1 /* Lower vector operations to scalar operations.
2    Copyright (C) 2004-2022 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "diagnostic.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "langhooks.h"
35 #include "tree-eh.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
38 #include "gimplify.h"
39 #include "tree-cfg.h"
40 #include "tree-vector-builder.h"
41 #include "vec-perm-indices.h"
42 #include "insn-config.h"
43 #include "tree-ssa-dce.h"
44 #include "gimple-fold.h"
45 #include "gimple-match.h"
46 #include "recog.h"            /* FIXME: for insn_data */
47 
48 
49 /* Build a ternary operation and gimplify it.  Emit code before GSI.
50    Return the gimple_val holding the result.  */
51 
52 static tree
gimplify_build3(gimple_stmt_iterator * gsi,enum tree_code code,tree type,tree a,tree b,tree c)53 gimplify_build3 (gimple_stmt_iterator *gsi, enum tree_code code,
54                      tree type, tree a, tree b, tree c)
55 {
56   location_t loc = gimple_location (gsi_stmt (*gsi));
57   gimple_seq stmts = NULL;
58   tree ret = gimple_build (&stmts, loc, code, type, a, b, c);
59   gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
60   return ret;
61 }
62 
63 /* Build a binary operation and gimplify it.  Emit code before GSI.
64    Return the gimple_val holding the result.  */
65 
66 static tree
gimplify_build2(gimple_stmt_iterator * gsi,enum tree_code code,tree type,tree a,tree b)67 gimplify_build2 (gimple_stmt_iterator *gsi, enum tree_code code,
68                      tree type, tree a, tree b)
69 {
70   location_t loc = gimple_location (gsi_stmt (*gsi));
71   gimple_seq stmts = NULL;
72   tree ret = gimple_build (&stmts, loc, code, type, a, b);
73   gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
74   return ret;
75 }
76 
77 /* Build a unary operation and gimplify it.  Emit code before GSI.
78    Return the gimple_val holding the result.  */
79 
80 static tree
gimplify_build1(gimple_stmt_iterator * gsi,enum tree_code code,tree type,tree a)81 gimplify_build1 (gimple_stmt_iterator *gsi, enum tree_code code, tree type,
82                      tree a)
83 {
84   location_t loc = gimple_location (gsi_stmt (*gsi));
85   gimple_seq stmts = NULL;
86   tree ret = gimple_build (&stmts, loc, code, type, a);
87   gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
88   return ret;
89 }
90 
91 
92 static void expand_vector_operations_1 (gimple_stmt_iterator *, bitmap);
93 
94 /* Return the number of elements in a vector type TYPE that we have
95    already decided needs to be expanded piecewise.  We don't support
96    this kind of expansion for variable-length vectors, since we should
97    always check for target support before introducing uses of those.  */
98 static unsigned int
nunits_for_known_piecewise_op(const_tree type)99 nunits_for_known_piecewise_op (const_tree type)
100 {
101   return TYPE_VECTOR_SUBPARTS (type).to_constant ();
102 }
103 
104 /* Return true if TYPE1 has more elements than TYPE2, where either
105    type may be a vector or a scalar.  */
106 
107 static inline bool
subparts_gt(tree type1,tree type2)108 subparts_gt (tree type1, tree type2)
109 {
110   poly_uint64 n1 = VECTOR_TYPE_P (type1) ? TYPE_VECTOR_SUBPARTS (type1) : 1;
111   poly_uint64 n2 = VECTOR_TYPE_P (type2) ? TYPE_VECTOR_SUBPARTS (type2) : 1;
112   return known_gt (n1, n2);
113 }
114 
115 /* Build a constant of type TYPE, made of VALUE's bits replicated
116    every WIDTH bits to fit TYPE's precision.  */
117 static tree
build_replicated_const(tree type,unsigned int width,HOST_WIDE_INT value)118 build_replicated_const (tree type, unsigned int width, HOST_WIDE_INT value)
119 {
120   int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
121     / HOST_BITS_PER_WIDE_INT;
122   unsigned HOST_WIDE_INT low, mask;
123   HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
124   int i;
125 
126   gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
127 
128   if (width == HOST_BITS_PER_WIDE_INT)
129     low = value;
130   else
131     {
132       mask = ((HOST_WIDE_INT)1 << width) - 1;
133       low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
134     }
135 
136   for (i = 0; i < n; i++)
137     a[i] = low;
138 
139   gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
140   return wide_int_to_tree
141     (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
142 }
143 
144 static GTY(()) tree vector_inner_type;
145 static GTY(()) tree vector_last_type;
146 static GTY(()) int vector_last_nunits;
147 
148 /* Return a suitable vector types made of SUBPARTS units each of mode
149    "word_mode" (the global variable).  */
150 static tree
build_word_mode_vector_type(int nunits)151 build_word_mode_vector_type (int nunits)
152 {
153   if (!vector_inner_type)
154     vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
155   else if (vector_last_nunits == nunits)
156     {
157       gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
158       return vector_last_type;
159     }
160 
161   vector_last_nunits = nunits;
162   vector_last_type = build_vector_type (vector_inner_type, nunits);
163   return vector_last_type;
164 }
165 
166 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
167                                     tree, tree, tree, tree, tree, enum tree_code,
168                                     tree);
169 
170 /* Extract the vector element of type TYPE at BITPOS with BITSIZE from T
171    and return it.  */
172 
173 tree
tree_vec_extract(gimple_stmt_iterator * gsi,tree type,tree t,tree bitsize,tree bitpos)174 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
175                       tree t, tree bitsize, tree bitpos)
176 {
177   /* We're using the resimplify API and maybe_push_res_to_seq to
178      simplify the BIT_FIELD_REF but restrict the simplification to
179      a single stmt while at the same time following SSA edges for
180      simplification with already emitted CTORs.  */
181   gimple_match_op opr;
182   opr.set_op (BIT_FIELD_REF, type, t, bitsize, bitpos);
183   opr.resimplify (NULL, follow_all_ssa_edges);
184   gimple_seq stmts = NULL;
185   tree res = maybe_push_res_to_seq (&opr, &stmts);
186   if (!res)
187     {
188       /* This can happen if SSA_NAME_OCCURS_IN_ABNORMAL_PHI are
189            used.  Build BIT_FIELD_REF manually otherwise.  */
190       t = build3 (BIT_FIELD_REF, type, t, bitsize, bitpos);
191       res = make_ssa_name (type);
192       gimple *g = gimple_build_assign (res, t);
193       gsi_insert_before (gsi, g, GSI_SAME_STMT);
194       return res;
195     }
196   gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
197   return res;
198 }
199 
200 static tree
do_unop(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b ATTRIBUTE_UNUSED,tree bitpos,tree bitsize,enum tree_code code,tree type ATTRIBUTE_UNUSED)201 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
202            tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
203            enum tree_code code, tree type ATTRIBUTE_UNUSED)
204 {
205   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
206   return gimplify_build1 (gsi, code, inner_type, a);
207 }
208 
209 static tree
do_binop(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b,tree bitpos,tree bitsize,enum tree_code code,tree type ATTRIBUTE_UNUSED)210 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
211             tree bitpos, tree bitsize, enum tree_code code,
212             tree type ATTRIBUTE_UNUSED)
213 {
214   if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
215     a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
216   if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
217     b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
218   return gimplify_build2 (gsi, code, inner_type, a, b);
219 }
220 
221 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
222 
223    INNER_TYPE is the type of A and B elements
224 
225    returned expression is of signed integer type with the
226    size equal to the size of INNER_TYPE.  */
227 static tree
do_compare(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b,tree bitpos,tree bitsize,enum tree_code code,tree type)228 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
229               tree bitpos, tree bitsize, enum tree_code code, tree type)
230 {
231   tree stype = TREE_TYPE (type);
232   tree cst_false = build_zero_cst (stype);
233   tree cst_true = build_all_ones_cst (stype);
234   tree cmp;
235 
236   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
237   b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
238 
239   cmp = build2 (code, boolean_type_node, a, b);
240   return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false);
241 }
242 
243 /* Expand vector addition to scalars.  This does bit twiddling
244    in order to increase parallelism:
245 
246    a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
247            (a ^ b) & 0x80808080
248 
249    a - b =  (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
250             (a ^ ~b) & 0x80808080
251 
252    -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
253 
254    This optimization should be done only if 4 vector items or more
255    fit into a word.  */
256 static tree
do_plus_minus(gimple_stmt_iterator * gsi,tree word_type,tree a,tree b,tree bitpos ATTRIBUTE_UNUSED,tree bitsize ATTRIBUTE_UNUSED,enum tree_code code,tree type ATTRIBUTE_UNUSED)257 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
258                  tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
259                  enum tree_code code, tree type ATTRIBUTE_UNUSED)
260 {
261   unsigned int width = vector_element_bits (TREE_TYPE (a));
262   tree inner_type = TREE_TYPE (TREE_TYPE (a));
263   unsigned HOST_WIDE_INT max;
264   tree low_bits, high_bits, a_low, b_low, result_low, signs;
265 
266   max = GET_MODE_MASK (TYPE_MODE (inner_type));
267   low_bits = build_replicated_const (word_type, width, max >> 1);
268   high_bits = build_replicated_const (word_type, width, max & ~(max >> 1));
269 
270   a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
271   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
272 
273   signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
274   b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
275   if (code == PLUS_EXPR)
276     a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
277   else
278     {
279       a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
280       signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
281     }
282 
283   signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
284   result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
285   return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
286 }
287 
288 static tree
do_negate(gimple_stmt_iterator * gsi,tree word_type,tree b,tree unused ATTRIBUTE_UNUSED,tree bitpos ATTRIBUTE_UNUSED,tree bitsize ATTRIBUTE_UNUSED,enum tree_code code ATTRIBUTE_UNUSED,tree type ATTRIBUTE_UNUSED)289 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
290              tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
291              tree bitsize ATTRIBUTE_UNUSED,
292              enum tree_code code ATTRIBUTE_UNUSED,
293              tree type ATTRIBUTE_UNUSED)
294 {
295   unsigned int width = vector_element_bits (TREE_TYPE (b));
296   tree inner_type = TREE_TYPE (TREE_TYPE (b));
297   HOST_WIDE_INT max;
298   tree low_bits, high_bits, b_low, result_low, signs;
299 
300   max = GET_MODE_MASK (TYPE_MODE (inner_type));
301   low_bits = build_replicated_const (word_type, width, max >> 1);
302   high_bits = build_replicated_const (word_type, width, max & ~(max >> 1));
303 
304   b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
305 
306   b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
307   signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
308   signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
309   result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
310   return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
311 }
312 
313 /* Expand a vector operation to scalars, by using many operations
314    whose type is the vector type's inner type.  */
315 static tree
expand_vector_piecewise(gimple_stmt_iterator * gsi,elem_op_func f,tree type,tree inner_type,tree a,tree b,enum tree_code code,bool parallel_p,tree ret_type=NULL_TREE)316 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
317                                tree type, tree inner_type,
318                                tree a, tree b, enum tree_code code,
319                                bool parallel_p, tree ret_type = NULL_TREE)
320 {
321   vec<constructor_elt, va_gc> *v;
322   tree part_width = TYPE_SIZE (inner_type);
323   tree index = bitsize_int (0);
324   int nunits = nunits_for_known_piecewise_op (type);
325   int delta = tree_to_uhwi (part_width) / vector_element_bits (type);
326   int i;
327   location_t loc = gimple_location (gsi_stmt (*gsi));
328 
329   if (nunits == 1
330       || warning_suppressed_p (gsi_stmt (*gsi),
331                                      OPT_Wvector_operation_performance))
332     /* Do not diagnose decomposing single element vectors or when
333        decomposing vectorizer produced operations.  */
334     ;
335   else if (ret_type || !parallel_p)
336     warning_at (loc, OPT_Wvector_operation_performance,
337                     "vector operation will be expanded piecewise");
338   else
339     warning_at (loc, OPT_Wvector_operation_performance,
340                     "vector operation will be expanded in parallel");
341 
342   if (!ret_type)
343     ret_type = type;
344   vec_alloc (v, (nunits + delta - 1) / delta);
345   bool constant_p = true;
346   for (i = 0; i < nunits;
347        i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
348     {
349       tree result = f (gsi, inner_type, a, b, index, part_width, code,
350                            ret_type);
351       if (!CONSTANT_CLASS_P (result))
352           constant_p = false;
353       constructor_elt ce = {NULL_TREE, result};
354       v->quick_push (ce);
355     }
356 
357   if (constant_p)
358     return build_vector_from_ctor (ret_type, v);
359   else
360     return build_constructor (ret_type, v);
361 }
362 
363 /* Expand a vector operation to scalars with the freedom to use
364    a scalar integer type, or to use a different size for the items
365    in the vector type.  */
366 static tree
expand_vector_parallel(gimple_stmt_iterator * gsi,elem_op_func f,tree type,tree a,tree b,enum tree_code code)367 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
368                               tree a, tree b, enum tree_code code)
369 {
370   tree result, compute_type;
371   int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
372   location_t loc = gimple_location (gsi_stmt (*gsi));
373 
374   /* We have three strategies.  If the type is already correct, just do
375      the operation an element at a time.  Else, if the vector is wider than
376      one word, do it a word at a time; finally, if the vector is smaller
377      than one word, do it as a scalar.  */
378   if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
379      return expand_vector_piecewise (gsi, f,
380                                              type, TREE_TYPE (type),
381                                              a, b, code, true);
382   else if (n_words > 1)
383     {
384       tree word_type = build_word_mode_vector_type (n_words);
385       result = expand_vector_piecewise (gsi, f,
386                                                 word_type, TREE_TYPE (word_type),
387                                                   a, b, code, true);
388       result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
389                                          GSI_SAME_STMT);
390     }
391   else
392     {
393       /* Use a single scalar operation with a mode no wider than word_mode.  */
394       if (!warning_suppressed_p (gsi_stmt (*gsi),
395                                          OPT_Wvector_operation_performance))
396           warning_at (loc, OPT_Wvector_operation_performance,
397                         "vector operation will be expanded with a "
398                         "single scalar operation");
399       scalar_int_mode mode
400           = int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), 0).require ();
401       compute_type = lang_hooks.types.type_for_mode (mode, 1);
402       result = f (gsi, compute_type, a, b, bitsize_zero_node,
403                       TYPE_SIZE (compute_type), code, type);
404     }
405 
406   return result;
407 }
408 
409 /* Expand a vector operation to scalars; for integer types we can use
410    special bit twiddling tricks to do the sums a word at a time, using
411    function F_PARALLEL instead of F.  These tricks are done only if
412    they can process at least four items, that is, only if the vector
413    holds at least four items and if a word can hold four items.  */
414 static tree
expand_vector_addition(gimple_stmt_iterator * gsi,elem_op_func f,elem_op_func f_parallel,tree type,tree a,tree b,enum tree_code code)415 expand_vector_addition (gimple_stmt_iterator *gsi,
416                               elem_op_func f, elem_op_func f_parallel,
417                               tree type, tree a, tree b, enum tree_code code)
418 {
419   int parts_per_word = BITS_PER_WORD / vector_element_bits (type);
420 
421   if (INTEGRAL_TYPE_P (TREE_TYPE (type))
422       && parts_per_word >= 4
423       && nunits_for_known_piecewise_op (type) >= 4)
424     return expand_vector_parallel (gsi, f_parallel,
425                                            type, a, b, code);
426   else
427     return expand_vector_piecewise (gsi, f,
428                                             type, TREE_TYPE (type),
429                                             a, b, code, false);
430 }
431 
432 static bool
433 expand_vector_condition (gimple_stmt_iterator *gsi, bitmap dce_ssa_names);
434 
435 /* Try to expand vector comparison expression OP0 CODE OP1 by
436    querying optab if the following expression:
437           VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
438    can be expanded.  */
439 static tree
expand_vector_comparison(gimple_stmt_iterator * gsi,tree type,tree op0,tree op1,enum tree_code code,bitmap dce_ssa_names)440 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
441                                 tree op1, enum tree_code code,
442                                 bitmap dce_ssa_names)
443 {
444   tree lhs = gimple_assign_lhs (gsi_stmt (*gsi));
445   use_operand_p use_p;
446   imm_use_iterator iterator;
447   bool vec_cond_expr_only = true;
448 
449   /* As seen in PR95830, we should not expand comparisons that are only
450      feeding a VEC_COND_EXPR statement.  */
451   auto_vec<gimple *> uses;
452   FOR_EACH_IMM_USE_FAST (use_p, iterator, lhs)
453     {
454       gimple *use = USE_STMT (use_p);
455       if (is_gimple_debug (use))
456           continue;
457       if (is_gimple_assign (use)
458             && gimple_assign_rhs_code (use) == VEC_COND_EXPR
459             && gimple_assign_rhs1 (use) == lhs
460             && gimple_assign_rhs2 (use) != lhs
461             && gimple_assign_rhs3 (use) != lhs)
462           uses.safe_push (use);
463       else
464           vec_cond_expr_only = false;
465     }
466 
467   if (vec_cond_expr_only)
468     for (gimple *use : uses)
469       {
470           gimple_stmt_iterator it = gsi_for_stmt (use);
471           if (!expand_vector_condition (&it, dce_ssa_names))
472             {
473               vec_cond_expr_only = false;
474               break;
475             }
476       }
477 
478   if (!uses.is_empty () && vec_cond_expr_only)
479     return NULL_TREE;
480 
481   tree t;
482   if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code))
483     {
484       if (VECTOR_BOOLEAN_TYPE_P (type)
485             && SCALAR_INT_MODE_P (TYPE_MODE (type))
486             && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
487                            TYPE_VECTOR_SUBPARTS (type)
488                            * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
489                                                             (TREE_TYPE (type)))))
490           {
491             tree inner_type = TREE_TYPE (TREE_TYPE (op0));
492             tree part_width = vector_element_bits_tree (TREE_TYPE (op0));
493             tree index = bitsize_int (0);
494             int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0));
495             int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type));
496             tree ret_type = build_nonstandard_integer_type (prec, 1);
497             tree ret_inner_type = boolean_type_node;
498             int i;
499             location_t loc = gimple_location (gsi_stmt (*gsi));
500             t = build_zero_cst (ret_type);
501 
502             if (TYPE_PRECISION (ret_inner_type) != 1)
503               ret_inner_type = build_nonstandard_integer_type (1, 1);
504             if (!warning_suppressed_p (gsi_stmt (*gsi),
505                                              OPT_Wvector_operation_performance))
506               warning_at (loc, OPT_Wvector_operation_performance,
507                               "vector operation will be expanded piecewise");
508             for (i = 0; i < nunits;
509                  i++, index = int_const_binop (PLUS_EXPR, index, part_width))
510               {
511                 tree a = tree_vec_extract (gsi, inner_type, op0, part_width,
512                                                    index);
513                 tree b = tree_vec_extract (gsi, inner_type, op1, part_width,
514                                                    index);
515                 tree result = gimplify_build2 (gsi, code, ret_inner_type, a, b);
516                 t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result,
517                                            bitsize_int (i));
518               }
519             t = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
520           }
521       else
522           t = expand_vector_piecewise (gsi, do_compare, type,
523                                              TREE_TYPE (TREE_TYPE (op0)), op0, op1,
524                                              code, false);
525     }
526   else
527     t = NULL_TREE;
528 
529   return t;
530 }
531 
532 /* Helper function of expand_vector_divmod.  Gimplify a RSHIFT_EXPR in type
533    of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
534    the result if successful, otherwise return NULL_TREE.  */
535 static tree
add_rshift(gimple_stmt_iterator * gsi,tree type,tree op0,int * shiftcnts)536 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
537 {
538   optab op;
539   unsigned int i, nunits = nunits_for_known_piecewise_op (type);
540   bool scalar_shift = true;
541 
542   for (i = 1; i < nunits; i++)
543     {
544       if (shiftcnts[i] != shiftcnts[0])
545           scalar_shift = false;
546     }
547 
548   if (scalar_shift && shiftcnts[0] == 0)
549     return op0;
550 
551   if (scalar_shift)
552     {
553       op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
554       if (op != unknown_optab
555             && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
556           return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
557                                         build_int_cst (NULL_TREE, shiftcnts[0]));
558     }
559 
560   op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
561   if (op != unknown_optab
562       && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
563     {
564       tree_vector_builder vec (type, nunits, 1);
565       for (i = 0; i < nunits; i++)
566           vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i]));
567       return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, vec.build ());
568     }
569 
570   return NULL_TREE;
571 }
572 
573 /* Try to expand integer vector division by constant using
574    widening multiply, shifts and additions.  */
575 static tree
expand_vector_divmod(gimple_stmt_iterator * gsi,tree type,tree op0,tree op1,enum tree_code code)576 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
577                           tree op1, enum tree_code code)
578 {
579   bool use_pow2 = true;
580   bool has_vector_shift = true;
581   bool use_abs_op1 = false;
582   int mode = -1, this_mode;
583   int pre_shift = -1, post_shift;
584   unsigned int nunits = nunits_for_known_piecewise_op (type);
585   int *shifts = XALLOCAVEC (int, nunits * 4);
586   int *pre_shifts = shifts + nunits;
587   int *post_shifts = pre_shifts + nunits;
588   int *shift_temps = post_shifts + nunits;
589   unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
590   int prec = TYPE_PRECISION (TREE_TYPE (type));
591   int dummy_int;
592   unsigned int i;
593   signop sign_p = TYPE_SIGN (TREE_TYPE (type));
594   unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
595   tree cur_op, mulcst, tem;
596   optab op;
597 
598   if (prec > HOST_BITS_PER_WIDE_INT)
599     return NULL_TREE;
600 
601   op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
602   if (op == unknown_optab
603       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
604     has_vector_shift = false;
605 
606   /* Analysis phase.  Determine if all op1 elements are either power
607      of two and it is possible to expand it using shifts (or for remainder
608      using masking).  Additionally compute the multiplicative constants
609      and pre and post shifts if the division is to be expanded using
610      widening or high part multiplication plus shifts.  */
611   for (i = 0; i < nunits; i++)
612     {
613       tree cst = VECTOR_CST_ELT (op1, i);
614       unsigned HOST_WIDE_INT ml;
615 
616       if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
617           return NULL_TREE;
618       pre_shifts[i] = 0;
619       post_shifts[i] = 0;
620       mulc[i] = 0;
621       if (use_pow2
622             && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
623           use_pow2 = false;
624       if (use_pow2)
625           {
626             shifts[i] = tree_log2 (cst);
627             if (shifts[i] != shifts[0]
628                 && code == TRUNC_DIV_EXPR
629                 && !has_vector_shift)
630               use_pow2 = false;
631           }
632       if (mode == -2)
633           continue;
634       if (sign_p == UNSIGNED)
635           {
636             unsigned HOST_WIDE_INT mh;
637             unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
638 
639             if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
640               /* FIXME: Can transform this into op0 >= op1 ? 1 : 0.  */
641               return NULL_TREE;
642 
643             if (d <= 1)
644               {
645                 mode = -2;
646                 continue;
647               }
648 
649             /* Find a suitable multiplier and right shift count
650                instead of multiplying with D.  */
651             mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
652 
653             /* If the suggested multiplier is more than SIZE bits, we can
654                do better for even divisors, using an initial right shift.  */
655             if ((mh != 0 && (d & 1) == 0)
656                 || (!has_vector_shift && pre_shift != -1))
657               {
658                 if (has_vector_shift)
659                     pre_shift = ctz_or_zero (d);
660                 else if (pre_shift == -1)
661                     {
662                       unsigned int j;
663                       for (j = 0; j < nunits; j++)
664                         {
665                           tree cst2 = VECTOR_CST_ELT (op1, j);
666                           unsigned HOST_WIDE_INT d2;
667                           int this_pre_shift;
668 
669                           if (!tree_fits_uhwi_p (cst2))
670                               return NULL_TREE;
671                           d2 = tree_to_uhwi (cst2) & mask;
672                           if (d2 == 0)
673                               return NULL_TREE;
674                           this_pre_shift = floor_log2 (d2 & -d2);
675                           if (pre_shift == -1 || this_pre_shift < pre_shift)
676                               pre_shift = this_pre_shift;
677                         }
678                       if (i != 0 && pre_shift != 0)
679                         {
680                           /* Restart.  */
681                           i = -1U;
682                           mode = -1;
683                           continue;
684                         }
685                     }
686                 if (pre_shift != 0)
687                     {
688                       if ((d >> pre_shift) <= 1)
689                         {
690                           mode = -2;
691                           continue;
692                         }
693                       mh = choose_multiplier (d >> pre_shift, prec,
694                                                     prec - pre_shift,
695                                                     &ml, &post_shift, &dummy_int);
696                       gcc_assert (!mh);
697                       pre_shifts[i] = pre_shift;
698                     }
699               }
700             if (!mh)
701               this_mode = 0;
702             else
703               this_mode = 1;
704           }
705       else
706           {
707             HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
708             unsigned HOST_WIDE_INT abs_d;
709 
710             if (d == -1)
711               return NULL_TREE;
712 
713             /* Since d might be INT_MIN, we have to cast to
714                unsigned HOST_WIDE_INT before negating to avoid
715                undefined signed overflow.  */
716             abs_d = (d >= 0
717                       ? (unsigned HOST_WIDE_INT) d
718                       : - (unsigned HOST_WIDE_INT) d);
719 
720             /* n rem d = n rem -d */
721             if (code == TRUNC_MOD_EXPR && d < 0)
722               {
723                 d = abs_d;
724                 use_abs_op1 = true;
725               }
726             if (abs_d == HOST_WIDE_INT_1U << (prec - 1))
727               {
728                 /* This case is not handled correctly below.  */
729                 mode = -2;
730                 continue;
731               }
732             if (abs_d <= 1)
733               {
734                 mode = -2;
735                 continue;
736               }
737 
738             choose_multiplier (abs_d, prec, prec - 1, &ml,
739                                    &post_shift, &dummy_int);
740             if (ml >= HOST_WIDE_INT_1U << (prec - 1))
741               {
742                 this_mode = 4 + (d < 0);
743                 ml |= HOST_WIDE_INT_M1U << (prec - 1);
744               }
745             else
746               this_mode = 2 + (d < 0);
747           }
748       mulc[i] = ml;
749       post_shifts[i] = post_shift;
750       if ((i && !has_vector_shift && post_shifts[0] != post_shift)
751             || post_shift >= prec
752             || pre_shifts[i] >= prec)
753           this_mode = -2;
754 
755       if (i == 0)
756           mode = this_mode;
757       else if (mode != this_mode)
758           mode = -2;
759     }
760 
761   if (use_pow2)
762     {
763       tree addend = NULL_TREE;
764       if (sign_p == SIGNED)
765           {
766             tree uns_type;
767 
768             /* Both division and remainder sequences need
769                op0 < 0 ? mask : 0 computed.  It can be either computed as
770                (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
771                if none of the shifts is 0, or as the conditional.  */
772             for (i = 0; i < nunits; i++)
773               if (shifts[i] == 0)
774                 break;
775             uns_type
776               = build_vector_type (build_nonstandard_integer_type (prec, 1),
777                                          nunits);
778             if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
779               {
780                 for (i = 0; i < nunits; i++)
781                     shift_temps[i] = prec - 1;
782                 cur_op = add_rshift (gsi, type, op0, shift_temps);
783                 if (cur_op != NULL_TREE)
784                     {
785                       cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
786                                                       uns_type, cur_op);
787                       for (i = 0; i < nunits; i++)
788                         shift_temps[i] = prec - shifts[i];
789                       cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
790                       if (cur_op != NULL_TREE)
791                         addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
792                                                         type, cur_op);
793                     }
794               }
795             if (addend == NULL_TREE
796                 && expand_vec_cond_expr_p (type, type, LT_EXPR))
797               {
798                 tree zero, cst, mask_type, mask;
799                 gimple *stmt, *cond;
800 
801                 mask_type = truth_type_for (type);
802                 zero = build_zero_cst (type);
803                 mask = make_ssa_name (mask_type);
804                 cond = gimple_build_assign (mask, LT_EXPR, op0, zero);
805                 gsi_insert_before (gsi, cond, GSI_SAME_STMT);
806                 tree_vector_builder vec (type, nunits, 1);
807                 for (i = 0; i < nunits; i++)
808                     vec.quick_push (build_int_cst (TREE_TYPE (type),
809                                                          (HOST_WIDE_INT_1U
810                                                             << shifts[i]) - 1));
811                 cst = vec.build ();
812                 addend = make_ssa_name (type);
813                 stmt
814                     = gimple_build_assign (addend, VEC_COND_EXPR, mask, cst, zero);
815                 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
816               }
817           }
818       if (code == TRUNC_DIV_EXPR)
819           {
820             if (sign_p == UNSIGNED)
821               {
822                 /* q = op0 >> shift;  */
823                 cur_op = add_rshift (gsi, type, op0, shifts);
824                 if (cur_op != NULL_TREE)
825                     return cur_op;
826               }
827             else if (addend != NULL_TREE)
828               {
829                 /* t1 = op0 + addend;
830                      q = t1 >> shift;  */
831                 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
832                 if (op != unknown_optab
833                       && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
834                     {
835                       cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
836                       cur_op = add_rshift (gsi, type, cur_op, shifts);
837                       if (cur_op != NULL_TREE)
838                         return cur_op;
839                     }
840               }
841           }
842       else
843           {
844             tree mask;
845             tree_vector_builder vec (type, nunits, 1);
846             for (i = 0; i < nunits; i++)
847               vec.quick_push (build_int_cst (TREE_TYPE (type),
848                                                      (HOST_WIDE_INT_1U
849                                                       << shifts[i]) - 1));
850             mask = vec.build ();
851             op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
852             if (op != unknown_optab
853                 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
854               {
855                 if (sign_p == UNSIGNED)
856                     /* r = op0 & mask;  */
857                     return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
858                 else if (addend != NULL_TREE)
859                     {
860                       /* t1 = op0 + addend;
861                          t2 = t1 & mask;
862                          r = t2 - addend;  */
863                       op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
864                       if (op != unknown_optab
865                           && optab_handler (op, TYPE_MODE (type))
866                                != CODE_FOR_nothing)
867                         {
868                           cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
869                                                             addend);
870                           cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
871                                                             cur_op, mask);
872                           op = optab_for_tree_code (MINUS_EXPR, type,
873                                                             optab_default);
874                           if (op != unknown_optab
875                                 && optab_handler (op, TYPE_MODE (type))
876                                    != CODE_FOR_nothing)
877                               return gimplify_build2 (gsi, MINUS_EXPR, type,
878                                                             cur_op, addend);
879                         }
880                     }
881               }
882           }
883     }
884 
885   if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
886     return NULL_TREE;
887 
888   if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
889     return NULL_TREE;
890 
891   cur_op = op0;
892 
893   switch (mode)
894     {
895     case 0:
896       gcc_assert (sign_p == UNSIGNED);
897       /* t1 = oprnd0 >> pre_shift;
898            t2 = t1 h* ml;
899            q = t2 >> post_shift;  */
900       cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
901       if (cur_op == NULL_TREE)
902           return NULL_TREE;
903       break;
904     case 1:
905       gcc_assert (sign_p == UNSIGNED);
906       for (i = 0; i < nunits; i++)
907           {
908             shift_temps[i] = 1;
909             post_shifts[i]--;
910           }
911       break;
912     case 2:
913     case 3:
914     case 4:
915     case 5:
916       gcc_assert (sign_p == SIGNED);
917       for (i = 0; i < nunits; i++)
918           shift_temps[i] = prec - 1;
919       break;
920     default:
921       return NULL_TREE;
922     }
923 
924   tree_vector_builder vec (type, nunits, 1);
925   for (i = 0; i < nunits; i++)
926     vec.quick_push (build_int_cst (TREE_TYPE (type), mulc[i]));
927   mulcst = vec.build ();
928 
929   cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
930 
931   switch (mode)
932     {
933     case 0:
934       /* t1 = oprnd0 >> pre_shift;
935            t2 = t1 h* ml;
936            q = t2 >> post_shift;  */
937       cur_op = add_rshift (gsi, type, cur_op, post_shifts);
938       break;
939     case 1:
940       /* t1 = oprnd0 h* ml;
941            t2 = oprnd0 - t1;
942            t3 = t2 >> 1;
943            t4 = t1 + t3;
944            q = t4 >> (post_shift - 1);  */
945       op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
946       if (op == unknown_optab
947             || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
948           return NULL_TREE;
949       tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
950       tem = add_rshift (gsi, type, tem, shift_temps);
951       op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
952       if (op == unknown_optab
953             || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
954           return NULL_TREE;
955       tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
956       cur_op = add_rshift (gsi, type, tem, post_shifts);
957       if (cur_op == NULL_TREE)
958           return NULL_TREE;
959       break;
960     case 2:
961     case 3:
962     case 4:
963     case 5:
964       /* t1 = oprnd0 h* ml;
965            t2 = t1; [ iff (mode & 2) != 0 ]
966            t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
967            t3 = t2 >> post_shift;
968            t4 = oprnd0 >> (prec - 1);
969            q = t3 - t4; [ iff (mode & 1) == 0 ]
970            q = t4 - t3; [ iff (mode & 1) != 0 ]  */
971       if ((mode & 2) == 0)
972           {
973             op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
974             if (op == unknown_optab
975                 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
976               return NULL_TREE;
977             cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
978           }
979       cur_op = add_rshift (gsi, type, cur_op, post_shifts);
980       if (cur_op == NULL_TREE)
981           return NULL_TREE;
982       tem = add_rshift (gsi, type, op0, shift_temps);
983       if (tem == NULL_TREE)
984           return NULL_TREE;
985       op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
986       if (op == unknown_optab
987             || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
988           return NULL_TREE;
989       if ((mode & 1) == 0)
990           cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
991       else
992           cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
993       break;
994     default:
995       gcc_unreachable ();
996     }
997 
998   if (code == TRUNC_DIV_EXPR)
999     return cur_op;
1000 
1001   /* We divided.  Now finish by:
1002      t1 = q * oprnd1;
1003      r = oprnd0 - t1;  */
1004   op = optab_for_tree_code (MULT_EXPR, type, optab_default);
1005   if (op == unknown_optab
1006       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1007     return NULL_TREE;
1008   if (use_abs_op1)
1009     {
1010       tree_vector_builder elts;
1011       if (!elts.new_unary_operation (type, op1, false))
1012           return NULL_TREE;
1013       unsigned int count = elts.encoded_nelts ();
1014       for (unsigned int i = 0; i < count; ++i)
1015           {
1016             tree elem1 = VECTOR_CST_ELT (op1, i);
1017 
1018             tree elt = const_unop (ABS_EXPR, TREE_TYPE (elem1), elem1);
1019             if (elt == NULL_TREE)
1020               return NULL_TREE;
1021             elts.quick_push (elt);
1022           }
1023       op1 = elts.build ();
1024     }
1025   tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
1026   op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
1027   if (op == unknown_optab
1028       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1029     return NULL_TREE;
1030   return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
1031 }
1032 
1033 /* Expand a vector condition to scalars, by using many conditions
1034    on the vector's elements.  */
1035 
1036 static bool
expand_vector_condition(gimple_stmt_iterator * gsi,bitmap dce_ssa_names)1037 expand_vector_condition (gimple_stmt_iterator *gsi, bitmap dce_ssa_names)
1038 {
1039   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1040   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
1041   tree a = gimple_assign_rhs1 (stmt);
1042   tree a1 = a;
1043   tree a2 = NULL_TREE;
1044   bool a_is_comparison = false;
1045   bool a_is_scalar_bitmask = false;
1046   tree b = gimple_assign_rhs2 (stmt);
1047   tree c = gimple_assign_rhs3 (stmt);
1048   vec<constructor_elt, va_gc> *v;
1049   tree constr;
1050   tree inner_type = TREE_TYPE (type);
1051   tree width = vector_element_bits_tree (type);
1052   tree cond_type = TREE_TYPE (TREE_TYPE (a));
1053   tree comp_inner_type = cond_type;
1054   tree index = bitsize_int (0);
1055   tree comp_width = width;
1056   tree comp_index = index;
1057   location_t loc = gimple_location (gsi_stmt (*gsi));
1058   tree_code code = TREE_CODE (a);
1059   gassign *assign = NULL;
1060 
1061   if (code == SSA_NAME)
1062     {
1063       assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (a));
1064       if (assign != NULL
1065             && TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison)
1066           {
1067             a_is_comparison = true;
1068             a1 = gimple_assign_rhs1 (assign);
1069             a2 = gimple_assign_rhs2 (assign);
1070             code = gimple_assign_rhs_code (assign);
1071             comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
1072             comp_width = vector_element_bits_tree (TREE_TYPE (a1));
1073           }
1074     }
1075 
1076   if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), code)
1077       || (integer_all_onesp (b) && integer_zerop (c)
1078             && expand_vec_cmp_expr_p (type, TREE_TYPE (a1), code)))
1079     {
1080       gcc_assert (TREE_CODE (a) == SSA_NAME || TREE_CODE (a) == VECTOR_CST);
1081       return true;
1082     }
1083 
1084   /* If a has vector boolean type and is a comparison, above
1085      expand_vec_cond_expr_p might fail, even if both the comparison and
1086      VEC_COND_EXPR could be supported individually.  See PR109176.  */
1087   if (a_is_comparison
1088       && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
1089       && expand_vec_cond_expr_p (type, TREE_TYPE (a), SSA_NAME)
1090       && expand_vec_cmp_expr_p (TREE_TYPE (a1), TREE_TYPE (a), code))
1091     return true;
1092 
1093   /* Handle vector boolean types with bitmasks.  If there is a comparison
1094      and we can expand the comparison into the vector boolean bitmask,
1095      or otherwise if it is compatible with type, we can transform
1096       vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5;
1097      into
1098       tmp_6 = x_2 < y_3;
1099       tmp_7 = tmp_6 & vbfld_4;
1100       tmp_8 = ~tmp_6;
1101       tmp_9 = tmp_8 & vbfld_5;
1102       vbfld_1 = tmp_7 | tmp_9;
1103      Similarly for vbfld_10 instead of x_2 < y_3.  */
1104   if (VECTOR_BOOLEAN_TYPE_P (type)
1105       && SCALAR_INT_MODE_P (TYPE_MODE (type))
1106       && known_lt (GET_MODE_BITSIZE (TYPE_MODE (type)),
1107                        TYPE_VECTOR_SUBPARTS (type)
1108                        * GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (type))))
1109       && (a_is_comparison
1110             ? useless_type_conversion_p (type, TREE_TYPE (a))
1111             : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a))))
1112     {
1113       if (a_is_comparison)
1114           a = gimplify_build2 (gsi, code, type, a1, a2);
1115       a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b);
1116       a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a);
1117       a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c);
1118       a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2);
1119       gimple_assign_set_rhs_from_tree (gsi, a);
1120       update_stmt (gsi_stmt (*gsi));
1121       return true;
1122     }
1123 
1124   /* TODO: try and find a smaller vector type.  */
1125 
1126   if (!warning_suppressed_p (stmt, OPT_Wvector_operation_performance))
1127     warning_at (loc, OPT_Wvector_operation_performance,
1128                     "vector condition will be expanded piecewise");
1129 
1130   if (!a_is_comparison
1131       && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a))
1132       && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (a)))
1133       && known_lt (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (a))),
1134                        TYPE_VECTOR_SUBPARTS (TREE_TYPE (a))
1135                        * GET_MODE_BITSIZE (SCALAR_TYPE_MODE
1136                                                             (TREE_TYPE (TREE_TYPE (a))))))
1137     {
1138       a_is_scalar_bitmask = true;
1139       int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (a)));
1140       tree atype = build_nonstandard_integer_type (prec, 1);
1141       a = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, atype, a);
1142     }
1143   else if (!a_is_comparison
1144              && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (a)))
1145     comp_width = vector_element_bits_tree (TREE_TYPE (a));
1146 
1147   int nunits = nunits_for_known_piecewise_op (type);
1148   vec_alloc (v, nunits);
1149   bool constant_p = true;
1150   for (int i = 0; i < nunits; i++)
1151     {
1152       tree aa, result;
1153       tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
1154       tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
1155       if (a_is_comparison)
1156           {
1157             tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1,
1158                                                comp_width, comp_index);
1159             tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2,
1160                                                comp_width, comp_index);
1161             aa = build2 (code, cond_type, aa1, aa2);
1162           }
1163       else if (a_is_scalar_bitmask)
1164           {
1165             wide_int w = wi::set_bit_in_zero (i, TYPE_PRECISION (TREE_TYPE (a)));
1166             result = gimplify_build2 (gsi, BIT_AND_EXPR, TREE_TYPE (a),
1167                                             a, wide_int_to_tree (TREE_TYPE (a), w));
1168             aa = build2 (NE_EXPR, boolean_type_node, result,
1169                            build_zero_cst (TREE_TYPE (a)));
1170           }
1171       else
1172           aa = tree_vec_extract (gsi, cond_type, a, comp_width, comp_index);
1173       result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
1174       if (!CONSTANT_CLASS_P (result))
1175           constant_p = false;
1176       constructor_elt ce = {NULL_TREE, result};
1177       v->quick_push (ce);
1178       index = int_const_binop (PLUS_EXPR, index, width);
1179       if (width == comp_width)
1180           comp_index = index;
1181       else
1182           comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width);
1183     }
1184 
1185   if (constant_p)
1186     constr = build_vector_from_ctor (type, v);
1187   else
1188     constr = build_constructor (type, v);
1189   gimple_assign_set_rhs_from_tree (gsi, constr);
1190   update_stmt (gsi_stmt (*gsi));
1191 
1192   if (a_is_comparison)
1193     bitmap_set_bit (dce_ssa_names,
1194                         SSA_NAME_VERSION (gimple_assign_lhs (assign)));
1195 
1196   return false;
1197 }
1198 
1199 static tree
expand_vector_operation(gimple_stmt_iterator * gsi,tree type,tree compute_type,gassign * assign,enum tree_code code,bitmap dce_ssa_names)1200 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
1201                                gassign *assign, enum tree_code code,
1202                                bitmap dce_ssa_names)
1203 {
1204   machine_mode compute_mode = TYPE_MODE (compute_type);
1205 
1206   /* If the compute mode is not a vector mode (hence we are not decomposing
1207      a BLKmode vector to smaller, hardware-supported vectors), we may want
1208      to expand the operations in parallel.  */
1209   if (!VECTOR_MODE_P (compute_mode))
1210     switch (code)
1211       {
1212       case PLUS_EXPR:
1213       case MINUS_EXPR:
1214         if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1215             return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
1216                                                    gimple_assign_rhs1 (assign),
1217                                                    gimple_assign_rhs2 (assign), code);
1218           break;
1219 
1220       case NEGATE_EXPR:
1221         if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
1222           return expand_vector_addition (gsi, do_unop, do_negate, type,
1223                                                  gimple_assign_rhs1 (assign),
1224                                                    NULL_TREE, code);
1225           break;
1226 
1227       case BIT_AND_EXPR:
1228       case BIT_IOR_EXPR:
1229       case BIT_XOR_EXPR:
1230         return expand_vector_parallel (gsi, do_binop, type,
1231                                                gimple_assign_rhs1 (assign),
1232                                                gimple_assign_rhs2 (assign), code);
1233 
1234       case BIT_NOT_EXPR:
1235         return expand_vector_parallel (gsi, do_unop, type,
1236                                                gimple_assign_rhs1 (assign),
1237                                      NULL_TREE, code);
1238       case EQ_EXPR:
1239       case NE_EXPR:
1240       case GT_EXPR:
1241       case LT_EXPR:
1242       case GE_EXPR:
1243       case LE_EXPR:
1244       case UNEQ_EXPR:
1245       case UNGT_EXPR:
1246       case UNLT_EXPR:
1247       case UNGE_EXPR:
1248       case UNLE_EXPR:
1249       case LTGT_EXPR:
1250       case ORDERED_EXPR:
1251       case UNORDERED_EXPR:
1252           {
1253             tree rhs1 = gimple_assign_rhs1 (assign);
1254             tree rhs2 = gimple_assign_rhs2 (assign);
1255 
1256             return expand_vector_comparison (gsi, type, rhs1, rhs2, code,
1257                                                      dce_ssa_names);
1258           }
1259 
1260       case TRUNC_DIV_EXPR:
1261       case TRUNC_MOD_EXPR:
1262           {
1263             tree rhs1 = gimple_assign_rhs1 (assign);
1264             tree rhs2 = gimple_assign_rhs2 (assign);
1265             tree ret;
1266 
1267             if (!optimize
1268                 || !VECTOR_INTEGER_TYPE_P (type)
1269                 || TREE_CODE (rhs2) != VECTOR_CST
1270                 || !VECTOR_MODE_P (TYPE_MODE (type)))
1271               break;
1272 
1273             ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
1274             if (ret != NULL_TREE)
1275               return ret;
1276             break;
1277           }
1278 
1279       default:
1280           break;
1281       }
1282 
1283   if (TREE_CODE_CLASS (code) == tcc_unary)
1284     return expand_vector_piecewise (gsi, do_unop, type, compute_type,
1285                                             gimple_assign_rhs1 (assign),
1286                                             NULL_TREE, code, false);
1287   else
1288     return expand_vector_piecewise (gsi, do_binop, type, compute_type,
1289                                             gimple_assign_rhs1 (assign),
1290                                             gimple_assign_rhs2 (assign), code, false);
1291 }
1292 
1293 /* Try to optimize
1294    a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1295    style stmts into:
1296    _9 = { b_7, b_7, b_7, b_7 };
1297    a_5 = _9 + { 0, 3, 6, 9 };
1298    because vector splat operation is usually more efficient
1299    than piecewise initialization of the vector.  */
1300 
1301 static void
optimize_vector_constructor(gimple_stmt_iterator * gsi)1302 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1303 {
1304   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1305   tree lhs = gimple_assign_lhs (stmt);
1306   tree rhs = gimple_assign_rhs1 (stmt);
1307   tree type = TREE_TYPE (rhs);
1308   unsigned int i, j;
1309   unsigned HOST_WIDE_INT nelts;
1310   bool all_same = true;
1311   constructor_elt *elt;
1312   gimple *g;
1313   tree base = NULL_TREE;
1314   optab op;
1315 
1316   if (!TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts)
1317       || nelts <= 2
1318       || CONSTRUCTOR_NELTS (rhs) != nelts)
1319     return;
1320   op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1321   if (op == unknown_optab
1322       || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1323     return;
1324   FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1325     if (TREE_CODE (elt->value) != SSA_NAME
1326           || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1327       return;
1328     else
1329       {
1330           tree this_base = elt->value;
1331           if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1332             all_same = false;
1333           for (j = 0; j < nelts + 1; j++)
1334             {
1335               g = SSA_NAME_DEF_STMT (this_base);
1336               if (is_gimple_assign (g)
1337                     && gimple_assign_rhs_code (g) == PLUS_EXPR
1338                     && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1339                     && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1340                     && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1341                 this_base = gimple_assign_rhs1 (g);
1342               else
1343                 break;
1344             }
1345           if (i == 0)
1346             base = this_base;
1347           else if (this_base != base)
1348             return;
1349       }
1350   if (all_same)
1351     return;
1352   tree_vector_builder cst (type, nelts, 1);
1353   for (i = 0; i < nelts; i++)
1354     {
1355       tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;
1356       tree elt = build_zero_cst (TREE_TYPE (base));
1357       while (this_base != base)
1358           {
1359             g = SSA_NAME_DEF_STMT (this_base);
1360             elt = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1361                                    elt, gimple_assign_rhs2 (g));
1362             if (elt == NULL_TREE
1363                 || TREE_CODE (elt) != INTEGER_CST
1364                 || TREE_OVERFLOW (elt))
1365               return;
1366             this_base = gimple_assign_rhs1 (g);
1367           }
1368       cst.quick_push (elt);
1369     }
1370   for (i = 0; i < nelts; i++)
1371     CONSTRUCTOR_ELT (rhs, i)->value = base;
1372   g = gimple_build_assign (make_ssa_name (type), rhs);
1373   gsi_insert_before (gsi, g, GSI_SAME_STMT);
1374   g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1375                                  cst.build ());
1376   gsi_replace (gsi, g, false);
1377 }
1378 
1379 /* Return a type for the widest vector mode with the same element type as
1380    type ORIGINAL_VECTOR_TYPE, with at most the same number of elements as type
1381    ORIGINAL_VECTOR_TYPE and that is supported by the target for an operation
1382    with optab OP, or return NULL_TREE if none is found.  */
1383 
1384 static tree
type_for_widest_vector_mode(tree original_vector_type,optab op)1385 type_for_widest_vector_mode (tree original_vector_type, optab op)
1386 {
1387   gcc_assert (VECTOR_TYPE_P (original_vector_type));
1388   tree type = TREE_TYPE (original_vector_type);
1389   machine_mode inner_mode = TYPE_MODE (type);
1390   machine_mode best_mode = VOIDmode, mode;
1391   poly_int64 best_nunits = 0;
1392 
1393   if (SCALAR_FLOAT_MODE_P (inner_mode))
1394     mode = MIN_MODE_VECTOR_FLOAT;
1395   else if (SCALAR_FRACT_MODE_P (inner_mode))
1396     mode = MIN_MODE_VECTOR_FRACT;
1397   else if (SCALAR_UFRACT_MODE_P (inner_mode))
1398     mode = MIN_MODE_VECTOR_UFRACT;
1399   else if (SCALAR_ACCUM_MODE_P (inner_mode))
1400     mode = MIN_MODE_VECTOR_ACCUM;
1401   else if (SCALAR_UACCUM_MODE_P (inner_mode))
1402     mode = MIN_MODE_VECTOR_UACCUM;
1403   else if (inner_mode == BImode)
1404     mode = MIN_MODE_VECTOR_BOOL;
1405   else
1406     mode = MIN_MODE_VECTOR_INT;
1407 
1408   FOR_EACH_MODE_FROM (mode, mode)
1409     if (GET_MODE_INNER (mode) == inner_mode
1410           && maybe_gt (GET_MODE_NUNITS (mode), best_nunits)
1411           && optab_handler (op, mode) != CODE_FOR_nothing
1412           && known_le (GET_MODE_NUNITS (mode),
1413                          TYPE_VECTOR_SUBPARTS (original_vector_type)))
1414       best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
1415 
1416   if (best_mode == VOIDmode)
1417     return NULL_TREE;
1418   else
1419     return build_vector_type_for_mode (type, best_mode);
1420 }
1421 
1422 
1423 /* Build a reference to the element of the vector VECT.  Function
1424    returns either the element itself, either BIT_FIELD_REF, or an
1425    ARRAY_REF expression.
1426 
1427    GSI is required to insert temporary variables while building a
1428    refernece to the element of the vector VECT.
1429 
1430    PTMPVEC is a pointer to the temporary variable for caching
1431    purposes.  In case when PTMPVEC is NULL new temporary variable
1432    will be created.  */
1433 static tree
vector_element(gimple_stmt_iterator * gsi,tree vect,tree idx,tree * ptmpvec)1434 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1435 {
1436   tree vect_type, vect_elt_type;
1437   gimple *asgn;
1438   tree tmpvec;
1439   tree arraytype;
1440   bool need_asgn = true;
1441   unsigned int elements;
1442 
1443   vect_type = TREE_TYPE (vect);
1444   vect_elt_type = TREE_TYPE (vect_type);
1445   elements = nunits_for_known_piecewise_op (vect_type);
1446 
1447   if (TREE_CODE (idx) == INTEGER_CST)
1448     {
1449       unsigned HOST_WIDE_INT index;
1450 
1451       /* Given that we're about to compute a binary modulus,
1452            we don't care about the high bits of the value.  */
1453       index = TREE_INT_CST_LOW (idx);
1454       if (!tree_fits_uhwi_p (idx) || index >= elements)
1455           {
1456             index &= elements - 1;
1457             idx = build_int_cst (TREE_TYPE (idx), index);
1458           }
1459 
1460       /* When lowering a vector statement sequence do some easy
1461          simplification by looking through intermediate vector results.  */
1462       if (TREE_CODE (vect) == SSA_NAME)
1463           {
1464             gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
1465             if (is_gimple_assign (def_stmt)
1466                 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1467                       || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1468               vect = gimple_assign_rhs1 (def_stmt);
1469           }
1470 
1471       if (TREE_CODE (vect) == VECTOR_CST)
1472           return VECTOR_CST_ELT (vect, index);
1473       else if (TREE_CODE (vect) == CONSTRUCTOR
1474                  && (CONSTRUCTOR_NELTS (vect) == 0
1475                        || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1476                           != VECTOR_TYPE))
1477         {
1478             if (index < CONSTRUCTOR_NELTS (vect))
1479               return CONSTRUCTOR_ELT (vect, index)->value;
1480           return build_zero_cst (vect_elt_type);
1481         }
1482       else
1483         {
1484             tree size = vector_element_bits_tree (vect_type);
1485             tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1486                                           size);
1487             return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1488         }
1489     }
1490 
1491   if (!ptmpvec)
1492     tmpvec = create_tmp_var (vect_type, "vectmp");
1493   else if (!*ptmpvec)
1494     tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1495   else
1496     {
1497       tmpvec = *ptmpvec;
1498       need_asgn = false;
1499     }
1500 
1501   if (need_asgn)
1502     {
1503       TREE_ADDRESSABLE (tmpvec) = 1;
1504       asgn = gimple_build_assign (tmpvec, vect);
1505       gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1506     }
1507 
1508   arraytype = build_array_type_nelts (vect_elt_type, elements);
1509   return build4 (ARRAY_REF, vect_elt_type,
1510                  build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1511                  idx, NULL_TREE, NULL_TREE);
1512 }
1513 
1514 /* Check if VEC_PERM_EXPR within the given setting is supported
1515    by hardware, or lower it piecewise.
1516 
1517    When VEC_PERM_EXPR has the same first and second operands:
1518    VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1519    {v0[mask[0]], v0[mask[1]], ...}
1520    MASK and V0 must have the same number of elements.
1521 
1522    Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1523    {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1524    V0 and V1 must have the same type.  MASK, V0, V1 must have the
1525    same number of arguments.  */
1526 
1527 static void
lower_vec_perm(gimple_stmt_iterator * gsi)1528 lower_vec_perm (gimple_stmt_iterator *gsi)
1529 {
1530   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1531   tree mask = gimple_assign_rhs3 (stmt);
1532   tree vec0 = gimple_assign_rhs1 (stmt);
1533   tree vec1 = gimple_assign_rhs2 (stmt);
1534   tree vect_type = TREE_TYPE (vec0);
1535   tree mask_type = TREE_TYPE (mask);
1536   tree vect_elt_type = TREE_TYPE (vect_type);
1537   tree mask_elt_type = TREE_TYPE (mask_type);
1538   unsigned HOST_WIDE_INT elements;
1539   vec<constructor_elt, va_gc> *v;
1540   tree constr, t, si, i_val;
1541   tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1542   bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1543   location_t loc = gimple_location (gsi_stmt (*gsi));
1544   unsigned i;
1545 
1546   if (!TYPE_VECTOR_SUBPARTS (vect_type).is_constant (&elements))
1547     return;
1548 
1549   if (TREE_CODE (mask) == SSA_NAME)
1550     {
1551       gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
1552       if (is_gimple_assign (def_stmt)
1553             && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1554           mask = gimple_assign_rhs1 (def_stmt);
1555     }
1556 
1557   vec_perm_builder sel_int;
1558 
1559   if (TREE_CODE (mask) == VECTOR_CST
1560       && tree_to_vec_perm_builder (&sel_int, mask))
1561     {
1562       vec_perm_indices indices (sel_int, 2, elements);
1563       if (can_vec_perm_const_p (TYPE_MODE (vect_type), indices))
1564           {
1565             gimple_assign_set_rhs3 (stmt, mask);
1566             update_stmt (stmt);
1567             return;
1568           }
1569       /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1570            vector as VEC1 and a right element shift MASK.  */
1571       if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type))
1572             != CODE_FOR_nothing
1573             && TREE_CODE (vec1) == VECTOR_CST
1574             && initializer_zerop (vec1)
1575             && maybe_ne (indices[0], 0)
1576             && known_lt (poly_uint64 (indices[0]), elements))
1577           {
1578             bool ok_p = indices.series_p (0, 1, indices[0], 1);
1579             if (!ok_p)
1580               {
1581                 for (i = 1; i < elements; ++i)
1582                     {
1583                       poly_uint64 actual = indices[i];
1584                       poly_uint64 expected = i + indices[0];
1585                       /* Indices into the second vector are all equivalent.  */
1586                       if (maybe_lt (actual, elements)
1587                           ? maybe_ne (actual, expected)
1588                           : maybe_lt (expected, elements))
1589                         break;
1590                     }
1591                 ok_p = i == elements;
1592               }
1593             if (ok_p)
1594               {
1595                 gimple_assign_set_rhs3 (stmt, mask);
1596                 update_stmt (stmt);
1597                 return;
1598               }
1599           }
1600       /* And similarly vec_shl pattern.  */
1601       if (optab_handler (vec_shl_optab, TYPE_MODE (vect_type))
1602             != CODE_FOR_nothing
1603             && TREE_CODE (vec0) == VECTOR_CST
1604             && initializer_zerop (vec0))
1605           {
1606             unsigned int first = 0;
1607             for (i = 0; i < elements; ++i)
1608               if (known_eq (poly_uint64 (indices[i]), elements))
1609                 {
1610                     if (i == 0 || first)
1611                       break;
1612                     first = i;
1613                 }
1614               else if (first
1615                          ? maybe_ne (poly_uint64 (indices[i]),
1616                                                         elements + i - first)
1617                          : maybe_ge (poly_uint64 (indices[i]), elements))
1618                 break;
1619             if (first && i == elements)
1620               {
1621                 gimple_assign_set_rhs3 (stmt, mask);
1622                 update_stmt (stmt);
1623                 return;
1624               }
1625           }
1626     }
1627   else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
1628     return;
1629 
1630   if (!warning_suppressed_p (stmt, OPT_Wvector_operation_performance))
1631     warning_at (loc, OPT_Wvector_operation_performance,
1632                     "vector shuffling operation will be expanded piecewise");
1633 
1634   vec_alloc (v, elements);
1635   bool constant_p = true;
1636   for (i = 0; i < elements; i++)
1637     {
1638       si = size_int (i);
1639       i_val = vector_element (gsi, mask, si, &masktmp);
1640 
1641       if (TREE_CODE (i_val) == INTEGER_CST)
1642         {
1643             unsigned HOST_WIDE_INT index;
1644 
1645             index = TREE_INT_CST_LOW (i_val);
1646             if (!tree_fits_uhwi_p (i_val) || index >= elements)
1647               i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1648 
1649           if (two_operand_p && (index & elements) != 0)
1650               t = vector_element (gsi, vec1, i_val, &vec1tmp);
1651             else
1652               t = vector_element (gsi, vec0, i_val, &vec0tmp);
1653 
1654           t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1655                                                   true, GSI_SAME_STMT);
1656         }
1657       else
1658         {
1659             tree cond = NULL_TREE, v0_val;
1660 
1661             if (two_operand_p)
1662               {
1663                 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1664                                         build_int_cst (mask_elt_type, elements));
1665                 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1666                                                          true, GSI_SAME_STMT);
1667               }
1668 
1669             i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1670                                      build_int_cst (mask_elt_type, elements - 1));
1671             i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1672                                                       true, GSI_SAME_STMT);
1673 
1674             v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1675             v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1676                                                        true, GSI_SAME_STMT);
1677 
1678             if (two_operand_p)
1679               {
1680                 tree v1_val;
1681 
1682                 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1683                 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1684                                                              true, GSI_SAME_STMT);
1685 
1686                 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1687                                           cond, build_zero_cst (mask_elt_type));
1688                 cond = fold_build3 (COND_EXPR, vect_elt_type,
1689                                           cond, v0_val, v1_val);
1690               t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1691                                                       true, GSI_SAME_STMT);
1692             }
1693             else
1694               t = v0_val;
1695         }
1696 
1697       if (!CONSTANT_CLASS_P (t))
1698           constant_p = false;
1699       CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1700     }
1701 
1702   if (constant_p)
1703     constr = build_vector_from_ctor (vect_type, v);
1704   else
1705     constr = build_constructor (vect_type, v);
1706   gimple_assign_set_rhs_from_tree (gsi, constr);
1707   update_stmt (gsi_stmt (*gsi));
1708 }
1709 
1710 /* If OP is a uniform vector return the element it is a splat from.  */
1711 
1712 static tree
ssa_uniform_vector_p(tree op)1713 ssa_uniform_vector_p (tree op)
1714 {
1715   if (TREE_CODE (op) == VECTOR_CST
1716       || TREE_CODE (op) == VEC_DUPLICATE_EXPR
1717       || TREE_CODE (op) == CONSTRUCTOR)
1718     return uniform_vector_p (op);
1719   if (TREE_CODE (op) == SSA_NAME)
1720     {
1721       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1722       if (gimple_assign_single_p (def_stmt))
1723           return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
1724     }
1725   return NULL_TREE;
1726 }
1727 
1728 /* Return type in which CODE operation with optab OP can be
1729    computed.  */
1730 
1731 static tree
get_compute_type(enum tree_code code,optab op,tree type)1732 get_compute_type (enum tree_code code, optab op, tree type)
1733 {
1734   /* For very wide vectors, try using a smaller vector mode.  */
1735   tree compute_type = type;
1736   if (op
1737       && (!VECTOR_MODE_P (TYPE_MODE (type))
1738             || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1739     {
1740       tree vector_compute_type
1741           = type_for_widest_vector_mode (type, op);
1742       if (vector_compute_type != NULL_TREE
1743             && maybe_ne (TYPE_VECTOR_SUBPARTS (vector_compute_type), 1U)
1744             && (optab_handler (op, TYPE_MODE (vector_compute_type))
1745                 != CODE_FOR_nothing))
1746           compute_type = vector_compute_type;
1747     }
1748 
1749   /* If we are breaking a BLKmode vector into smaller pieces,
1750      type_for_widest_vector_mode has already looked into the optab,
1751      so skip these checks.  */
1752   if (compute_type == type)
1753     {
1754       machine_mode compute_mode = TYPE_MODE (compute_type);
1755       if (VECTOR_MODE_P (compute_mode))
1756           {
1757             if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1758               return compute_type;
1759             if (code == MULT_HIGHPART_EXPR
1760                 && can_mult_highpart_p (compute_mode,
1761                                               TYPE_UNSIGNED (compute_type)))
1762               return compute_type;
1763           }
1764       /* There is no operation in hardware, so fall back to scalars.  */
1765       compute_type = TREE_TYPE (type);
1766     }
1767 
1768   return compute_type;
1769 }
1770 
1771 static tree
do_cond(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree b,tree bitpos,tree bitsize,enum tree_code code,tree type ATTRIBUTE_UNUSED)1772 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1773            tree bitpos, tree bitsize, enum tree_code code,
1774            tree type ATTRIBUTE_UNUSED)
1775 {
1776   if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1777     a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1778   if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1779     b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1780   tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1781   return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b);
1782 }
1783 
1784 /* Expand a vector COND_EXPR to scalars, piecewise.  */
1785 static void
expand_vector_scalar_condition(gimple_stmt_iterator * gsi)1786 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1787 {
1788   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1789   tree lhs = gimple_assign_lhs (stmt);
1790   tree type = TREE_TYPE (lhs);
1791   tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1792   machine_mode compute_mode = TYPE_MODE (compute_type);
1793   gcc_assert (compute_mode != BLKmode);
1794   tree rhs2 = gimple_assign_rhs2 (stmt);
1795   tree rhs3 = gimple_assign_rhs3 (stmt);
1796   tree new_rhs;
1797 
1798   /* If the compute mode is not a vector mode (hence we are not decomposing
1799      a BLKmode vector to smaller, hardware-supported vectors), we may want
1800      to expand the operations in parallel.  */
1801   if (!VECTOR_MODE_P (compute_mode))
1802     new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1803                                               COND_EXPR);
1804   else
1805     new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1806                                                rhs2, rhs3, COND_EXPR, false);
1807   if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1808     new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1809                                      new_rhs);
1810 
1811   /* NOTE:  We should avoid using gimple_assign_set_rhs_from_tree. One
1812      way to do it is change expand_vector_operation and its callees to
1813      return a tree_code, RHS1 and RHS2 instead of a tree. */
1814   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1815   update_stmt (gsi_stmt (*gsi));
1816 }
1817 
1818 /* Callback for expand_vector_piecewise to do VEC_CONVERT ifn call
1819    lowering.  If INNER_TYPE is not a vector type, this is a scalar
1820    fallback.  */
1821 
1822 static tree
do_vec_conversion(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree decl,tree bitpos,tree bitsize,enum tree_code code,tree type)1823 do_vec_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1824                        tree decl, tree bitpos, tree bitsize,
1825                        enum tree_code code, tree type)
1826 {
1827   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1828   if (!VECTOR_TYPE_P (inner_type))
1829     return gimplify_build1 (gsi, code, TREE_TYPE (type), a);
1830   if (code == CALL_EXPR)
1831     {
1832       gimple *g = gimple_build_call (decl, 1, a);
1833       tree lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (decl)));
1834       gimple_call_set_lhs (g, lhs);
1835       gsi_insert_before (gsi, g, GSI_SAME_STMT);
1836       return lhs;
1837     }
1838   else
1839     {
1840       tree outer_type = build_vector_type (TREE_TYPE (type),
1841                                                      TYPE_VECTOR_SUBPARTS (inner_type));
1842       return gimplify_build1 (gsi, code, outer_type, a);
1843     }
1844 }
1845 
1846 /* Similarly, but for narrowing conversion.  */
1847 
1848 static tree
do_vec_narrow_conversion(gimple_stmt_iterator * gsi,tree inner_type,tree a,tree,tree bitpos,tree,enum tree_code code,tree type)1849 do_vec_narrow_conversion (gimple_stmt_iterator *gsi, tree inner_type, tree a,
1850                                 tree, tree bitpos, tree, enum tree_code code,
1851                                 tree type)
1852 {
1853   tree itype = build_vector_type (TREE_TYPE (inner_type),
1854                                           exact_div (TYPE_VECTOR_SUBPARTS (inner_type),
1855                                                        2));
1856   tree b = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype), bitpos);
1857   tree c = tree_vec_extract (gsi, itype, a, TYPE_SIZE (itype),
1858                                    int_const_binop (PLUS_EXPR, bitpos,
1859                                                         TYPE_SIZE (itype)));
1860   tree outer_type = build_vector_type (TREE_TYPE (type),
1861                                                TYPE_VECTOR_SUBPARTS (inner_type));
1862   return gimplify_build2 (gsi, code, outer_type, b, c);
1863 }
1864 
1865 /* Expand VEC_CONVERT ifn call.  */
1866 
1867 static void
expand_vector_conversion(gimple_stmt_iterator * gsi)1868 expand_vector_conversion (gimple_stmt_iterator *gsi)
1869 {
1870   gimple *stmt = gsi_stmt (*gsi);
1871   gimple *g;
1872   tree lhs = gimple_call_lhs (stmt);
1873   if (lhs == NULL_TREE)
1874     {
1875       g = gimple_build_nop ();
1876       gsi_replace (gsi, g, false);
1877       return;
1878     }
1879   tree arg = gimple_call_arg (stmt, 0);
1880   tree ret_type = TREE_TYPE (lhs);
1881   tree arg_type = TREE_TYPE (arg);
1882   tree new_rhs, compute_type = TREE_TYPE (arg_type);
1883   enum tree_code code = NOP_EXPR;
1884   enum tree_code code1 = ERROR_MARK;
1885   enum { NARROW, NONE, WIDEN } modifier = NONE;
1886   optab optab1 = unknown_optab;
1887 
1888   gcc_checking_assert (VECTOR_TYPE_P (ret_type) && VECTOR_TYPE_P (arg_type));
1889   if (INTEGRAL_TYPE_P (TREE_TYPE (ret_type))
1890       && SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg_type)))
1891     code = FIX_TRUNC_EXPR;
1892   else if (INTEGRAL_TYPE_P (TREE_TYPE (arg_type))
1893              && SCALAR_FLOAT_TYPE_P (TREE_TYPE (ret_type)))
1894     code = FLOAT_EXPR;
1895   unsigned int ret_elt_bits = vector_element_bits (ret_type);
1896   unsigned int arg_elt_bits = vector_element_bits (arg_type);
1897   if (ret_elt_bits < arg_elt_bits)
1898     modifier = NARROW;
1899   else if (ret_elt_bits > arg_elt_bits)
1900     modifier = WIDEN;
1901 
1902   if (modifier == NONE && (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR))
1903     {
1904       if (supportable_convert_operation (code, ret_type, arg_type, &code1))
1905           {
1906             g = gimple_build_assign (lhs, code1, arg);
1907             gsi_replace (gsi, g, false);
1908             return;
1909           }
1910       /* Can't use get_compute_type here, as supportable_convert_operation
1911            doesn't necessarily use an optab and needs two arguments.  */
1912       tree vec_compute_type
1913           = type_for_widest_vector_mode (arg_type, mov_optab);
1914       if (vec_compute_type
1915             && VECTOR_MODE_P (TYPE_MODE (vec_compute_type)))
1916           {
1917             unsigned HOST_WIDE_INT nelts
1918               = constant_lower_bound (TYPE_VECTOR_SUBPARTS (vec_compute_type));
1919             while (nelts > 1)
1920               {
1921                 tree ret1_type = build_vector_type (TREE_TYPE (ret_type), nelts);
1922                 tree arg1_type = build_vector_type (TREE_TYPE (arg_type), nelts);
1923                 if (supportable_convert_operation (code, ret1_type, arg1_type,
1924                                                              &code1))
1925                     {
1926                       new_rhs = expand_vector_piecewise (gsi, do_vec_conversion,
1927                                                                  ret_type, arg1_type, arg,
1928                                                                  NULL_TREE, code1, false);
1929                       g = gimple_build_assign (lhs, new_rhs);
1930                       gsi_replace (gsi, g, false);
1931                       return;
1932                     }
1933                 nelts = nelts / 2;
1934               }
1935           }
1936     }
1937   else if (modifier == NARROW)
1938     {
1939       switch (code)
1940           {
1941           CASE_CONVERT:
1942             code1 = VEC_PACK_TRUNC_EXPR;
1943             optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1944             break;
1945           case FIX_TRUNC_EXPR:
1946             code1 = VEC_PACK_FIX_TRUNC_EXPR;
1947             /* The signedness is determined from output operand.  */
1948             optab1 = optab_for_tree_code (code1, ret_type, optab_default);
1949             break;
1950           case FLOAT_EXPR:
1951             code1 = VEC_PACK_FLOAT_EXPR;
1952             optab1 = optab_for_tree_code (code1, arg_type, optab_default);
1953             break;
1954           default:
1955             gcc_unreachable ();
1956           }
1957 
1958       if (optab1)
1959           compute_type = get_compute_type (code1, optab1, arg_type);
1960       enum insn_code icode1;
1961       if (VECTOR_TYPE_P (compute_type)
1962             && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
1963                 != CODE_FOR_nothing)
1964             && VECTOR_MODE_P (insn_data[icode1].operand[0].mode))
1965           {
1966             tree cretd_type
1967               = build_vector_type (TREE_TYPE (ret_type),
1968                                          TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1969             if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
1970               {
1971                 if (compute_type == arg_type)
1972                     {
1973                       new_rhs = gimplify_build2 (gsi, code1, cretd_type,
1974                                                        arg, build_zero_cst (arg_type));
1975                       new_rhs = tree_vec_extract (gsi, ret_type, new_rhs,
1976                                                         TYPE_SIZE (ret_type),
1977                                                         bitsize_int (0));
1978                       g = gimple_build_assign (lhs, new_rhs);
1979                       gsi_replace (gsi, g, false);
1980                       return;
1981                     }
1982                 tree dcompute_type
1983                     = build_vector_type (TREE_TYPE (compute_type),
1984                                              TYPE_VECTOR_SUBPARTS (compute_type) * 2);
1985                 if (TYPE_MAIN_VARIANT (dcompute_type)
1986                       == TYPE_MAIN_VARIANT (arg_type))
1987                     new_rhs = do_vec_narrow_conversion (gsi, dcompute_type, arg,
1988                                                                 NULL_TREE, bitsize_int (0),
1989                                                                 NULL_TREE, code1,
1990                                                                 ret_type);
1991                 else
1992                     new_rhs = expand_vector_piecewise (gsi,
1993                                                                do_vec_narrow_conversion,
1994                                                                arg_type, dcompute_type,
1995                                                                arg, NULL_TREE, code1,
1996                                                                false, ret_type);
1997                 g = gimple_build_assign (lhs, new_rhs);
1998                 gsi_replace (gsi, g, false);
1999                 return;
2000               }
2001           }
2002     }
2003   else if (modifier == WIDEN)
2004     {
2005       enum tree_code code2 = ERROR_MARK;
2006       optab optab2 = unknown_optab;
2007       switch (code)
2008           {
2009           CASE_CONVERT:
2010             code1 = VEC_UNPACK_LO_EXPR;
2011           code2 = VEC_UNPACK_HI_EXPR;
2012             break;
2013           case FIX_TRUNC_EXPR:
2014             code1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
2015             code2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
2016             break;
2017           case FLOAT_EXPR:
2018             code1 = VEC_UNPACK_FLOAT_LO_EXPR;
2019             code2 = VEC_UNPACK_FLOAT_HI_EXPR;
2020             break;
2021           default:
2022             gcc_unreachable ();
2023           }
2024       if (BYTES_BIG_ENDIAN)
2025           std::swap (code1, code2);
2026 
2027       if (code == FIX_TRUNC_EXPR)
2028           {
2029             /* The signedness is determined from output operand.  */
2030             optab1 = optab_for_tree_code (code1, ret_type, optab_default);
2031             optab2 = optab_for_tree_code (code2, ret_type, optab_default);
2032           }
2033       else
2034           {
2035             optab1 = optab_for_tree_code (code1, arg_type, optab_default);
2036             optab2 = optab_for_tree_code (code2, arg_type, optab_default);
2037           }
2038 
2039       if (optab1 && optab2)
2040           compute_type = get_compute_type (code1, optab1, arg_type);
2041 
2042       enum insn_code icode1, icode2;
2043       if (VECTOR_TYPE_P (compute_type)
2044             && ((icode1 = optab_handler (optab1, TYPE_MODE (compute_type)))
2045                 != CODE_FOR_nothing)
2046             && ((icode2 = optab_handler (optab2, TYPE_MODE (compute_type)))
2047                 != CODE_FOR_nothing)
2048             && VECTOR_MODE_P (insn_data[icode1].operand[0].mode)
2049             && (insn_data[icode1].operand[0].mode
2050                 == insn_data[icode2].operand[0].mode))
2051           {
2052             poly_uint64 nunits
2053               = exact_div (TYPE_VECTOR_SUBPARTS (compute_type), 2);
2054             tree cretd_type = build_vector_type (TREE_TYPE (ret_type), nunits);
2055             if (insn_data[icode1].operand[0].mode == TYPE_MODE (cretd_type))
2056               {
2057                 vec<constructor_elt, va_gc> *v;
2058                 tree part_width = TYPE_SIZE (compute_type);
2059                 tree index = bitsize_int (0);
2060                 int nunits = nunits_for_known_piecewise_op (arg_type);
2061                 int delta = tree_to_uhwi (part_width) / arg_elt_bits;
2062                 int i;
2063                 location_t loc = gimple_location (gsi_stmt (*gsi));
2064 
2065                 if (compute_type != arg_type)
2066                     {
2067                       if (!warning_suppressed_p (gsi_stmt (*gsi),
2068                                                        OPT_Wvector_operation_performance))
2069                         warning_at (loc, OPT_Wvector_operation_performance,
2070                                         "vector operation will be expanded piecewise");
2071                     }
2072                 else
2073                     {
2074                       nunits = 1;
2075                       delta = 1;
2076                     }
2077 
2078                 vec_alloc (v, (nunits + delta - 1) / delta * 2);
2079                 bool constant_p = true;
2080                 for (i = 0; i < nunits;
2081                        i += delta, index = int_const_binop (PLUS_EXPR, index,
2082                                                                       part_width))
2083                     {
2084                       tree a = arg;
2085                       if (compute_type != arg_type)
2086                         a = tree_vec_extract (gsi, compute_type, a, part_width,
2087                                                     index);
2088                       tree result = gimplify_build1 (gsi, code1, cretd_type, a);
2089                       constructor_elt ce = { NULL_TREE, result };
2090                       if (!CONSTANT_CLASS_P (ce.value))
2091                         constant_p = false;
2092                       v->quick_push (ce);
2093                       ce.value = gimplify_build1 (gsi, code2, cretd_type, a);
2094                       if (!CONSTANT_CLASS_P (ce.value))
2095                         constant_p = false;
2096                       v->quick_push (ce);
2097                     }
2098 
2099                 if (constant_p)
2100                     new_rhs = build_vector_from_ctor (ret_type, v);
2101                 else
2102                     new_rhs = build_constructor (ret_type, v);
2103                 g = gimple_build_assign (lhs, new_rhs);
2104                 gsi_replace (gsi, g, false);
2105                 return;
2106               }
2107           }
2108     }
2109 
2110   new_rhs = expand_vector_piecewise (gsi, do_vec_conversion, arg_type,
2111                                              TREE_TYPE (arg_type), arg,
2112                                              NULL_TREE, code, false, ret_type);
2113   g = gimple_build_assign (lhs, new_rhs);
2114   gsi_replace (gsi, g, false);
2115 }
2116 
2117 /* Process one statement.  If we identify a vector operation, expand it.  */
2118 
2119 static void
expand_vector_operations_1(gimple_stmt_iterator * gsi,bitmap dce_ssa_names)2120 expand_vector_operations_1 (gimple_stmt_iterator *gsi,
2121                                   bitmap dce_ssa_names)
2122 {
2123   tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
2124   enum tree_code code;
2125   optab op = unknown_optab;
2126   enum gimple_rhs_class rhs_class;
2127   tree new_rhs;
2128 
2129   /* Only consider code == GIMPLE_ASSIGN. */
2130   gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
2131   if (!stmt)
2132     {
2133       if (gimple_call_internal_p (gsi_stmt (*gsi), IFN_VEC_CONVERT))
2134           expand_vector_conversion (gsi);
2135       return;
2136     }
2137 
2138   code = gimple_assign_rhs_code (stmt);
2139   rhs_class = get_gimple_rhs_class (code);
2140   lhs = gimple_assign_lhs (stmt);
2141 
2142   if (code == VEC_PERM_EXPR)
2143     {
2144       lower_vec_perm (gsi);
2145       return;
2146     }
2147 
2148   if (code == VEC_COND_EXPR)
2149     {
2150       expand_vector_condition (gsi, dce_ssa_names);
2151       return;
2152     }
2153 
2154   if (code == COND_EXPR
2155       && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
2156       && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
2157     {
2158       expand_vector_scalar_condition (gsi);
2159       return;
2160     }
2161 
2162   if (code == CONSTRUCTOR
2163       && TREE_CODE (lhs) == SSA_NAME
2164       && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
2165       && !gimple_clobber_p (stmt)
2166       && optimize)
2167     {
2168       optimize_vector_constructor (gsi);
2169       return;
2170     }
2171 
2172   if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
2173     return;
2174 
2175   rhs1 = gimple_assign_rhs1 (stmt);
2176   if (rhs_class == GIMPLE_BINARY_RHS)
2177     rhs2 = gimple_assign_rhs2 (stmt);
2178 
2179   type = TREE_TYPE (lhs);
2180   if (!VECTOR_TYPE_P (type)
2181       || !VECTOR_TYPE_P (TREE_TYPE (rhs1)))
2182     return;
2183 
2184   /* A scalar operation pretending to be a vector one.  */
2185   if (VECTOR_BOOLEAN_TYPE_P (type)
2186       && !VECTOR_MODE_P (TYPE_MODE (type))
2187       && TYPE_MODE (type) != BLKmode
2188       && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) != tcc_comparison
2189             || (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))
2190                 && !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs1)))
2191                 && TYPE_MODE (TREE_TYPE (rhs1)) != BLKmode)))
2192     return;
2193 
2194   /* If the vector operation is operating on all same vector elements
2195      implement it with a scalar operation and a splat if the target
2196      supports the scalar operation.  */
2197   tree srhs1, srhs2 = NULL_TREE;
2198   if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE
2199       && (rhs2 == NULL_TREE
2200             || (! VECTOR_TYPE_P (TREE_TYPE (rhs2))
2201                 && (srhs2 = rhs2))
2202             || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2203       /* As we query direct optabs restrict to non-convert operations.  */
2204       && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1)))
2205     {
2206       op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar);
2207       if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB
2208             && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing)
2209           {
2210             tree stype = TREE_TYPE (TREE_TYPE (lhs));
2211             tree slhs = (rhs2 != NULL_TREE)
2212                           ? gimplify_build2 (gsi, code, stype, srhs1, srhs2)
2213                           : gimplify_build1 (gsi, code, stype, srhs1);
2214             gimple_assign_set_rhs_from_tree (gsi,
2215                                                      build_vector_from_val (type, slhs));
2216             update_stmt (stmt);
2217             return;
2218           }
2219     }
2220 
2221   if (CONVERT_EXPR_CODE_P (code)
2222       || code == FLOAT_EXPR
2223       || code == FIX_TRUNC_EXPR
2224       || code == VIEW_CONVERT_EXPR)
2225     return;
2226 
2227   /* The signedness is determined from input argument.  */
2228   if (code == VEC_UNPACK_FLOAT_HI_EXPR
2229       || code == VEC_UNPACK_FLOAT_LO_EXPR
2230       || code == VEC_PACK_FLOAT_EXPR)
2231     {
2232       /* We do not know how to scalarize those.  */
2233       return;
2234     }
2235 
2236   /* For widening/narrowing vector operations, the relevant type is of the
2237      arguments, not the widened result.  VEC_UNPACK_FLOAT_*_EXPR is
2238      calculated in the same way above.  */
2239   if (code == WIDEN_SUM_EXPR
2240       || code == VEC_WIDEN_PLUS_HI_EXPR
2241       || code == VEC_WIDEN_PLUS_LO_EXPR
2242       || code == VEC_WIDEN_MINUS_HI_EXPR
2243       || code == VEC_WIDEN_MINUS_LO_EXPR
2244       || code == VEC_WIDEN_MULT_HI_EXPR
2245       || code == VEC_WIDEN_MULT_LO_EXPR
2246       || code == VEC_WIDEN_MULT_EVEN_EXPR
2247       || code == VEC_WIDEN_MULT_ODD_EXPR
2248       || code == VEC_UNPACK_HI_EXPR
2249       || code == VEC_UNPACK_LO_EXPR
2250       || code == VEC_UNPACK_FIX_TRUNC_HI_EXPR
2251       || code == VEC_UNPACK_FIX_TRUNC_LO_EXPR
2252       || code == VEC_PACK_TRUNC_EXPR
2253       || code == VEC_PACK_SAT_EXPR
2254       || code == VEC_PACK_FIX_TRUNC_EXPR
2255       || code == VEC_WIDEN_LSHIFT_HI_EXPR
2256       || code == VEC_WIDEN_LSHIFT_LO_EXPR)
2257     {
2258       /* We do not know how to scalarize those.  */
2259       return;
2260     }
2261 
2262   /* Choose between vector shift/rotate by vector and vector shift/rotate by
2263      scalar */
2264   if (code == LSHIFT_EXPR
2265       || code == RSHIFT_EXPR
2266       || code == LROTATE_EXPR
2267       || code == RROTATE_EXPR)
2268     {
2269       optab opv;
2270 
2271       /* Check whether we have vector <op> {x,x,x,x} where x
2272          could be a scalar variable or a constant.  Transform
2273          vector <op> {x,x,x,x} ==> vector <op> scalar.  */
2274       if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2275         {
2276           tree first;
2277 
2278           if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
2279             {
2280               gimple_assign_set_rhs2 (stmt, first);
2281               update_stmt (stmt);
2282               rhs2 = first;
2283             }
2284         }
2285 
2286       opv = optab_for_tree_code (code, type, optab_vector);
2287       if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2288           op = opv;
2289       else
2290           {
2291           op = optab_for_tree_code (code, type, optab_scalar);
2292 
2293             compute_type = get_compute_type (code, op, type);
2294             if (compute_type == type)
2295               return;
2296             /* The rtl expander will expand vector/scalar as vector/vector
2297                if necessary.  Pick one with wider vector type.  */
2298             tree compute_vtype = get_compute_type (code, opv, type);
2299             if (subparts_gt (compute_vtype, compute_type))
2300               {
2301                 compute_type = compute_vtype;
2302                 op = opv;
2303               }
2304           }
2305 
2306       if (code == LROTATE_EXPR || code == RROTATE_EXPR)
2307           {
2308             if (compute_type == NULL_TREE)
2309               compute_type = get_compute_type (code, op, type);
2310             if (compute_type == type)
2311               return;
2312             /* Before splitting vector rotates into scalar rotates,
2313                see if we can't use vector shifts and BIT_IOR_EXPR
2314                instead.  For vector by vector rotates we'd also
2315                need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
2316                for now, fold doesn't seem to create such rotates anyway.  */
2317             if (compute_type == TREE_TYPE (type)
2318                 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
2319               {
2320                 optab oplv = vashl_optab, opl = ashl_optab;
2321                 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
2322                 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
2323                 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
2324                 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
2325                 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
2326                 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
2327                 /* The rtl expander will expand vector/scalar as vector/vector
2328                      if necessary.  Pick one with wider vector type.  */
2329                 if (subparts_gt (compute_lvtype, compute_ltype))
2330                     {
2331                       compute_ltype = compute_lvtype;
2332                       opl = oplv;
2333                     }
2334                 if (subparts_gt (compute_rvtype, compute_rtype))
2335                     {
2336                       compute_rtype = compute_rvtype;
2337                       opr = oprv;
2338                     }
2339                 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
2340                      BIT_IOR_EXPR.  */
2341                 compute_type = compute_ltype;
2342                 if (subparts_gt (compute_type, compute_rtype))
2343                     compute_type = compute_rtype;
2344                 if (subparts_gt (compute_type, compute_otype))
2345                     compute_type = compute_otype;
2346                 /* Verify all 3 operations can be performed in that type.  */
2347                 if (compute_type != TREE_TYPE (type))
2348                     {
2349                       if (optab_handler (opl, TYPE_MODE (compute_type))
2350                           == CODE_FOR_nothing
2351                           || optab_handler (opr, TYPE_MODE (compute_type))
2352                                == CODE_FOR_nothing
2353                           || optab_handler (opo, TYPE_MODE (compute_type))
2354                                == CODE_FOR_nothing)
2355                         compute_type = TREE_TYPE (type);
2356                     }
2357               }
2358           }
2359     }
2360   else
2361     op = optab_for_tree_code (code, type, optab_default);
2362 
2363   /* Optabs will try converting a negation into a subtraction, so
2364      look for it as well.  TODO: negation of floating-point vectors
2365      might be turned into an exclusive OR toggling the sign bit.  */
2366   if (op == unknown_optab
2367       && code == NEGATE_EXPR
2368       && INTEGRAL_TYPE_P (TREE_TYPE (type)))
2369     op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
2370 
2371   if (compute_type == NULL_TREE)
2372     compute_type = get_compute_type (code, op, type);
2373   if (compute_type == type)
2374     return;
2375 
2376   new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code,
2377                                              dce_ssa_names);
2378 
2379   /* Leave expression untouched for later expansion.  */
2380   if (new_rhs == NULL_TREE)
2381     return;
2382 
2383   if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
2384     new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
2385                                new_rhs);
2386 
2387   /* NOTE:  We should avoid using gimple_assign_set_rhs_from_tree. One
2388      way to do it is change expand_vector_operation and its callees to
2389      return a tree_code, RHS1 and RHS2 instead of a tree. */
2390   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
2391   update_stmt (gsi_stmt (*gsi));
2392 }
2393 
2394 /* Use this to lower vector operations introduced by the vectorizer,
2395    if it may need the bit-twiddling tricks implemented in this file.  */
2396 
2397 static unsigned int
expand_vector_operations(void)2398 expand_vector_operations (void)
2399 {
2400   gimple_stmt_iterator gsi;
2401   basic_block bb;
2402   bool cfg_changed = false;
2403 
2404   auto_bitmap dce_ssa_names;
2405 
2406   FOR_EACH_BB_FN (bb, cfun)
2407     {
2408       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2409           {
2410             expand_vector_operations_1 (&gsi, dce_ssa_names);
2411             /* ???  If we do not cleanup EH then we will ICE in
2412                verification.  But in reality we have created wrong-code
2413                as we did not properly transition EH info and edges to
2414                the piecewise computations.  */
2415             if (maybe_clean_eh_stmt (gsi_stmt (gsi))
2416                 && gimple_purge_dead_eh_edges (bb))
2417               cfg_changed = true;
2418           }
2419     }
2420 
2421   simple_dce_from_worklist (dce_ssa_names);
2422 
2423   return cfg_changed ? TODO_cleanup_cfg : 0;
2424 }
2425 
2426 namespace {
2427 
2428 const pass_data pass_data_lower_vector =
2429 {
2430   GIMPLE_PASS, /* type */
2431   "veclower", /* name */
2432   OPTGROUP_VEC, /* optinfo_flags */
2433   TV_NONE, /* tv_id */
2434   PROP_cfg, /* properties_required */
2435   PROP_gimple_lvec, /* properties_provided */
2436   0, /* properties_destroyed */
2437   0, /* todo_flags_start */
2438   TODO_update_ssa, /* todo_flags_finish */
2439 };
2440 
2441 class pass_lower_vector : public gimple_opt_pass
2442 {
2443 public:
pass_lower_vector(gcc::context * ctxt)2444   pass_lower_vector (gcc::context *ctxt)
2445     : gimple_opt_pass (pass_data_lower_vector, ctxt)
2446   {}
2447 
2448   /* opt_pass methods: */
gate(function * fun)2449   virtual bool gate (function *fun)
2450     {
2451       return !(fun->curr_properties & PROP_gimple_lvec);
2452     }
2453 
execute(function *)2454   virtual unsigned int execute (function *)
2455     {
2456       return expand_vector_operations ();
2457     }
2458 
2459 }; // class pass_lower_vector
2460 
2461 } // anon namespace
2462 
2463 gimple_opt_pass *
make_pass_lower_vector(gcc::context * ctxt)2464 make_pass_lower_vector (gcc::context *ctxt)
2465 {
2466   return new pass_lower_vector (ctxt);
2467 }
2468 
2469 namespace {
2470 
2471 const pass_data pass_data_lower_vector_ssa =
2472 {
2473   GIMPLE_PASS, /* type */
2474   "veclower2", /* name */
2475   OPTGROUP_VEC, /* optinfo_flags */
2476   TV_NONE, /* tv_id */
2477   PROP_cfg, /* properties_required */
2478   PROP_gimple_lvec, /* properties_provided */
2479   0, /* properties_destroyed */
2480   0, /* todo_flags_start */
2481   ( TODO_update_ssa
2482     | TODO_cleanup_cfg ), /* todo_flags_finish */
2483 };
2484 
2485 class pass_lower_vector_ssa : public gimple_opt_pass
2486 {
2487 public:
pass_lower_vector_ssa(gcc::context * ctxt)2488   pass_lower_vector_ssa (gcc::context *ctxt)
2489     : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
2490   {}
2491 
2492   /* opt_pass methods: */
clone()2493   opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
execute(function *)2494   virtual unsigned int execute (function *)
2495     {
2496       return expand_vector_operations ();
2497     }
2498 
2499 }; // class pass_lower_vector_ssa
2500 
2501 } // anon namespace
2502 
2503 gimple_opt_pass *
make_pass_lower_vector_ssa(gcc::context * ctxt)2504 make_pass_lower_vector_ssa (gcc::context *ctxt)
2505 {
2506   return new pass_lower_vector_ssa (ctxt);
2507 }
2508 
2509 #include "gt-tree-vect-generic.h"
2510