1 /* Statement Analysis and Transformation for Vectorization
2    Copyright (C) 2003-2022 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h"            /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
58 
59 /* For lang_hooks.types.type_for_mode.  */
60 #include "langhooks.h"
61 
62 /* Return the vectorized type for the given statement.  */
63 
64 tree
stmt_vectype(class _stmt_vec_info * stmt_info)65 stmt_vectype (class _stmt_vec_info *stmt_info)
66 {
67   return STMT_VINFO_VECTYPE (stmt_info);
68 }
69 
70 /* Return TRUE iff the given statement is in an inner loop relative to
71    the loop being vectorized.  */
72 bool
stmt_in_inner_loop_p(vec_info * vinfo,class _stmt_vec_info * stmt_info)73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
74 {
75   gimple *stmt = STMT_VINFO_STMT (stmt_info);
76   basic_block bb = gimple_bb (stmt);
77   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78   class loop* loop;
79 
80   if (!loop_vinfo)
81     return false;
82 
83   loop = LOOP_VINFO_LOOP (loop_vinfo);
84 
85   return (bb->loop_father == loop->inner);
86 }
87 
88 /* Record the cost of a statement, either by directly informing the
89    target model or by saving it in a vector for later processing.
90    Return a preliminary estimate of the statement's cost.  */
91 
92 static unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,slp_tree node,tree vectype,int misalign,enum vect_cost_model_location where)93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94                       enum vect_cost_for_stmt kind,
95                       stmt_vec_info stmt_info, slp_tree node,
96                       tree vectype, int misalign,
97                       enum vect_cost_model_location where)
98 {
99   if ((kind == vector_load || kind == unaligned_load)
100       && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
101     kind = vector_gather_load;
102   if ((kind == vector_store || kind == unaligned_store)
103       && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
104     kind = vector_scatter_store;
105 
106   stmt_info_for_cost si
107     = { count, kind, where, stmt_info, node, vectype, misalign };
108   body_cost_vec->safe_push (si);
109 
110   return (unsigned)
111       (builtin_vectorization_cost (kind, vectype, misalign) * count);
112 }
113 
114 unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,tree vectype,int misalign,enum vect_cost_model_location where)115 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
116                       enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
117                       tree vectype, int misalign,
118                       enum vect_cost_model_location where)
119 {
120   return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
121                                  vectype, misalign, where);
122 }
123 
124 unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,slp_tree node,tree vectype,int misalign,enum vect_cost_model_location where)125 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
126                       enum vect_cost_for_stmt kind, slp_tree node,
127                       tree vectype, int misalign,
128                       enum vect_cost_model_location where)
129 {
130   return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
131                                  vectype, misalign, where);
132 }
133 
134 unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,enum vect_cost_model_location where)135 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
136                       enum vect_cost_for_stmt kind,
137                       enum vect_cost_model_location where)
138 {
139   gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
140                 || kind == scalar_stmt);
141   return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
142                                  NULL_TREE, 0, where);
143 }
144 
145 /* Return a variable of type ELEM_TYPE[NELEMS].  */
146 
147 static tree
create_vector_array(tree elem_type,unsigned HOST_WIDE_INT nelems)148 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
149 {
150   return create_tmp_var (build_array_type_nelts (elem_type, nelems),
151                                "vect_array");
152 }
153 
154 /* ARRAY is an array of vectors created by create_vector_array.
155    Return an SSA_NAME for the vector in index N.  The reference
156    is part of the vectorization of STMT_INFO and the vector is associated
157    with scalar destination SCALAR_DEST.  */
158 
159 static tree
read_vector_array(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree scalar_dest,tree array,unsigned HOST_WIDE_INT n)160 read_vector_array (vec_info *vinfo,
161                        stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
162                        tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
163 {
164   tree vect_type, vect, vect_name, array_ref;
165   gimple *new_stmt;
166 
167   gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
168   vect_type = TREE_TYPE (TREE_TYPE (array));
169   vect = vect_create_destination_var (scalar_dest, vect_type);
170   array_ref = build4 (ARRAY_REF, vect_type, array,
171                           build_int_cst (size_type_node, n),
172                           NULL_TREE, NULL_TREE);
173 
174   new_stmt = gimple_build_assign (vect, array_ref);
175   vect_name = make_ssa_name (vect, new_stmt);
176   gimple_assign_set_lhs (new_stmt, vect_name);
177   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
178 
179   return vect_name;
180 }
181 
182 /* ARRAY is an array of vectors created by create_vector_array.
183    Emit code to store SSA_NAME VECT in index N of the array.
184    The store is part of the vectorization of STMT_INFO.  */
185 
186 static void
write_vector_array(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree vect,tree array,unsigned HOST_WIDE_INT n)187 write_vector_array (vec_info *vinfo,
188                         stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
189                         tree vect, tree array, unsigned HOST_WIDE_INT n)
190 {
191   tree array_ref;
192   gimple *new_stmt;
193 
194   array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
195                           build_int_cst (size_type_node, n),
196                           NULL_TREE, NULL_TREE);
197 
198   new_stmt = gimple_build_assign (array_ref, vect);
199   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
200 }
201 
202 /* PTR is a pointer to an array of type TYPE.  Return a representation
203    of *PTR.  The memory reference replaces those in FIRST_DR
204    (and its group).  */
205 
206 static tree
create_array_ref(tree type,tree ptr,tree alias_ptr_type)207 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
208 {
209   tree mem_ref;
210 
211   mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
212   /* Arrays have the same alignment as their type.  */
213   set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
214   return mem_ref;
215 }
216 
217 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
218    Emit the clobber before *GSI.  */
219 
220 static void
vect_clobber_variable(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,tree var)221 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
222                            gimple_stmt_iterator *gsi, tree var)
223 {
224   tree clobber = build_clobber (TREE_TYPE (var));
225   gimple *new_stmt = gimple_build_assign (var, clobber);
226   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
227 }
228 
229 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
230 
231 /* Function vect_mark_relevant.
232 
233    Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST.  */
234 
235 static void
vect_mark_relevant(vec<stmt_vec_info> * worklist,stmt_vec_info stmt_info,enum vect_relevant relevant,bool live_p)236 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
237                         enum vect_relevant relevant, bool live_p)
238 {
239   enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
240   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
241 
242   if (dump_enabled_p ())
243     dump_printf_loc (MSG_NOTE, vect_location,
244                          "mark relevant %d, live %d: %G", relevant, live_p,
245                          stmt_info->stmt);
246 
247   /* If this stmt is an original stmt in a pattern, we might need to mark its
248      related pattern stmt instead of the original stmt.  However, such stmts
249      may have their own uses that are not in any pattern, in such cases the
250      stmt itself should be marked.  */
251   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
252     {
253       /* This is the last stmt in a sequence that was detected as a
254            pattern that can potentially be vectorized.  Don't mark the stmt
255            as relevant/live because it's not going to be vectorized.
256            Instead mark the pattern-stmt that replaces it.  */
257 
258       if (dump_enabled_p ())
259           dump_printf_loc (MSG_NOTE, vect_location,
260                                "last stmt in pattern. don't mark"
261                                " relevant/live.\n");
262       stmt_vec_info old_stmt_info = stmt_info;
263       stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
264       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
265       save_relevant = STMT_VINFO_RELEVANT (stmt_info);
266       save_live_p = STMT_VINFO_LIVE_P (stmt_info);
267     }
268 
269   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
270   if (relevant > STMT_VINFO_RELEVANT (stmt_info))
271     STMT_VINFO_RELEVANT (stmt_info) = relevant;
272 
273   if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
274       && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
275     {
276       if (dump_enabled_p ())
277         dump_printf_loc (MSG_NOTE, vect_location,
278                          "already marked relevant/live.\n");
279       return;
280     }
281 
282   worklist->safe_push (stmt_info);
283 }
284 
285 
286 /* Function is_simple_and_all_uses_invariant
287 
288    Return true if STMT_INFO is simple and all uses of it are invariant.  */
289 
290 bool
is_simple_and_all_uses_invariant(stmt_vec_info stmt_info,loop_vec_info loop_vinfo)291 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
292                                           loop_vec_info loop_vinfo)
293 {
294   tree op;
295   ssa_op_iter iter;
296 
297   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
298   if (!stmt)
299     return false;
300 
301   FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
302     {
303       enum vect_def_type dt = vect_uninitialized_def;
304 
305       if (!vect_is_simple_use (op, loop_vinfo, &dt))
306           {
307             if (dump_enabled_p ())
308               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
309                                    "use not simple.\n");
310             return false;
311           }
312 
313       if (dt != vect_external_def && dt != vect_constant_def)
314           return false;
315     }
316   return true;
317 }
318 
319 /* Function vect_stmt_relevant_p.
320 
321    Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
322    is "relevant for vectorization".
323 
324    A stmt is considered "relevant for vectorization" if:
325    - it has uses outside the loop.
326    - it has vdefs (it alters memory).
327    - control stmts in the loop (except for the exit condition).
328 
329    CHECKME: what other side effects would the vectorizer allow?  */
330 
331 static bool
vect_stmt_relevant_p(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,enum vect_relevant * relevant,bool * live_p)332 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
333                           enum vect_relevant *relevant, bool *live_p)
334 {
335   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
336   ssa_op_iter op_iter;
337   imm_use_iterator imm_iter;
338   use_operand_p use_p;
339   def_operand_p def_p;
340 
341   *relevant = vect_unused_in_scope;
342   *live_p = false;
343 
344   /* cond stmt other than loop exit cond.  */
345   if (is_ctrl_stmt (stmt_info->stmt)
346       && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
347     *relevant = vect_used_in_scope;
348 
349   /* changing memory.  */
350   if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
351     if (gimple_vdef (stmt_info->stmt)
352           && !gimple_clobber_p (stmt_info->stmt))
353       {
354           if (dump_enabled_p ())
355             dump_printf_loc (MSG_NOTE, vect_location,
356                            "vec_stmt_relevant_p: stmt has vdefs.\n");
357           *relevant = vect_used_in_scope;
358       }
359 
360   /* uses outside the loop.  */
361   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
362     {
363       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
364           {
365             basic_block bb = gimple_bb (USE_STMT (use_p));
366             if (!flow_bb_inside_loop_p (loop, bb))
367               {
368                 if (is_gimple_debug (USE_STMT (use_p)))
369                     continue;
370 
371                 if (dump_enabled_p ())
372                     dump_printf_loc (MSG_NOTE, vect_location,
373                                  "vec_stmt_relevant_p: used out of loop.\n");
374 
375                 /* We expect all such uses to be in the loop exit phis
376                      (because of loop closed form)   */
377                 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
378                 gcc_assert (bb == single_exit (loop)->dest);
379 
380               *live_p = true;
381               }
382           }
383     }
384 
385   if (*live_p && *relevant == vect_unused_in_scope
386       && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
387     {
388       if (dump_enabled_p ())
389           dump_printf_loc (MSG_NOTE, vect_location,
390                                "vec_stmt_relevant_p: stmt live but not relevant.\n");
391       *relevant = vect_used_only_live;
392     }
393 
394   return (*live_p || *relevant);
395 }
396 
397 
398 /* Function exist_non_indexing_operands_for_use_p
399 
400    USE is one of the uses attached to STMT_INFO.  Check if USE is
401    used in STMT_INFO for anything other than indexing an array.  */
402 
403 static bool
exist_non_indexing_operands_for_use_p(tree use,stmt_vec_info stmt_info)404 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
405 {
406   tree operand;
407 
408   /* USE corresponds to some operand in STMT.  If there is no data
409      reference in STMT, then any operand that corresponds to USE
410      is not indexing an array.  */
411   if (!STMT_VINFO_DATA_REF (stmt_info))
412     return true;
413 
414   /* STMT has a data_ref. FORNOW this means that its of one of
415      the following forms:
416      -1- ARRAY_REF = var
417      -2- var = ARRAY_REF
418      (This should have been verified in analyze_data_refs).
419 
420      'var' in the second case corresponds to a def, not a use,
421      so USE cannot correspond to any operands that are not used
422      for array indexing.
423 
424      Therefore, all we need to check is if STMT falls into the
425      first case, and whether var corresponds to USE.  */
426 
427   gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
428   if (!assign || !gimple_assign_copy_p (assign))
429     {
430       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
431       if (call && gimple_call_internal_p (call))
432           {
433             internal_fn ifn = gimple_call_internal_fn (call);
434             int mask_index = internal_fn_mask_index (ifn);
435             if (mask_index >= 0
436                 && use == gimple_call_arg (call, mask_index))
437               return true;
438             int stored_value_index = internal_fn_stored_value_index (ifn);
439             if (stored_value_index >= 0
440                 && use == gimple_call_arg (call, stored_value_index))
441               return true;
442             if (internal_gather_scatter_fn_p (ifn)
443                 && use == gimple_call_arg (call, 1))
444               return true;
445           }
446       return false;
447     }
448 
449   if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
450     return false;
451   operand = gimple_assign_rhs1 (assign);
452   if (TREE_CODE (operand) != SSA_NAME)
453     return false;
454 
455   if (operand == use)
456     return true;
457 
458   return false;
459 }
460 
461 
462 /*
463    Function process_use.
464 
465    Inputs:
466    - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
467    - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
468      that defined USE.  This is done by calling mark_relevant and passing it
469      the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
470    - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
471      be performed.
472 
473    Outputs:
474    Generally, LIVE_P and RELEVANT are used to define the liveness and
475    relevance info of the DEF_STMT of this USE:
476        STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
477        STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
478    Exceptions:
479    - case 1: If USE is used only for address computations (e.g. array indexing),
480    which does not need to be directly vectorized, then the liveness/relevance
481    of the respective DEF_STMT is left unchanged.
482    - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
483    we skip DEF_STMT cause it had already been processed.
484    - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
485    "relevant" will be modified accordingly.
486 
487    Return true if everything is as expected. Return false otherwise.  */
488 
489 static opt_result
process_use(stmt_vec_info stmt_vinfo,tree use,loop_vec_info loop_vinfo,enum vect_relevant relevant,vec<stmt_vec_info> * worklist,bool force)490 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
491                enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
492                bool force)
493 {
494   stmt_vec_info dstmt_vinfo;
495   enum vect_def_type dt;
496 
497   /* case 1: we are only interested in uses that need to be vectorized.  Uses
498      that are used for address computation are not considered relevant.  */
499   if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
500     return opt_result::success ();
501 
502   if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
503     return opt_result::failure_at (stmt_vinfo->stmt,
504                                            "not vectorized:"
505                                            " unsupported use in stmt.\n");
506 
507   if (!dstmt_vinfo)
508     return opt_result::success ();
509 
510   basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
511   basic_block bb = gimple_bb (stmt_vinfo->stmt);
512 
513   /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
514      We have to force the stmt live since the epilogue loop needs it to
515      continue computing the reduction.  */
516   if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
517       && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
518       && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
519       && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
520       && bb->loop_father == def_bb->loop_father)
521     {
522       if (dump_enabled_p ())
523           dump_printf_loc (MSG_NOTE, vect_location,
524                                "reduc-stmt defining reduc-phi in the same nest.\n");
525       vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
526       return opt_result::success ();
527     }
528 
529   /* case 3a: outer-loop stmt defining an inner-loop stmt:
530           outer-loop-header-bb:
531                     d = dstmt_vinfo
532           inner-loop:
533                     stmt # use (d)
534           outer-loop-tail-bb:
535                     ...                   */
536   if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
537     {
538       if (dump_enabled_p ())
539           dump_printf_loc (MSG_NOTE, vect_location,
540                          "outer-loop def-stmt defining inner-loop stmt.\n");
541 
542       switch (relevant)
543           {
544           case vect_unused_in_scope:
545             relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
546                           vect_used_in_scope : vect_unused_in_scope;
547             break;
548 
549           case vect_used_in_outer_by_reduction:
550           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
551             relevant = vect_used_by_reduction;
552             break;
553 
554           case vect_used_in_outer:
555           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
556             relevant = vect_used_in_scope;
557             break;
558 
559           case vect_used_in_scope:
560             break;
561 
562           default:
563             gcc_unreachable ();
564           }
565     }
566 
567   /* case 3b: inner-loop stmt defining an outer-loop stmt:
568           outer-loop-header-bb:
569                     ...
570           inner-loop:
571                     d = dstmt_vinfo
572           outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
573                     stmt # use (d)                */
574   else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
575     {
576       if (dump_enabled_p ())
577           dump_printf_loc (MSG_NOTE, vect_location,
578                          "inner-loop def-stmt defining outer-loop stmt.\n");
579 
580       switch (relevant)
581         {
582         case vect_unused_in_scope:
583           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
584             || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
585                       vect_used_in_outer_by_reduction : vect_unused_in_scope;
586           break;
587 
588         case vect_used_by_reduction:
589           case vect_used_only_live:
590           relevant = vect_used_in_outer_by_reduction;
591           break;
592 
593         case vect_used_in_scope:
594           relevant = vect_used_in_outer;
595           break;
596 
597         default:
598           gcc_unreachable ();
599         }
600     }
601   /* We are also not interested in uses on loop PHI backedges that are
602      inductions.  Otherwise we'll needlessly vectorize the IV increment
603      and cause hybrid SLP for SLP inductions.  Unless the PHI is live
604      of course.  */
605   else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
606              && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
607              && ! STMT_VINFO_LIVE_P (stmt_vinfo)
608              && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
609                                               loop_latch_edge (bb->loop_father))
610                  == use))
611     {
612       if (dump_enabled_p ())
613           dump_printf_loc (MSG_NOTE, vect_location,
614                          "induction value on backedge.\n");
615       return opt_result::success ();
616     }
617 
618 
619   vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
620   return opt_result::success ();
621 }
622 
623 
624 /* Function vect_mark_stmts_to_be_vectorized.
625 
626    Not all stmts in the loop need to be vectorized. For example:
627 
628      for i...
629        for j...
630    1.    T0 = i + j
631    2.      T1 = a[T0]
632 
633    3.    j = j + 1
634 
635    Stmt 1 and 3 do not need to be vectorized, because loop control and
636    addressing of vectorized data-refs are handled differently.
637 
638    This pass detects such stmts.  */
639 
640 opt_result
vect_mark_stmts_to_be_vectorized(loop_vec_info loop_vinfo,bool * fatal)641 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
642 {
643   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
644   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
645   unsigned int nbbs = loop->num_nodes;
646   gimple_stmt_iterator si;
647   unsigned int i;
648   basic_block bb;
649   bool live_p;
650   enum vect_relevant relevant;
651 
652   DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
653 
654   auto_vec<stmt_vec_info, 64> worklist;
655 
656   /* 1. Init worklist.  */
657   for (i = 0; i < nbbs; i++)
658     {
659       bb = bbs[i];
660       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
661           {
662             stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
663             if (dump_enabled_p ())
664               dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
665                                    phi_info->stmt);
666 
667             if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
668               vect_mark_relevant (&worklist, phi_info, relevant, live_p);
669           }
670       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
671           {
672             if (is_gimple_debug (gsi_stmt (si)))
673               continue;
674             stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
675             if (dump_enabled_p ())
676                 dump_printf_loc (MSG_NOTE, vect_location,
677                                      "init: stmt relevant? %G", stmt_info->stmt);
678 
679             if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
680               vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
681           }
682     }
683 
684   /* 2. Process_worklist */
685   while (worklist.length () > 0)
686     {
687       use_operand_p use_p;
688       ssa_op_iter iter;
689 
690       stmt_vec_info stmt_vinfo = worklist.pop ();
691       if (dump_enabled_p ())
692           dump_printf_loc (MSG_NOTE, vect_location,
693                                "worklist: examine stmt: %G", stmt_vinfo->stmt);
694 
695       /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
696            (DEF_STMT) as relevant/irrelevant according to the relevance property
697            of STMT.  */
698       relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
699 
700       /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
701            propagated as is to the DEF_STMTs of its USEs.
702 
703            One exception is when STMT has been identified as defining a reduction
704            variable; in this case we set the relevance to vect_used_by_reduction.
705            This is because we distinguish between two kinds of relevant stmts -
706            those that are used by a reduction computation, and those that are
707            (also) used by a regular computation.  This allows us later on to
708            identify stmts that are used solely by a reduction, and therefore the
709            order of the results that they produce does not have to be kept.  */
710 
711       switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
712         {
713           case vect_reduction_def:
714               gcc_assert (relevant != vect_unused_in_scope);
715               if (relevant != vect_unused_in_scope
716                     && relevant != vect_used_in_scope
717                     && relevant != vect_used_by_reduction
718                     && relevant != vect_used_only_live)
719                 return opt_result::failure_at
720                     (stmt_vinfo->stmt, "unsupported use of reduction.\n");
721               break;
722 
723           case vect_nested_cycle:
724               if (relevant != vect_unused_in_scope
725                     && relevant != vect_used_in_outer_by_reduction
726                     && relevant != vect_used_in_outer)
727                 return opt_result::failure_at
728                     (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
729             break;
730 
731           case vect_double_reduction_def:
732               if (relevant != vect_unused_in_scope
733                     && relevant != vect_used_by_reduction
734                     && relevant != vect_used_only_live)
735                 return opt_result::failure_at
736                     (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
737             break;
738 
739           default:
740             break;
741         }
742 
743       if (is_pattern_stmt_p (stmt_vinfo))
744         {
745           /* Pattern statements are not inserted into the code, so
746              FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
747              have to scan the RHS or function arguments instead.  */
748             if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
749               {
750                 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
751                 tree op = gimple_assign_rhs1 (assign);
752 
753                 i = 1;
754                 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
755                     {
756                       opt_result res
757                         = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
758                                            loop_vinfo, relevant, &worklist, false);
759                       if (!res)
760                         return res;
761                       res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
762                                              loop_vinfo, relevant, &worklist, false);
763                       if (!res)
764                         return res;
765                       i = 2;
766                     }
767                 for (; i < gimple_num_ops (assign); i++)
768                     {
769                       op = gimple_op (assign, i);
770                   if (TREE_CODE (op) == SSA_NAME)
771                         {
772                           opt_result res
773                               = process_use (stmt_vinfo, op, loop_vinfo, relevant,
774                                                &worklist, false);
775                           if (!res)
776                               return res;
777                         }
778                  }
779             }
780             else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
781               {
782                 for (i = 0; i < gimple_call_num_args (call); i++)
783                     {
784                       tree arg = gimple_call_arg (call, i);
785                       opt_result res
786                         = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
787                                            &worklist, false);
788                       if (!res)
789                         return res;
790                     }
791               }
792         }
793       else
794           FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
795           {
796             tree op = USE_FROM_PTR (use_p);
797               opt_result res
798                 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
799                                    &worklist, false);
800               if (!res)
801                 return res;
802           }
803 
804       if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
805           {
806             gather_scatter_info gs_info;
807             if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
808               gcc_unreachable ();
809             opt_result res
810               = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
811                                  &worklist, true);
812             if (!res)
813               {
814                 if (fatal)
815                     *fatal = false;
816                 return res;
817               }
818           }
819     } /* while worklist */
820 
821   return opt_result::success ();
822 }
823 
824 /* Function vect_model_simple_cost.
825 
826    Models cost for simple operations, i.e. those that only emit ncopies of a
827    single op.  Right now, this does not account for multiple insns that could
828    be generated for the single vector op.  We will handle that shortly.  */
829 
830 static void
vect_model_simple_cost(vec_info *,stmt_vec_info stmt_info,int ncopies,enum vect_def_type * dt,int ndts,slp_tree node,stmt_vector_for_cost * cost_vec,vect_cost_for_stmt kind=vector_stmt)831 vect_model_simple_cost (vec_info *,
832                               stmt_vec_info stmt_info, int ncopies,
833                               enum vect_def_type *dt,
834                               int ndts,
835                               slp_tree node,
836                               stmt_vector_for_cost *cost_vec,
837                               vect_cost_for_stmt kind = vector_stmt)
838 {
839   int inside_cost = 0, prologue_cost = 0;
840 
841   gcc_assert (cost_vec != NULL);
842 
843   /* ???  Somehow we need to fix this at the callers.  */
844   if (node)
845     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
846 
847   if (!node)
848     /* Cost the "broadcast" of a scalar operand in to a vector operand.
849        Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
850        cost model.  */
851     for (int i = 0; i < ndts; i++)
852       if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
853           prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
854                                                      stmt_info, 0, vect_prologue);
855 
856   /* Pass the inside-of-loop statements to the target-specific cost model.  */
857   inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
858                                            stmt_info, 0, vect_body);
859 
860   if (dump_enabled_p ())
861     dump_printf_loc (MSG_NOTE, vect_location,
862                      "vect_model_simple_cost: inside_cost = %d, "
863                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
864 }
865 
866 
867 /* Model cost for type demotion and promotion operations.  PWR is
868    normally zero for single-step promotions and demotions.  It will be
869    one if two-step promotion/demotion is required, and so on.  NCOPIES
870    is the number of vector results (and thus number of instructions)
871    for the narrowest end of the operation chain.  Each additional
872    step doubles the number of instructions required.  If WIDEN_ARITH
873    is true the stmt is doing widening arithmetic.  */
874 
875 static void
vect_model_promotion_demotion_cost(stmt_vec_info stmt_info,enum vect_def_type * dt,unsigned int ncopies,int pwr,stmt_vector_for_cost * cost_vec,bool widen_arith)876 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
877                                             enum vect_def_type *dt,
878                                             unsigned int ncopies, int pwr,
879                                             stmt_vector_for_cost *cost_vec,
880                                             bool widen_arith)
881 {
882   int i;
883   int inside_cost = 0, prologue_cost = 0;
884 
885   for (i = 0; i < pwr + 1; i++)
886     {
887       inside_cost += record_stmt_cost (cost_vec, ncopies,
888                                                widen_arith
889                                                ? vector_stmt : vec_promote_demote,
890                                                stmt_info, 0, vect_body);
891       ncopies *= 2;
892     }
893 
894   /* FORNOW: Assuming maximum 2 args per stmts.  */
895   for (i = 0; i < 2; i++)
896     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
897       prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
898                                                    stmt_info, 0, vect_prologue);
899 
900   if (dump_enabled_p ())
901     dump_printf_loc (MSG_NOTE, vect_location,
902                      "vect_model_promotion_demotion_cost: inside_cost = %d, "
903                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
904 }
905 
906 /* Returns true if the current function returns DECL.  */
907 
908 static bool
cfun_returns(tree decl)909 cfun_returns (tree decl)
910 {
911   edge_iterator ei;
912   edge e;
913   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
914     {
915       greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
916       if (!ret)
917           continue;
918       if (gimple_return_retval (ret) == decl)
919           return true;
920       /* We often end up with an aggregate copy to the result decl,
921          handle that case as well.  First skip intermediate clobbers
922            though.  */
923       gimple *def = ret;
924       do
925           {
926             def = SSA_NAME_DEF_STMT (gimple_vuse (def));
927           }
928       while (gimple_clobber_p (def));
929       if (is_a <gassign *> (def)
930             && gimple_assign_lhs (def) == gimple_return_retval (ret)
931             && gimple_assign_rhs1 (def) == decl)
932           return true;
933     }
934   return false;
935 }
936 
937 /* Function vect_model_store_cost
938 
939    Models cost for stores.  In the case of grouped accesses, one access
940    has the overhead of the grouped access attributed to it.  */
941 
942 static void
vect_model_store_cost(vec_info * vinfo,stmt_vec_info stmt_info,int ncopies,vect_memory_access_type memory_access_type,dr_alignment_support alignment_support_scheme,int misalignment,vec_load_store_type vls_type,slp_tree slp_node,stmt_vector_for_cost * cost_vec)943 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
944                            vect_memory_access_type memory_access_type,
945                            dr_alignment_support alignment_support_scheme,
946                            int misalignment,
947                            vec_load_store_type vls_type, slp_tree slp_node,
948                            stmt_vector_for_cost *cost_vec)
949 {
950   unsigned int inside_cost = 0, prologue_cost = 0;
951   stmt_vec_info first_stmt_info = stmt_info;
952   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
953 
954   /* ???  Somehow we need to fix this at the callers.  */
955   if (slp_node)
956     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
957 
958   if (vls_type == VLS_STORE_INVARIANT)
959     {
960       if (!slp_node)
961           prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
962                                                      stmt_info, 0, vect_prologue);
963     }
964 
965   /* Grouped stores update all elements in the group at once,
966      so we want the DR for the first statement.  */
967   if (!slp_node && grouped_access_p)
968     first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
969 
970   /* True if we should include any once-per-group costs as well as
971      the cost of the statement itself.  For SLP we only get called
972      once per group anyhow.  */
973   bool first_stmt_p = (first_stmt_info == stmt_info);
974 
975   /* We assume that the cost of a single store-lanes instruction is
976      equivalent to the cost of DR_GROUP_SIZE separate stores.  If a grouped
977      access is instead being provided by a permute-and-store operation,
978      include the cost of the permutes.  */
979   if (first_stmt_p
980       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
981     {
982       /* Uses a high and low interleave or shuffle operations for each
983            needed permute.  */
984       int group_size = DR_GROUP_SIZE (first_stmt_info);
985       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
986       inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
987                                               stmt_info, 0, vect_body);
988 
989       if (dump_enabled_p ())
990         dump_printf_loc (MSG_NOTE, vect_location,
991                          "vect_model_store_cost: strided group_size = %d .\n",
992                          group_size);
993     }
994 
995   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
996   /* Costs of the stores.  */
997   if (memory_access_type == VMAT_ELEMENTWISE
998       || memory_access_type == VMAT_GATHER_SCATTER)
999     {
1000       /* N scalar stores plus extracting the elements.  */
1001       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1002       inside_cost += record_stmt_cost (cost_vec,
1003                                                ncopies * assumed_nunits,
1004                                                scalar_store, stmt_info, 0, vect_body);
1005     }
1006   else
1007     vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
1008                                misalignment, &inside_cost, cost_vec);
1009 
1010   if (memory_access_type == VMAT_ELEMENTWISE
1011       || memory_access_type == VMAT_STRIDED_SLP)
1012     {
1013       /* N scalar stores plus extracting the elements.  */
1014       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1015       inside_cost += record_stmt_cost (cost_vec,
1016                                                ncopies * assumed_nunits,
1017                                                vec_to_scalar, stmt_info, 0, vect_body);
1018     }
1019 
1020   /* When vectorizing a store into the function result assign
1021      a penalty if the function returns in a multi-register location.
1022      In this case we assume we'll end up with having to spill the
1023      vector result and do piecewise loads as a conservative estimate.  */
1024   tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1025   if (base
1026       && (TREE_CODE (base) == RESULT_DECL
1027             || (DECL_P (base) && cfun_returns (base)))
1028       && !aggregate_value_p (base, cfun->decl))
1029     {
1030       rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1031       /* ???  Handle PARALLEL in some way.  */
1032       if (REG_P (reg))
1033           {
1034             int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1035             /* Assume that a single reg-reg move is possible and cheap,
1036                do not account for vector to gp register move cost.  */
1037             if (nregs > 1)
1038               {
1039                 /* Spill.  */
1040                 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1041                                                              vector_store,
1042                                                              stmt_info, 0, vect_epilogue);
1043                 /* Loads.  */
1044                 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1045                                                              scalar_load,
1046                                                              stmt_info, 0, vect_epilogue);
1047               }
1048           }
1049     }
1050 
1051   if (dump_enabled_p ())
1052     dump_printf_loc (MSG_NOTE, vect_location,
1053                      "vect_model_store_cost: inside_cost = %d, "
1054                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
1055 }
1056 
1057 
1058 /* Calculate cost of DR's memory access.  */
1059 void
vect_get_store_cost(vec_info *,stmt_vec_info stmt_info,int ncopies,dr_alignment_support alignment_support_scheme,int misalignment,unsigned int * inside_cost,stmt_vector_for_cost * body_cost_vec)1060 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1061                          dr_alignment_support alignment_support_scheme,
1062                          int misalignment,
1063                          unsigned int *inside_cost,
1064                          stmt_vector_for_cost *body_cost_vec)
1065 {
1066   switch (alignment_support_scheme)
1067     {
1068     case dr_aligned:
1069       {
1070           *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1071                                                     vector_store, stmt_info, 0,
1072                                                     vect_body);
1073 
1074         if (dump_enabled_p ())
1075           dump_printf_loc (MSG_NOTE, vect_location,
1076                            "vect_model_store_cost: aligned.\n");
1077         break;
1078       }
1079 
1080     case dr_unaligned_supported:
1081       {
1082         /* Here, we assign an additional cost for the unaligned store.  */
1083           *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1084                                                     unaligned_store, stmt_info,
1085                                                     misalignment, vect_body);
1086         if (dump_enabled_p ())
1087           dump_printf_loc (MSG_NOTE, vect_location,
1088                            "vect_model_store_cost: unaligned supported by "
1089                            "hardware.\n");
1090         break;
1091       }
1092 
1093     case dr_unaligned_unsupported:
1094       {
1095         *inside_cost = VECT_MAX_COST;
1096 
1097         if (dump_enabled_p ())
1098           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1099                            "vect_model_store_cost: unsupported access.\n");
1100         break;
1101       }
1102 
1103     default:
1104       gcc_unreachable ();
1105     }
1106 }
1107 
1108 
1109 /* Function vect_model_load_cost
1110 
1111    Models cost for loads.  In the case of grouped accesses, one access has
1112    the overhead of the grouped access attributed to it.  Since unaligned
1113    accesses are supported for loads, we also account for the costs of the
1114    access scheme chosen.  */
1115 
1116 static void
vect_model_load_cost(vec_info * vinfo,stmt_vec_info stmt_info,unsigned ncopies,poly_uint64 vf,vect_memory_access_type memory_access_type,dr_alignment_support alignment_support_scheme,int misalignment,gather_scatter_info * gs_info,slp_tree slp_node,stmt_vector_for_cost * cost_vec)1117 vect_model_load_cost (vec_info *vinfo,
1118                           stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1119                           vect_memory_access_type memory_access_type,
1120                           dr_alignment_support alignment_support_scheme,
1121                           int misalignment,
1122                           gather_scatter_info *gs_info,
1123                           slp_tree slp_node,
1124                           stmt_vector_for_cost *cost_vec)
1125 {
1126   unsigned int inside_cost = 0, prologue_cost = 0;
1127   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1128 
1129   gcc_assert (cost_vec);
1130 
1131   /* ???  Somehow we need to fix this at the callers.  */
1132   if (slp_node)
1133     ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1134 
1135   if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1136     {
1137       /* If the load is permuted then the alignment is determined by
1138            the first group element not by the first scalar stmt DR.  */
1139       stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1140       /* Record the cost for the permutation.  */
1141       unsigned n_perms, n_loads;
1142       vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1143                                             vf, true, &n_perms, &n_loads);
1144       inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1145                                                first_stmt_info, 0, vect_body);
1146 
1147       /* And adjust the number of loads performed.  This handles
1148            redundancies as well as loads that are later dead.  */
1149       ncopies = n_loads;
1150     }
1151 
1152   /* Grouped loads read all elements in the group at once,
1153      so we want the DR for the first statement.  */
1154   stmt_vec_info first_stmt_info = stmt_info;
1155   if (!slp_node && grouped_access_p)
1156     first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1157 
1158   /* True if we should include any once-per-group costs as well as
1159      the cost of the statement itself.  For SLP we only get called
1160      once per group anyhow.  */
1161   bool first_stmt_p = (first_stmt_info == stmt_info);
1162 
1163   /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1164      ones we actually need.  Account for the cost of unused results.  */
1165   if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
1166     {
1167       unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
1168       stmt_vec_info next_stmt_info = first_stmt_info;
1169       do
1170           {
1171             gaps -= 1;
1172             next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
1173           }
1174       while (next_stmt_info);
1175       if (gaps)
1176           {
1177             if (dump_enabled_p ())
1178               dump_printf_loc (MSG_NOTE, vect_location,
1179                                    "vect_model_load_cost: %d unused vectors.\n",
1180                                    gaps);
1181             vect_get_load_cost (vinfo, stmt_info, ncopies * gaps,
1182                                     alignment_support_scheme, misalignment, false,
1183                                     &inside_cost, &prologue_cost,
1184                                     cost_vec, cost_vec, true);
1185           }
1186     }
1187 
1188   /* We assume that the cost of a single load-lanes instruction is
1189      equivalent to the cost of DR_GROUP_SIZE separate loads.  If a grouped
1190      access is instead being provided by a load-and-permute operation,
1191      include the cost of the permutes.  */
1192   if (first_stmt_p
1193       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1194     {
1195       /* Uses an even and odd extract operations or shuffle operations
1196            for each needed permute.  */
1197       int group_size = DR_GROUP_SIZE (first_stmt_info);
1198       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1199       inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1200                                                stmt_info, 0, vect_body);
1201 
1202       if (dump_enabled_p ())
1203         dump_printf_loc (MSG_NOTE, vect_location,
1204                          "vect_model_load_cost: strided group_size = %d .\n",
1205                          group_size);
1206     }
1207 
1208   /* The loads themselves.  */
1209   if (memory_access_type == VMAT_ELEMENTWISE
1210       || memory_access_type == VMAT_GATHER_SCATTER)
1211     {
1212       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1213       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1214       if (memory_access_type == VMAT_GATHER_SCATTER
1215             && gs_info->ifn == IFN_LAST && !gs_info->decl)
1216           /* For emulated gathers N offset vector element extracts
1217              (we assume the scalar scaling and ptr + offset add is consumed by
1218              the load).  */
1219           inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1220                                                    vec_to_scalar, stmt_info, 0,
1221                                                    vect_body);
1222       /* N scalar loads plus gathering them into a vector.  */
1223       inside_cost += record_stmt_cost (cost_vec,
1224                                                ncopies * assumed_nunits,
1225                                                scalar_load, stmt_info, 0, vect_body);
1226     }
1227   else if (memory_access_type == VMAT_INVARIANT)
1228     {
1229       /* Invariant loads will ideally be hoisted and splat to a vector.  */
1230       prologue_cost += record_stmt_cost (cost_vec, 1,
1231                                                    scalar_load, stmt_info, 0,
1232                                                    vect_prologue);
1233       prologue_cost += record_stmt_cost (cost_vec, 1,
1234                                                    scalar_to_vec, stmt_info, 0,
1235                                                    vect_prologue);
1236     }
1237   else
1238     vect_get_load_cost (vinfo, stmt_info, ncopies,
1239                               alignment_support_scheme, misalignment, first_stmt_p,
1240                               &inside_cost, &prologue_cost,
1241                               cost_vec, cost_vec, true);
1242   if (memory_access_type == VMAT_ELEMENTWISE
1243       || memory_access_type == VMAT_STRIDED_SLP
1244       || (memory_access_type == VMAT_GATHER_SCATTER
1245             && gs_info->ifn == IFN_LAST && !gs_info->decl))
1246     inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1247                                              stmt_info, 0, vect_body);
1248 
1249   if (dump_enabled_p ())
1250     dump_printf_loc (MSG_NOTE, vect_location,
1251                      "vect_model_load_cost: inside_cost = %d, "
1252                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
1253 }
1254 
1255 
1256 /* Calculate cost of DR's memory access.  */
1257 void
vect_get_load_cost(vec_info *,stmt_vec_info stmt_info,int ncopies,dr_alignment_support alignment_support_scheme,int misalignment,bool add_realign_cost,unsigned int * inside_cost,unsigned int * prologue_cost,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec,bool record_prologue_costs)1258 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1259                         dr_alignment_support alignment_support_scheme,
1260                         int misalignment,
1261                         bool add_realign_cost, unsigned int *inside_cost,
1262                         unsigned int *prologue_cost,
1263                         stmt_vector_for_cost *prologue_cost_vec,
1264                         stmt_vector_for_cost *body_cost_vec,
1265                         bool record_prologue_costs)
1266 {
1267   switch (alignment_support_scheme)
1268     {
1269     case dr_aligned:
1270       {
1271           *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1272                                                     stmt_info, 0, vect_body);
1273 
1274         if (dump_enabled_p ())
1275           dump_printf_loc (MSG_NOTE, vect_location,
1276                            "vect_model_load_cost: aligned.\n");
1277 
1278         break;
1279       }
1280     case dr_unaligned_supported:
1281       {
1282         /* Here, we assign an additional cost for the unaligned load.  */
1283           *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1284                                                     unaligned_load, stmt_info,
1285                                                     misalignment, vect_body);
1286 
1287         if (dump_enabled_p ())
1288           dump_printf_loc (MSG_NOTE, vect_location,
1289                            "vect_model_load_cost: unaligned supported by "
1290                            "hardware.\n");
1291 
1292         break;
1293       }
1294     case dr_explicit_realign:
1295       {
1296           *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1297                                                     vector_load, stmt_info, 0, vect_body);
1298           *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1299                                                     vec_perm, stmt_info, 0, vect_body);
1300 
1301         /* FIXME: If the misalignment remains fixed across the iterations of
1302            the containing loop, the following cost should be added to the
1303            prologue costs.  */
1304         if (targetm.vectorize.builtin_mask_for_load)
1305             *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1306                                                       stmt_info, 0, vect_body);
1307 
1308         if (dump_enabled_p ())
1309           dump_printf_loc (MSG_NOTE, vect_location,
1310                            "vect_model_load_cost: explicit realign\n");
1311 
1312         break;
1313       }
1314     case dr_explicit_realign_optimized:
1315       {
1316         if (dump_enabled_p ())
1317           dump_printf_loc (MSG_NOTE, vect_location,
1318                            "vect_model_load_cost: unaligned software "
1319                            "pipelined.\n");
1320 
1321         /* Unaligned software pipeline has a load of an address, an initial
1322            load, and possibly a mask operation to "prime" the loop.  However,
1323            if this is an access in a group of loads, which provide grouped
1324            access, then the above cost should only be considered for one
1325            access in the group.  Inside the loop, there is a load op
1326            and a realignment op.  */
1327 
1328         if (add_realign_cost && record_prologue_costs)
1329           {
1330               *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1331                                                             vector_stmt, stmt_info,
1332                                                             0, vect_prologue);
1333             if (targetm.vectorize.builtin_mask_for_load)
1334                 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1335                                                               vector_stmt, stmt_info,
1336                                                               0, vect_prologue);
1337           }
1338 
1339           *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1340                                                     stmt_info, 0, vect_body);
1341           *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1342                                                     stmt_info, 0, vect_body);
1343 
1344         if (dump_enabled_p ())
1345           dump_printf_loc (MSG_NOTE, vect_location,
1346                            "vect_model_load_cost: explicit realign optimized"
1347                            "\n");
1348 
1349         break;
1350       }
1351 
1352     case dr_unaligned_unsupported:
1353       {
1354         *inside_cost = VECT_MAX_COST;
1355 
1356         if (dump_enabled_p ())
1357           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1358                            "vect_model_load_cost: unsupported access.\n");
1359         break;
1360       }
1361 
1362     default:
1363       gcc_unreachable ();
1364     }
1365 }
1366 
1367 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1368    the loop preheader for the vectorized stmt STMT_VINFO.  */
1369 
1370 static void
vect_init_vector_1(vec_info * vinfo,stmt_vec_info stmt_vinfo,gimple * new_stmt,gimple_stmt_iterator * gsi)1371 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1372                         gimple_stmt_iterator *gsi)
1373 {
1374   if (gsi)
1375     vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1376   else
1377     vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1378 
1379   if (dump_enabled_p ())
1380     dump_printf_loc (MSG_NOTE, vect_location,
1381                          "created new init_stmt: %G", new_stmt);
1382 }
1383 
1384 /* Function vect_init_vector.
1385 
1386    Insert a new stmt (INIT_STMT) that initializes a new variable of type
1387    TYPE with the value VAL.  If TYPE is a vector type and VAL does not have
1388    vector type a vector with all elements equal to VAL is created first.
1389    Place the initialization at GSI if it is not NULL.  Otherwise, place the
1390    initialization at the loop preheader.
1391    Return the DEF of INIT_STMT.
1392    It will be used in the vectorization of STMT_INFO.  */
1393 
1394 tree
vect_init_vector(vec_info * vinfo,stmt_vec_info stmt_info,tree val,tree type,gimple_stmt_iterator * gsi)1395 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1396                       gimple_stmt_iterator *gsi)
1397 {
1398   gimple *init_stmt;
1399   tree new_temp;
1400 
1401   /* We abuse this function to push sth to a SSA name with initial 'val'.  */
1402   if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1403     {
1404       gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1405       if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1406           {
1407             /* Scalar boolean value should be transformed into
1408                all zeros or all ones value before building a vector.  */
1409             if (VECTOR_BOOLEAN_TYPE_P (type))
1410               {
1411                 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1412                 tree false_val = build_zero_cst (TREE_TYPE (type));
1413 
1414                 if (CONSTANT_CLASS_P (val))
1415                     val = integer_zerop (val) ? false_val : true_val;
1416                 else
1417                     {
1418                       new_temp = make_ssa_name (TREE_TYPE (type));
1419                       init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1420                                                                val, true_val, false_val);
1421                       vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1422                       val = new_temp;
1423                     }
1424               }
1425             else
1426               {
1427                 gimple_seq stmts = NULL;
1428                 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1429                     val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1430                                             TREE_TYPE (type), val);
1431                 else
1432                     /* ???  Condition vectorization expects us to do
1433                        promotion of invariant/external defs.  */
1434                     val = gimple_convert (&stmts, TREE_TYPE (type), val);
1435                 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1436                        !gsi_end_p (gsi2); )
1437                     {
1438                       init_stmt = gsi_stmt (gsi2);
1439                       gsi_remove (&gsi2, false);
1440                       vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1441                     }
1442               }
1443           }
1444       val = build_vector_from_val (type, val);
1445     }
1446 
1447   new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1448   init_stmt = gimple_build_assign (new_temp, val);
1449   vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1450   return new_temp;
1451 }
1452 
1453 
1454 /* Function vect_get_vec_defs_for_operand.
1455 
1456    OP is an operand in STMT_VINFO.  This function returns a vector of
1457    NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1458 
1459    In the case that OP is an SSA_NAME which is defined in the loop, then
1460    STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1461 
1462    In case OP is an invariant or constant, a new stmt that creates a vector def
1463    needs to be introduced.  VECTYPE may be used to specify a required type for
1464    vector invariant.  */
1465 
1466 void
vect_get_vec_defs_for_operand(vec_info * vinfo,stmt_vec_info stmt_vinfo,unsigned ncopies,tree op,vec<tree> * vec_oprnds,tree vectype)1467 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1468                                      unsigned ncopies,
1469                                      tree op, vec<tree> *vec_oprnds, tree vectype)
1470 {
1471   gimple *def_stmt;
1472   enum vect_def_type dt;
1473   bool is_simple_use;
1474   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1475 
1476   if (dump_enabled_p ())
1477     dump_printf_loc (MSG_NOTE, vect_location,
1478                          "vect_get_vec_defs_for_operand: %T\n", op);
1479 
1480   stmt_vec_info def_stmt_info;
1481   is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1482                                               &def_stmt_info, &def_stmt);
1483   gcc_assert (is_simple_use);
1484   if (def_stmt && dump_enabled_p ())
1485     dump_printf_loc (MSG_NOTE, vect_location, "  def_stmt =  %G", def_stmt);
1486 
1487   vec_oprnds->create (ncopies);
1488   if (dt == vect_constant_def || dt == vect_external_def)
1489     {
1490       tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1491       tree vector_type;
1492 
1493       if (vectype)
1494           vector_type = vectype;
1495       else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1496                  && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1497           vector_type = truth_type_for (stmt_vectype);
1498       else
1499           vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1500 
1501       gcc_assert (vector_type);
1502       tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1503       while (ncopies--)
1504           vec_oprnds->quick_push (vop);
1505     }
1506   else
1507     {
1508       def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1509       gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1510       for (unsigned i = 0; i < ncopies; ++i)
1511           vec_oprnds->quick_push (gimple_get_lhs
1512                                           (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1513     }
1514 }
1515 
1516 
1517 /* Get vectorized definitions for OP0 and OP1.  */
1518 
1519 void
vect_get_vec_defs(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,unsigned ncopies,tree op0,vec<tree> * vec_oprnds0,tree vectype0,tree op1,vec<tree> * vec_oprnds1,tree vectype1,tree op2,vec<tree> * vec_oprnds2,tree vectype2,tree op3,vec<tree> * vec_oprnds3,tree vectype3)1520 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1521                        unsigned ncopies,
1522                        tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1523                        tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1524                        tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1525                        tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1526 {
1527   if (slp_node)
1528     {
1529       if (op0)
1530           vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1531       if (op1)
1532           vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1533       if (op2)
1534           vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1535       if (op3)
1536           vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1537     }
1538   else
1539     {
1540       if (op0)
1541           vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1542                                                op0, vec_oprnds0, vectype0);
1543       if (op1)
1544           vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1545                                                op1, vec_oprnds1, vectype1);
1546       if (op2)
1547           vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1548                                                op2, vec_oprnds2, vectype2);
1549       if (op3)
1550           vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1551                                                op3, vec_oprnds3, vectype3);
1552     }
1553 }
1554 
1555 void
vect_get_vec_defs(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,unsigned ncopies,tree op0,vec<tree> * vec_oprnds0,tree op1,vec<tree> * vec_oprnds1,tree op2,vec<tree> * vec_oprnds2,tree op3,vec<tree> * vec_oprnds3)1556 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1557                        unsigned ncopies,
1558                        tree op0, vec<tree> *vec_oprnds0,
1559                        tree op1, vec<tree> *vec_oprnds1,
1560                        tree op2, vec<tree> *vec_oprnds2,
1561                        tree op3, vec<tree> *vec_oprnds3)
1562 {
1563   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1564                          op0, vec_oprnds0, NULL_TREE,
1565                          op1, vec_oprnds1, NULL_TREE,
1566                          op2, vec_oprnds2, NULL_TREE,
1567                          op3, vec_oprnds3, NULL_TREE);
1568 }
1569 
1570 /* Helper function called by vect_finish_replace_stmt and
1571    vect_finish_stmt_generation.  Set the location of the new
1572    statement and create and return a stmt_vec_info for it.  */
1573 
1574 static void
vect_finish_stmt_generation_1(vec_info *,stmt_vec_info stmt_info,gimple * vec_stmt)1575 vect_finish_stmt_generation_1 (vec_info *,
1576                                      stmt_vec_info stmt_info, gimple *vec_stmt)
1577 {
1578   if (dump_enabled_p ())
1579     dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1580 
1581   if (stmt_info)
1582     {
1583       gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1584 
1585       /* While EH edges will generally prevent vectorization, stmt might
1586            e.g. be in a must-not-throw region.  Ensure newly created stmts
1587            that could throw are part of the same region.  */
1588       int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1589       if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1590           add_stmt_to_eh_lp (vec_stmt, lp_nr);
1591     }
1592   else
1593     gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1594 }
1595 
1596 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1597    which sets the same scalar result as STMT_INFO did.  Create and return a
1598    stmt_vec_info for VEC_STMT.  */
1599 
1600 void
vect_finish_replace_stmt(vec_info * vinfo,stmt_vec_info stmt_info,gimple * vec_stmt)1601 vect_finish_replace_stmt (vec_info *vinfo,
1602                                 stmt_vec_info stmt_info, gimple *vec_stmt)
1603 {
1604   gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1605   gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1606 
1607   gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1608   gsi_replace (&gsi, vec_stmt, true);
1609 
1610   vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1611 }
1612 
1613 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1614    before *GSI.  Create and return a stmt_vec_info for VEC_STMT.  */
1615 
1616 void
vect_finish_stmt_generation(vec_info * vinfo,stmt_vec_info stmt_info,gimple * vec_stmt,gimple_stmt_iterator * gsi)1617 vect_finish_stmt_generation (vec_info *vinfo,
1618                                    stmt_vec_info stmt_info, gimple *vec_stmt,
1619                                    gimple_stmt_iterator *gsi)
1620 {
1621   gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1622 
1623   if (!gsi_end_p (*gsi)
1624       && gimple_has_mem_ops (vec_stmt))
1625     {
1626       gimple *at_stmt = gsi_stmt (*gsi);
1627       tree vuse = gimple_vuse (at_stmt);
1628       if (vuse && TREE_CODE (vuse) == SSA_NAME)
1629           {
1630             tree vdef = gimple_vdef (at_stmt);
1631             gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1632             gimple_set_modified (vec_stmt, true);
1633             /* If we have an SSA vuse and insert a store, update virtual
1634                SSA form to avoid triggering the renamer.  Do so only
1635                if we can easily see all uses - which is what almost always
1636                happens with the way vectorized stmts are inserted.  */
1637             if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1638                 && ((is_gimple_assign (vec_stmt)
1639                        && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1640                       || (is_gimple_call (vec_stmt)
1641                           && !(gimple_call_flags (vec_stmt)
1642                                  & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1643               {
1644                 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1645                 gimple_set_vdef (vec_stmt, new_vdef);
1646                 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1647               }
1648           }
1649     }
1650   gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1651   vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1652 }
1653 
1654 /* We want to vectorize a call to combined function CFN with function
1655    decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1656    as the types of all inputs.  Check whether this is possible using
1657    an internal function, returning its code if so or IFN_LAST if not.  */
1658 
1659 static internal_fn
vectorizable_internal_function(combined_fn cfn,tree fndecl,tree vectype_out,tree vectype_in)1660 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1661                                         tree vectype_out, tree vectype_in)
1662 {
1663   internal_fn ifn;
1664   if (internal_fn_p (cfn))
1665     ifn = as_internal_fn (cfn);
1666   else
1667     ifn = associated_internal_fn (fndecl);
1668   if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1669     {
1670       const direct_internal_fn_info &info = direct_internal_fn (ifn);
1671       if (info.vectorizable)
1672           {
1673             tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1674             tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1675             if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1676                                                         OPTIMIZE_FOR_SPEED))
1677               return ifn;
1678           }
1679     }
1680   return IFN_LAST;
1681 }
1682 
1683 
1684 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1685                                           gimple_stmt_iterator *);
1686 
1687 /* Check whether a load or store statement in the loop described by
1688    LOOP_VINFO is possible in a loop using partial vectors.  This is
1689    testing whether the vectorizer pass has the appropriate support,
1690    as well as whether the target does.
1691 
1692    VLS_TYPE says whether the statement is a load or store and VECTYPE
1693    is the type of the vector being loaded or stored.  SLP_NODE is the SLP
1694    node that contains the statement, or null if none.  MEMORY_ACCESS_TYPE
1695    says how the load or store is going to be implemented and GROUP_SIZE
1696    is the number of load or store statements in the containing group.
1697    If the access is a gather load or scatter store, GS_INFO describes
1698    its arguments.  If the load or store is conditional, SCALAR_MASK is the
1699    condition under which it occurs.
1700 
1701    Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1702    vectors is not supported, otherwise record the required rgroup control
1703    types.  */
1704 
1705 static void
check_load_store_for_partial_vectors(loop_vec_info loop_vinfo,tree vectype,slp_tree slp_node,vec_load_store_type vls_type,int group_size,vect_memory_access_type memory_access_type,gather_scatter_info * gs_info,tree scalar_mask)1706 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1707                                               slp_tree slp_node,
1708                                               vec_load_store_type vls_type,
1709                                               int group_size,
1710                                               vect_memory_access_type
1711                                               memory_access_type,
1712                                               gather_scatter_info *gs_info,
1713                                               tree scalar_mask)
1714 {
1715   /* Invariant loads need no special support.  */
1716   if (memory_access_type == VMAT_INVARIANT)
1717     return;
1718 
1719   unsigned int nvectors;
1720   if (slp_node)
1721     nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1722   else
1723     nvectors = vect_get_num_copies (loop_vinfo, vectype);
1724 
1725   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1726   machine_mode vecmode = TYPE_MODE (vectype);
1727   bool is_load = (vls_type == VLS_LOAD);
1728   if (memory_access_type == VMAT_LOAD_STORE_LANES)
1729     {
1730       if (is_load
1731             ? !vect_load_lanes_supported (vectype, group_size, true)
1732             : !vect_store_lanes_supported (vectype, group_size, true))
1733           {
1734             if (dump_enabled_p ())
1735               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1736                                    "can't operate on partial vectors because"
1737                                    " the target doesn't have an appropriate"
1738                                    " load/store-lanes instruction.\n");
1739             LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1740             return;
1741           }
1742       vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1743                                    scalar_mask);
1744       return;
1745     }
1746 
1747   if (memory_access_type == VMAT_GATHER_SCATTER)
1748     {
1749       internal_fn ifn = (is_load
1750                                ? IFN_MASK_GATHER_LOAD
1751                                : IFN_MASK_SCATTER_STORE);
1752       if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1753                                                                gs_info->memory_type,
1754                                                                gs_info->offset_vectype,
1755                                                                gs_info->scale))
1756           {
1757             if (dump_enabled_p ())
1758               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1759                                    "can't operate on partial vectors because"
1760                                    " the target doesn't have an appropriate"
1761                                    " gather load or scatter store instruction.\n");
1762             LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1763             return;
1764           }
1765       vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1766                                    scalar_mask);
1767       return;
1768     }
1769 
1770   if (memory_access_type != VMAT_CONTIGUOUS
1771       && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1772     {
1773       /* Element X of the data must come from iteration i * VF + X of the
1774            scalar loop.  We need more work to support other mappings.  */
1775       if (dump_enabled_p ())
1776           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1777                                "can't operate on partial vectors because an"
1778                                " access isn't contiguous.\n");
1779       LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1780       return;
1781     }
1782 
1783   if (!VECTOR_MODE_P (vecmode))
1784     {
1785       if (dump_enabled_p ())
1786           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1787                                "can't operate on partial vectors when emulating"
1788                                " vector operations.\n");
1789       LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1790       return;
1791     }
1792 
1793   /* We might load more scalars than we need for permuting SLP loads.
1794      We checked in get_group_load_store_type that the extra elements
1795      don't leak into a new vector.  */
1796   auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1797   {
1798     unsigned int nvectors;
1799     if (can_div_away_from_zero_p (size, nunits, &nvectors))
1800       return nvectors;
1801     gcc_unreachable ();
1802   };
1803 
1804   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1805   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1806   machine_mode mask_mode;
1807   bool using_partial_vectors_p = false;
1808   if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1809       && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1810     {
1811       nvectors = group_memory_nvectors (group_size * vf, nunits);
1812       vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1813       using_partial_vectors_p = true;
1814     }
1815 
1816   machine_mode vmode;
1817   if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1818     {
1819       nvectors = group_memory_nvectors (group_size * vf, nunits);
1820       vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1821       unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1822       vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1823       using_partial_vectors_p = true;
1824     }
1825 
1826   if (!using_partial_vectors_p)
1827     {
1828       if (dump_enabled_p ())
1829           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1830                                "can't operate on partial vectors because the"
1831                                " target doesn't have the appropriate partial"
1832                                " vectorization load or store.\n");
1833       LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1834     }
1835 }
1836 
1837 /* Return the mask input to a masked load or store.  VEC_MASK is the vectorized
1838    form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1839    that needs to be applied to all loads and stores in a vectorized loop.
1840    Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1841    otherwise return VEC_MASK & LOOP_MASK.
1842 
1843    MASK_TYPE is the type of both masks.  If new statements are needed,
1844    insert them before GSI.  */
1845 
1846 static tree
prepare_vec_mask(loop_vec_info loop_vinfo,tree mask_type,tree loop_mask,tree vec_mask,gimple_stmt_iterator * gsi)1847 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1848                       tree vec_mask, gimple_stmt_iterator *gsi)
1849 {
1850   gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1851   if (!loop_mask)
1852     return vec_mask;
1853 
1854   gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1855 
1856   if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1857     return vec_mask;
1858 
1859   tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1860   gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1861                                                     vec_mask, loop_mask);
1862 
1863   gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1864   return and_res;
1865 }
1866 
1867 /* Determine whether we can use a gather load or scatter store to vectorize
1868    strided load or store STMT_INFO by truncating the current offset to a
1869    smaller width.  We need to be able to construct an offset vector:
1870 
1871      { 0, X, X*2, X*3, ... }
1872 
1873    without loss of precision, where X is STMT_INFO's DR_STEP.
1874 
1875    Return true if this is possible, describing the gather load or scatter
1876    store in GS_INFO.  MASKED_P is true if the load or store is conditional.  */
1877 
1878 static bool
vect_truncate_gather_scatter_offset(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)1879 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1880                                              loop_vec_info loop_vinfo, bool masked_p,
1881                                              gather_scatter_info *gs_info)
1882 {
1883   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1884   data_reference *dr = dr_info->dr;
1885   tree step = DR_STEP (dr);
1886   if (TREE_CODE (step) != INTEGER_CST)
1887     {
1888       /* ??? Perhaps we could use range information here?  */
1889       if (dump_enabled_p ())
1890           dump_printf_loc (MSG_NOTE, vect_location,
1891                                "cannot truncate variable step.\n");
1892       return false;
1893     }
1894 
1895   /* Get the number of bits in an element.  */
1896   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1897   scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1898   unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1899 
1900   /* Set COUNT to the upper limit on the number of elements - 1.
1901      Start with the maximum vectorization factor.  */
1902   unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1903 
1904   /* Try lowering COUNT to the number of scalar latch iterations.  */
1905   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1906   widest_int max_iters;
1907   if (max_loop_iterations (loop, &max_iters)
1908       && max_iters < count)
1909     count = max_iters.to_shwi ();
1910 
1911   /* Try scales of 1 and the element size.  */
1912   int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1913   wi::overflow_type overflow = wi::OVF_NONE;
1914   for (int i = 0; i < 2; ++i)
1915     {
1916       int scale = scales[i];
1917       widest_int factor;
1918       if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1919           continue;
1920 
1921       /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE.  */
1922       widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1923       if (overflow)
1924           continue;
1925       signop sign = range >= 0 ? UNSIGNED : SIGNED;
1926       unsigned int min_offset_bits = wi::min_precision (range, sign);
1927 
1928       /* Find the narrowest viable offset type.  */
1929       unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1930       tree offset_type = build_nonstandard_integer_type (offset_bits,
1931                                                                        sign == UNSIGNED);
1932 
1933       /* See whether the target supports the operation with an offset
1934            no narrower than OFFSET_TYPE.  */
1935       tree memory_type = TREE_TYPE (DR_REF (dr));
1936       if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1937                                              vectype, memory_type, offset_type, scale,
1938                                              &gs_info->ifn, &gs_info->offset_vectype)
1939             || gs_info->ifn == IFN_LAST)
1940           continue;
1941 
1942       gs_info->decl = NULL_TREE;
1943       /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1944            but we don't need to store that here.  */
1945       gs_info->base = NULL_TREE;
1946       gs_info->element_type = TREE_TYPE (vectype);
1947       gs_info->offset = fold_convert (offset_type, step);
1948       gs_info->offset_dt = vect_constant_def;
1949       gs_info->scale = scale;
1950       gs_info->memory_type = memory_type;
1951       return true;
1952     }
1953 
1954   if (overflow && dump_enabled_p ())
1955     dump_printf_loc (MSG_NOTE, vect_location,
1956                          "truncating gather/scatter offset to %d bits"
1957                          " might change its value.\n", element_bits);
1958 
1959   return false;
1960 }
1961 
1962 /* Return true if we can use gather/scatter internal functions to
1963    vectorize STMT_INFO, which is a grouped or strided load or store.
1964    MASKED_P is true if load or store is conditional.  When returning
1965    true, fill in GS_INFO with the information required to perform the
1966    operation.  */
1967 
1968 static bool
vect_use_strided_gather_scatters_p(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)1969 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1970                                             loop_vec_info loop_vinfo, bool masked_p,
1971                                             gather_scatter_info *gs_info)
1972 {
1973   if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1974       || gs_info->ifn == IFN_LAST)
1975     return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1976                                                             masked_p, gs_info);
1977 
1978   tree old_offset_type = TREE_TYPE (gs_info->offset);
1979   tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1980 
1981   gcc_assert (TYPE_PRECISION (new_offset_type)
1982                 >= TYPE_PRECISION (old_offset_type));
1983   gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1984 
1985   if (dump_enabled_p ())
1986     dump_printf_loc (MSG_NOTE, vect_location,
1987                          "using gather/scatter for strided/grouped access,"
1988                          " scale = %d\n", gs_info->scale);
1989 
1990   return true;
1991 }
1992 
1993 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1994    elements with a known constant step.  Return -1 if that step
1995    is negative, 0 if it is zero, and 1 if it is greater than zero.  */
1996 
1997 static int
compare_step_with_zero(vec_info * vinfo,stmt_vec_info stmt_info)1998 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1999 {
2000   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2001   return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
2002                                      size_zero_node);
2003 }
2004 
2005 /* If the target supports a permute mask that reverses the elements in
2006    a vector of type VECTYPE, return that mask, otherwise return null.  */
2007 
2008 static tree
perm_mask_for_reverse(tree vectype)2009 perm_mask_for_reverse (tree vectype)
2010 {
2011   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2012 
2013   /* The encoding has a single stepped pattern.  */
2014   vec_perm_builder sel (nunits, 1, 3);
2015   for (int i = 0; i < 3; ++i)
2016     sel.quick_push (nunits - 1 - i);
2017 
2018   vec_perm_indices indices (sel, 1, nunits);
2019   if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2020     return NULL_TREE;
2021   return vect_gen_perm_mask_checked (vectype, indices);
2022 }
2023 
2024 /* A subroutine of get_load_store_type, with a subset of the same
2025    arguments.  Handle the case where STMT_INFO is a load or store that
2026    accesses consecutive elements with a negative step.  Sets *POFFSET
2027    to the offset to be applied to the DR for the first access.  */
2028 
2029 static vect_memory_access_type
get_negative_load_store_type(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,vec_load_store_type vls_type,unsigned int ncopies,poly_int64 * poffset)2030 get_negative_load_store_type (vec_info *vinfo,
2031                                     stmt_vec_info stmt_info, tree vectype,
2032                                     vec_load_store_type vls_type,
2033                                     unsigned int ncopies, poly_int64 *poffset)
2034 {
2035   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2036   dr_alignment_support alignment_support_scheme;
2037 
2038   if (ncopies > 1)
2039     {
2040       if (dump_enabled_p ())
2041           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2042                                "multiple types with negative step.\n");
2043       return VMAT_ELEMENTWISE;
2044     }
2045 
2046   /* For backward running DRs the first access in vectype actually is
2047      N-1 elements before the address of the DR.  */
2048   *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
2049                 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2050 
2051   int misalignment = dr_misalignment (dr_info, vectype, *poffset);
2052   alignment_support_scheme
2053     = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
2054   if (alignment_support_scheme != dr_aligned
2055       && alignment_support_scheme != dr_unaligned_supported)
2056     {
2057       if (dump_enabled_p ())
2058           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2059                                "negative step but alignment required.\n");
2060       *poffset = 0;
2061       return VMAT_ELEMENTWISE;
2062     }
2063 
2064   if (vls_type == VLS_STORE_INVARIANT)
2065     {
2066       if (dump_enabled_p ())
2067           dump_printf_loc (MSG_NOTE, vect_location,
2068                                "negative step with invariant source;"
2069                                " no permute needed.\n");
2070       return VMAT_CONTIGUOUS_DOWN;
2071     }
2072 
2073   if (!perm_mask_for_reverse (vectype))
2074     {
2075       if (dump_enabled_p ())
2076           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2077                                "negative step and reversing not supported.\n");
2078       *poffset = 0;
2079       return VMAT_ELEMENTWISE;
2080     }
2081 
2082   return VMAT_CONTIGUOUS_REVERSE;
2083 }
2084 
2085 /* STMT_INFO is either a masked or unconditional store.  Return the value
2086    being stored.  */
2087 
2088 tree
vect_get_store_rhs(stmt_vec_info stmt_info)2089 vect_get_store_rhs (stmt_vec_info stmt_info)
2090 {
2091   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2092     {
2093       gcc_assert (gimple_assign_single_p (assign));
2094       return gimple_assign_rhs1 (assign);
2095     }
2096   if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2097     {
2098       internal_fn ifn = gimple_call_internal_fn (call);
2099       int index = internal_fn_stored_value_index (ifn);
2100       gcc_assert (index >= 0);
2101       return gimple_call_arg (call, index);
2102     }
2103   gcc_unreachable ();
2104 }
2105 
2106 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2107 
2108    This function returns a vector type which can be composed with NETLS pieces,
2109    whose type is recorded in PTYPE.  VTYPE should be a vector type, and has the
2110    same vector size as the return vector.  It checks target whether supports
2111    pieces-size vector mode for construction firstly, if target fails to, check
2112    pieces-size scalar mode for construction further.  It returns NULL_TREE if
2113    fails to find the available composition.
2114 
2115    For example, for (vtype=V16QI, nelts=4), we can probably get:
2116      - V16QI with PTYPE V4QI.
2117      - V4SI with PTYPE SI.
2118      - NULL_TREE.  */
2119 
2120 static tree
vector_vector_composition_type(tree vtype,poly_uint64 nelts,tree * ptype)2121 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2122 {
2123   gcc_assert (VECTOR_TYPE_P (vtype));
2124   gcc_assert (known_gt (nelts, 0U));
2125 
2126   machine_mode vmode = TYPE_MODE (vtype);
2127   if (!VECTOR_MODE_P (vmode))
2128     return NULL_TREE;
2129 
2130   poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2131   unsigned int pbsize;
2132   if (constant_multiple_p (vbsize, nelts, &pbsize))
2133     {
2134       /* First check if vec_init optab supports construction from
2135            vector pieces directly.  */
2136       scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2137       poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2138       machine_mode rmode;
2139       if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2140             && (convert_optab_handler (vec_init_optab, vmode, rmode)
2141                 != CODE_FOR_nothing))
2142           {
2143             *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2144             return vtype;
2145           }
2146 
2147       /* Otherwise check if exists an integer type of the same piece size and
2148            if vec_init optab supports construction from it directly.  */
2149       if (int_mode_for_size (pbsize, 0).exists (&elmode)
2150             && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2151             && (convert_optab_handler (vec_init_optab, rmode, elmode)
2152                 != CODE_FOR_nothing))
2153           {
2154             *ptype = build_nonstandard_integer_type (pbsize, 1);
2155             return build_vector_type (*ptype, nelts);
2156           }
2157     }
2158 
2159   return NULL_TREE;
2160 }
2161 
2162 /* A subroutine of get_load_store_type, with a subset of the same
2163    arguments.  Handle the case where STMT_INFO is part of a grouped load
2164    or store.
2165 
2166    For stores, the statements in the group are all consecutive
2167    and there is no gap at the end.  For loads, the statements in the
2168    group might not be consecutive; there can be gaps between statements
2169    as well as at the end.  */
2170 
2171 static bool
get_group_load_store_type(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,slp_tree slp_node,bool masked_p,vec_load_store_type vls_type,vect_memory_access_type * memory_access_type,poly_int64 * poffset,dr_alignment_support * alignment_support_scheme,int * misalignment,gather_scatter_info * gs_info)2172 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2173                                  tree vectype, slp_tree slp_node,
2174                                  bool masked_p, vec_load_store_type vls_type,
2175                                  vect_memory_access_type *memory_access_type,
2176                                  poly_int64 *poffset,
2177                                  dr_alignment_support *alignment_support_scheme,
2178                                  int *misalignment,
2179                                  gather_scatter_info *gs_info)
2180 {
2181   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2182   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2183   stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2184   dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2185   unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2186   bool single_element_p = (stmt_info == first_stmt_info
2187                                  && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2188   unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2189   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2190 
2191   /* True if the vectorized statements would access beyond the last
2192      statement in the group.  */
2193   bool overrun_p = false;
2194 
2195   /* True if we can cope with such overrun by peeling for gaps, so that
2196      there is at least one final scalar iteration after the vector loop.  */
2197   bool can_overrun_p = (!masked_p
2198                               && vls_type == VLS_LOAD
2199                               && loop_vinfo
2200                               && !loop->inner);
2201 
2202   /* There can only be a gap at the end of the group if the stride is
2203      known at compile time.  */
2204   gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2205 
2206   /* Stores can't yet have gaps.  */
2207   gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2208 
2209   if (slp_node)
2210     {
2211       /* For SLP vectorization we directly vectorize a subchain
2212            without permutation.  */
2213       if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2214           first_dr_info
2215             = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2216       if (STMT_VINFO_STRIDED_P (first_stmt_info))
2217           {
2218             /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2219                separated by the stride, until we have a complete vector.
2220                Fall back to scalar accesses if that isn't possible.  */
2221             if (multiple_p (nunits, group_size))
2222               *memory_access_type = VMAT_STRIDED_SLP;
2223             else
2224               *memory_access_type = VMAT_ELEMENTWISE;
2225           }
2226       else
2227           {
2228             overrun_p = loop_vinfo && gap != 0;
2229             if (overrun_p && vls_type != VLS_LOAD)
2230               {
2231                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2232                                      "Grouped store with gaps requires"
2233                                      " non-consecutive accesses\n");
2234                 return false;
2235               }
2236             /* An overrun is fine if the trailing elements are smaller
2237                than the alignment boundary B.  Every vector access will
2238                be a multiple of B and so we are guaranteed to access a
2239                non-gap element in the same B-sized block.  */
2240             if (overrun_p
2241                 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2242                                                                    vectype)
2243                               / vect_get_scalar_dr_size (first_dr_info)))
2244               overrun_p = false;
2245 
2246             /* If the gap splits the vector in half and the target
2247                can do half-vector operations avoid the epilogue peeling
2248                by simply loading half of the vector only.  Usually
2249                the construction with an upper zero half will be elided.  */
2250             dr_alignment_support alss;
2251             int misalign = dr_misalignment (first_dr_info, vectype);
2252             tree half_vtype;
2253             if (overrun_p
2254                 && !masked_p
2255                 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2256                                                                         vectype, misalign)))
2257                        == dr_aligned
2258                       || alss == dr_unaligned_supported)
2259                 && known_eq (nunits, (group_size - gap) * 2)
2260                 && known_eq (nunits, group_size)
2261                 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2262                       != NULL_TREE))
2263               overrun_p = false;
2264 
2265             if (overrun_p && !can_overrun_p)
2266               {
2267                 if (dump_enabled_p ())
2268                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2269                                          "Peeling for outer loop is not supported\n");
2270                 return false;
2271               }
2272             int cmp = compare_step_with_zero (vinfo, stmt_info);
2273             if (cmp < 0)
2274               {
2275                 if (single_element_p)
2276                     /* ???  The VMAT_CONTIGUOUS_REVERSE code generation is
2277                        only correct for single element "interleaving" SLP.  */
2278                     *memory_access_type = get_negative_load_store_type
2279                                    (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2280                 else
2281                     {
2282                       /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2283                          separated by the stride, until we have a complete vector.
2284                          Fall back to scalar accesses if that isn't possible.  */
2285                       if (multiple_p (nunits, group_size))
2286                         *memory_access_type = VMAT_STRIDED_SLP;
2287                       else
2288                         *memory_access_type = VMAT_ELEMENTWISE;
2289                     }
2290               }
2291             else
2292               {
2293                 gcc_assert (!loop_vinfo || cmp > 0);
2294                 *memory_access_type = VMAT_CONTIGUOUS;
2295               }
2296 
2297             /* When we have a contiguous access across loop iterations
2298                but the access in the loop doesn't cover the full vector
2299                we can end up with no gap recorded but still excess
2300                elements accessed, see PR103116.  Make sure we peel for
2301                gaps if necessary and sufficient and give up if not.  */
2302             if (loop_vinfo
2303                 && *memory_access_type == VMAT_CONTIGUOUS
2304                 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2305                 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2306                                     nunits))
2307               {
2308                 unsigned HOST_WIDE_INT cnunits, cvf;
2309                 if (!can_overrun_p
2310                       || !nunits.is_constant (&cnunits)
2311                       || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2312                       /* Peeling for gaps assumes that a single scalar iteration
2313                          is enough to make sure the last vector iteration doesn't
2314                          access excess elements.
2315                          ???  Enhancements include peeling multiple iterations
2316                          or using masked loads with a static mask.  */
2317                       || (group_size * cvf) % cnunits + group_size < cnunits)
2318                     {
2319                       if (dump_enabled_p ())
2320                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2321                                              "peeling for gaps insufficient for "
2322                                              "access\n");
2323                       return false;
2324                     }
2325                 overrun_p = true;
2326               }
2327           }
2328     }
2329   else
2330     {
2331       /* We can always handle this case using elementwise accesses,
2332            but see if something more efficient is available.  */
2333       *memory_access_type = VMAT_ELEMENTWISE;
2334 
2335       /* If there is a gap at the end of the group then these optimizations
2336            would access excess elements in the last iteration.  */
2337       bool would_overrun_p = (gap != 0);
2338       /* An overrun is fine if the trailing elements are smaller than the
2339            alignment boundary B.  Every vector access will be a multiple of B
2340            and so we are guaranteed to access a non-gap element in the
2341            same B-sized block.  */
2342       if (would_overrun_p
2343             && !masked_p
2344             && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2345                         / vect_get_scalar_dr_size (first_dr_info)))
2346           would_overrun_p = false;
2347 
2348       if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2349             && (can_overrun_p || !would_overrun_p)
2350             && compare_step_with_zero (vinfo, stmt_info) > 0)
2351           {
2352             /* First cope with the degenerate case of a single-element
2353                vector.  */
2354             if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2355               ;
2356 
2357             /* Otherwise try using LOAD/STORE_LANES.  */
2358             else if (vls_type == VLS_LOAD
2359                        ? vect_load_lanes_supported (vectype, group_size, masked_p)
2360                        : vect_store_lanes_supported (vectype, group_size,
2361                                                              masked_p))
2362               {
2363                 *memory_access_type = VMAT_LOAD_STORE_LANES;
2364                 overrun_p = would_overrun_p;
2365               }
2366 
2367             /* If that fails, try using permuting loads.  */
2368             else if (vls_type == VLS_LOAD
2369                        ? vect_grouped_load_supported (vectype, single_element_p,
2370                                                               group_size)
2371                        : vect_grouped_store_supported (vectype, group_size))
2372               {
2373                 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2374                 overrun_p = would_overrun_p;
2375               }
2376           }
2377 
2378       /* As a last resort, trying using a gather load or scatter store.
2379 
2380            ??? Although the code can handle all group sizes correctly,
2381            it probably isn't a win to use separate strided accesses based
2382            on nearby locations.  Or, even if it's a win over scalar code,
2383            it might not be a win over vectorizing at a lower VF, if that
2384            allows us to use contiguous accesses.  */
2385       if (*memory_access_type == VMAT_ELEMENTWISE
2386             && single_element_p
2387             && loop_vinfo
2388             && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2389                                                              masked_p, gs_info))
2390           *memory_access_type = VMAT_GATHER_SCATTER;
2391     }
2392 
2393   if (*memory_access_type == VMAT_GATHER_SCATTER
2394       || *memory_access_type == VMAT_ELEMENTWISE)
2395     {
2396       *alignment_support_scheme = dr_unaligned_supported;
2397       *misalignment = DR_MISALIGNMENT_UNKNOWN;
2398     }
2399   else
2400     {
2401       *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2402       *alignment_support_scheme
2403           = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2404                                                    *misalignment);
2405     }
2406 
2407   if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2408     {
2409       /* STMT is the leader of the group. Check the operands of all the
2410            stmts of the group.  */
2411       stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2412       while (next_stmt_info)
2413           {
2414             tree op = vect_get_store_rhs (next_stmt_info);
2415             enum vect_def_type dt;
2416             if (!vect_is_simple_use (op, vinfo, &dt))
2417               {
2418                 if (dump_enabled_p ())
2419                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2420                                          "use not simple.\n");
2421                 return false;
2422               }
2423             next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2424           }
2425     }
2426 
2427   if (overrun_p)
2428     {
2429       gcc_assert (can_overrun_p);
2430       if (dump_enabled_p ())
2431           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2432                                "Data access with gaps requires scalar "
2433                                "epilogue loop\n");
2434       LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2435     }
2436 
2437   return true;
2438 }
2439 
2440 /* Analyze load or store statement STMT_INFO of type VLS_TYPE.  Return true
2441    if there is a memory access type that the vectorized form can use,
2442    storing it in *MEMORY_ACCESS_TYPE if so.  If we decide to use gathers
2443    or scatters, fill in GS_INFO accordingly.  In addition
2444    *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2445    the target does not support the alignment scheme.  *MISALIGNMENT
2446    is set according to the alignment of the access (including
2447    DR_MISALIGNMENT_UNKNOWN when it is unknown).
2448 
2449    SLP says whether we're performing SLP rather than loop vectorization.
2450    MASKED_P is true if the statement is conditional on a vectorized mask.
2451    VECTYPE is the vector type that the vectorized statements will use.
2452    NCOPIES is the number of vector statements that will be needed.  */
2453 
2454 static bool
get_load_store_type(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,slp_tree slp_node,bool masked_p,vec_load_store_type vls_type,unsigned int ncopies,vect_memory_access_type * memory_access_type,poly_int64 * poffset,dr_alignment_support * alignment_support_scheme,int * misalignment,gather_scatter_info * gs_info)2455 get_load_store_type (vec_info  *vinfo, stmt_vec_info stmt_info,
2456                          tree vectype, slp_tree slp_node,
2457                          bool masked_p, vec_load_store_type vls_type,
2458                          unsigned int ncopies,
2459                          vect_memory_access_type *memory_access_type,
2460                          poly_int64 *poffset,
2461                          dr_alignment_support *alignment_support_scheme,
2462                          int *misalignment,
2463                          gather_scatter_info *gs_info)
2464 {
2465   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2466   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2467   *misalignment = DR_MISALIGNMENT_UNKNOWN;
2468   *poffset = 0;
2469   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2470     {
2471       *memory_access_type = VMAT_GATHER_SCATTER;
2472       if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2473           gcc_unreachable ();
2474       else if (!vect_is_simple_use (gs_info->offset, vinfo,
2475                                             &gs_info->offset_dt,
2476                                             &gs_info->offset_vectype))
2477           {
2478             if (dump_enabled_p ())
2479               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2480                                    "%s index use not simple.\n",
2481                                    vls_type == VLS_LOAD ? "gather" : "scatter");
2482             return false;
2483           }
2484       else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2485           {
2486             if (vls_type != VLS_LOAD)
2487               {
2488                 if (dump_enabled_p ())
2489                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2490                                          "unsupported emulated scatter.\n");
2491                 return false;
2492               }
2493             else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2494                        || !TYPE_VECTOR_SUBPARTS
2495                                (gs_info->offset_vectype).is_constant ()
2496                        || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2497                                                         (gs_info->offset_vectype),
2498                                                       TYPE_VECTOR_SUBPARTS (vectype)))
2499               {
2500                 if (dump_enabled_p ())
2501                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2502                                          "unsupported vector types for emulated "
2503                                          "gather.\n");
2504                 return false;
2505               }
2506           }
2507       /* Gather-scatter accesses perform only component accesses, alignment
2508            is irrelevant for them.  */
2509       *alignment_support_scheme = dr_unaligned_supported;
2510     }
2511   else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2512     {
2513       if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2514                                               masked_p,
2515                                               vls_type, memory_access_type, poffset,
2516                                               alignment_support_scheme,
2517                                               misalignment, gs_info))
2518           return false;
2519     }
2520   else if (STMT_VINFO_STRIDED_P (stmt_info))
2521     {
2522       gcc_assert (!slp_node);
2523       if (loop_vinfo
2524             && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2525                                                              masked_p, gs_info))
2526           *memory_access_type = VMAT_GATHER_SCATTER;
2527       else
2528           *memory_access_type = VMAT_ELEMENTWISE;
2529       /* Alignment is irrelevant here.  */
2530       *alignment_support_scheme = dr_unaligned_supported;
2531     }
2532   else
2533     {
2534       int cmp = compare_step_with_zero (vinfo, stmt_info);
2535       if (cmp == 0)
2536           {
2537             gcc_assert (vls_type == VLS_LOAD);
2538             *memory_access_type = VMAT_INVARIANT;
2539             /* Invariant accesses perform only component accesses, alignment
2540                is irrelevant for them.  */
2541             *alignment_support_scheme = dr_unaligned_supported;
2542           }
2543       else
2544           {
2545             if (cmp < 0)
2546               *memory_access_type = get_negative_load_store_type
2547                  (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2548             else
2549               *memory_access_type = VMAT_CONTIGUOUS;
2550             *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2551                                                      vectype, *poffset);
2552             *alignment_support_scheme
2553               = vect_supportable_dr_alignment (vinfo,
2554                                                        STMT_VINFO_DR_INFO (stmt_info),
2555                                                        vectype, *misalignment);
2556           }
2557     }
2558 
2559   if ((*memory_access_type == VMAT_ELEMENTWISE
2560        || *memory_access_type == VMAT_STRIDED_SLP)
2561       && !nunits.is_constant ())
2562     {
2563       if (dump_enabled_p ())
2564           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2565                                "Not using elementwise accesses due to variable "
2566                                "vectorization factor.\n");
2567       return false;
2568     }
2569 
2570   if (*alignment_support_scheme == dr_unaligned_unsupported)
2571     {
2572       if (dump_enabled_p ())
2573           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2574                                "unsupported unaligned access\n");
2575       return false;
2576     }
2577 
2578   /* FIXME: At the moment the cost model seems to underestimate the
2579      cost of using elementwise accesses.  This check preserves the
2580      traditional behavior until that can be fixed.  */
2581   stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2582   if (!first_stmt_info)
2583     first_stmt_info = stmt_info;
2584   if (*memory_access_type == VMAT_ELEMENTWISE
2585       && !STMT_VINFO_STRIDED_P (first_stmt_info)
2586       && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2587              && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2588              && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2589     {
2590       if (dump_enabled_p ())
2591           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2592                                "not falling back to elementwise accesses\n");
2593       return false;
2594     }
2595   return true;
2596 }
2597 
2598 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2599    conditional operation STMT_INFO.  When returning true, store the mask
2600    in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2601    vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2602    to the mask in *MASK_NODE if MASK_NODE is not NULL.  */
2603 
2604 static bool
vect_check_scalar_mask(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,unsigned mask_index,tree * mask,slp_tree * mask_node,vect_def_type * mask_dt_out,tree * mask_vectype_out)2605 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2606                               slp_tree slp_node, unsigned mask_index,
2607                               tree *mask, slp_tree *mask_node,
2608                               vect_def_type *mask_dt_out, tree *mask_vectype_out)
2609 {
2610   enum vect_def_type mask_dt;
2611   tree mask_vectype;
2612   slp_tree mask_node_1;
2613   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2614                                  mask, &mask_node_1, &mask_dt, &mask_vectype))
2615     {
2616       if (dump_enabled_p ())
2617           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2618                                "mask use not simple.\n");
2619       return false;
2620     }
2621 
2622   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2623     {
2624       if (dump_enabled_p ())
2625           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2626                                "mask argument is not a boolean.\n");
2627       return false;
2628     }
2629 
2630   /* If the caller is not prepared for adjusting an external/constant
2631      SLP mask vector type fail.  */
2632   if (slp_node
2633       && !mask_node
2634       && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2635     {
2636       if (dump_enabled_p ())
2637           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2638                                "SLP mask argument is not vectorized.\n");
2639       return false;
2640     }
2641 
2642   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2643   if (!mask_vectype)
2644     mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2645 
2646   if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2647     {
2648       if (dump_enabled_p ())
2649           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2650                                "could not find an appropriate vector mask type.\n");
2651       return false;
2652     }
2653 
2654   if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2655                     TYPE_VECTOR_SUBPARTS (vectype)))
2656     {
2657       if (dump_enabled_p ())
2658           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2659                                "vector mask type %T"
2660                                " does not match vector data type %T.\n",
2661                                mask_vectype, vectype);
2662 
2663       return false;
2664     }
2665 
2666   *mask_dt_out = mask_dt;
2667   *mask_vectype_out = mask_vectype;
2668   if (mask_node)
2669     *mask_node = mask_node_1;
2670   return true;
2671 }
2672 
2673 /* Return true if stored value RHS is suitable for vectorizing store
2674    statement STMT_INFO.  When returning true, store the type of the
2675    definition in *RHS_DT_OUT, the type of the vectorized store value in
2676    *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT.  */
2677 
2678 static bool
vect_check_store_rhs(vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,tree rhs,vect_def_type * rhs_dt_out,tree * rhs_vectype_out,vec_load_store_type * vls_type_out)2679 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2680                           slp_tree slp_node, tree rhs,
2681                           vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2682                           vec_load_store_type *vls_type_out)
2683 {
2684   /* In the case this is a store from a constant make sure
2685      native_encode_expr can handle it.  */
2686   if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2687     {
2688       if (dump_enabled_p ())
2689           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2690                                "cannot encode constant as a byte sequence.\n");
2691       return false;
2692     }
2693 
2694   unsigned op_no = 0;
2695   if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2696     {
2697       if (gimple_call_internal_p (call)
2698             && internal_store_fn_p (gimple_call_internal_fn (call)))
2699           op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2700     }
2701 
2702   enum vect_def_type rhs_dt;
2703   tree rhs_vectype;
2704   slp_tree slp_op;
2705   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2706                                  &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2707     {
2708       if (dump_enabled_p ())
2709           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2710                                "use not simple.\n");
2711       return false;
2712     }
2713 
2714   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2715   if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2716     {
2717       if (dump_enabled_p ())
2718           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2719                                "incompatible vector types.\n");
2720       return false;
2721     }
2722 
2723   *rhs_dt_out = rhs_dt;
2724   *rhs_vectype_out = rhs_vectype;
2725   if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2726     *vls_type_out = VLS_STORE_INVARIANT;
2727   else
2728     *vls_type_out = VLS_STORE;
2729   return true;
2730 }
2731 
2732 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2733    Note that we support masks with floating-point type, in which case the
2734    floats are interpreted as a bitmask.  */
2735 
2736 static tree
vect_build_all_ones_mask(vec_info * vinfo,stmt_vec_info stmt_info,tree masktype)2737 vect_build_all_ones_mask (vec_info *vinfo,
2738                                 stmt_vec_info stmt_info, tree masktype)
2739 {
2740   if (TREE_CODE (masktype) == INTEGER_TYPE)
2741     return build_int_cst (masktype, -1);
2742   else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2743     {
2744       tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2745       mask = build_vector_from_val (masktype, mask);
2746       return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2747     }
2748   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2749     {
2750       REAL_VALUE_TYPE r;
2751       long tmp[6];
2752       for (int j = 0; j < 6; ++j)
2753           tmp[j] = -1;
2754       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2755       tree mask = build_real (TREE_TYPE (masktype), r);
2756       mask = build_vector_from_val (masktype, mask);
2757       return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2758     }
2759   gcc_unreachable ();
2760 }
2761 
2762 /* Build an all-zero merge value of type VECTYPE while vectorizing
2763    STMT_INFO as a gather load.  */
2764 
2765 static tree
vect_build_zero_merge_argument(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype)2766 vect_build_zero_merge_argument (vec_info *vinfo,
2767                                         stmt_vec_info stmt_info, tree vectype)
2768 {
2769   tree merge;
2770   if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2771     merge = build_int_cst (TREE_TYPE (vectype), 0);
2772   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2773     {
2774       REAL_VALUE_TYPE r;
2775       long tmp[6];
2776       for (int j = 0; j < 6; ++j)
2777           tmp[j] = 0;
2778       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2779       merge = build_real (TREE_TYPE (vectype), r);
2780     }
2781   else
2782     gcc_unreachable ();
2783   merge = build_vector_from_val (vectype, merge);
2784   return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2785 }
2786 
2787 /* Build a gather load call while vectorizing STMT_INFO.  Insert new
2788    instructions before GSI and add them to VEC_STMT.  GS_INFO describes
2789    the gather load operation.  If the load is conditional, MASK is the
2790    unvectorized condition and MASK_DT is its definition type, otherwise
2791    MASK is null.  */
2792 
2793 static void
vect_build_gather_load_calls(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,gather_scatter_info * gs_info,tree mask)2794 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2795                                     gimple_stmt_iterator *gsi,
2796                                     gimple **vec_stmt,
2797                                     gather_scatter_info *gs_info,
2798                                     tree mask)
2799 {
2800   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2801   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2802   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2803   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2804   int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2805   edge pe = loop_preheader_edge (loop);
2806   enum { NARROW, NONE, WIDEN } modifier;
2807   poly_uint64 gather_off_nunits
2808     = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2809 
2810   tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2811   tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2812   tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2813   tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2814   tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2815   tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2816   tree scaletype = TREE_VALUE (arglist);
2817   tree real_masktype = masktype;
2818   gcc_checking_assert (types_compatible_p (srctype, rettype)
2819                            && (!mask
2820                                  || TREE_CODE (masktype) == INTEGER_TYPE
2821                                  || types_compatible_p (srctype, masktype)));
2822   if (mask)
2823     masktype = truth_type_for (srctype);
2824 
2825   tree mask_halftype = masktype;
2826   tree perm_mask = NULL_TREE;
2827   tree mask_perm_mask = NULL_TREE;
2828   if (known_eq (nunits, gather_off_nunits))
2829     modifier = NONE;
2830   else if (known_eq (nunits * 2, gather_off_nunits))
2831     {
2832       modifier = WIDEN;
2833 
2834       /* Currently widening gathers and scatters are only supported for
2835            fixed-length vectors.  */
2836       int count = gather_off_nunits.to_constant ();
2837       vec_perm_builder sel (count, count, 1);
2838       for (int i = 0; i < count; ++i)
2839           sel.quick_push (i | (count / 2));
2840 
2841       vec_perm_indices indices (sel, 1, count);
2842       perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2843                                                         indices);
2844     }
2845   else if (known_eq (nunits, gather_off_nunits * 2))
2846     {
2847       modifier = NARROW;
2848 
2849       /* Currently narrowing gathers and scatters are only supported for
2850            fixed-length vectors.  */
2851       int count = nunits.to_constant ();
2852       vec_perm_builder sel (count, count, 1);
2853       sel.quick_grow (count);
2854       for (int i = 0; i < count; ++i)
2855           sel[i] = i < count / 2 ? i : i + count / 2;
2856       vec_perm_indices indices (sel, 2, count);
2857       perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2858 
2859       ncopies *= 2;
2860 
2861       if (mask && VECTOR_TYPE_P (real_masktype))
2862           {
2863             for (int i = 0; i < count; ++i)
2864               sel[i] = i | (count / 2);
2865             indices.new_vector (sel, 2, count);
2866             mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2867           }
2868       else if (mask)
2869           mask_halftype = truth_type_for (gs_info->offset_vectype);
2870     }
2871   else
2872     gcc_unreachable ();
2873 
2874   tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2875   tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2876 
2877   tree ptr = fold_convert (ptrtype, gs_info->base);
2878   if (!is_gimple_min_invariant (ptr))
2879     {
2880       gimple_seq seq;
2881       ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2882       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2883       gcc_assert (!new_bb);
2884     }
2885 
2886   tree scale = build_int_cst (scaletype, gs_info->scale);
2887 
2888   tree vec_oprnd0 = NULL_TREE;
2889   tree vec_mask = NULL_TREE;
2890   tree src_op = NULL_TREE;
2891   tree mask_op = NULL_TREE;
2892   tree prev_res = NULL_TREE;
2893 
2894   if (!mask)
2895     {
2896       src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2897       mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2898     }
2899 
2900   auto_vec<tree> vec_oprnds0;
2901   auto_vec<tree> vec_masks;
2902   vect_get_vec_defs_for_operand (vinfo, stmt_info,
2903                                          modifier == WIDEN ? ncopies / 2 : ncopies,
2904                                          gs_info->offset, &vec_oprnds0);
2905   if (mask)
2906     vect_get_vec_defs_for_operand (vinfo, stmt_info,
2907                                            modifier == NARROW ? ncopies / 2 : ncopies,
2908                                            mask, &vec_masks, masktype);
2909   for (int j = 0; j < ncopies; ++j)
2910     {
2911       tree op, var;
2912       if (modifier == WIDEN && (j & 1))
2913           op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2914                                            perm_mask, stmt_info, gsi);
2915       else
2916           op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2917 
2918       if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2919           {
2920             gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2921                                         TYPE_VECTOR_SUBPARTS (idxtype)));
2922             var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2923             op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2924             gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2925             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2926             op = var;
2927           }
2928 
2929       if (mask)
2930           {
2931             if (mask_perm_mask && (j & 1))
2932               mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2933                                                       mask_perm_mask, stmt_info, gsi);
2934             else
2935               {
2936                 if (modifier == NARROW)
2937                     {
2938                       if ((j & 1) == 0)
2939                         vec_mask = vec_masks[j / 2];
2940                     }
2941                 else
2942                     vec_mask = vec_masks[j];
2943 
2944                 mask_op = vec_mask;
2945                 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2946                     {
2947                       poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2948                       poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2949                       gcc_assert (known_eq (sub1, sub2));
2950                       var = vect_get_new_ssa_name (masktype, vect_simple_var);
2951                       mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2952                       gassign *new_stmt
2953                         = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2954                       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2955                       mask_op = var;
2956                     }
2957               }
2958             if (modifier == NARROW && !VECTOR_TYPE_P (real_masktype))
2959               {
2960                 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2961                 gassign *new_stmt
2962                     = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2963                                                                 : VEC_UNPACK_LO_EXPR,
2964                                                mask_op);
2965                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2966                 mask_op = var;
2967               }
2968             src_op = mask_op;
2969           }
2970 
2971       tree mask_arg = mask_op;
2972       if (masktype != real_masktype)
2973           {
2974             tree utype, optype = TREE_TYPE (mask_op);
2975             if (VECTOR_TYPE_P (real_masktype)
2976                 || TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2977               utype = real_masktype;
2978             else
2979               utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2980             var = vect_get_new_ssa_name (utype, vect_scalar_var);
2981             mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2982             gassign *new_stmt
2983               = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2984             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2985             mask_arg = var;
2986             if (!useless_type_conversion_p (real_masktype, utype))
2987               {
2988                 gcc_assert (TYPE_PRECISION (utype)
2989                                 <= TYPE_PRECISION (real_masktype));
2990                 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2991                 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2992                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2993                 mask_arg = var;
2994               }
2995             src_op = build_zero_cst (srctype);
2996           }
2997       gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2998                                                       mask_arg, scale);
2999 
3000       if (!useless_type_conversion_p (vectype, rettype))
3001           {
3002             gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
3003                                         TYPE_VECTOR_SUBPARTS (rettype)));
3004             op = vect_get_new_ssa_name (rettype, vect_simple_var);
3005             gimple_call_set_lhs (new_stmt, op);
3006             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3007             var = make_ssa_name (vec_dest);
3008             op = build1 (VIEW_CONVERT_EXPR, vectype, op);
3009             new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
3010             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3011           }
3012       else
3013           {
3014             var = make_ssa_name (vec_dest, new_stmt);
3015             gimple_call_set_lhs (new_stmt, var);
3016             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3017           }
3018 
3019       if (modifier == NARROW)
3020           {
3021             if ((j & 1) == 0)
3022               {
3023                 prev_res = var;
3024                 continue;
3025               }
3026             var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
3027                                               stmt_info, gsi);
3028             new_stmt = SSA_NAME_DEF_STMT (var);
3029           }
3030 
3031       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3032     }
3033   *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3034 }
3035 
3036 /* Prepare the base and offset in GS_INFO for vectorization.
3037    Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3038    to the vectorized offset argument for the first copy of STMT_INFO.
3039    STMT_INFO is the statement described by GS_INFO and LOOP is the
3040    containing loop.  */
3041 
3042 static void
vect_get_gather_scatter_ops(loop_vec_info loop_vinfo,class loop * loop,stmt_vec_info stmt_info,slp_tree slp_node,gather_scatter_info * gs_info,tree * dataref_ptr,vec<tree> * vec_offset)3043 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
3044                                    class loop *loop, stmt_vec_info stmt_info,
3045                                    slp_tree slp_node, gather_scatter_info *gs_info,
3046                                    tree *dataref_ptr, vec<tree> *vec_offset)
3047 {
3048   gimple_seq stmts = NULL;
3049   *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
3050   if (stmts != NULL)
3051     {
3052       basic_block new_bb;
3053       edge pe = loop_preheader_edge (loop);
3054       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3055       gcc_assert (!new_bb);
3056     }
3057   if (slp_node)
3058     vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
3059   else
3060     {
3061       unsigned ncopies
3062           = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
3063       vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
3064                                              gs_info->offset, vec_offset,
3065                                              gs_info->offset_vectype);
3066     }
3067 }
3068 
3069 /* Prepare to implement a grouped or strided load or store using
3070    the gather load or scatter store operation described by GS_INFO.
3071    STMT_INFO is the load or store statement.
3072 
3073    Set *DATAREF_BUMP to the amount that should be added to the base
3074    address after each copy of the vectorized statement.  Set *VEC_OFFSET
3075    to an invariant offset vector in which element I has the value
3076    I * DR_STEP / SCALE.  */
3077 
3078 static void
vect_get_strided_load_store_ops(stmt_vec_info stmt_info,loop_vec_info loop_vinfo,gather_scatter_info * gs_info,tree * dataref_bump,tree * vec_offset)3079 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3080                                          loop_vec_info loop_vinfo,
3081                                          gather_scatter_info *gs_info,
3082                                          tree *dataref_bump, tree *vec_offset)
3083 {
3084   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3085   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3086 
3087   tree bump = size_binop (MULT_EXPR,
3088                                 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3089                                 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3090   *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3091 
3092   /* The offset given in GS_INFO can have pointer type, so use the element
3093      type of the vector instead.  */
3094   tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3095 
3096   /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type.  */
3097   tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3098                                 ssize_int (gs_info->scale));
3099   step = fold_convert (offset_type, step);
3100 
3101   /* Create {0, X, X*2, X*3, ...}.  */
3102   tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3103                                    build_zero_cst (offset_type), step);
3104   *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3105 }
3106 
3107 /* Return the amount that should be added to a vector pointer to move
3108    to the next or previous copy of AGGR_TYPE.  DR_INFO is the data reference
3109    being vectorized and MEMORY_ACCESS_TYPE describes the type of
3110    vectorization.  */
3111 
3112 static tree
vect_get_data_ptr_increment(vec_info * vinfo,dr_vec_info * dr_info,tree aggr_type,vect_memory_access_type memory_access_type)3113 vect_get_data_ptr_increment (vec_info *vinfo,
3114                                    dr_vec_info *dr_info, tree aggr_type,
3115                                    vect_memory_access_type memory_access_type)
3116 {
3117   if (memory_access_type == VMAT_INVARIANT)
3118     return size_zero_node;
3119 
3120   tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3121   tree step = vect_dr_behavior (vinfo, dr_info)->step;
3122   if (tree_int_cst_sgn (step) == -1)
3123     iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3124   return iv_step;
3125 }
3126 
3127 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}.  */
3128 
3129 static bool
vectorizable_bswap(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,slp_tree * slp_op,tree vectype_in,stmt_vector_for_cost * cost_vec)3130 vectorizable_bswap (vec_info *vinfo,
3131                         stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3132                         gimple **vec_stmt, slp_tree slp_node,
3133                         slp_tree *slp_op,
3134                         tree vectype_in, stmt_vector_for_cost *cost_vec)
3135 {
3136   tree op, vectype;
3137   gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3138   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3139   unsigned ncopies;
3140 
3141   op = gimple_call_arg (stmt, 0);
3142   vectype = STMT_VINFO_VECTYPE (stmt_info);
3143   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3144 
3145   /* Multiple types in SLP are handled by creating the appropriate number of
3146      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
3147      case of SLP.  */
3148   if (slp_node)
3149     ncopies = 1;
3150   else
3151     ncopies = vect_get_num_copies (loop_vinfo, vectype);
3152 
3153   gcc_assert (ncopies >= 1);
3154 
3155   tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3156   if (! char_vectype)
3157     return false;
3158 
3159   poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3160   unsigned word_bytes;
3161   if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3162     return false;
3163 
3164   /* The encoding uses one stepped pattern for each byte in the word.  */
3165   vec_perm_builder elts (num_bytes, word_bytes, 3);
3166   for (unsigned i = 0; i < 3; ++i)
3167     for (unsigned j = 0; j < word_bytes; ++j)
3168       elts.quick_push ((i + 1) * word_bytes - j - 1);
3169 
3170   vec_perm_indices indices (elts, 1, num_bytes);
3171   if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3172     return false;
3173 
3174   if (! vec_stmt)
3175     {
3176       if (slp_node
3177             && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3178           {
3179             if (dump_enabled_p ())
3180               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3181                                    "incompatible vector types for invariants\n");
3182             return false;
3183           }
3184 
3185       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3186       DUMP_VECT_SCOPE ("vectorizable_bswap");
3187       record_stmt_cost (cost_vec,
3188                               1, vector_stmt, stmt_info, 0, vect_prologue);
3189       record_stmt_cost (cost_vec,
3190                               slp_node
3191                               ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3192                               vec_perm, stmt_info, 0, vect_body);
3193       return true;
3194     }
3195 
3196   tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3197 
3198   /* Transform.  */
3199   vec<tree> vec_oprnds = vNULL;
3200   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3201                          op, &vec_oprnds);
3202   /* Arguments are ready. create the new vector stmt.  */
3203   unsigned i;
3204   tree vop;
3205   FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3206     {
3207       gimple *new_stmt;
3208       tree tem = make_ssa_name (char_vectype);
3209       new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3210                                                                char_vectype, vop));
3211       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3212       tree tem2 = make_ssa_name (char_vectype);
3213       new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3214                                               tem, tem, bswap_vconst);
3215       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3216       tem = make_ssa_name (vectype);
3217       new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3218                                                                vectype, tem2));
3219       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3220       if (slp_node)
3221           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3222       else
3223           STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3224     }
3225 
3226   if (!slp_node)
3227     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3228 
3229   vec_oprnds.release ();
3230   return true;
3231 }
3232 
3233 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3234    integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3235    in a single step.  On success, store the binary pack code in
3236    *CONVERT_CODE.  */
3237 
3238 static bool
simple_integer_narrowing(tree vectype_out,tree vectype_in,tree_code * convert_code)3239 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3240                                 tree_code *convert_code)
3241 {
3242   if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3243       || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3244     return false;
3245 
3246   tree_code code;
3247   int multi_step_cvt = 0;
3248   auto_vec <tree, 8> interm_types;
3249   if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3250                                                   &code, &multi_step_cvt, &interm_types)
3251       || multi_step_cvt)
3252     return false;
3253 
3254   *convert_code = code;
3255   return true;
3256 }
3257 
3258 /* Function vectorizable_call.
3259 
3260    Check if STMT_INFO performs a function call that can be vectorized.
3261    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3262    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3263    Return true if STMT_INFO is vectorizable in this way.  */
3264 
3265 static bool
vectorizable_call(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)3266 vectorizable_call (vec_info *vinfo,
3267                        stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3268                        gimple **vec_stmt, slp_tree slp_node,
3269                        stmt_vector_for_cost *cost_vec)
3270 {
3271   gcall *stmt;
3272   tree vec_dest;
3273   tree scalar_dest;
3274   tree op;
3275   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3276   tree vectype_out, vectype_in;
3277   poly_uint64 nunits_in;
3278   poly_uint64 nunits_out;
3279   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3280   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3281   tree fndecl, new_temp, rhs_type;
3282   enum vect_def_type dt[4]
3283     = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3284           vect_unknown_def_type };
3285   tree vectypes[ARRAY_SIZE (dt)] = {};
3286   slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3287   int ndts = ARRAY_SIZE (dt);
3288   int ncopies, j;
3289   auto_vec<tree, 8> vargs;
3290   enum { NARROW, NONE, WIDEN } modifier;
3291   size_t i, nargs;
3292   tree lhs;
3293 
3294   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3295     return false;
3296 
3297   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3298       && ! vec_stmt)
3299     return false;
3300 
3301   /* Is STMT_INFO a vectorizable call?   */
3302   stmt = dyn_cast <gcall *> (stmt_info->stmt);
3303   if (!stmt)
3304     return false;
3305 
3306   if (gimple_call_internal_p (stmt)
3307       && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3308             || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3309     /* Handled by vectorizable_load and vectorizable_store.  */
3310     return false;
3311 
3312   if (gimple_call_lhs (stmt) == NULL_TREE
3313       || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3314     return false;
3315 
3316   gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3317 
3318   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3319 
3320   /* Process function arguments.  */
3321   rhs_type = NULL_TREE;
3322   vectype_in = NULL_TREE;
3323   nargs = gimple_call_num_args (stmt);
3324 
3325   /* Bail out if the function has more than four arguments, we do not have
3326      interesting builtin functions to vectorize with more than two arguments
3327      except for fma.  No arguments is also not good.  */
3328   if (nargs == 0 || nargs > 4)
3329     return false;
3330 
3331   /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic.  */
3332   combined_fn cfn = gimple_call_combined_fn (stmt);
3333   if (cfn == CFN_GOMP_SIMD_LANE)
3334     {
3335       nargs = 0;
3336       rhs_type = unsigned_type_node;
3337     }
3338 
3339   int mask_opno = -1;
3340   if (internal_fn_p (cfn))
3341     mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3342 
3343   for (i = 0; i < nargs; i++)
3344     {
3345       if ((int) i == mask_opno)
3346           {
3347             if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3348                                                &op, &slp_op[i], &dt[i], &vectypes[i]))
3349               return false;
3350             continue;
3351           }
3352 
3353       if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3354                                      i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3355           {
3356             if (dump_enabled_p ())
3357               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3358                                    "use not simple.\n");
3359             return false;
3360           }
3361 
3362       /* We can only handle calls with arguments of the same type.  */
3363       if (rhs_type
3364             && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3365           {
3366             if (dump_enabled_p ())
3367               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3368                              "argument types differ.\n");
3369             return false;
3370           }
3371       if (!rhs_type)
3372           rhs_type = TREE_TYPE (op);
3373 
3374       if (!vectype_in)
3375           vectype_in = vectypes[i];
3376       else if (vectypes[i]
3377                  && !types_compatible_p (vectypes[i], vectype_in))
3378           {
3379             if (dump_enabled_p ())
3380               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3381                              "argument vector types differ.\n");
3382             return false;
3383           }
3384     }
3385   /* If all arguments are external or constant defs, infer the vector type
3386      from the scalar type.  */
3387   if (!vectype_in)
3388     vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3389   if (vec_stmt)
3390     gcc_assert (vectype_in);
3391   if (!vectype_in)
3392     {
3393       if (dump_enabled_p ())
3394           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3395                                "no vectype for scalar type %T\n", rhs_type);
3396 
3397       return false;
3398     }
3399   /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3400      just mixtures of nunits.  E.g. DI->SI versions of __builtin_ctz*
3401      are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3402      by a pack of the two vectors into an SI vector.  We would need
3403      separate code to handle direct VnDI->VnSI IFN_CTZs.  */
3404   if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3405     {
3406       if (dump_enabled_p ())
3407           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3408                                "mismatched vector sizes %T and %T\n",
3409                                vectype_in, vectype_out);
3410       return false;
3411     }
3412 
3413   if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3414       != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3415     {
3416       if (dump_enabled_p ())
3417           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3418                                "mixed mask and nonmask vector types\n");
3419       return false;
3420     }
3421 
3422   if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3423   {
3424       if (dump_enabled_p ())
3425           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3426                                "use emulated vector type for call\n");
3427       return false;
3428   }
3429 
3430   /* FORNOW */
3431   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3432   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3433   if (known_eq (nunits_in * 2, nunits_out))
3434     modifier = NARROW;
3435   else if (known_eq (nunits_out, nunits_in))
3436     modifier = NONE;
3437   else if (known_eq (nunits_out * 2, nunits_in))
3438     modifier = WIDEN;
3439   else
3440     return false;
3441 
3442   /* We only handle functions that do not read or clobber memory.  */
3443   if (gimple_vuse (stmt))
3444     {
3445       if (dump_enabled_p ())
3446           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3447                                "function reads from or writes to memory.\n");
3448       return false;
3449     }
3450 
3451   /* For now, we only vectorize functions if a target specific builtin
3452      is available.  TODO -- in some cases, it might be profitable to
3453      insert the calls for pieces of the vector, in order to be able
3454      to vectorize other operations in the loop.  */
3455   fndecl = NULL_TREE;
3456   internal_fn ifn = IFN_LAST;
3457   tree callee = gimple_call_fndecl (stmt);
3458 
3459   /* First try using an internal function.  */
3460   tree_code convert_code = ERROR_MARK;
3461   if (cfn != CFN_LAST
3462       && (modifier == NONE
3463             || (modifier == NARROW
3464                 && simple_integer_narrowing (vectype_out, vectype_in,
3465                                                      &convert_code))))
3466     ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3467                                                     vectype_in);
3468 
3469   /* If that fails, try asking for a target-specific built-in function.  */
3470   if (ifn == IFN_LAST)
3471     {
3472       if (cfn != CFN_LAST)
3473           fndecl = targetm.vectorize.builtin_vectorized_function
3474             (cfn, vectype_out, vectype_in);
3475       else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3476           fndecl = targetm.vectorize.builtin_md_vectorized_function
3477             (callee, vectype_out, vectype_in);
3478     }
3479 
3480   if (ifn == IFN_LAST && !fndecl)
3481     {
3482       if (cfn == CFN_GOMP_SIMD_LANE
3483             && !slp_node
3484             && loop_vinfo
3485             && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3486             && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3487             && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3488                == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3489           {
3490             /* We can handle IFN_GOMP_SIMD_LANE by returning a
3491                { 0, 1, 2, ... vf - 1 } vector.  */
3492             gcc_assert (nargs == 0);
3493           }
3494       else if (modifier == NONE
3495                  && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3496                        || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3497                        || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3498                        || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3499           return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3500                                            slp_op, vectype_in, cost_vec);
3501       else
3502           {
3503             if (dump_enabled_p ())
3504               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3505                                    "function is not vectorizable.\n");
3506             return false;
3507           }
3508     }
3509 
3510   if (slp_node)
3511     ncopies = 1;
3512   else if (modifier == NARROW && ifn == IFN_LAST)
3513     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3514   else
3515     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3516 
3517   /* Sanity check: make sure that at least one copy of the vectorized stmt
3518      needs to be generated.  */
3519   gcc_assert (ncopies >= 1);
3520 
3521   int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3522   internal_fn cond_fn = get_conditional_internal_fn (ifn);
3523   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3524   if (!vec_stmt) /* transformation not required.  */
3525     {
3526       if (slp_node)
3527           for (i = 0; i < nargs; ++i)
3528             if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3529                                                              vectypes[i]
3530                                                              ? vectypes[i] : vectype_in))
3531               {
3532                 if (dump_enabled_p ())
3533                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3534                                          "incompatible vector types for invariants\n");
3535                 return false;
3536               }
3537       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3538       DUMP_VECT_SCOPE ("vectorizable_call");
3539       vect_model_simple_cost (vinfo, stmt_info,
3540                                     ncopies, dt, ndts, slp_node, cost_vec);
3541       if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3542           record_stmt_cost (cost_vec, ncopies / 2,
3543                                 vec_promote_demote, stmt_info, 0, vect_body);
3544 
3545       if (loop_vinfo
3546             && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3547             && (reduc_idx >= 0 || mask_opno >= 0))
3548           {
3549             if (reduc_idx >= 0
3550                 && (cond_fn == IFN_LAST
3551                       || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3552                                                                   OPTIMIZE_FOR_SPEED)))
3553               {
3554                 if (dump_enabled_p ())
3555                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3556                                          "can't use a fully-masked loop because no"
3557                                          " conditional operation is available.\n");
3558                 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3559               }
3560             else
3561               {
3562                 unsigned int nvectors
3563                     = (slp_node
3564                        ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3565                        : ncopies);
3566                 tree scalar_mask = NULL_TREE;
3567                 if (mask_opno >= 0)
3568                     scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3569                 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3570                                              vectype_out, scalar_mask);
3571               }
3572           }
3573       return true;
3574     }
3575 
3576   /* Transform.  */
3577 
3578   if (dump_enabled_p ())
3579     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3580 
3581   /* Handle def.  */
3582   scalar_dest = gimple_call_lhs (stmt);
3583   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3584 
3585   bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3586   unsigned int vect_nargs = nargs;
3587   if (masked_loop_p && reduc_idx >= 0)
3588     {
3589       ifn = cond_fn;
3590       vect_nargs += 2;
3591     }
3592 
3593   if (modifier == NONE || ifn != IFN_LAST)
3594     {
3595       tree prev_res = NULL_TREE;
3596       vargs.safe_grow (vect_nargs, true);
3597       auto_vec<vec<tree> > vec_defs (nargs);
3598       for (j = 0; j < ncopies; ++j)
3599           {
3600             /* Build argument list for the vectorized call.  */
3601             if (slp_node)
3602               {
3603                 vec<tree> vec_oprnds0;
3604 
3605                 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3606                 vec_oprnds0 = vec_defs[0];
3607 
3608                 /* Arguments are ready.  Create the new vector stmt.  */
3609                 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3610                     {
3611                       int varg = 0;
3612                       if (masked_loop_p && reduc_idx >= 0)
3613                         {
3614                           unsigned int vec_num = vec_oprnds0.length ();
3615                           /* Always true for SLP.  */
3616                           gcc_assert (ncopies == 1);
3617                           vargs[varg++] = vect_get_loop_mask (gsi, masks, vec_num,
3618                                                                         vectype_out, i);
3619                         }
3620                       size_t k;
3621                       for (k = 0; k < nargs; k++)
3622                         {
3623                           vec<tree> vec_oprndsk = vec_defs[k];
3624                           vargs[varg++] = vec_oprndsk[i];
3625                         }
3626                       if (masked_loop_p && reduc_idx >= 0)
3627                         vargs[varg++] = vargs[reduc_idx + 1];
3628                       gimple *new_stmt;
3629                       if (modifier == NARROW)
3630                         {
3631                           /* We don't define any narrowing conditional functions
3632                                at present.  */
3633                           gcc_assert (mask_opno < 0);
3634                           tree half_res = make_ssa_name (vectype_in);
3635                           gcall *call
3636                               = gimple_build_call_internal_vec (ifn, vargs);
3637                           gimple_call_set_lhs (call, half_res);
3638                           gimple_call_set_nothrow (call, true);
3639                           vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3640                           if ((i & 1) == 0)
3641                               {
3642                                 prev_res = half_res;
3643                                 continue;
3644                               }
3645                           new_temp = make_ssa_name (vec_dest);
3646                           new_stmt = gimple_build_assign (new_temp, convert_code,
3647                                                                   prev_res, half_res);
3648                           vect_finish_stmt_generation (vinfo, stmt_info,
3649                                                                new_stmt, gsi);
3650                         }
3651                       else
3652                         {
3653                           if (mask_opno >= 0 && masked_loop_p)
3654                               {
3655                                 unsigned int vec_num = vec_oprnds0.length ();
3656                                 /* Always true for SLP.  */
3657                                 gcc_assert (ncopies == 1);
3658                                 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3659                                                                         vectype_out, i);
3660                                 vargs[mask_opno] = prepare_vec_mask
3661                                   (loop_vinfo, TREE_TYPE (mask), mask,
3662                                    vargs[mask_opno], gsi);
3663                               }
3664 
3665                           gcall *call;
3666                           if (ifn != IFN_LAST)
3667                               call = gimple_build_call_internal_vec (ifn, vargs);
3668                           else
3669                               call = gimple_build_call_vec (fndecl, vargs);
3670                           new_temp = make_ssa_name (vec_dest, call);
3671                           gimple_call_set_lhs (call, new_temp);
3672                           gimple_call_set_nothrow (call, true);
3673                           vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3674                           new_stmt = call;
3675                         }
3676                       SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3677                     }
3678                 continue;
3679               }
3680 
3681             int varg = 0;
3682             if (masked_loop_p && reduc_idx >= 0)
3683               vargs[varg++] = vect_get_loop_mask (gsi, masks, ncopies,
3684                                                             vectype_out, j);
3685             for (i = 0; i < nargs; i++)
3686               {
3687                 op = gimple_call_arg (stmt, i);
3688                 if (j == 0)
3689                     {
3690                       vec_defs.quick_push (vNULL);
3691                       vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3692                                                              op, &vec_defs[i],
3693                                                              vectypes[i]);
3694                     }
3695                 vargs[varg++] = vec_defs[i][j];
3696               }
3697             if (masked_loop_p && reduc_idx >= 0)
3698               vargs[varg++] = vargs[reduc_idx + 1];
3699 
3700             if (mask_opno >= 0 && masked_loop_p)
3701               {
3702                 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3703                                                         vectype_out, j);
3704                 vargs[mask_opno]
3705                     = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3706                                             vargs[mask_opno], gsi);
3707               }
3708 
3709             gimple *new_stmt;
3710             if (cfn == CFN_GOMP_SIMD_LANE)
3711               {
3712                 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3713                 tree new_var
3714                     = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3715                 gimple *init_stmt = gimple_build_assign (new_var, cst);
3716                 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3717                 new_temp = make_ssa_name (vec_dest);
3718                 new_stmt = gimple_build_assign (new_temp, new_var);
3719                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3720               }
3721             else if (modifier == NARROW)
3722               {
3723                 /* We don't define any narrowing conditional functions at
3724                      present.  */
3725                 gcc_assert (mask_opno < 0);
3726                 tree half_res = make_ssa_name (vectype_in);
3727                 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3728                 gimple_call_set_lhs (call, half_res);
3729                 gimple_call_set_nothrow (call, true);
3730                 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3731                 if ((j & 1) == 0)
3732                     {
3733                       prev_res = half_res;
3734                       continue;
3735                     }
3736                 new_temp = make_ssa_name (vec_dest);
3737                 new_stmt = gimple_build_assign (new_temp, convert_code,
3738                                                         prev_res, half_res);
3739                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3740               }
3741             else
3742               {
3743                 gcall *call;
3744                 if (ifn != IFN_LAST)
3745                     call = gimple_build_call_internal_vec (ifn, vargs);
3746                 else
3747                     call = gimple_build_call_vec (fndecl, vargs);
3748                 new_temp = make_ssa_name (vec_dest, call);
3749                 gimple_call_set_lhs (call, new_temp);
3750                 gimple_call_set_nothrow (call, true);
3751                 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3752                 new_stmt = call;
3753               }
3754 
3755             if (j == (modifier == NARROW ? 1 : 0))
3756               *vec_stmt = new_stmt;
3757             STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3758           }
3759       for (i = 0; i < nargs; i++)
3760           {
3761             vec<tree> vec_oprndsi = vec_defs[i];
3762             vec_oprndsi.release ();
3763           }
3764     }
3765   else if (modifier == NARROW)
3766     {
3767       auto_vec<vec<tree> > vec_defs (nargs);
3768       /* We don't define any narrowing conditional functions at present.  */
3769       gcc_assert (mask_opno < 0);
3770       for (j = 0; j < ncopies; ++j)
3771           {
3772             /* Build argument list for the vectorized call.  */
3773             if (j == 0)
3774               vargs.create (nargs * 2);
3775             else
3776               vargs.truncate (0);
3777 
3778             if (slp_node)
3779               {
3780                 vec<tree> vec_oprnds0;
3781 
3782                 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3783                 vec_oprnds0 = vec_defs[0];
3784 
3785                 /* Arguments are ready.  Create the new vector stmt.  */
3786                 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3787                     {
3788                       size_t k;
3789                       vargs.truncate (0);
3790                       for (k = 0; k < nargs; k++)
3791                         {
3792                           vec<tree> vec_oprndsk = vec_defs[k];
3793                           vargs.quick_push (vec_oprndsk[i]);
3794                           vargs.quick_push (vec_oprndsk[i + 1]);
3795                         }
3796                       gcall *call;
3797                       if (ifn != IFN_LAST)
3798                         call = gimple_build_call_internal_vec (ifn, vargs);
3799                       else
3800                         call = gimple_build_call_vec (fndecl, vargs);
3801                       new_temp = make_ssa_name (vec_dest, call);
3802                       gimple_call_set_lhs (call, new_temp);
3803                       gimple_call_set_nothrow (call, true);
3804                       vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3805                       SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3806                     }
3807                 continue;
3808               }
3809 
3810             for (i = 0; i < nargs; i++)
3811               {
3812                 op = gimple_call_arg (stmt, i);
3813                 if (j == 0)
3814                     {
3815                       vec_defs.quick_push (vNULL);
3816                       vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3817                                                              op, &vec_defs[i], vectypes[i]);
3818                     }
3819                 vec_oprnd0 = vec_defs[i][2*j];
3820                 vec_oprnd1 = vec_defs[i][2*j+1];
3821 
3822                 vargs.quick_push (vec_oprnd0);
3823                 vargs.quick_push (vec_oprnd1);
3824               }
3825 
3826             gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3827             new_temp = make_ssa_name (vec_dest, new_stmt);
3828             gimple_call_set_lhs (new_stmt, new_temp);
3829             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3830 
3831             STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3832           }
3833 
3834       if (!slp_node)
3835           *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3836 
3837       for (i = 0; i < nargs; i++)
3838           {
3839             vec<tree> vec_oprndsi = vec_defs[i];
3840             vec_oprndsi.release ();
3841           }
3842     }
3843   else
3844     /* No current target implements this case.  */
3845     return false;
3846 
3847   vargs.release ();
3848 
3849   /* The call in STMT might prevent it from being removed in dce.
3850      We however cannot remove it here, due to the way the ssa name
3851      it defines is mapped to the new definition.  So just replace
3852      rhs of the statement with something harmless.  */
3853 
3854   if (slp_node)
3855     return true;
3856 
3857   stmt_info = vect_orig_stmt (stmt_info);
3858   lhs = gimple_get_lhs (stmt_info->stmt);
3859 
3860   gassign *new_stmt
3861     = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3862   vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3863 
3864   return true;
3865 }
3866 
3867 
3868 struct simd_call_arg_info
3869 {
3870   tree vectype;
3871   tree op;
3872   HOST_WIDE_INT linear_step;
3873   enum vect_def_type dt;
3874   unsigned int align;
3875   bool simd_lane_linear;
3876 };
3877 
3878 /* Helper function of vectorizable_simd_clone_call.  If OP, an SSA_NAME,
3879    is linear within simd lane (but not within whole loop), note it in
3880    *ARGINFO.  */
3881 
3882 static void
vect_simd_lane_linear(tree op,class loop * loop,struct simd_call_arg_info * arginfo)3883 vect_simd_lane_linear (tree op, class loop *loop,
3884                            struct simd_call_arg_info *arginfo)
3885 {
3886   gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3887 
3888   if (!is_gimple_assign (def_stmt)
3889       || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3890       || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3891     return;
3892 
3893   tree base = gimple_assign_rhs1 (def_stmt);
3894   HOST_WIDE_INT linear_step = 0;
3895   tree v = gimple_assign_rhs2 (def_stmt);
3896   while (TREE_CODE (v) == SSA_NAME)
3897     {
3898       tree t;
3899       def_stmt = SSA_NAME_DEF_STMT (v);
3900       if (is_gimple_assign (def_stmt))
3901           switch (gimple_assign_rhs_code (def_stmt))
3902             {
3903             case PLUS_EXPR:
3904               t = gimple_assign_rhs2 (def_stmt);
3905               if (linear_step || TREE_CODE (t) != INTEGER_CST)
3906                 return;
3907               base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3908               v = gimple_assign_rhs1 (def_stmt);
3909               continue;
3910             case MULT_EXPR:
3911               t = gimple_assign_rhs2 (def_stmt);
3912               if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3913                 return;
3914               linear_step = tree_to_shwi (t);
3915               v = gimple_assign_rhs1 (def_stmt);
3916               continue;
3917             CASE_CONVERT:
3918               t = gimple_assign_rhs1 (def_stmt);
3919               if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3920                     || (TYPE_PRECISION (TREE_TYPE (v))
3921                         < TYPE_PRECISION (TREE_TYPE (t))))
3922                 return;
3923               if (!linear_step)
3924                 linear_step = 1;
3925               v = t;
3926               continue;
3927             default:
3928               return;
3929             }
3930       else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3931                  && loop->simduid
3932                  && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3933                  && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3934                        == loop->simduid))
3935           {
3936             if (!linear_step)
3937               linear_step = 1;
3938             arginfo->linear_step = linear_step;
3939             arginfo->op = base;
3940             arginfo->simd_lane_linear = true;
3941             return;
3942           }
3943     }
3944 }
3945 
3946 /* Return the number of elements in vector type VECTYPE, which is associated
3947    with a SIMD clone.  At present these vectors always have a constant
3948    length.  */
3949 
3950 static unsigned HOST_WIDE_INT
simd_clone_subparts(tree vectype)3951 simd_clone_subparts (tree vectype)
3952 {
3953   return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3954 }
3955 
3956 /* Function vectorizable_simd_clone_call.
3957 
3958    Check if STMT_INFO performs a function call that can be vectorized
3959    by calling a simd clone of the function.
3960    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3961    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3962    Return true if STMT_INFO is vectorizable in this way.  */
3963 
3964 static bool
vectorizable_simd_clone_call(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost *)3965 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3966                                     gimple_stmt_iterator *gsi,
3967                                     gimple **vec_stmt, slp_tree slp_node,
3968                                     stmt_vector_for_cost *)
3969 {
3970   tree vec_dest;
3971   tree scalar_dest;
3972   tree op, type;
3973   tree vec_oprnd0 = NULL_TREE;
3974   tree vectype;
3975   poly_uint64 nunits;
3976   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3977   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3978   class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3979   tree fndecl, new_temp;
3980   int ncopies, j;
3981   auto_vec<simd_call_arg_info> arginfo;
3982   vec<tree> vargs = vNULL;
3983   size_t i, nargs;
3984   tree lhs, rtype, ratype;
3985   vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3986 
3987   /* Is STMT a vectorizable call?   */
3988   gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3989   if (!stmt)
3990     return false;
3991 
3992   fndecl = gimple_call_fndecl (stmt);
3993   if (fndecl == NULL_TREE)
3994     return false;
3995 
3996   struct cgraph_node *node = cgraph_node::get (fndecl);
3997   if (node == NULL || node->simd_clones == NULL)
3998     return false;
3999 
4000   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4001     return false;
4002 
4003   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4004       && ! vec_stmt)
4005     return false;
4006 
4007   if (gimple_call_lhs (stmt)
4008       && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
4009     return false;
4010 
4011   gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
4012 
4013   vectype = STMT_VINFO_VECTYPE (stmt_info);
4014 
4015   if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
4016     return false;
4017 
4018   /* FORNOW */
4019   if (slp_node)
4020     return false;
4021 
4022   /* Process function arguments.  */
4023   nargs = gimple_call_num_args (stmt);
4024 
4025   /* Bail out if the function has zero arguments.  */
4026   if (nargs == 0)
4027     return false;
4028 
4029   arginfo.reserve (nargs, true);
4030 
4031   for (i = 0; i < nargs; i++)
4032     {
4033       simd_call_arg_info thisarginfo;
4034       affine_iv iv;
4035 
4036       thisarginfo.linear_step = 0;
4037       thisarginfo.align = 0;
4038       thisarginfo.op = NULL_TREE;
4039       thisarginfo.simd_lane_linear = false;
4040 
4041       op = gimple_call_arg (stmt, i);
4042       if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
4043                                      &thisarginfo.vectype)
4044             || thisarginfo.dt == vect_uninitialized_def)
4045           {
4046             if (dump_enabled_p ())
4047               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4048                                    "use not simple.\n");
4049             return false;
4050           }
4051 
4052       if (thisarginfo.dt == vect_constant_def
4053             || thisarginfo.dt == vect_external_def)
4054           gcc_assert (thisarginfo.vectype == NULL_TREE);
4055       else
4056           {
4057             gcc_assert (thisarginfo.vectype != NULL_TREE);
4058             if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
4059               {
4060                 if (dump_enabled_p ())
4061                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4062                                          "vector mask arguments are not supported\n");
4063                 return false;
4064               }
4065           }
4066 
4067       /* For linear arguments, the analyze phase should have saved
4068            the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
4069       if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
4070             && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
4071           {
4072             gcc_assert (vec_stmt);
4073             thisarginfo.linear_step
4074               = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
4075             thisarginfo.op
4076               = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
4077             thisarginfo.simd_lane_linear
4078               = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
4079                  == boolean_true_node);
4080             /* If loop has been peeled for alignment, we need to adjust it.  */
4081             tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4082             tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4083             if (n1 != n2 && !thisarginfo.simd_lane_linear)
4084               {
4085                 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4086                 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4087                 tree opt = TREE_TYPE (thisarginfo.op);
4088                 bias = fold_convert (TREE_TYPE (step), bias);
4089                 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4090                 thisarginfo.op
4091                     = fold_build2 (POINTER_TYPE_P (opt)
4092                                      ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4093                                      thisarginfo.op, bias);
4094               }
4095           }
4096       else if (!vec_stmt
4097                  && thisarginfo.dt != vect_constant_def
4098                  && thisarginfo.dt != vect_external_def
4099                  && loop_vinfo
4100                  && TREE_CODE (op) == SSA_NAME
4101                  && simple_iv (loop, loop_containing_stmt (stmt), op,
4102                                    &iv, false)
4103                  && tree_fits_shwi_p (iv.step))
4104           {
4105             thisarginfo.linear_step = tree_to_shwi (iv.step);
4106             thisarginfo.op = iv.base;
4107           }
4108       else if ((thisarginfo.dt == vect_constant_def
4109                     || thisarginfo.dt == vect_external_def)
4110                  && POINTER_TYPE_P (TREE_TYPE (op)))
4111           thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4112       /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4113            linear too.  */
4114       if (POINTER_TYPE_P (TREE_TYPE (op))
4115             && !thisarginfo.linear_step
4116             && !vec_stmt
4117             && thisarginfo.dt != vect_constant_def
4118             && thisarginfo.dt != vect_external_def
4119             && loop_vinfo
4120             && !slp_node
4121             && TREE_CODE (op) == SSA_NAME)
4122           vect_simd_lane_linear (op, loop, &thisarginfo);
4123 
4124       arginfo.quick_push (thisarginfo);
4125     }
4126 
4127   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4128   if (!vf.is_constant ())
4129     {
4130       if (dump_enabled_p ())
4131           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4132                                "not considering SIMD clones; not yet supported"
4133                                " for variable-width vectors.\n");
4134       return false;
4135     }
4136 
4137   unsigned int badness = 0;
4138   struct cgraph_node *bestn = NULL;
4139   if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4140     bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4141   else
4142     for (struct cgraph_node *n = node->simd_clones; n != NULL;
4143            n = n->simdclone->next_clone)
4144       {
4145           unsigned int this_badness = 0;
4146           unsigned int num_calls;
4147           if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
4148               || n->simdclone->nargs != nargs)
4149             continue;
4150           if (num_calls != 1)
4151             this_badness += exact_log2 (num_calls) * 4096;
4152           if (n->simdclone->inbranch)
4153             this_badness += 8192;
4154           int target_badness = targetm.simd_clone.usable (n);
4155           if (target_badness < 0)
4156             continue;
4157           this_badness += target_badness * 512;
4158           /* FORNOW: Have to add code to add the mask argument.  */
4159           if (n->simdclone->inbranch)
4160             continue;
4161           for (i = 0; i < nargs; i++)
4162             {
4163               switch (n->simdclone->args[i].arg_type)
4164                 {
4165                 case SIMD_CLONE_ARG_TYPE_VECTOR:
4166                     if (!useless_type_conversion_p
4167                               (n->simdclone->args[i].orig_type,
4168                                TREE_TYPE (gimple_call_arg (stmt, i))))
4169                       i = -1;
4170                     else if (arginfo[i].dt == vect_constant_def
4171                                || arginfo[i].dt == vect_external_def
4172                                || arginfo[i].linear_step)
4173                       this_badness += 64;
4174                     break;
4175                 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4176                     if (arginfo[i].dt != vect_constant_def
4177                         && arginfo[i].dt != vect_external_def)
4178                       i = -1;
4179                     break;
4180                 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4181                 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4182                     if (arginfo[i].dt == vect_constant_def
4183                         || arginfo[i].dt == vect_external_def
4184                         || (arginfo[i].linear_step
4185                               != n->simdclone->args[i].linear_step))
4186                       i = -1;
4187                     break;
4188                 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4189                 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4190                 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4191                 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4192                 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4193                 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4194                     /* FORNOW */
4195                     i = -1;
4196                     break;
4197                 case SIMD_CLONE_ARG_TYPE_MASK:
4198                     gcc_unreachable ();
4199                 }
4200               if (i == (size_t) -1)
4201                 break;
4202               if (n->simdclone->args[i].alignment > arginfo[i].align)
4203                 {
4204                     i = -1;
4205                     break;
4206                 }
4207               if (arginfo[i].align)
4208                 this_badness += (exact_log2 (arginfo[i].align)
4209                                      - exact_log2 (n->simdclone->args[i].alignment));
4210             }
4211           if (i == (size_t) -1)
4212             continue;
4213           if (bestn == NULL || this_badness < badness)
4214             {
4215               bestn = n;
4216               badness = this_badness;
4217             }
4218       }
4219 
4220   if (bestn == NULL)
4221     return false;
4222 
4223   for (i = 0; i < nargs; i++)
4224     if ((arginfo[i].dt == vect_constant_def
4225            || arginfo[i].dt == vect_external_def)
4226           && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4227       {
4228           tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4229           arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4230                                                                         slp_node);
4231           if (arginfo[i].vectype == NULL
4232               || !constant_multiple_p (bestn->simdclone->simdlen,
4233                                              simd_clone_subparts (arginfo[i].vectype)))
4234             return false;
4235       }
4236 
4237   fndecl = bestn->decl;
4238   nunits = bestn->simdclone->simdlen;
4239   ncopies = vector_unroll_factor (vf, nunits);
4240 
4241   /* If the function isn't const, only allow it in simd loops where user
4242      has asserted that at least nunits consecutive iterations can be
4243      performed using SIMD instructions.  */
4244   if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4245       && gimple_vuse (stmt))
4246     return false;
4247 
4248   /* Sanity check: make sure that at least one copy of the vectorized stmt
4249      needs to be generated.  */
4250   gcc_assert (ncopies >= 1);
4251 
4252   if (!vec_stmt) /* transformation not required.  */
4253     {
4254       STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4255       for (i = 0; i < nargs; i++)
4256           if ((bestn->simdclone->args[i].arg_type
4257                == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4258               || (bestn->simdclone->args[i].arg_type
4259                     == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4260             {
4261               STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4262                                                                                           + 1,
4263                                                                                       true);
4264               STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4265               tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4266                            ? size_type_node : TREE_TYPE (arginfo[i].op);
4267               tree ls = build_int_cst (lst, arginfo[i].linear_step);
4268               STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4269               tree sll = arginfo[i].simd_lane_linear
4270                            ? boolean_true_node : boolean_false_node;
4271               STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4272             }
4273       STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4274       DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4275 /*      vect_model_simple_cost (vinfo, stmt_info, ncopies,
4276                                         dt, slp_node, cost_vec); */
4277       return true;
4278     }
4279 
4280   /* Transform.  */
4281 
4282   if (dump_enabled_p ())
4283     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4284 
4285   /* Handle def.  */
4286   scalar_dest = gimple_call_lhs (stmt);
4287   vec_dest = NULL_TREE;
4288   rtype = NULL_TREE;
4289   ratype = NULL_TREE;
4290   if (scalar_dest)
4291     {
4292       vec_dest = vect_create_destination_var (scalar_dest, vectype);
4293       rtype = TREE_TYPE (TREE_TYPE (fndecl));
4294       if (TREE_CODE (rtype) == ARRAY_TYPE)
4295           {
4296             ratype = rtype;
4297             rtype = TREE_TYPE (ratype);
4298           }
4299     }
4300 
4301   auto_vec<vec<tree> > vec_oprnds;
4302   auto_vec<unsigned> vec_oprnds_i;
4303   vec_oprnds.safe_grow_cleared (nargs, true);
4304   vec_oprnds_i.safe_grow_cleared (nargs, true);
4305   for (j = 0; j < ncopies; ++j)
4306     {
4307       /* Build argument list for the vectorized call.  */
4308       if (j == 0)
4309           vargs.create (nargs);
4310       else
4311           vargs.truncate (0);
4312 
4313       for (i = 0; i < nargs; i++)
4314           {
4315             unsigned int k, l, m, o;
4316             tree atype;
4317             op = gimple_call_arg (stmt, i);
4318             switch (bestn->simdclone->args[i].arg_type)
4319               {
4320               case SIMD_CLONE_ARG_TYPE_VECTOR:
4321                 atype = bestn->simdclone->args[i].vector_type;
4322                 o = vector_unroll_factor (nunits,
4323                                                   simd_clone_subparts (atype));
4324                 for (m = j * o; m < (j + 1) * o; m++)
4325                     {
4326                       if (simd_clone_subparts (atype)
4327                           < simd_clone_subparts (arginfo[i].vectype))
4328                         {
4329                           poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4330                           k = (simd_clone_subparts (arginfo[i].vectype)
4331                                  / simd_clone_subparts (atype));
4332                           gcc_assert ((k & (k - 1)) == 0);
4333                           if (m == 0)
4334                               {
4335                                 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4336                                                                        ncopies * o / k, op,
4337                                                                        &vec_oprnds[i]);
4338                                 vec_oprnds_i[i] = 0;
4339                                 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4340                               }
4341                           else
4342                               {
4343                                 vec_oprnd0 = arginfo[i].op;
4344                                 if ((m & (k - 1)) == 0)
4345                                   vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4346                               }
4347                           arginfo[i].op = vec_oprnd0;
4348                           vec_oprnd0
4349                               = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4350                                           bitsize_int (prec),
4351                                           bitsize_int ((m & (k - 1)) * prec));
4352                           gassign *new_stmt
4353                               = gimple_build_assign (make_ssa_name (atype),
4354                                                          vec_oprnd0);
4355                           vect_finish_stmt_generation (vinfo, stmt_info,
4356                                                                new_stmt, gsi);
4357                           vargs.safe_push (gimple_assign_lhs (new_stmt));
4358                         }
4359                       else
4360                         {
4361                           k = (simd_clone_subparts (atype)
4362                                  / simd_clone_subparts (arginfo[i].vectype));
4363                           gcc_assert ((k & (k - 1)) == 0);
4364                           vec<constructor_elt, va_gc> *ctor_elts;
4365                           if (k != 1)
4366                               vec_alloc (ctor_elts, k);
4367                           else
4368                               ctor_elts = NULL;
4369                           for (l = 0; l < k; l++)
4370                               {
4371                                 if (m == 0 && l == 0)
4372                                   {
4373                                     vect_get_vec_defs_for_operand (vinfo, stmt_info,
4374                                                                            k * o * ncopies,
4375                                                                            op,
4376                                                                            &vec_oprnds[i]);
4377                                     vec_oprnds_i[i] = 0;
4378                                     vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4379                                   }
4380                                 else
4381                                   vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4382                                 arginfo[i].op = vec_oprnd0;
4383                                 if (k == 1)
4384                                   break;
4385                                 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4386                                                               vec_oprnd0);
4387                               }
4388                           if (k == 1)
4389                               if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4390                                                                    atype))
4391                                 {
4392                                   vec_oprnd0
4393                                     = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4394                                   gassign *new_stmt
4395                                     = gimple_build_assign (make_ssa_name (atype),
4396                                                                  vec_oprnd0);
4397                                   vect_finish_stmt_generation (vinfo, stmt_info,
4398                                                                        new_stmt, gsi);
4399                                   vargs.safe_push (gimple_assign_lhs (new_stmt));
4400                                 }
4401                               else
4402                                 vargs.safe_push (vec_oprnd0);
4403                           else
4404                               {
4405                                 vec_oprnd0 = build_constructor (atype, ctor_elts);
4406                                 gassign *new_stmt
4407                                   = gimple_build_assign (make_ssa_name (atype),
4408                                                                vec_oprnd0);
4409                                 vect_finish_stmt_generation (vinfo, stmt_info,
4410                                                                    new_stmt, gsi);
4411                                 vargs.safe_push (gimple_assign_lhs (new_stmt));
4412                               }
4413                         }
4414                     }
4415                 break;
4416               case SIMD_CLONE_ARG_TYPE_UNIFORM:
4417                 vargs.safe_push (op);
4418                 break;
4419               case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4420               case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4421                 if (j == 0)
4422                     {
4423                       gimple_seq stmts;
4424                       arginfo[i].op
4425                         = force_gimple_operand (unshare_expr (arginfo[i].op),
4426                                                       &stmts, true, NULL_TREE);
4427                       if (stmts != NULL)
4428                         {
4429                           basic_block new_bb;
4430                           edge pe = loop_preheader_edge (loop);
4431                           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4432                           gcc_assert (!new_bb);
4433                         }
4434                       if (arginfo[i].simd_lane_linear)
4435                         {
4436                           vargs.safe_push (arginfo[i].op);
4437                           break;
4438                         }
4439                       tree phi_res = copy_ssa_name (op);
4440                       gphi *new_phi = create_phi_node (phi_res, loop->header);
4441                       add_phi_arg (new_phi, arginfo[i].op,
4442                                      loop_preheader_edge (loop), UNKNOWN_LOCATION);
4443                       enum tree_code code
4444                         = POINTER_TYPE_P (TREE_TYPE (op))
4445                           ? POINTER_PLUS_EXPR : PLUS_EXPR;
4446                       tree type = POINTER_TYPE_P (TREE_TYPE (op))
4447                                     ? sizetype : TREE_TYPE (op);
4448                       poly_widest_int cst
4449                         = wi::mul (bestn->simdclone->args[i].linear_step,
4450                                      ncopies * nunits);
4451                       tree tcst = wide_int_to_tree (type, cst);
4452                       tree phi_arg = copy_ssa_name (op);
4453                       gassign *new_stmt
4454                         = gimple_build_assign (phi_arg, code, phi_res, tcst);
4455                       gimple_stmt_iterator si = gsi_after_labels (loop->header);
4456                       gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4457                       add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4458                                      UNKNOWN_LOCATION);
4459                       arginfo[i].op = phi_res;
4460                       vargs.safe_push (phi_res);
4461                     }
4462                 else
4463                     {
4464                       enum tree_code code
4465                         = POINTER_TYPE_P (TREE_TYPE (op))
4466                           ? POINTER_PLUS_EXPR : PLUS_EXPR;
4467                       tree type = POINTER_TYPE_P (TREE_TYPE (op))
4468                                     ? sizetype : TREE_TYPE (op);
4469                       poly_widest_int cst
4470                         = wi::mul (bestn->simdclone->args[i].linear_step,
4471                                      j * nunits);
4472                       tree tcst = wide_int_to_tree (type, cst);
4473                       new_temp = make_ssa_name (TREE_TYPE (op));
4474                       gassign *new_stmt
4475                         = gimple_build_assign (new_temp, code,
4476                                                      arginfo[i].op, tcst);
4477                       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4478                       vargs.safe_push (new_temp);
4479                     }
4480                 break;
4481               case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4482               case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4483               case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4484               case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4485               case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4486               case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4487               default:
4488                 gcc_unreachable ();
4489               }
4490           }
4491 
4492       gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4493       if (vec_dest)
4494           {
4495             gcc_assert (ratype
4496                           || known_eq (simd_clone_subparts (rtype), nunits));
4497             if (ratype)
4498               new_temp = create_tmp_var (ratype);
4499             else if (useless_type_conversion_p (vectype, rtype))
4500               new_temp = make_ssa_name (vec_dest, new_call);
4501             else
4502               new_temp = make_ssa_name (rtype, new_call);
4503             gimple_call_set_lhs (new_call, new_temp);
4504           }
4505       vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4506       gimple *new_stmt = new_call;
4507 
4508       if (vec_dest)
4509           {
4510             if (!multiple_p (simd_clone_subparts (vectype), nunits))
4511               {
4512                 unsigned int k, l;
4513                 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4514                 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4515                 k = vector_unroll_factor (nunits,
4516                                                   simd_clone_subparts (vectype));
4517                 gcc_assert ((k & (k - 1)) == 0);
4518                 for (l = 0; l < k; l++)
4519                     {
4520                       tree t;
4521                       if (ratype)
4522                         {
4523                           t = build_fold_addr_expr (new_temp);
4524                           t = build2 (MEM_REF, vectype, t,
4525                                           build_int_cst (TREE_TYPE (t), l * bytes));
4526                         }
4527                       else
4528                         t = build3 (BIT_FIELD_REF, vectype, new_temp,
4529                                         bitsize_int (prec), bitsize_int (l * prec));
4530                       new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4531                       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4532 
4533                       if (j == 0 && l == 0)
4534                         *vec_stmt = new_stmt;
4535                       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4536                     }
4537 
4538                 if (ratype)
4539                     vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4540                 continue;
4541               }
4542             else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4543               {
4544                 unsigned int k = (simd_clone_subparts (vectype)
4545                                         / simd_clone_subparts (rtype));
4546                 gcc_assert ((k & (k - 1)) == 0);
4547                 if ((j & (k - 1)) == 0)
4548                     vec_alloc (ret_ctor_elts, k);
4549                 if (ratype)
4550                     {
4551                       unsigned int m, o;
4552                       o = vector_unroll_factor (nunits,
4553                                                       simd_clone_subparts (rtype));
4554                       for (m = 0; m < o; m++)
4555                         {
4556                           tree tem = build4 (ARRAY_REF, rtype, new_temp,
4557                                                    size_int (m), NULL_TREE, NULL_TREE);
4558                           new_stmt = gimple_build_assign (make_ssa_name (rtype),
4559                                                                   tem);
4560                           vect_finish_stmt_generation (vinfo, stmt_info,
4561                                                                new_stmt, gsi);
4562                           CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4563                                                         gimple_assign_lhs (new_stmt));
4564                         }
4565                       vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4566                     }
4567                 else
4568                     CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4569                 if ((j & (k - 1)) != k - 1)
4570                     continue;
4571                 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4572                 new_stmt
4573                     = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4574                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4575 
4576                 if ((unsigned) j == k - 1)
4577                     *vec_stmt = new_stmt;
4578                 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4579                 continue;
4580               }
4581             else if (ratype)
4582               {
4583                 tree t = build_fold_addr_expr (new_temp);
4584                 t = build2 (MEM_REF, vectype, t,
4585                                 build_int_cst (TREE_TYPE (t), 0));
4586                 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4587                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4588                 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4589               }
4590             else if (!useless_type_conversion_p (vectype, rtype))
4591               {
4592                 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4593                 new_stmt
4594                     = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4595                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4596               }
4597           }
4598 
4599       if (j == 0)
4600           *vec_stmt = new_stmt;
4601       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4602     }
4603 
4604   for (i = 0; i < nargs; ++i)
4605     {
4606       vec<tree> oprndsi = vec_oprnds[i];
4607       oprndsi.release ();
4608     }
4609   vargs.release ();
4610 
4611   /* The call in STMT might prevent it from being removed in dce.
4612      We however cannot remove it here, due to the way the ssa name
4613      it defines is mapped to the new definition.  So just replace
4614      rhs of the statement with something harmless.  */
4615 
4616   if (slp_node)
4617     return true;
4618 
4619   gimple *new_stmt;
4620   if (scalar_dest)
4621     {
4622       type = TREE_TYPE (scalar_dest);
4623       lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4624       new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4625     }
4626   else
4627     new_stmt = gimple_build_nop ();
4628   vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4629   unlink_stmt_vdef (stmt);
4630 
4631   return true;
4632 }
4633 
4634 
4635 /* Function vect_gen_widened_results_half
4636 
4637    Create a vector stmt whose code, type, number of arguments, and result
4638    variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4639    VEC_OPRND0 and VEC_OPRND1.  The new vector stmt is to be inserted at GSI.
4640    In the case that CODE is a CALL_EXPR, this means that a call to DECL
4641    needs to be created (DECL is a function-decl of a target-builtin).
4642    STMT_INFO is the original scalar stmt that we are vectorizing.  */
4643 
4644 static gimple *
vect_gen_widened_results_half(vec_info * vinfo,enum tree_code code,tree vec_oprnd0,tree vec_oprnd1,int op_type,tree vec_dest,gimple_stmt_iterator * gsi,stmt_vec_info stmt_info)4645 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4646                                tree vec_oprnd0, tree vec_oprnd1, int op_type,
4647                                      tree vec_dest, gimple_stmt_iterator *gsi,
4648                                      stmt_vec_info stmt_info)
4649 {
4650   gimple *new_stmt;
4651   tree new_temp;
4652 
4653   /* Generate half of the widened result:  */
4654   gcc_assert (op_type == TREE_CODE_LENGTH (code));
4655   if (op_type != binary_op)
4656     vec_oprnd1 = NULL;
4657   new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4658   new_temp = make_ssa_name (vec_dest, new_stmt);
4659   gimple_assign_set_lhs (new_stmt, new_temp);
4660   vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4661 
4662   return new_stmt;
4663 }
4664 
4665 
4666 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4667    For multi-step conversions store the resulting vectors and call the function
4668    recursively.  */
4669 
4670 static void
vect_create_vectorized_demotion_stmts(vec_info * vinfo,vec<tree> * vec_oprnds,int multi_step_cvt,stmt_vec_info stmt_info,vec<tree> & vec_dsts,gimple_stmt_iterator * gsi,slp_tree slp_node,enum tree_code code)4671 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4672                                                int multi_step_cvt,
4673                                                stmt_vec_info stmt_info,
4674                                                vec<tree> &vec_dsts,
4675                                                gimple_stmt_iterator *gsi,
4676                                                slp_tree slp_node, enum tree_code code)
4677 {
4678   unsigned int i;
4679   tree vop0, vop1, new_tmp, vec_dest;
4680 
4681   vec_dest = vec_dsts.pop ();
4682 
4683   for (i = 0; i < vec_oprnds->length (); i += 2)
4684     {
4685       /* Create demotion operation.  */
4686       vop0 = (*vec_oprnds)[i];
4687       vop1 = (*vec_oprnds)[i + 1];
4688       gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4689       new_tmp = make_ssa_name (vec_dest, new_stmt);
4690       gimple_assign_set_lhs (new_stmt, new_tmp);
4691       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4692 
4693       if (multi_step_cvt)
4694           /* Store the resulting vector for next recursive call.  */
4695           (*vec_oprnds)[i/2] = new_tmp;
4696       else
4697           {
4698             /* This is the last step of the conversion sequence. Store the
4699                vectors in SLP_NODE or in vector info of the scalar statement
4700                (or in STMT_VINFO_RELATED_STMT chain).  */
4701             if (slp_node)
4702               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4703             else
4704               STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4705           }
4706     }
4707 
4708   /* For multi-step demotion operations we first generate demotion operations
4709      from the source type to the intermediate types, and then combine the
4710      results (stored in VEC_OPRNDS) in demotion operation to the destination
4711      type.  */
4712   if (multi_step_cvt)
4713     {
4714       /* At each level of recursion we have half of the operands we had at the
4715            previous level.  */
4716       vec_oprnds->truncate ((i+1)/2);
4717       vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4718                                                        multi_step_cvt - 1,
4719                                                        stmt_info, vec_dsts, gsi,
4720                                                        slp_node, VEC_PACK_TRUNC_EXPR);
4721     }
4722 
4723   vec_dsts.quick_push (vec_dest);
4724 }
4725 
4726 
4727 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4728    and VEC_OPRNDS1, for a binary operation associated with scalar statement
4729    STMT_INFO.  For multi-step conversions store the resulting vectors and
4730    call the function recursively.  */
4731 
4732 static void
vect_create_vectorized_promotion_stmts(vec_info * vinfo,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,stmt_vec_info stmt_info,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,enum tree_code code2,int op_type)4733 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4734                                                   vec<tree> *vec_oprnds0,
4735                                                   vec<tree> *vec_oprnds1,
4736                                                   stmt_vec_info stmt_info, tree vec_dest,
4737                                                   gimple_stmt_iterator *gsi,
4738                                                   enum tree_code code1,
4739                                                   enum tree_code code2, int op_type)
4740 {
4741   int i;
4742   tree vop0, vop1, new_tmp1, new_tmp2;
4743   gimple *new_stmt1, *new_stmt2;
4744   vec<tree> vec_tmp = vNULL;
4745 
4746   vec_tmp.create (vec_oprnds0->length () * 2);
4747   FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4748     {
4749       if (op_type == binary_op)
4750           vop1 = (*vec_oprnds1)[i];
4751       else
4752           vop1 = NULL_TREE;
4753 
4754       /* Generate the two halves of promotion operation.  */
4755       new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4756                                                              op_type, vec_dest, gsi,
4757                                                              stmt_info);
4758       new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4759                                                              op_type, vec_dest, gsi,
4760                                                              stmt_info);
4761       if (is_gimple_call (new_stmt1))
4762           {
4763             new_tmp1 = gimple_call_lhs (new_stmt1);
4764             new_tmp2 = gimple_call_lhs (new_stmt2);
4765           }
4766       else
4767           {
4768             new_tmp1 = gimple_assign_lhs (new_stmt1);
4769             new_tmp2 = gimple_assign_lhs (new_stmt2);
4770           }
4771 
4772       /* Store the results for the next step.  */
4773       vec_tmp.quick_push (new_tmp1);
4774       vec_tmp.quick_push (new_tmp2);
4775     }
4776 
4777   vec_oprnds0->release ();
4778   *vec_oprnds0 = vec_tmp;
4779 }
4780 
4781 /* Create vectorized promotion stmts for widening stmts using only half the
4782    potential vector size for input.  */
4783 static void
vect_create_half_widening_stmts(vec_info * vinfo,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,stmt_vec_info stmt_info,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,int op_type)4784 vect_create_half_widening_stmts (vec_info *vinfo,
4785                                                   vec<tree> *vec_oprnds0,
4786                                                   vec<tree> *vec_oprnds1,
4787                                                   stmt_vec_info stmt_info, tree vec_dest,
4788                                                   gimple_stmt_iterator *gsi,
4789                                                   enum tree_code code1,
4790                                                   int op_type)
4791 {
4792   int i;
4793   tree vop0, vop1;
4794   gimple *new_stmt1;
4795   gimple *new_stmt2;
4796   gimple *new_stmt3;
4797   vec<tree> vec_tmp = vNULL;
4798 
4799   vec_tmp.create (vec_oprnds0->length ());
4800   FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4801     {
4802       tree new_tmp1, new_tmp2, new_tmp3, out_type;
4803 
4804       gcc_assert (op_type == binary_op);
4805       vop1 = (*vec_oprnds1)[i];
4806 
4807       /* Widen the first vector input.  */
4808       out_type = TREE_TYPE (vec_dest);
4809       new_tmp1 = make_ssa_name (out_type);
4810       new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4811       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4812       if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
4813           {
4814             /* Widen the second vector input.  */
4815             new_tmp2 = make_ssa_name (out_type);
4816             new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
4817             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
4818             /* Perform the operation.  With both vector inputs widened.  */
4819             new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
4820           }
4821       else
4822           {
4823             /* Perform the operation.  With the single vector input widened.  */
4824             new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
4825       }
4826 
4827       new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
4828       gimple_assign_set_lhs (new_stmt3, new_tmp3);
4829       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
4830 
4831       /* Store the results for the next step.  */
4832       vec_tmp.quick_push (new_tmp3);
4833     }
4834 
4835   vec_oprnds0->release ();
4836   *vec_oprnds0 = vec_tmp;
4837 }
4838 
4839 
4840 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4841    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4842    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4843    Return true if STMT_INFO is vectorizable in this way.  */
4844 
4845 static bool
vectorizable_conversion(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)4846 vectorizable_conversion (vec_info *vinfo,
4847                                stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4848                                gimple **vec_stmt, slp_tree slp_node,
4849                                stmt_vector_for_cost *cost_vec)
4850 {
4851   tree vec_dest;
4852   tree scalar_dest;
4853   tree op0, op1 = NULL_TREE;
4854   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4855   enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4856   enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4857   tree new_temp;
4858   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4859   int ndts = 2;
4860   poly_uint64 nunits_in;
4861   poly_uint64 nunits_out;
4862   tree vectype_out, vectype_in;
4863   int ncopies, i;
4864   tree lhs_type, rhs_type;
4865   enum { NARROW, NONE, WIDEN } modifier;
4866   vec<tree> vec_oprnds0 = vNULL;
4867   vec<tree> vec_oprnds1 = vNULL;
4868   tree vop0;
4869   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4870   int multi_step_cvt = 0;
4871   vec<tree> interm_types = vNULL;
4872   tree intermediate_type, cvt_type = NULL_TREE;
4873   int op_type;
4874   unsigned short fltsz;
4875 
4876   /* Is STMT a vectorizable conversion?   */
4877 
4878   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4879     return false;
4880 
4881   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4882       && ! vec_stmt)
4883     return false;
4884 
4885   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4886   if (!stmt)
4887     return false;
4888 
4889   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4890     return false;
4891 
4892   code = gimple_assign_rhs_code (stmt);
4893   if (!CONVERT_EXPR_CODE_P (code)
4894       && code != FIX_TRUNC_EXPR
4895       && code != FLOAT_EXPR
4896       && code != WIDEN_PLUS_EXPR
4897       && code != WIDEN_MINUS_EXPR
4898       && code != WIDEN_MULT_EXPR
4899       && code != WIDEN_LSHIFT_EXPR)
4900     return false;
4901 
4902   bool widen_arith = (code == WIDEN_PLUS_EXPR
4903                           || code == WIDEN_MINUS_EXPR
4904                           || code == WIDEN_MULT_EXPR
4905                           || code == WIDEN_LSHIFT_EXPR);
4906   op_type = TREE_CODE_LENGTH (code);
4907 
4908   /* Check types of lhs and rhs.  */
4909   scalar_dest = gimple_assign_lhs (stmt);
4910   lhs_type = TREE_TYPE (scalar_dest);
4911   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4912 
4913   /* Check the operands of the operation.  */
4914   slp_tree slp_op0, slp_op1 = NULL;
4915   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4916                                  0, &op0, &slp_op0, &dt[0], &vectype_in))
4917     {
4918       if (dump_enabled_p ())
4919           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4920                                "use not simple.\n");
4921       return false;
4922     }
4923 
4924   rhs_type = TREE_TYPE (op0);
4925   if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4926       && !((INTEGRAL_TYPE_P (lhs_type)
4927               && INTEGRAL_TYPE_P (rhs_type))
4928              || (SCALAR_FLOAT_TYPE_P (lhs_type)
4929                  && SCALAR_FLOAT_TYPE_P (rhs_type))))
4930     return false;
4931 
4932   if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4933       && ((INTEGRAL_TYPE_P (lhs_type)
4934              && !type_has_mode_precision_p (lhs_type))
4935             || (INTEGRAL_TYPE_P (rhs_type)
4936                 && !type_has_mode_precision_p (rhs_type))))
4937     {
4938       if (dump_enabled_p ())
4939           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4940                          "type conversion to/from bit-precision unsupported."
4941                          "\n");
4942       return false;
4943     }
4944 
4945   if (op_type == binary_op)
4946     {
4947       gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
4948                       || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
4949 
4950       op1 = gimple_assign_rhs2 (stmt);
4951       tree vectype1_in;
4952       if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4953                                      &op1, &slp_op1, &dt[1], &vectype1_in))
4954           {
4955           if (dump_enabled_p ())
4956             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4957                              "use not simple.\n");
4958             return false;
4959           }
4960       /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4961            OP1.  */
4962       if (!vectype_in)
4963           vectype_in = vectype1_in;
4964     }
4965 
4966   /* If op0 is an external or constant def, infer the vector type
4967      from the scalar type.  */
4968   if (!vectype_in)
4969     vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4970   if (vec_stmt)
4971     gcc_assert (vectype_in);
4972   if (!vectype_in)
4973     {
4974       if (dump_enabled_p ())
4975           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4976                                "no vectype for scalar type %T\n", rhs_type);
4977 
4978       return false;
4979     }
4980 
4981   if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4982       && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4983     {
4984       if (dump_enabled_p ())
4985           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4986                                "can't convert between boolean and non "
4987                                "boolean vectors %T\n", rhs_type);
4988 
4989       return false;
4990     }
4991 
4992   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4993   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4994   if (known_eq (nunits_out, nunits_in))
4995     if (widen_arith)
4996       modifier = WIDEN;
4997     else
4998       modifier = NONE;
4999   else if (multiple_p (nunits_out, nunits_in))
5000     modifier = NARROW;
5001   else
5002     {
5003       gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5004       modifier = WIDEN;
5005     }
5006 
5007   /* Multiple types in SLP are handled by creating the appropriate number of
5008      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5009      case of SLP.  */
5010   if (slp_node)
5011     ncopies = 1;
5012   else if (modifier == NARROW)
5013     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5014   else
5015     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5016 
5017   /* Sanity check: make sure that at least one copy of the vectorized stmt
5018      needs to be generated.  */
5019   gcc_assert (ncopies >= 1);
5020 
5021   bool found_mode = false;
5022   scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5023   scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5024   opt_scalar_mode rhs_mode_iter;
5025 
5026   /* Supportable by target?  */
5027   switch (modifier)
5028     {
5029     case NONE:
5030       if (code != FIX_TRUNC_EXPR
5031             && code != FLOAT_EXPR
5032             && !CONVERT_EXPR_CODE_P (code))
5033           return false;
5034       if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
5035           break;
5036       /* FALLTHRU */
5037     unsupported:
5038       if (dump_enabled_p ())
5039           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5040                          "conversion not supported by target.\n");
5041       return false;
5042 
5043     case WIDEN:
5044       if (known_eq (nunits_in, nunits_out))
5045           {
5046             if (!supportable_half_widening_operation (code, vectype_out,
5047                                                                vectype_in, &code1))
5048               goto unsupported;
5049             gcc_assert (!(multi_step_cvt && op_type == binary_op));
5050             break;
5051           }
5052       if (supportable_widening_operation (vinfo, code, stmt_info,
5053                                                          vectype_out, vectype_in, &code1,
5054                                                          &code2, &multi_step_cvt,
5055                                                          &interm_types))
5056           {
5057             /* Binary widening operation can only be supported directly by the
5058                architecture.  */
5059             gcc_assert (!(multi_step_cvt && op_type == binary_op));
5060             break;
5061           }
5062 
5063       if (code != FLOAT_EXPR
5064             || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5065           goto unsupported;
5066 
5067       fltsz = GET_MODE_SIZE (lhs_mode);
5068       FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5069           {
5070             rhs_mode = rhs_mode_iter.require ();
5071             if (GET_MODE_SIZE (rhs_mode) > fltsz)
5072               break;
5073 
5074             cvt_type
5075               = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5076             cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5077             if (cvt_type == NULL_TREE)
5078               goto unsupported;
5079 
5080             if (GET_MODE_SIZE (rhs_mode) == fltsz)
5081               {
5082                 if (!supportable_convert_operation (code, vectype_out,
5083                                                               cvt_type, &codecvt1))
5084                     goto unsupported;
5085               }
5086             else if (!supportable_widening_operation (vinfo, code, stmt_info,
5087                                                                 vectype_out, cvt_type,
5088                                                                 &codecvt1, &codecvt2,
5089                                                                 &multi_step_cvt,
5090                                                                 &interm_types))
5091               continue;
5092             else
5093               gcc_assert (multi_step_cvt == 0);
5094 
5095             if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5096                                                         cvt_type,
5097                                                         vectype_in, &code1, &code2,
5098                                                         &multi_step_cvt, &interm_types))
5099               {
5100                 found_mode = true;
5101                 break;
5102               }
5103           }
5104 
5105       if (!found_mode)
5106           goto unsupported;
5107 
5108       if (GET_MODE_SIZE (rhs_mode) == fltsz)
5109           codecvt2 = ERROR_MARK;
5110       else
5111           {
5112             multi_step_cvt++;
5113             interm_types.safe_push (cvt_type);
5114             cvt_type = NULL_TREE;
5115           }
5116       break;
5117 
5118     case NARROW:
5119       gcc_assert (op_type == unary_op);
5120       if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5121                                                      &code1, &multi_step_cvt,
5122                                                      &interm_types))
5123           break;
5124 
5125       if (code != FIX_TRUNC_EXPR
5126             || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5127           goto unsupported;
5128 
5129       cvt_type
5130           = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5131       cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5132       if (cvt_type == NULL_TREE)
5133           goto unsupported;
5134       if (!supportable_convert_operation (code, cvt_type, vectype_in,
5135                                                     &codecvt1))
5136           goto unsupported;
5137       if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5138                                                      &code1, &multi_step_cvt,
5139                                                      &interm_types))
5140           break;
5141       goto unsupported;
5142 
5143     default:
5144       gcc_unreachable ();
5145     }
5146 
5147   if (!vec_stmt)              /* transformation not required.  */
5148     {
5149       if (slp_node
5150             && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5151                 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5152           {
5153             if (dump_enabled_p ())
5154               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5155                                    "incompatible vector types for invariants\n");
5156             return false;
5157           }
5158       DUMP_VECT_SCOPE ("vectorizable_conversion");
5159       if (modifier == NONE)
5160         {
5161             STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5162             vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5163                                           cost_vec);
5164           }
5165       else if (modifier == NARROW)
5166           {
5167             STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5168             /* The final packing step produces one vector result per copy.  */
5169             unsigned int nvectors
5170               = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5171             vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5172                                                         multi_step_cvt, cost_vec,
5173                                                         widen_arith);
5174           }
5175       else
5176           {
5177             STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5178             /* The initial unpacking step produces two vector results
5179                per copy.  MULTI_STEP_CVT is 0 for a single conversion,
5180                so >> MULTI_STEP_CVT divides by 2^(number of steps - 1).  */
5181             unsigned int nvectors
5182               = (slp_node
5183                  ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5184                  : ncopies * 2);
5185             vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5186                                                         multi_step_cvt, cost_vec,
5187                                                         widen_arith);
5188           }
5189       interm_types.release ();
5190       return true;
5191     }
5192 
5193   /* Transform.  */
5194   if (dump_enabled_p ())
5195     dump_printf_loc (MSG_NOTE, vect_location,
5196                      "transform conversion. ncopies = %d.\n", ncopies);
5197 
5198   if (op_type == binary_op)
5199     {
5200       if (CONSTANT_CLASS_P (op0))
5201           op0 = fold_convert (TREE_TYPE (op1), op0);
5202       else if (CONSTANT_CLASS_P (op1))
5203           op1 = fold_convert (TREE_TYPE (op0), op1);
5204     }
5205 
5206   /* In case of multi-step conversion, we first generate conversion operations
5207      to the intermediate types, and then from that types to the final one.
5208      We create vector destinations for the intermediate type (TYPES) received
5209      from supportable_*_operation, and store them in the correct order
5210      for future use in vect_create_vectorized_*_stmts ().  */
5211   auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5212   vec_dest = vect_create_destination_var (scalar_dest,
5213                                                     (cvt_type && modifier == WIDEN)
5214                                                     ? cvt_type : vectype_out);
5215   vec_dsts.quick_push (vec_dest);
5216 
5217   if (multi_step_cvt)
5218     {
5219       for (i = interm_types.length () - 1;
5220              interm_types.iterate (i, &intermediate_type); i--)
5221           {
5222             vec_dest = vect_create_destination_var (scalar_dest,
5223                                                               intermediate_type);
5224             vec_dsts.quick_push (vec_dest);
5225           }
5226     }
5227 
5228   if (cvt_type)
5229     vec_dest = vect_create_destination_var (scalar_dest,
5230                                                       modifier == WIDEN
5231                                                       ? vectype_out : cvt_type);
5232 
5233   int ninputs = 1;
5234   if (!slp_node)
5235     {
5236       if (modifier == WIDEN)
5237           ;
5238       else if (modifier == NARROW)
5239           {
5240             if (multi_step_cvt)
5241               ninputs = vect_pow2 (multi_step_cvt);
5242             ninputs *= 2;
5243           }
5244     }
5245 
5246   switch (modifier)
5247     {
5248     case NONE:
5249       vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5250                                op0, &vec_oprnds0);
5251       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5252           {
5253             /* Arguments are ready, create the new vector stmt.  */
5254             gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5255             gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5256             new_temp = make_ssa_name (vec_dest, new_stmt);
5257             gimple_assign_set_lhs (new_stmt, new_temp);
5258             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5259 
5260             if (slp_node)
5261               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5262             else
5263               STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5264           }
5265       break;
5266 
5267     case WIDEN:
5268       /* In case the vectorization factor (VF) is bigger than the number
5269            of elements that we can fit in a vectype (nunits), we have to
5270            generate more than one vector stmt - i.e - we need to "unroll"
5271            the vector stmt by a factor VF/nunits.  */
5272       vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5273                                op0, &vec_oprnds0,
5274                                code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5275                                &vec_oprnds1);
5276       if (code == WIDEN_LSHIFT_EXPR)
5277           {
5278             int oprnds_size = vec_oprnds0.length ();
5279             vec_oprnds1.create (oprnds_size);
5280             for (i = 0; i < oprnds_size; ++i)
5281               vec_oprnds1.quick_push (op1);
5282           }
5283       /* Arguments are ready.  Create the new vector stmts.  */
5284       for (i = multi_step_cvt; i >= 0; i--)
5285           {
5286             tree this_dest = vec_dsts[i];
5287             enum tree_code c1 = code1, c2 = code2;
5288             if (i == 0 && codecvt2 != ERROR_MARK)
5289               {
5290                 c1 = codecvt1;
5291                 c2 = codecvt2;
5292               }
5293             if (known_eq (nunits_out, nunits_in))
5294               vect_create_half_widening_stmts (vinfo, &vec_oprnds0,
5295                                                                 &vec_oprnds1, stmt_info,
5296                                                                 this_dest, gsi,
5297                                                                 c1, op_type);
5298             else
5299               vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5300                                                                 &vec_oprnds1, stmt_info,
5301                                                                 this_dest, gsi,
5302                                                                 c1, c2, op_type);
5303           }
5304 
5305       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5306           {
5307             gimple *new_stmt;
5308             if (cvt_type)
5309               {
5310                 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5311                 new_temp = make_ssa_name (vec_dest);
5312                 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
5313                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5314               }
5315             else
5316               new_stmt = SSA_NAME_DEF_STMT (vop0);
5317 
5318             if (slp_node)
5319               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5320             else
5321               STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5322           }
5323       break;
5324 
5325     case NARROW:
5326       /* In case the vectorization factor (VF) is bigger than the number
5327            of elements that we can fit in a vectype (nunits), we have to
5328            generate more than one vector stmt - i.e - we need to "unroll"
5329            the vector stmt by a factor VF/nunits.  */
5330       vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5331                                op0, &vec_oprnds0);
5332       /* Arguments are ready.  Create the new vector stmts.  */
5333       if (cvt_type)
5334           FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5335             {
5336               gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5337               new_temp = make_ssa_name (vec_dest);
5338               gassign *new_stmt
5339                 = gimple_build_assign (new_temp, codecvt1, vop0);
5340               vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5341               vec_oprnds0[i] = new_temp;
5342             }
5343 
5344       vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5345                                                        multi_step_cvt,
5346                                                        stmt_info, vec_dsts, gsi,
5347                                                        slp_node, code1);
5348       break;
5349     }
5350   if (!slp_node)
5351     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5352 
5353   vec_oprnds0.release ();
5354   vec_oprnds1.release ();
5355   interm_types.release ();
5356 
5357   return true;
5358 }
5359 
5360 /* Return true if we can assume from the scalar form of STMT_INFO that
5361    neither the scalar nor the vector forms will generate code.  STMT_INFO
5362    is known not to involve a data reference.  */
5363 
5364 bool
vect_nop_conversion_p(stmt_vec_info stmt_info)5365 vect_nop_conversion_p (stmt_vec_info stmt_info)
5366 {
5367   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5368   if (!stmt)
5369     return false;
5370 
5371   tree lhs = gimple_assign_lhs (stmt);
5372   tree_code code = gimple_assign_rhs_code (stmt);
5373   tree rhs = gimple_assign_rhs1 (stmt);
5374 
5375   if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5376     return true;
5377 
5378   if (CONVERT_EXPR_CODE_P (code))
5379     return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5380 
5381   return false;
5382 }
5383 
5384 /* Function vectorizable_assignment.
5385 
5386    Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5387    If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5388    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5389    Return true if STMT_INFO is vectorizable in this way.  */
5390 
5391 static bool
vectorizable_assignment(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5392 vectorizable_assignment (vec_info *vinfo,
5393                                stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5394                                gimple **vec_stmt, slp_tree slp_node,
5395                                stmt_vector_for_cost *cost_vec)
5396 {
5397   tree vec_dest;
5398   tree scalar_dest;
5399   tree op;
5400   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5401   tree new_temp;
5402   enum vect_def_type dt[1] = {vect_unknown_def_type};
5403   int ndts = 1;
5404   int ncopies;
5405   int i;
5406   vec<tree> vec_oprnds = vNULL;
5407   tree vop;
5408   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5409   enum tree_code code;
5410   tree vectype_in;
5411 
5412   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5413     return false;
5414 
5415   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5416       && ! vec_stmt)
5417     return false;
5418 
5419   /* Is vectorizable assignment?  */
5420   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5421   if (!stmt)
5422     return false;
5423 
5424   scalar_dest = gimple_assign_lhs (stmt);
5425   if (TREE_CODE (scalar_dest) != SSA_NAME)
5426     return false;
5427 
5428   if (STMT_VINFO_DATA_REF (stmt_info))
5429     return false;
5430 
5431   code = gimple_assign_rhs_code (stmt);
5432   if (!(gimple_assign_single_p (stmt)
5433           || code == PAREN_EXPR
5434           || CONVERT_EXPR_CODE_P (code)))
5435     return false;
5436 
5437   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5438   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5439 
5440   /* Multiple types in SLP are handled by creating the appropriate number of
5441      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5442      case of SLP.  */
5443   if (slp_node)
5444     ncopies = 1;
5445   else
5446     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5447 
5448   gcc_assert (ncopies >= 1);
5449 
5450   slp_tree slp_op;
5451   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5452                                  &dt[0], &vectype_in))
5453     {
5454       if (dump_enabled_p ())
5455         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5456                          "use not simple.\n");
5457       return false;
5458     }
5459   if (!vectype_in)
5460     vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5461 
5462   /* We can handle NOP_EXPR conversions that do not change the number
5463      of elements or the vector size.  */
5464   if ((CONVERT_EXPR_CODE_P (code)
5465        || code == VIEW_CONVERT_EXPR)
5466       && (!vectype_in
5467             || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5468             || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5469                            GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5470     return false;
5471 
5472   if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
5473     {
5474       if (dump_enabled_p ())
5475           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5476                                "can't convert between boolean and non "
5477                                "boolean vectors %T\n", TREE_TYPE (op));
5478 
5479       return false;
5480     }
5481 
5482   /* We do not handle bit-precision changes.  */
5483   if ((CONVERT_EXPR_CODE_P (code)
5484        || code == VIEW_CONVERT_EXPR)
5485       && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5486       && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5487             || !type_has_mode_precision_p (TREE_TYPE (op)))
5488       /* But a conversion that does not change the bit-pattern is ok.  */
5489       && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5490               > TYPE_PRECISION (TREE_TYPE (op)))
5491              && TYPE_UNSIGNED (TREE_TYPE (op))))
5492     {
5493       if (dump_enabled_p ())
5494         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5495                          "type conversion to/from bit-precision "
5496                          "unsupported.\n");
5497       return false;
5498     }
5499 
5500   if (!vec_stmt) /* transformation not required.  */
5501     {
5502       if (slp_node
5503             && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5504           {
5505             if (dump_enabled_p ())
5506               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5507                                    "incompatible vector types for invariants\n");
5508             return false;
5509           }
5510       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5511       DUMP_VECT_SCOPE ("vectorizable_assignment");
5512       if (!vect_nop_conversion_p (stmt_info))
5513           vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5514                                         cost_vec);
5515       return true;
5516     }
5517 
5518   /* Transform.  */
5519   if (dump_enabled_p ())
5520     dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5521 
5522   /* Handle def.  */
5523   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5524 
5525   /* Handle use.  */
5526   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5527 
5528   /* Arguments are ready. create the new vector stmt.  */
5529   FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5530     {
5531       if (CONVERT_EXPR_CODE_P (code)
5532             || code == VIEW_CONVERT_EXPR)
5533           vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5534       gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5535       new_temp = make_ssa_name (vec_dest, new_stmt);
5536       gimple_assign_set_lhs (new_stmt, new_temp);
5537       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5538       if (slp_node)
5539           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5540       else
5541           STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5542     }
5543   if (!slp_node)
5544     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5545 
5546   vec_oprnds.release ();
5547   return true;
5548 }
5549 
5550 
5551 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5552    either as shift by a scalar or by a vector.  */
5553 
5554 bool
vect_supportable_shift(vec_info * vinfo,enum tree_code code,tree scalar_type)5555 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5556 {
5557 
5558   machine_mode vec_mode;
5559   optab optab;
5560   int icode;
5561   tree vectype;
5562 
5563   vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5564   if (!vectype)
5565     return false;
5566 
5567   optab = optab_for_tree_code (code, vectype, optab_scalar);
5568   if (!optab
5569       || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5570     {
5571       optab = optab_for_tree_code (code, vectype, optab_vector);
5572       if (!optab
5573           || (optab_handler (optab, TYPE_MODE (vectype))
5574                       == CODE_FOR_nothing))
5575         return false;
5576     }
5577 
5578   vec_mode = TYPE_MODE (vectype);
5579   icode = (int) optab_handler (optab, vec_mode);
5580   if (icode == CODE_FOR_nothing)
5581     return false;
5582 
5583   return true;
5584 }
5585 
5586 
5587 /* Function vectorizable_shift.
5588 
5589    Check if STMT_INFO performs a shift operation that can be vectorized.
5590    If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5591    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5592    Return true if STMT_INFO is vectorizable in this way.  */
5593 
5594 static bool
vectorizable_shift(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)5595 vectorizable_shift (vec_info *vinfo,
5596                         stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5597                         gimple **vec_stmt, slp_tree slp_node,
5598                         stmt_vector_for_cost *cost_vec)
5599 {
5600   tree vec_dest;
5601   tree scalar_dest;
5602   tree op0, op1 = NULL;
5603   tree vec_oprnd1 = NULL_TREE;
5604   tree vectype;
5605   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5606   enum tree_code code;
5607   machine_mode vec_mode;
5608   tree new_temp;
5609   optab optab;
5610   int icode;
5611   machine_mode optab_op2_mode;
5612   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5613   int ndts = 2;
5614   poly_uint64 nunits_in;
5615   poly_uint64 nunits_out;
5616   tree vectype_out;
5617   tree op1_vectype;
5618   int ncopies;
5619   int i;
5620   vec<tree> vec_oprnds0 = vNULL;
5621   vec<tree> vec_oprnds1 = vNULL;
5622   tree vop0, vop1;
5623   unsigned int k;
5624   bool scalar_shift_arg = true;
5625   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5626   bool incompatible_op1_vectype_p = false;
5627 
5628   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5629     return false;
5630 
5631   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5632       && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5633       && ! vec_stmt)
5634     return false;
5635 
5636   /* Is STMT a vectorizable binary/unary operation?   */
5637   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5638   if (!stmt)
5639     return false;
5640 
5641   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5642     return false;
5643 
5644   code = gimple_assign_rhs_code (stmt);
5645 
5646   if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5647       || code == RROTATE_EXPR))
5648     return false;
5649 
5650   scalar_dest = gimple_assign_lhs (stmt);
5651   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5652   if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5653     {
5654       if (dump_enabled_p ())
5655         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5656                          "bit-precision shifts not supported.\n");
5657       return false;
5658     }
5659 
5660   slp_tree slp_op0;
5661   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5662                                  0, &op0, &slp_op0, &dt[0], &vectype))
5663     {
5664       if (dump_enabled_p ())
5665         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5666                          "use not simple.\n");
5667       return false;
5668     }
5669   /* If op0 is an external or constant def, infer the vector type
5670      from the scalar type.  */
5671   if (!vectype)
5672     vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5673   if (vec_stmt)
5674     gcc_assert (vectype);
5675   if (!vectype)
5676     {
5677       if (dump_enabled_p ())
5678         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5679                          "no vectype for scalar type\n");
5680       return false;
5681     }
5682 
5683   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5684   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5685   if (maybe_ne (nunits_out, nunits_in))
5686     return false;
5687 
5688   stmt_vec_info op1_def_stmt_info;
5689   slp_tree slp_op1;
5690   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5691                                  &dt[1], &op1_vectype, &op1_def_stmt_info))
5692     {
5693       if (dump_enabled_p ())
5694         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5695                          "use not simple.\n");
5696       return false;
5697     }
5698 
5699   /* Multiple types in SLP are handled by creating the appropriate number of
5700      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5701      case of SLP.  */
5702   if (slp_node)
5703     ncopies = 1;
5704   else
5705     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5706 
5707   gcc_assert (ncopies >= 1);
5708 
5709   /* Determine whether the shift amount is a vector, or scalar.  If the
5710      shift/rotate amount is a vector, use the vector/vector shift optabs.  */
5711 
5712   if ((dt[1] == vect_internal_def
5713        || dt[1] == vect_induction_def
5714        || dt[1] == vect_nested_cycle)
5715       && !slp_node)
5716     scalar_shift_arg = false;
5717   else if (dt[1] == vect_constant_def
5718              || dt[1] == vect_external_def
5719              || dt[1] == vect_internal_def)
5720     {
5721       /* In SLP, need to check whether the shift count is the same,
5722            in loops if it is a constant or invariant, it is always
5723            a scalar shift.  */
5724       if (slp_node)
5725           {
5726             vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5727             stmt_vec_info slpstmt_info;
5728 
5729             FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5730               {
5731                 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5732                 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5733                     scalar_shift_arg = false;
5734               }
5735 
5736             /* For internal SLP defs we have to make sure we see scalar stmts
5737                for all vector elements.
5738                ???  For different vectors we could resort to a different
5739                scalar shift operand but code-generation below simply always
5740                takes the first.  */
5741             if (dt[1] == vect_internal_def
5742                 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5743                                  stmts.length ()))
5744               scalar_shift_arg = false;
5745           }
5746 
5747       /* If the shift amount is computed by a pattern stmt we cannot
5748          use the scalar amount directly thus give up and use a vector
5749            shift.  */
5750       if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5751           scalar_shift_arg = false;
5752     }
5753   else
5754     {
5755       if (dump_enabled_p ())
5756         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5757                          "operand mode requires invariant argument.\n");
5758       return false;
5759     }
5760 
5761   /* Vector shifted by vector.  */
5762   bool was_scalar_shift_arg = scalar_shift_arg;
5763   if (!scalar_shift_arg)
5764     {
5765       optab = optab_for_tree_code (code, vectype, optab_vector);
5766       if (dump_enabled_p ())
5767         dump_printf_loc (MSG_NOTE, vect_location,
5768                          "vector/vector shift/rotate found.\n");
5769 
5770       if (!op1_vectype)
5771           op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5772                                                                slp_op1);
5773       incompatible_op1_vectype_p
5774           = (op1_vectype == NULL_TREE
5775              || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5776                               TYPE_VECTOR_SUBPARTS (vectype))
5777              || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5778       if (incompatible_op1_vectype_p
5779             && (!slp_node
5780                 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5781                 || slp_op1->refcnt != 1))
5782           {
5783             if (dump_enabled_p ())
5784               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5785                              "unusable type for last operand in"
5786                              " vector/vector shift/rotate.\n");
5787             return false;
5788           }
5789     }
5790   /* See if the machine has a vector shifted by scalar insn and if not
5791      then see if it has a vector shifted by vector insn.  */
5792   else
5793     {
5794       optab = optab_for_tree_code (code, vectype, optab_scalar);
5795       if (optab
5796           && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5797         {
5798           if (dump_enabled_p ())
5799             dump_printf_loc (MSG_NOTE, vect_location,
5800                              "vector/scalar shift/rotate found.\n");
5801         }
5802       else
5803         {
5804           optab = optab_for_tree_code (code, vectype, optab_vector);
5805           if (optab
5806                && (optab_handler (optab, TYPE_MODE (vectype))
5807                       != CODE_FOR_nothing))
5808             {
5809                 scalar_shift_arg = false;
5810 
5811               if (dump_enabled_p ())
5812                 dump_printf_loc (MSG_NOTE, vect_location,
5813                                  "vector/vector shift/rotate found.\n");
5814 
5815                 if (!op1_vectype)
5816                     op1_vectype = get_vectype_for_scalar_type (vinfo,
5817                                                                          TREE_TYPE (op1),
5818                                                                          slp_op1);
5819 
5820               /* Unlike the other binary operators, shifts/rotates have
5821                  the rhs being int, instead of the same type as the lhs,
5822                  so make sure the scalar is the right type if we are
5823                      dealing with vectors of long long/long/short/char.  */
5824                 incompatible_op1_vectype_p
5825                     = (!op1_vectype
5826                        || !tree_nop_conversion_p (TREE_TYPE (vectype),
5827                                                         TREE_TYPE (op1)));
5828                 if (incompatible_op1_vectype_p
5829                       && dt[1] == vect_internal_def)
5830                     {
5831                       if (dump_enabled_p ())
5832                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5833                                              "unusable type for last operand in"
5834                                              " vector/vector shift/rotate.\n");
5835                       return false;
5836                     }
5837             }
5838         }
5839     }
5840 
5841   /* Supportable by target?  */
5842   if (!optab)
5843     {
5844       if (dump_enabled_p ())
5845         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5846                          "no optab.\n");
5847       return false;
5848     }
5849   vec_mode = TYPE_MODE (vectype);
5850   icode = (int) optab_handler (optab, vec_mode);
5851   if (icode == CODE_FOR_nothing)
5852     {
5853       if (dump_enabled_p ())
5854         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5855                          "op not supported by target.\n");
5856       return false;
5857     }
5858   /* vector lowering cannot optimize vector shifts using word arithmetic.  */
5859   if (vect_emulated_vector_p (vectype))
5860     return false;
5861 
5862   if (!vec_stmt) /* transformation not required.  */
5863     {
5864       if (slp_node
5865             && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5866                 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5867                       && (!incompatible_op1_vectype_p
5868                           || dt[1] == vect_constant_def)
5869                       && !vect_maybe_update_slp_op_vectype
5870                               (slp_op1,
5871                                incompatible_op1_vectype_p ? vectype : op1_vectype))))
5872           {
5873             if (dump_enabled_p ())
5874               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5875                                    "incompatible vector types for invariants\n");
5876             return false;
5877           }
5878       /* Now adjust the constant shift amount in place.  */
5879       if (slp_node
5880             && incompatible_op1_vectype_p
5881             && dt[1] == vect_constant_def)
5882           {
5883             for (unsigned i = 0;
5884                  i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5885               {
5886                 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5887                     = fold_convert (TREE_TYPE (vectype),
5888                                         SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5889                 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5890                                  == INTEGER_CST));
5891               }
5892           }
5893       STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5894       DUMP_VECT_SCOPE ("vectorizable_shift");
5895       vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5896                                     scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5897       return true;
5898     }
5899 
5900   /* Transform.  */
5901 
5902   if (dump_enabled_p ())
5903     dump_printf_loc (MSG_NOTE, vect_location,
5904                      "transform binary/unary operation.\n");
5905 
5906   if (incompatible_op1_vectype_p && !slp_node)
5907     {
5908       gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5909       op1 = fold_convert (TREE_TYPE (vectype), op1);
5910       if (dt[1] != vect_constant_def)
5911           op1 = vect_init_vector (vinfo, stmt_info, op1,
5912                                         TREE_TYPE (vectype), NULL);
5913     }
5914 
5915   /* Handle def.  */
5916   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5917 
5918   if (scalar_shift_arg && dt[1] != vect_internal_def)
5919     {
5920       /* Vector shl and shr insn patterns can be defined with scalar
5921            operand 2 (shift operand).  In this case, use constant or loop
5922            invariant op1 directly, without extending it to vector mode
5923            first.  */
5924       optab_op2_mode = insn_data[icode].operand[2].mode;
5925       if (!VECTOR_MODE_P (optab_op2_mode))
5926           {
5927             if (dump_enabled_p ())
5928               dump_printf_loc (MSG_NOTE, vect_location,
5929                                    "operand 1 using scalar mode.\n");
5930             vec_oprnd1 = op1;
5931             vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5932             vec_oprnds1.quick_push (vec_oprnd1);
5933                 /* Store vec_oprnd1 for every vector stmt to be created.
5934                      We check during the analysis that all the shift arguments
5935                      are the same.
5936                      TODO: Allow different constants for different vector
5937                      stmts generated for an SLP instance.  */
5938             for (k = 0;
5939                  k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5940               vec_oprnds1.quick_push (vec_oprnd1);
5941           }
5942     }
5943   else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5944     {
5945       if (was_scalar_shift_arg)
5946           {
5947             /* If the argument was the same in all lanes create
5948                the correctly typed vector shift amount directly.  */
5949             op1 = fold_convert (TREE_TYPE (vectype), op1);
5950             op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5951                                           !loop_vinfo ? gsi : NULL);
5952             vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5953                                                    !loop_vinfo ? gsi : NULL);
5954             vec_oprnds1.create (slp_node->vec_stmts_size);
5955             for (k = 0; k < slp_node->vec_stmts_size; k++)
5956               vec_oprnds1.quick_push (vec_oprnd1);
5957           }
5958       else if (dt[1] == vect_constant_def)
5959           /* The constant shift amount has been adjusted in place.  */
5960           ;
5961       else
5962           gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5963     }
5964 
5965   /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5966      (a special case for certain kind of vector shifts); otherwise,
5967      operand 1 should be of a vector type (the usual case).  */
5968   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5969                          op0, &vec_oprnds0,
5970                          vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5971 
5972   /* Arguments are ready.  Create the new vector stmt.  */
5973   FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5974     {
5975       /* For internal defs where we need to use a scalar shift arg
5976            extract the first lane.  */
5977       if (scalar_shift_arg && dt[1] == vect_internal_def)
5978           {
5979             vop1 = vec_oprnds1[0];
5980             new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5981             gassign *new_stmt
5982               = gimple_build_assign (new_temp,
5983                                            build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5984                                                      vop1,
5985                                                      TYPE_SIZE (TREE_TYPE (new_temp)),
5986                                                      bitsize_zero_node));
5987             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5988             vop1 = new_temp;
5989           }
5990       else
5991           vop1 = vec_oprnds1[i];
5992       gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5993       new_temp = make_ssa_name (vec_dest, new_stmt);
5994       gimple_assign_set_lhs (new_stmt, new_temp);
5995       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5996       if (slp_node)
5997           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5998       else
5999           STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6000     }
6001 
6002   if (!slp_node)
6003     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6004 
6005   vec_oprnds0.release ();
6006   vec_oprnds1.release ();
6007 
6008   return true;
6009 }
6010 
6011 
6012 /* Function vectorizable_operation.
6013 
6014    Check if STMT_INFO performs a binary, unary or ternary operation that can
6015    be vectorized.
6016    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6017    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6018    Return true if STMT_INFO is vectorizable in this way.  */
6019 
6020 static bool
vectorizable_operation(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)6021 vectorizable_operation (vec_info *vinfo,
6022                               stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6023                               gimple **vec_stmt, slp_tree slp_node,
6024                               stmt_vector_for_cost *cost_vec)
6025 {
6026   tree vec_dest;
6027   tree scalar_dest;
6028   tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6029   tree vectype;
6030   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6031   enum tree_code code, orig_code;
6032   machine_mode vec_mode;
6033   tree new_temp;
6034   int op_type;
6035   optab optab;
6036   bool target_support_p;
6037   enum vect_def_type dt[3]
6038     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6039   int ndts = 3;
6040   poly_uint64 nunits_in;
6041   poly_uint64 nunits_out;
6042   tree vectype_out;
6043   int ncopies, vec_num;
6044   int i;
6045   vec<tree> vec_oprnds0 = vNULL;
6046   vec<tree> vec_oprnds1 = vNULL;
6047   vec<tree> vec_oprnds2 = vNULL;
6048   tree vop0, vop1, vop2;
6049   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6050 
6051   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6052     return false;
6053 
6054   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6055       && ! vec_stmt)
6056     return false;
6057 
6058   /* Is STMT a vectorizable binary/unary operation?   */
6059   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6060   if (!stmt)
6061     return false;
6062 
6063   /* Loads and stores are handled in vectorizable_{load,store}.  */
6064   if (STMT_VINFO_DATA_REF (stmt_info))
6065     return false;
6066 
6067   orig_code = code = gimple_assign_rhs_code (stmt);
6068 
6069   /* Shifts are handled in vectorizable_shift.  */
6070   if (code == LSHIFT_EXPR
6071       || code == RSHIFT_EXPR
6072       || code == LROTATE_EXPR
6073       || code == RROTATE_EXPR)
6074    return false;
6075 
6076   /* Comparisons are handled in vectorizable_comparison.  */
6077   if (TREE_CODE_CLASS (code) == tcc_comparison)
6078     return false;
6079 
6080   /* Conditions are handled in vectorizable_condition.  */
6081   if (code == COND_EXPR)
6082     return false;
6083 
6084   /* For pointer addition and subtraction, we should use the normal
6085      plus and minus for the vector operation.  */
6086   if (code == POINTER_PLUS_EXPR)
6087     code = PLUS_EXPR;
6088   if (code == POINTER_DIFF_EXPR)
6089     code = MINUS_EXPR;
6090 
6091   /* Support only unary or binary operations.  */
6092   op_type = TREE_CODE_LENGTH (code);
6093   if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6094     {
6095       if (dump_enabled_p ())
6096         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6097                          "num. args = %d (not unary/binary/ternary op).\n",
6098                          op_type);
6099       return false;
6100     }
6101 
6102   scalar_dest = gimple_assign_lhs (stmt);
6103   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6104 
6105   /* Most operations cannot handle bit-precision types without extra
6106      truncations.  */
6107   bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6108   if (!mask_op_p
6109       && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6110       /* Exception are bitwise binary operations.  */
6111       && code != BIT_IOR_EXPR
6112       && code != BIT_XOR_EXPR
6113       && code != BIT_AND_EXPR)
6114     {
6115       if (dump_enabled_p ())
6116         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6117                          "bit-precision arithmetic not supported.\n");
6118       return false;
6119     }
6120 
6121   slp_tree slp_op0;
6122   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6123                                  0, &op0, &slp_op0, &dt[0], &vectype))
6124     {
6125       if (dump_enabled_p ())
6126         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6127                          "use not simple.\n");
6128       return false;
6129     }
6130   bool is_invariant = (dt[0] == vect_external_def
6131                            || dt[0] == vect_constant_def);
6132   /* If op0 is an external or constant def, infer the vector type
6133      from the scalar type.  */
6134   if (!vectype)
6135     {
6136       /* For boolean type we cannot determine vectype by
6137            invariant value (don't know whether it is a vector
6138            of booleans or vector of integers).  We use output
6139            vectype because operations on boolean don't change
6140            type.  */
6141       if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6142           {
6143             if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6144               {
6145                 if (dump_enabled_p ())
6146                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6147                                          "not supported operation on bool value.\n");
6148                 return false;
6149               }
6150             vectype = vectype_out;
6151           }
6152       else
6153           vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6154                                                          slp_node);
6155     }
6156   if (vec_stmt)
6157     gcc_assert (vectype);
6158   if (!vectype)
6159     {
6160       if (dump_enabled_p ())
6161           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6162                                "no vectype for scalar type %T\n",
6163                                TREE_TYPE (op0));
6164 
6165       return false;
6166     }
6167 
6168   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6169   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6170   if (maybe_ne (nunits_out, nunits_in))
6171     return false;
6172 
6173   tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6174   slp_tree slp_op1 = NULL, slp_op2 = NULL;
6175   if (op_type == binary_op || op_type == ternary_op)
6176     {
6177       if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6178                                      1, &op1, &slp_op1, &dt[1], &vectype2))
6179           {
6180             if (dump_enabled_p ())
6181               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6182                              "use not simple.\n");
6183             return false;
6184           }
6185       is_invariant &= (dt[1] == vect_external_def
6186                            || dt[1] == vect_constant_def);
6187       if (vectype2
6188             && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
6189           return false;
6190     }
6191   if (op_type == ternary_op)
6192     {
6193       if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6194                                      2, &op2, &slp_op2, &dt[2], &vectype3))
6195           {
6196             if (dump_enabled_p ())
6197               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6198                              "use not simple.\n");
6199             return false;
6200           }
6201       is_invariant &= (dt[2] == vect_external_def
6202                            || dt[2] == vect_constant_def);
6203       if (vectype3
6204             && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
6205           return false;
6206     }
6207 
6208   /* Multiple types in SLP are handled by creating the appropriate number of
6209      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
6210      case of SLP.  */
6211   if (slp_node)
6212     {
6213       ncopies = 1;
6214       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6215     }
6216   else
6217     {
6218       ncopies = vect_get_num_copies (loop_vinfo, vectype);
6219       vec_num = 1;
6220     }
6221 
6222   gcc_assert (ncopies >= 1);
6223 
6224   /* Reject attempts to combine mask types with nonmask types, e.g. if
6225      we have an AND between a (nonmask) boolean loaded from memory and
6226      a (mask) boolean result of a comparison.
6227 
6228      TODO: We could easily fix these cases up using pattern statements.  */
6229   if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6230       || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6231       || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6232     {
6233       if (dump_enabled_p ())
6234           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6235                                "mixed mask and nonmask vector types\n");
6236       return false;
6237     }
6238 
6239   /* Supportable by target?  */
6240 
6241   vec_mode = TYPE_MODE (vectype);
6242   if (code == MULT_HIGHPART_EXPR)
6243     target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6244   else
6245     {
6246       optab = optab_for_tree_code (code, vectype, optab_default);
6247       if (!optab)
6248           {
6249           if (dump_enabled_p ())
6250             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6251                              "no optab.\n");
6252             return false;
6253           }
6254       target_support_p = (optab_handler (optab, vec_mode)
6255                                 != CODE_FOR_nothing);
6256     }
6257 
6258   bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6259   if (!target_support_p)
6260     {
6261       if (dump_enabled_p ())
6262           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6263                          "op not supported by target.\n");
6264       /* Check only during analysis.  */
6265       if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6266             || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6267         return false;
6268       if (dump_enabled_p ())
6269           dump_printf_loc (MSG_NOTE, vect_location,
6270                          "proceeding using word mode.\n");
6271       using_emulated_vectors_p = true;
6272     }
6273 
6274   if (using_emulated_vectors_p
6275       && !vect_can_vectorize_without_simd_p (code))
6276     {
6277       if (dump_enabled_p ())
6278           dump_printf (MSG_NOTE, "using word mode not possible.\n");
6279       return false;
6280     }
6281 
6282   /* ???  We should instead expand the operations here, instead of
6283      relying on vector lowering which has this hard cap on the number
6284      of vector elements below it performs elementwise operations.  */
6285   if (using_emulated_vectors_p
6286       && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6287       && ((BITS_PER_WORD / vector_element_bits (vectype)) < 4
6288             || maybe_lt (nunits_out, 4U)))
6289     {
6290       if (dump_enabled_p ())
6291           dump_printf (MSG_NOTE, "not using word mode for +- and less than "
6292                          "four vector elements\n");
6293       return false;
6294     }
6295 
6296   int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6297   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6298   internal_fn cond_fn = get_conditional_internal_fn (code);
6299 
6300   /* If operating on inactive elements could generate spurious traps,
6301      we need to restrict the operation to active lanes.  Note that this
6302      specifically doesn't apply to unhoisted invariants, since they
6303      operate on the same value for every lane.
6304 
6305      Similarly, if this operation is part of a reduction, a fully-masked
6306      loop should only change the active lanes of the reduction chain,
6307      keeping the inactive lanes as-is.  */
6308   bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6309                                   || reduc_idx >= 0);
6310 
6311   if (!vec_stmt) /* transformation not required.  */
6312     {
6313       if (loop_vinfo
6314             && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6315             && mask_out_inactive)
6316           {
6317             if (cond_fn == IFN_LAST
6318                 || !direct_internal_fn_supported_p (cond_fn, vectype,
6319                                                               OPTIMIZE_FOR_SPEED))
6320               {
6321                 if (dump_enabled_p ())
6322                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6323                                          "can't use a fully-masked loop because no"
6324                                          " conditional operation is available.\n");
6325                 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6326               }
6327             else
6328               vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6329                                            vectype, NULL);
6330           }
6331 
6332       /* Put types on constant and invariant SLP children.  */
6333       if (slp_node
6334             && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6335                 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6336                 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6337           {
6338             if (dump_enabled_p ())
6339               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6340                                    "incompatible vector types for invariants\n");
6341             return false;
6342           }
6343 
6344       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6345       DUMP_VECT_SCOPE ("vectorizable_operation");
6346       vect_model_simple_cost (vinfo, stmt_info,
6347                                     ncopies, dt, ndts, slp_node, cost_vec);
6348       if (using_emulated_vectors_p)
6349           {
6350             /* The above vect_model_simple_cost call handles constants
6351                in the prologue and (mis-)costs one of the stmts as
6352                vector stmt.  See tree-vect-generic.cc:do_plus_minus/do_negate
6353                for the actual lowering that will be applied.  */
6354             unsigned n
6355               = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6356             switch (code)
6357               {
6358               case PLUS_EXPR:
6359                 n *= 5;
6360                 break;
6361               case MINUS_EXPR:
6362                 n *= 6;
6363                 break;
6364               case NEGATE_EXPR:
6365                 n *= 4;
6366                 break;
6367               default:;
6368               }
6369             record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info, 0, vect_body);
6370           }
6371       return true;
6372     }
6373 
6374   /* Transform.  */
6375 
6376   if (dump_enabled_p ())
6377     dump_printf_loc (MSG_NOTE, vect_location,
6378                      "transform binary/unary operation.\n");
6379 
6380   bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6381 
6382   /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6383      vectors with unsigned elements, but the result is signed.  So, we
6384      need to compute the MINUS_EXPR into vectype temporary and
6385      VIEW_CONVERT_EXPR it into the final vectype_out result.  */
6386   tree vec_cvt_dest = NULL_TREE;
6387   if (orig_code == POINTER_DIFF_EXPR)
6388     {
6389       vec_dest = vect_create_destination_var (scalar_dest, vectype);
6390       vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6391     }
6392   /* Handle def.  */
6393   else
6394     vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6395 
6396   /* In case the vectorization factor (VF) is bigger than the number
6397      of elements that we can fit in a vectype (nunits), we have to generate
6398      more than one vector stmt - i.e - we need to "unroll" the
6399      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
6400      from one copy of the vector stmt to the next, in the field
6401      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
6402      stages to find the correct vector defs to be used when vectorizing
6403      stmts that use the defs of the current stmt.  The example below
6404      illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6405      we need to create 4 vectorized stmts):
6406 
6407      before vectorization:
6408                                 RELATED_STMT    VEC_STMT
6409         S1:     x = memref      -               -
6410         S2:     z = x + 1       -               -
6411 
6412      step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6413              there):
6414                                 RELATED_STMT    VEC_STMT
6415         VS1_0:  vx0 = memref0   VS1_1           -
6416         VS1_1:  vx1 = memref1   VS1_2           -
6417         VS1_2:  vx2 = memref2   VS1_3           -
6418         VS1_3:  vx3 = memref3   -               -
6419         S1:     x = load        -               VS1_0
6420         S2:     z = x + 1       -               -
6421 
6422      step2: vectorize stmt S2 (done here):
6423         To vectorize stmt S2 we first need to find the relevant vector
6424         def for the first operand 'x'.  This is, as usual, obtained from
6425         the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6426         that defines 'x' (S1).  This way we find the stmt VS1_0, and the
6427         relevant vector def 'vx0'.  Having found 'vx0' we can generate
6428         the vector stmt VS2_0, and as usual, record it in the
6429         STMT_VINFO_VEC_STMT of stmt S2.
6430         When creating the second copy (VS2_1), we obtain the relevant vector
6431         def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6432         stmt VS1_0.  This way we find the stmt VS1_1 and the relevant
6433         vector def 'vx1'.  Using 'vx1' we create stmt VS2_1 and record a
6434         pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6435         Similarly when creating stmts VS2_2 and VS2_3.  This is the resulting
6436         chain of stmts and pointers:
6437                                 RELATED_STMT    VEC_STMT
6438         VS1_0:  vx0 = memref0   VS1_1           -
6439         VS1_1:  vx1 = memref1   VS1_2           -
6440         VS1_2:  vx2 = memref2   VS1_3           -
6441         VS1_3:  vx3 = memref3   -               -
6442         S1:     x = load        -               VS1_0
6443         VS2_0:  vz0 = vx0 + v1  VS2_1           -
6444         VS2_1:  vz1 = vx1 + v1  VS2_2           -
6445         VS2_2:  vz2 = vx2 + v1  VS2_3           -
6446         VS2_3:  vz3 = vx3 + v1  -               -
6447         S2:     z = x + 1       -               VS2_0  */
6448 
6449   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6450                          op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6451   /* Arguments are ready.  Create the new vector stmt.  */
6452   FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6453     {
6454       gimple *new_stmt = NULL;
6455       vop1 = ((op_type == binary_op || op_type == ternary_op)
6456                 ? vec_oprnds1[i] : NULL_TREE);
6457       vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6458       if (masked_loop_p && mask_out_inactive)
6459           {
6460             tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6461                                                     vectype, i);
6462             auto_vec<tree> vops (5);
6463             vops.quick_push (mask);
6464             vops.quick_push (vop0);
6465             if (vop1)
6466               vops.quick_push (vop1);
6467             if (vop2)
6468               vops.quick_push (vop2);
6469             if (reduc_idx >= 0)
6470               {
6471                 /* Perform the operation on active elements only and take
6472                      inactive elements from the reduction chain input.  */
6473                 gcc_assert (!vop2);
6474                 vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
6475               }
6476             else
6477               {
6478                 auto else_value = targetm.preferred_else_value
6479                     (cond_fn, vectype, vops.length () - 1, &vops[1]);
6480                 vops.quick_push (else_value);
6481               }
6482             gcall *call = gimple_build_call_internal_vec (cond_fn, vops);
6483             new_temp = make_ssa_name (vec_dest, call);
6484             gimple_call_set_lhs (call, new_temp);
6485             gimple_call_set_nothrow (call, true);
6486             vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6487             new_stmt = call;
6488           }
6489       else
6490           {
6491             tree mask = NULL_TREE;
6492             /* When combining two masks check if either of them is elsewhere
6493                combined with a loop mask, if that's the case we can mark that the
6494                new combined mask doesn't need to be combined with a loop mask.  */
6495             if (masked_loop_p
6496                 && code == BIT_AND_EXPR
6497                 && VECTOR_BOOLEAN_TYPE_P (vectype))
6498               {
6499                 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
6500                                                                                  ncopies}))
6501                     {
6502                       mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6503                                                        vectype, i);
6504 
6505                       vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6506                                                      vop0, gsi);
6507                     }
6508 
6509                 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
6510                                                                                  ncopies }))
6511                     {
6512                       mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6513                                                        vectype, i);
6514 
6515                       vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6516                                                      vop1, gsi);
6517                     }
6518               }
6519 
6520             new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6521             new_temp = make_ssa_name (vec_dest, new_stmt);
6522             gimple_assign_set_lhs (new_stmt, new_temp);
6523             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6524             if (using_emulated_vectors_p)
6525               suppress_warning (new_stmt, OPT_Wvector_operation_performance);
6526 
6527             /* Enter the combined value into the vector cond hash so we don't
6528                AND it with a loop mask again.  */
6529             if (mask)
6530               loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
6531 
6532             if (vec_cvt_dest)
6533               {
6534                 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6535                 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6536                                                         new_temp);
6537                 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6538                 gimple_assign_set_lhs (new_stmt, new_temp);
6539                 vect_finish_stmt_generation (vinfo, stmt_info,
6540                                                      new_stmt, gsi);
6541               }
6542           }
6543       if (slp_node)
6544           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6545       else
6546           STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6547     }
6548 
6549   if (!slp_node)
6550     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6551 
6552   vec_oprnds0.release ();
6553   vec_oprnds1.release ();
6554   vec_oprnds2.release ();
6555 
6556   return true;
6557 }
6558 
6559 /* A helper function to ensure data reference DR_INFO's base alignment.  */
6560 
6561 static void
ensure_base_align(dr_vec_info * dr_info)6562 ensure_base_align (dr_vec_info *dr_info)
6563 {
6564   /* Alignment is only analyzed for the first element of a DR group,
6565      use that to look at base alignment we need to enforce.  */
6566   if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
6567     dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
6568 
6569   gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
6570 
6571   if (dr_info->base_misaligned)
6572     {
6573       tree base_decl = dr_info->base_decl;
6574 
6575       // We should only be able to increase the alignment of a base object if
6576       // we know what its new alignment should be at compile time.
6577       unsigned HOST_WIDE_INT align_base_to =
6578           DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6579 
6580       if (decl_in_symtab_p (base_decl))
6581           symtab_node::get (base_decl)->increase_alignment (align_base_to);
6582       else if (DECL_ALIGN (base_decl) < align_base_to)
6583           {
6584             SET_DECL_ALIGN (base_decl, align_base_to);
6585           DECL_USER_ALIGN (base_decl) = 1;
6586           }
6587       dr_info->base_misaligned = false;
6588     }
6589 }
6590 
6591 
6592 /* Function get_group_alias_ptr_type.
6593 
6594    Return the alias type for the group starting at FIRST_STMT_INFO.  */
6595 
6596 static tree
get_group_alias_ptr_type(stmt_vec_info first_stmt_info)6597 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6598 {
6599   struct data_reference *first_dr, *next_dr;
6600 
6601   first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6602   stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6603   while (next_stmt_info)
6604     {
6605       next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6606       if (get_alias_set (DR_REF (first_dr))
6607             != get_alias_set (DR_REF (next_dr)))
6608           {
6609             if (dump_enabled_p ())
6610               dump_printf_loc (MSG_NOTE, vect_location,
6611                                    "conflicting alias set types.\n");
6612             return ptr_type_node;
6613           }
6614       next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6615     }
6616   return reference_alias_ptr_type (DR_REF (first_dr));
6617 }
6618 
6619 
6620 /* Function scan_operand_equal_p.
6621 
6622    Helper function for check_scan_store.  Compare two references
6623    with .GOMP_SIMD_LANE bases.  */
6624 
6625 static bool
scan_operand_equal_p(tree ref1,tree ref2)6626 scan_operand_equal_p (tree ref1, tree ref2)
6627 {
6628   tree ref[2] = { ref1, ref2 };
6629   poly_int64 bitsize[2], bitpos[2];
6630   tree offset[2], base[2];
6631   for (int i = 0; i < 2; ++i)
6632     {
6633       machine_mode mode;
6634       int unsignedp, reversep, volatilep = 0;
6635       base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6636                                              &offset[i], &mode, &unsignedp,
6637                                              &reversep, &volatilep);
6638       if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6639           return false;
6640       if (TREE_CODE (base[i]) == MEM_REF
6641             && offset[i] == NULL_TREE
6642             && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6643           {
6644             gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6645             if (is_gimple_assign (def_stmt)
6646                 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6647                 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6648                 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6649               {
6650                 if (maybe_ne (mem_ref_offset (base[i]), 0))
6651                     return false;
6652                 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6653                 offset[i] = gimple_assign_rhs2 (def_stmt);
6654               }
6655           }
6656     }
6657 
6658   if (!operand_equal_p (base[0], base[1], 0))
6659     return false;
6660   if (maybe_ne (bitsize[0], bitsize[1]))
6661     return false;
6662   if (offset[0] != offset[1])
6663     {
6664       if (!offset[0] || !offset[1])
6665           return false;
6666       if (!operand_equal_p (offset[0], offset[1], 0))
6667           {
6668             tree step[2];
6669             for (int i = 0; i < 2; ++i)
6670               {
6671                 step[i] = integer_one_node;
6672                 if (TREE_CODE (offset[i]) == SSA_NAME)
6673                     {
6674                       gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6675                       if (is_gimple_assign (def_stmt)
6676                           && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6677                           && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6678                                 == INTEGER_CST))
6679                         {
6680                           step[i] = gimple_assign_rhs2 (def_stmt);
6681                           offset[i] = gimple_assign_rhs1 (def_stmt);
6682                         }
6683                     }
6684                 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6685                     {
6686                       step[i] = TREE_OPERAND (offset[i], 1);
6687                       offset[i] = TREE_OPERAND (offset[i], 0);
6688                     }
6689                 tree rhs1 = NULL_TREE;
6690                 if (TREE_CODE (offset[i]) == SSA_NAME)
6691                     {
6692                       gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6693                       if (gimple_assign_cast_p (def_stmt))
6694                         rhs1 = gimple_assign_rhs1 (def_stmt);
6695                     }
6696                 else if (CONVERT_EXPR_P (offset[i]))
6697                     rhs1 = TREE_OPERAND (offset[i], 0);
6698                 if (rhs1
6699                       && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6700                       && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6701                       && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6702                           >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6703                     offset[i] = rhs1;
6704               }
6705             if (!operand_equal_p (offset[0], offset[1], 0)
6706                 || !operand_equal_p (step[0], step[1], 0))
6707               return false;
6708           }
6709     }
6710   return true;
6711 }
6712 
6713 
6714 enum scan_store_kind {
6715   /* Normal permutation.  */
6716   scan_store_kind_perm,
6717 
6718   /* Whole vector left shift permutation with zero init.  */
6719   scan_store_kind_lshift_zero,
6720 
6721   /* Whole vector left shift permutation and VEC_COND_EXPR.  */
6722   scan_store_kind_lshift_cond
6723 };
6724 
6725 /* Function check_scan_store.
6726 
6727    Verify if we can perform the needed permutations or whole vector shifts.
6728    Return -1 on failure, otherwise exact log2 of vectype's nunits.
6729    USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6730    to do at each step.  */
6731 
6732 static int
scan_store_can_perm_p(tree vectype,tree init,vec<enum scan_store_kind> * use_whole_vector=NULL)6733 scan_store_can_perm_p (tree vectype, tree init,
6734                            vec<enum scan_store_kind> *use_whole_vector = NULL)
6735 {
6736   enum machine_mode vec_mode = TYPE_MODE (vectype);
6737   unsigned HOST_WIDE_INT nunits;
6738   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6739     return -1;
6740   int units_log2 = exact_log2 (nunits);
6741   if (units_log2 <= 0)
6742     return -1;
6743 
6744   int i;
6745   enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6746   for (i = 0; i <= units_log2; ++i)
6747     {
6748       unsigned HOST_WIDE_INT j, k;
6749       enum scan_store_kind kind = scan_store_kind_perm;
6750       vec_perm_builder sel (nunits, nunits, 1);
6751       sel.quick_grow (nunits);
6752       if (i == units_log2)
6753           {
6754             for (j = 0; j < nunits; ++j)
6755               sel[j] = nunits - 1;
6756           }
6757       else
6758           {
6759             for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6760               sel[j] = j;
6761             for (k = 0; j < nunits; ++j, ++k)
6762               sel[j] = nunits + k;
6763           }
6764       vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6765       if (!can_vec_perm_const_p (vec_mode, indices))
6766           {
6767             if (i == units_log2)
6768               return -1;
6769 
6770             if (whole_vector_shift_kind == scan_store_kind_perm)
6771               {
6772                 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6773                     return -1;
6774                 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6775                 /* Whole vector shifts shift in zeros, so if init is all zero
6776                      constant, there is no need to do anything further.  */
6777                 if ((TREE_CODE (init) != INTEGER_CST
6778                        && TREE_CODE (init) != REAL_CST)
6779                       || !initializer_zerop (init))
6780                     {
6781                       tree masktype = truth_type_for (vectype);
6782                       if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6783                         return -1;
6784                       whole_vector_shift_kind = scan_store_kind_lshift_cond;
6785                     }
6786               }
6787             kind = whole_vector_shift_kind;
6788           }
6789       if (use_whole_vector)
6790           {
6791             if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6792               use_whole_vector->safe_grow_cleared (i, true);
6793             if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6794               use_whole_vector->safe_push (kind);
6795           }
6796     }
6797 
6798   return units_log2;
6799 }
6800 
6801 
6802 /* Function check_scan_store.
6803 
6804    Check magic stores for #pragma omp scan {in,ex}clusive reductions.  */
6805 
6806 static bool
check_scan_store(vec_info * vinfo,stmt_vec_info stmt_info,tree vectype,enum vect_def_type rhs_dt,bool slp,tree mask,vect_memory_access_type memory_access_type)6807 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6808                       enum vect_def_type rhs_dt, bool slp, tree mask,
6809                       vect_memory_access_type memory_access_type)
6810 {
6811   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6812   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6813   tree ref_type;
6814 
6815   gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6816   if (slp
6817       || mask
6818       || memory_access_type != VMAT_CONTIGUOUS
6819       || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6820       || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6821       || loop_vinfo == NULL
6822       || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6823       || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6824       || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6825       || !integer_zerop (DR_INIT (dr_info->dr))
6826       || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6827       || !alias_sets_conflict_p (get_alias_set (vectype),
6828                                          get_alias_set (TREE_TYPE (ref_type))))
6829     {
6830       if (dump_enabled_p ())
6831           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6832                                "unsupported OpenMP scan store.\n");
6833       return false;
6834     }
6835 
6836   /* We need to pattern match code built by OpenMP lowering and simplified
6837      by following optimizations into something we can handle.
6838      #pragma omp simd reduction(inscan,+:r)
6839      for (...)
6840        {
6841            r += something ();
6842            #pragma omp scan inclusive (r)
6843            use (r);
6844        }
6845      shall have body with:
6846        // Initialization for input phase, store the reduction initializer:
6847        _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6848        _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6849        D.2042[_21] = 0;
6850        // Actual input phase:
6851        ...
6852        r.0_5 = D.2042[_20];
6853        _6 = _4 + r.0_5;
6854        D.2042[_20] = _6;
6855        // Initialization for scan phase:
6856        _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6857        _26 = D.2043[_25];
6858        _27 = D.2042[_25];
6859        _28 = _26 + _27;
6860        D.2043[_25] = _28;
6861        D.2042[_25] = _28;
6862        // Actual scan phase:
6863        ...
6864        r.1_8 = D.2042[_20];
6865        ...
6866      The "omp simd array" variable D.2042 holds the privatized copy used
6867      inside of the loop and D.2043 is another one that holds copies of
6868      the current original list item.  The separate GOMP_SIMD_LANE ifn
6869      kinds are there in order to allow optimizing the initializer store
6870      and combiner sequence, e.g. if it is originally some C++ish user
6871      defined reduction, but allow the vectorizer to pattern recognize it
6872      and turn into the appropriate vectorized scan.
6873 
6874      For exclusive scan, this is slightly different:
6875      #pragma omp simd reduction(inscan,+:r)
6876      for (...)
6877        {
6878            use (r);
6879            #pragma omp scan exclusive (r)
6880            r += something ();
6881        }
6882      shall have body with:
6883        // Initialization for input phase, store the reduction initializer:
6884        _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6885        _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6886        D.2042[_21] = 0;
6887        // Actual input phase:
6888        ...
6889        r.0_5 = D.2042[_20];
6890        _6 = _4 + r.0_5;
6891        D.2042[_20] = _6;
6892        // Initialization for scan phase:
6893        _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6894        _26 = D.2043[_25];
6895        D.2044[_25] = _26;
6896        _27 = D.2042[_25];
6897        _28 = _26 + _27;
6898        D.2043[_25] = _28;
6899        // Actual scan phase:
6900        ...
6901        r.1_8 = D.2044[_20];
6902        ...  */
6903 
6904   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6905     {
6906       /* Match the D.2042[_21] = 0; store above.  Just require that
6907            it is a constant or external definition store.  */
6908       if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6909           {
6910            fail_init:
6911             if (dump_enabled_p ())
6912               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6913                                    "unsupported OpenMP scan initializer store.\n");
6914             return false;
6915           }
6916 
6917       if (! loop_vinfo->scan_map)
6918           loop_vinfo->scan_map = new hash_map<tree, tree>;
6919       tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6920       tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6921       if (cached)
6922           goto fail_init;
6923       cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6924 
6925       /* These stores can be vectorized normally.  */
6926       return true;
6927     }
6928 
6929   if (rhs_dt != vect_internal_def)
6930     {
6931      fail:
6932       if (dump_enabled_p ())
6933           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6934                                "unsupported OpenMP scan combiner pattern.\n");
6935       return false;
6936     }
6937 
6938   gimple *stmt = STMT_VINFO_STMT (stmt_info);
6939   tree rhs = gimple_assign_rhs1 (stmt);
6940   if (TREE_CODE (rhs) != SSA_NAME)
6941     goto fail;
6942 
6943   gimple *other_store_stmt = NULL;
6944   tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6945   bool inscan_var_store
6946     = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6947 
6948   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6949     {
6950       if (!inscan_var_store)
6951           {
6952             use_operand_p use_p;
6953             imm_use_iterator iter;
6954             FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6955               {
6956                 gimple *use_stmt = USE_STMT (use_p);
6957                 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6958                     continue;
6959                 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6960                       || !is_gimple_assign (use_stmt)
6961                       || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6962                       || other_store_stmt
6963                       || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6964                     goto fail;
6965                 other_store_stmt = use_stmt;
6966               }
6967             if (other_store_stmt == NULL)
6968               goto fail;
6969             rhs = gimple_assign_lhs (other_store_stmt);
6970             if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6971               goto fail;
6972           }
6973     }
6974   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6975     {
6976       use_operand_p use_p;
6977       imm_use_iterator iter;
6978       FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6979           {
6980             gimple *use_stmt = USE_STMT (use_p);
6981             if (use_stmt == stmt || is_gimple_debug (use_stmt))
6982               continue;
6983             if (other_store_stmt)
6984               goto fail;
6985             other_store_stmt = use_stmt;
6986           }
6987     }
6988   else
6989     goto fail;
6990 
6991   gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6992   if (gimple_bb (def_stmt) != gimple_bb (stmt)
6993       || !is_gimple_assign (def_stmt)
6994       || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6995     goto fail;
6996 
6997   enum tree_code code = gimple_assign_rhs_code (def_stmt);
6998   /* For pointer addition, we should use the normal plus for the vector
6999      operation.  */
7000   switch (code)
7001     {
7002     case POINTER_PLUS_EXPR:
7003       code = PLUS_EXPR;
7004       break;
7005     case MULT_HIGHPART_EXPR:
7006       goto fail;
7007     default:
7008       break;
7009     }
7010   if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7011     goto fail;
7012 
7013   tree rhs1 = gimple_assign_rhs1 (def_stmt);
7014   tree rhs2 = gimple_assign_rhs2 (def_stmt);
7015   if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7016     goto fail;
7017 
7018   gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7019   gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7020   if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7021       || !gimple_assign_load_p (load1_stmt)
7022       || gimple_bb (load2_stmt) != gimple_bb (stmt)
7023       || !gimple_assign_load_p (load2_stmt))
7024     goto fail;
7025 
7026   stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7027   stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7028   if (load1_stmt_info == NULL
7029       || load2_stmt_info == NULL
7030       || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7031             != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7032       || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7033             != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7034     goto fail;
7035 
7036   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7037     {
7038       dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7039       if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7040             || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7041           goto fail;
7042       tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7043       tree lrhs;
7044       if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7045           lrhs = rhs1;
7046       else
7047           lrhs = rhs2;
7048       use_operand_p use_p;
7049       imm_use_iterator iter;
7050       FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7051           {
7052             gimple *use_stmt = USE_STMT (use_p);
7053             if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7054               continue;
7055             if (other_store_stmt)
7056               goto fail;
7057             other_store_stmt = use_stmt;
7058           }
7059     }
7060 
7061   if (other_store_stmt == NULL)
7062     goto fail;
7063   if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7064       || !gimple_store_p (other_store_stmt))
7065     goto fail;
7066 
7067   stmt_vec_info other_store_stmt_info
7068     = loop_vinfo->lookup_stmt (other_store_stmt);
7069   if (other_store_stmt_info == NULL
7070       || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7071             != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7072     goto fail;
7073 
7074   gimple *stmt1 = stmt;
7075   gimple *stmt2 = other_store_stmt;
7076   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7077     std::swap (stmt1, stmt2);
7078   if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7079                                   gimple_assign_rhs1 (load2_stmt)))
7080     {
7081       std::swap (rhs1, rhs2);
7082       std::swap (load1_stmt, load2_stmt);
7083       std::swap (load1_stmt_info, load2_stmt_info);
7084     }
7085   if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7086                                    gimple_assign_rhs1 (load1_stmt)))
7087     goto fail;
7088 
7089   tree var3 = NULL_TREE;
7090   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7091       && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7092                                         gimple_assign_rhs1 (load2_stmt)))
7093     goto fail;
7094   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7095     {
7096       dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7097       if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7098             || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7099           goto fail;
7100       var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7101       if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7102             || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7103             || lookup_attribute ("omp simd inscan exclusive",
7104                                      DECL_ATTRIBUTES (var3)))
7105           goto fail;
7106     }
7107 
7108   dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7109   if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7110       || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7111     goto fail;
7112 
7113   tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7114   tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7115   if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7116       || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7117       || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7118            == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7119     goto fail;
7120 
7121   if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7122     std::swap (var1, var2);
7123 
7124   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7125     {
7126       if (!lookup_attribute ("omp simd inscan exclusive",
7127                                    DECL_ATTRIBUTES (var1)))
7128           goto fail;
7129       var1 = var3;
7130     }
7131 
7132   if (loop_vinfo->scan_map == NULL)
7133     goto fail;
7134   tree *init = loop_vinfo->scan_map->get (var1);
7135   if (init == NULL)
7136     goto fail;
7137 
7138   /* The IL is as expected, now check if we can actually vectorize it.
7139      Inclusive scan:
7140        _26 = D.2043[_25];
7141        _27 = D.2042[_25];
7142        _28 = _26 + _27;
7143        D.2043[_25] = _28;
7144        D.2042[_25] = _28;
7145      should be vectorized as (where _40 is the vectorized rhs
7146      from the D.2042[_21] = 0; store):
7147        _30 = MEM <vector(8) int> [(int *)&D.2043];
7148        _31 = MEM <vector(8) int> [(int *)&D.2042];
7149        _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7150        _33 = _31 + _32;
7151        // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7152        _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7153        _35 = _33 + _34;
7154        // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7155        //         _31[1]+.._31[4], ... _31[4]+.._31[7] };
7156        _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7157        _37 = _35 + _36;
7158        // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7159        //         _31[0]+.._31[4], ... _31[0]+.._31[7] };
7160        _38 = _30 + _37;
7161        _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7162        MEM <vector(8) int> [(int *)&D.2043] = _39;
7163        MEM <vector(8) int> [(int *)&D.2042] = _38;
7164      Exclusive scan:
7165        _26 = D.2043[_25];
7166        D.2044[_25] = _26;
7167        _27 = D.2042[_25];
7168        _28 = _26 + _27;
7169        D.2043[_25] = _28;
7170      should be vectorized as (where _40 is the vectorized rhs
7171      from the D.2042[_21] = 0; store):
7172        _30 = MEM <vector(8) int> [(int *)&D.2043];
7173        _31 = MEM <vector(8) int> [(int *)&D.2042];
7174        _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7175        _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7176        _34 = _32 + _33;
7177        // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7178        //         _31[3]+_31[4], ... _31[5]+.._31[6] };
7179        _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7180        _36 = _34 + _35;
7181        // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7182        //         _31[1]+.._31[4], ... _31[3]+.._31[6] };
7183        _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7184        _38 = _36 + _37;
7185        // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7186        //         _31[0]+.._31[4], ... _31[0]+.._31[6] };
7187        _39 = _30 + _38;
7188        _50 = _31 + _39;
7189        _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7190        MEM <vector(8) int> [(int *)&D.2044] = _39;
7191        MEM <vector(8) int> [(int *)&D.2042] = _51;  */
7192   enum machine_mode vec_mode = TYPE_MODE (vectype);
7193   optab optab = optab_for_tree_code (code, vectype, optab_default);
7194   if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7195     goto fail;
7196 
7197   int units_log2 = scan_store_can_perm_p (vectype, *init);
7198   if (units_log2 == -1)
7199     goto fail;
7200 
7201   return true;
7202 }
7203 
7204 
7205 /* Function vectorizable_scan_store.
7206 
7207    Helper of vectorizable_score, arguments like on vectorizable_store.
7208    Handle only the transformation, checking is done in check_scan_store.  */
7209 
7210 static bool
vectorizable_scan_store(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,int ncopies)7211 vectorizable_scan_store (vec_info *vinfo,
7212                                stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7213                                gimple **vec_stmt, int ncopies)
7214 {
7215   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7216   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7217   tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7218   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7219 
7220   if (dump_enabled_p ())
7221     dump_printf_loc (MSG_NOTE, vect_location,
7222                          "transform scan store. ncopies = %d\n", ncopies);
7223 
7224   gimple *stmt = STMT_VINFO_STMT (stmt_info);
7225   tree rhs = gimple_assign_rhs1 (stmt);
7226   gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7227 
7228   tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7229   bool inscan_var_store
7230     = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7231 
7232   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7233     {
7234       use_operand_p use_p;
7235       imm_use_iterator iter;
7236       FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7237           {
7238             gimple *use_stmt = USE_STMT (use_p);
7239             if (use_stmt == stmt || is_gimple_debug (use_stmt))
7240               continue;
7241             rhs = gimple_assign_lhs (use_stmt);
7242             break;
7243           }
7244     }
7245 
7246   gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7247   enum tree_code code = gimple_assign_rhs_code (def_stmt);
7248   if (code == POINTER_PLUS_EXPR)
7249     code = PLUS_EXPR;
7250   gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7251                 && commutative_tree_code (code));
7252   tree rhs1 = gimple_assign_rhs1 (def_stmt);
7253   tree rhs2 = gimple_assign_rhs2 (def_stmt);
7254   gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7255   gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7256   gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7257   stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7258   stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7259   dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7260   dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7261   tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7262   tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7263 
7264   if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7265     {
7266       std::swap (rhs1, rhs2);
7267       std::swap (var1, var2);
7268       std::swap (load1_dr_info, load2_dr_info);
7269     }
7270 
7271   tree *init = loop_vinfo->scan_map->get (var1);
7272   gcc_assert (init);
7273 
7274   unsigned HOST_WIDE_INT nunits;
7275   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7276     gcc_unreachable ();
7277   auto_vec<enum scan_store_kind, 16> use_whole_vector;
7278   int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7279   gcc_assert (units_log2 > 0);
7280   auto_vec<tree, 16> perms;
7281   perms.quick_grow (units_log2 + 1);
7282   tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7283   for (int i = 0; i <= units_log2; ++i)
7284     {
7285       unsigned HOST_WIDE_INT j, k;
7286       vec_perm_builder sel (nunits, nunits, 1);
7287       sel.quick_grow (nunits);
7288       if (i == units_log2)
7289           for (j = 0; j < nunits; ++j)
7290             sel[j] = nunits - 1;
7291       else
7292           {
7293             for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7294               sel[j] = j;
7295             for (k = 0; j < nunits; ++j, ++k)
7296               sel[j] = nunits + k;
7297           }
7298       vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7299       if (!use_whole_vector.is_empty ()
7300             && use_whole_vector[i] != scan_store_kind_perm)
7301           {
7302             if (zero_vec == NULL_TREE)
7303               zero_vec = build_zero_cst (vectype);
7304             if (masktype == NULL_TREE
7305                 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7306               masktype = truth_type_for (vectype);
7307             perms[i] = vect_gen_perm_mask_any (vectype, indices);
7308           }
7309       else
7310           perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7311     }
7312 
7313   tree vec_oprnd1 = NULL_TREE;
7314   tree vec_oprnd2 = NULL_TREE;
7315   tree vec_oprnd3 = NULL_TREE;
7316   tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7317   tree dataref_offset = build_int_cst (ref_type, 0);
7318   tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7319                                                      vectype, VMAT_CONTIGUOUS);
7320   tree ldataref_ptr = NULL_TREE;
7321   tree orig = NULL_TREE;
7322   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7323     ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7324   auto_vec<tree> vec_oprnds1;
7325   auto_vec<tree> vec_oprnds2;
7326   auto_vec<tree> vec_oprnds3;
7327   vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7328                          *init, &vec_oprnds1,
7329                          ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7330                          rhs2, &vec_oprnds3);
7331   for (int j = 0; j < ncopies; j++)
7332     {
7333       vec_oprnd1 = vec_oprnds1[j];
7334       if (ldataref_ptr == NULL)
7335           vec_oprnd2 = vec_oprnds2[j];
7336       vec_oprnd3 = vec_oprnds3[j];
7337       if (j == 0)
7338           orig = vec_oprnd3;
7339       else if (!inscan_var_store)
7340           dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7341 
7342       if (ldataref_ptr)
7343           {
7344             vec_oprnd2 = make_ssa_name (vectype);
7345             tree data_ref = fold_build2 (MEM_REF, vectype,
7346                                                unshare_expr (ldataref_ptr),
7347                                                dataref_offset);
7348             vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7349             gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7350             vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7351             STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7352             *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7353           }
7354 
7355       tree v = vec_oprnd2;
7356       for (int i = 0; i < units_log2; ++i)
7357           {
7358             tree new_temp = make_ssa_name (vectype);
7359             gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7360                                                      (zero_vec
7361                                                       && (use_whole_vector[i]
7362                                                             != scan_store_kind_perm))
7363                                                      ? zero_vec : vec_oprnd1, v,
7364                                                      perms[i]);
7365             vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7366             STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7367             *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7368 
7369             if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7370               {
7371                 /* Whole vector shift shifted in zero bits, but if *init
7372                      is not initializer_zerop, we need to replace those elements
7373                      with elements from vec_oprnd1.  */
7374                 tree_vector_builder vb (masktype, nunits, 1);
7375                 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7376                     vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7377                                      ? boolean_false_node : boolean_true_node);
7378 
7379                 tree new_temp2 = make_ssa_name (vectype);
7380                 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7381                                                new_temp, vec_oprnd1);
7382                 vect_finish_stmt_generation (vinfo, stmt_info,
7383                                                                          g, gsi);
7384                 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7385                 new_temp = new_temp2;
7386               }
7387 
7388             /* For exclusive scan, perform the perms[i] permutation once
7389                more.  */
7390             if (i == 0
7391                 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7392                 && v == vec_oprnd2)
7393               {
7394                 v = new_temp;
7395                 --i;
7396                 continue;
7397               }
7398 
7399             tree new_temp2 = make_ssa_name (vectype);
7400             g = gimple_build_assign (new_temp2, code, v, new_temp);
7401             vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7402             STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7403 
7404             v = new_temp2;
7405           }
7406 
7407       tree new_temp = make_ssa_name (vectype);
7408       gimple *g = gimple_build_assign (new_temp, code, orig, v);
7409       vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7410       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7411 
7412       tree last_perm_arg = new_temp;
7413       /* For exclusive scan, new_temp computed above is the exclusive scan
7414            prefix sum.  Turn it into inclusive prefix sum for the broadcast
7415            of the last element into orig.  */
7416       if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7417           {
7418             last_perm_arg = make_ssa_name (vectype);
7419             g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7420             vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7421             STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7422           }
7423 
7424       orig = make_ssa_name (vectype);
7425       g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7426                                      last_perm_arg, perms[units_log2]);
7427       vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7428       STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7429 
7430       if (!inscan_var_store)
7431           {
7432             tree data_ref = fold_build2 (MEM_REF, vectype,
7433                                                unshare_expr (dataref_ptr),
7434                                                dataref_offset);
7435             vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7436             g = gimple_build_assign (data_ref, new_temp);
7437             vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7438             STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7439           }
7440     }
7441 
7442   if (inscan_var_store)
7443     for (int j = 0; j < ncopies; j++)
7444       {
7445           if (j != 0)
7446             dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7447 
7448           tree data_ref = fold_build2 (MEM_REF, vectype,
7449                                              unshare_expr (dataref_ptr),
7450                                              dataref_offset);
7451           vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7452           gimple *g = gimple_build_assign (data_ref, orig);
7453           vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7454           STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7455       }
7456   return true;
7457 }
7458 
7459 
7460 /* Function vectorizable_store.
7461 
7462    Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7463    that can be vectorized.
7464    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7465    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7466    Return true if STMT_INFO is vectorizable in this way.  */
7467 
7468 static bool
vectorizable_store(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)7469 vectorizable_store (vec_info *vinfo,
7470                         stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7471                         gimple **vec_stmt, slp_tree slp_node,
7472                         stmt_vector_for_cost *cost_vec)
7473 {
7474   tree data_ref;
7475   tree op;
7476   tree vec_oprnd = NULL_TREE;
7477   tree elem_type;
7478   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7479   class loop *loop = NULL;
7480   machine_mode vec_mode;
7481   tree dummy;
7482   enum vect_def_type rhs_dt = vect_unknown_def_type;
7483   enum vect_def_type mask_dt = vect_unknown_def_type;
7484   tree dataref_ptr = NULL_TREE;
7485   tree dataref_offset = NULL_TREE;
7486   gimple *ptr_incr = NULL;
7487   int ncopies;
7488   int j;
7489   stmt_vec_info first_stmt_info;
7490   bool grouped_store;
7491   unsigned int group_size, i;
7492   vec<tree> oprnds = vNULL;
7493   vec<tree> result_chain = vNULL;
7494   vec<tree> vec_oprnds = vNULL;
7495   bool slp = (slp_node != NULL);
7496   unsigned int vec_num;
7497   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7498   tree aggr_type;
7499   gather_scatter_info gs_info;
7500   poly_uint64 vf;
7501   vec_load_store_type vls_type;
7502   tree ref_type;
7503 
7504   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7505     return false;
7506 
7507   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7508       && ! vec_stmt)
7509     return false;
7510 
7511   /* Is vectorizable store? */
7512 
7513   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7514   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7515     {
7516       tree scalar_dest = gimple_assign_lhs (assign);
7517       if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7518             && is_pattern_stmt_p (stmt_info))
7519           scalar_dest = TREE_OPERAND (scalar_dest, 0);
7520       if (TREE_CODE (scalar_dest) != ARRAY_REF
7521             && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7522             && TREE_CODE (scalar_dest) != INDIRECT_REF
7523             && TREE_CODE (scalar_dest) != COMPONENT_REF
7524             && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7525             && TREE_CODE (scalar_dest) != REALPART_EXPR
7526             && TREE_CODE (scalar_dest) != MEM_REF)
7527           return false;
7528     }
7529   else
7530     {
7531       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7532       if (!call || !gimple_call_internal_p (call))
7533           return false;
7534 
7535       internal_fn ifn = gimple_call_internal_fn (call);
7536       if (!internal_store_fn_p (ifn))
7537           return false;
7538 
7539       if (slp_node != NULL)
7540           {
7541             if (dump_enabled_p ())
7542               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7543                                    "SLP of masked stores not supported.\n");
7544             return false;
7545           }
7546 
7547       int mask_index = internal_fn_mask_index (ifn);
7548       if (mask_index >= 0
7549             && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
7550                                               &mask, NULL, &mask_dt, &mask_vectype))
7551           return false;
7552     }
7553 
7554   op = vect_get_store_rhs (stmt_info);
7555 
7556   /* Cannot have hybrid store SLP -- that would mean storing to the
7557      same location twice.  */
7558   gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7559 
7560   tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7561   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7562 
7563   if (loop_vinfo)
7564     {
7565       loop = LOOP_VINFO_LOOP (loop_vinfo);
7566       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7567     }
7568   else
7569     vf = 1;
7570 
7571   /* Multiple types in SLP are handled by creating the appropriate number of
7572      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
7573      case of SLP.  */
7574   if (slp)
7575     ncopies = 1;
7576   else
7577     ncopies = vect_get_num_copies (loop_vinfo, vectype);
7578 
7579   gcc_assert (ncopies >= 1);
7580 
7581   /* FORNOW.  This restriction should be relaxed.  */
7582   if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7583     {
7584       if (dump_enabled_p ())
7585           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7586                                "multiple types in nested loop.\n");
7587       return false;
7588     }
7589 
7590   if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7591                                    op, &rhs_dt, &rhs_vectype, &vls_type))
7592     return false;
7593 
7594   elem_type = TREE_TYPE (vectype);
7595   vec_mode = TYPE_MODE (vectype);
7596 
7597   if (!STMT_VINFO_DATA_REF (stmt_info))
7598     return false;
7599 
7600   vect_memory_access_type memory_access_type;
7601   enum dr_alignment_support alignment_support_scheme;
7602   int misalignment;
7603   poly_int64 poffset;
7604   if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7605                                   ncopies, &memory_access_type, &poffset,
7606                                   &alignment_support_scheme, &misalignment, &gs_info))
7607     return false;
7608 
7609   if (mask)
7610     {
7611       if (memory_access_type == VMAT_CONTIGUOUS)
7612           {
7613             if (!VECTOR_MODE_P (vec_mode)
7614                 || !can_vec_mask_load_store_p (vec_mode,
7615                                                        TYPE_MODE (mask_vectype), false))
7616               return false;
7617           }
7618       else if (memory_access_type != VMAT_LOAD_STORE_LANES
7619                  && (memory_access_type != VMAT_GATHER_SCATTER
7620                        || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7621           {
7622             if (dump_enabled_p ())
7623               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7624                                    "unsupported access type for masked store.\n");
7625             return false;
7626           }
7627     }
7628   else
7629     {
7630       /* FORNOW. In some cases can vectorize even if data-type not supported
7631            (e.g. - array initialization with 0).  */
7632       if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7633           return false;
7634     }
7635 
7636   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7637   grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7638                        && memory_access_type != VMAT_GATHER_SCATTER
7639                        && (slp || memory_access_type != VMAT_CONTIGUOUS));
7640   if (grouped_store)
7641     {
7642       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7643       first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7644       group_size = DR_GROUP_SIZE (first_stmt_info);
7645     }
7646   else
7647     {
7648       first_stmt_info = stmt_info;
7649       first_dr_info = dr_info;
7650       group_size = vec_num = 1;
7651     }
7652 
7653   if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7654     {
7655       if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7656                                    memory_access_type))
7657           return false;
7658     }
7659 
7660   if (!vec_stmt) /* transformation not required.  */
7661     {
7662       STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7663 
7664       if (loop_vinfo
7665             && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7666           check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
7667                                                         vls_type, group_size,
7668                                                         memory_access_type, &gs_info,
7669                                                         mask);
7670 
7671       if (slp_node
7672             && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7673                                                             vectype))
7674           {
7675             if (dump_enabled_p ())
7676               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7677                                    "incompatible vector types for invariants\n");
7678             return false;
7679           }
7680 
7681       if (dump_enabled_p ()
7682             && memory_access_type != VMAT_ELEMENTWISE
7683             && memory_access_type != VMAT_GATHER_SCATTER
7684             && alignment_support_scheme != dr_aligned)
7685           dump_printf_loc (MSG_NOTE, vect_location,
7686                                "Vectorizing an unaligned access.\n");
7687 
7688       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7689       vect_model_store_cost (vinfo, stmt_info, ncopies,
7690                                    memory_access_type, alignment_support_scheme,
7691                                    misalignment, vls_type, slp_node, cost_vec);
7692       return true;
7693     }
7694   gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7695 
7696   /* Transform.  */
7697 
7698   ensure_base_align (dr_info);
7699 
7700   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7701     {
7702       tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7703       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7704       tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7705       tree ptr, var, scale, vec_mask;
7706       tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7707       tree mask_halfvectype = mask_vectype;
7708       edge pe = loop_preheader_edge (loop);
7709       gimple_seq seq;
7710       basic_block new_bb;
7711       enum { NARROW, NONE, WIDEN } modifier;
7712       poly_uint64 scatter_off_nunits
7713           = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7714 
7715       if (known_eq (nunits, scatter_off_nunits))
7716           modifier = NONE;
7717       else if (known_eq (nunits * 2, scatter_off_nunits))
7718           {
7719             modifier = WIDEN;
7720 
7721             /* Currently gathers and scatters are only supported for
7722                fixed-length vectors.  */
7723             unsigned int count = scatter_off_nunits.to_constant ();
7724             vec_perm_builder sel (count, count, 1);
7725             for (i = 0; i < (unsigned int) count; ++i)
7726               sel.quick_push (i | (count / 2));
7727 
7728             vec_perm_indices indices (sel, 1, count);
7729             perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7730                                                               indices);
7731             gcc_assert (perm_mask != NULL_TREE);
7732           }
7733       else if (known_eq (nunits, scatter_off_nunits * 2))
7734           {
7735             modifier = NARROW;
7736 
7737             /* Currently gathers and scatters are only supported for
7738                fixed-length vectors.  */
7739             unsigned int count = nunits.to_constant ();
7740             vec_perm_builder sel (count, count, 1);
7741             for (i = 0; i < (unsigned int) count; ++i)
7742               sel.quick_push (i | (count / 2));
7743 
7744             vec_perm_indices indices (sel, 2, count);
7745             perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7746             gcc_assert (perm_mask != NULL_TREE);
7747             ncopies *= 2;
7748 
7749             if (mask)
7750               mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7751           }
7752       else
7753           gcc_unreachable ();
7754 
7755       rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7756       ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7757       masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7758       idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7759       srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7760       scaletype = TREE_VALUE (arglist);
7761 
7762       gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7763                                  && TREE_CODE (rettype) == VOID_TYPE);
7764 
7765       ptr = fold_convert (ptrtype, gs_info.base);
7766       if (!is_gimple_min_invariant (ptr))
7767           {
7768             ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7769             new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7770             gcc_assert (!new_bb);
7771           }
7772 
7773       if (mask == NULL_TREE)
7774           {
7775             mask_arg = build_int_cst (masktype, -1);
7776             mask_arg = vect_init_vector (vinfo, stmt_info,
7777                                                mask_arg, masktype, NULL);
7778           }
7779 
7780       scale = build_int_cst (scaletype, gs_info.scale);
7781 
7782       auto_vec<tree> vec_oprnds0;
7783       auto_vec<tree> vec_oprnds1;
7784       auto_vec<tree> vec_masks;
7785       if (mask)
7786           {
7787             tree mask_vectype = truth_type_for (vectype);
7788             vect_get_vec_defs_for_operand (vinfo, stmt_info,
7789                                                    modifier == NARROW
7790                                                    ? ncopies / 2 : ncopies,
7791                                                    mask, &vec_masks, mask_vectype);
7792           }
7793       vect_get_vec_defs_for_operand (vinfo, stmt_info,
7794                                              modifier == WIDEN
7795                                              ? ncopies / 2 : ncopies,
7796                                              gs_info.offset, &vec_oprnds0);
7797       vect_get_vec_defs_for_operand (vinfo, stmt_info,
7798                                              modifier == NARROW
7799                                              ? ncopies / 2 : ncopies,
7800                                              op, &vec_oprnds1);
7801       for (j = 0; j < ncopies; ++j)
7802           {
7803             if (modifier == WIDEN)
7804               {
7805                 if (j & 1)
7806                     op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7807                                                      perm_mask, stmt_info, gsi);
7808                 else
7809                     op = vec_oprnd0 = vec_oprnds0[j / 2];
7810                 src = vec_oprnd1 = vec_oprnds1[j];
7811                 if (mask)
7812                     mask_op = vec_mask = vec_masks[j];
7813               }
7814             else if (modifier == NARROW)
7815               {
7816                 if (j & 1)
7817                     src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7818                                                       perm_mask, stmt_info, gsi);
7819                 else
7820                     src = vec_oprnd1 = vec_oprnds1[j / 2];
7821                 op = vec_oprnd0 = vec_oprnds0[j];
7822                 if (mask)
7823                     mask_op = vec_mask = vec_masks[j / 2];
7824               }
7825             else
7826               {
7827                 op = vec_oprnd0 = vec_oprnds0[j];
7828                 src = vec_oprnd1 = vec_oprnds1[j];
7829                 if (mask)
7830                     mask_op = vec_mask = vec_masks[j];
7831               }
7832 
7833             if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7834               {
7835                 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7836                                             TYPE_VECTOR_SUBPARTS (srctype)));
7837                 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7838                 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7839                 gassign *new_stmt
7840                     = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7841                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7842                 src = var;
7843               }
7844 
7845             if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7846               {
7847                 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7848                                             TYPE_VECTOR_SUBPARTS (idxtype)));
7849                 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7850                 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7851                 gassign *new_stmt
7852                     = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7853                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7854                 op = var;
7855               }
7856 
7857             if (mask)
7858               {
7859                 tree utype;
7860                 mask_arg = mask_op;
7861                 if (modifier == NARROW)
7862                     {
7863                       var = vect_get_new_ssa_name (mask_halfvectype,
7864                                                          vect_simple_var);
7865                       gassign *new_stmt
7866                         = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7867                                                                       : VEC_UNPACK_LO_EXPR,
7868                                                      mask_op);
7869                       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7870                       mask_arg = var;
7871                     }
7872                 tree optype = TREE_TYPE (mask_arg);
7873                 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7874                     utype = masktype;
7875                 else
7876                     utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7877                 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7878                 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7879                 gassign *new_stmt
7880                     = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7881                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7882                 mask_arg = var;
7883                 if (!useless_type_conversion_p (masktype, utype))
7884                     {
7885                       gcc_assert (TYPE_PRECISION (utype)
7886                                     <= TYPE_PRECISION (masktype));
7887                       var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7888                       new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7889                       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7890                       mask_arg = var;
7891                     }
7892               }
7893 
7894             gcall *new_stmt
7895               = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7896              vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7897 
7898             STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7899           }
7900       *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7901       return true;
7902     }
7903   else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7904     return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7905 
7906   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7907     DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7908 
7909   if (grouped_store)
7910     {
7911       /* FORNOW */
7912       gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7913 
7914       /* We vectorize all the stmts of the interleaving group when we
7915            reach the last stmt in the group.  */
7916       if (DR_GROUP_STORE_COUNT (first_stmt_info)
7917             < DR_GROUP_SIZE (first_stmt_info)
7918             && !slp)
7919           {
7920             *vec_stmt = NULL;
7921             return true;
7922           }
7923 
7924       if (slp)
7925         {
7926           grouped_store = false;
7927           /* VEC_NUM is the number of vect stmts to be created for this
7928              group.  */
7929           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7930             first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7931             gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7932                           == first_stmt_info);
7933             first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7934             op = vect_get_store_rhs (first_stmt_info);
7935         }
7936       else
7937         /* VEC_NUM is the number of vect stmts to be created for this
7938            group.  */
7939           vec_num = group_size;
7940 
7941       ref_type = get_group_alias_ptr_type (first_stmt_info);
7942     }
7943   else
7944     ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7945 
7946   if (dump_enabled_p ())
7947     dump_printf_loc (MSG_NOTE, vect_location,
7948                      "transform store. ncopies = %d\n", ncopies);
7949 
7950   if (memory_access_type == VMAT_ELEMENTWISE
7951       || memory_access_type == VMAT_STRIDED_SLP)
7952     {
7953       gimple_stmt_iterator incr_gsi;
7954       bool insert_after;
7955       gimple *incr;
7956       tree offvar;
7957       tree ivstep;
7958       tree running_off;
7959       tree stride_base, stride_step, alias_off;
7960       tree vec_oprnd;
7961       tree dr_offset;
7962       unsigned int g;
7963       /* Checked by get_load_store_type.  */
7964       unsigned int const_nunits = nunits.to_constant ();
7965 
7966       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7967       gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7968 
7969       dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7970       stride_base
7971           = fold_build_pointer_plus
7972               (DR_BASE_ADDRESS (first_dr_info->dr),
7973                size_binop (PLUS_EXPR,
7974                                convert_to_ptrofftype (dr_offset),
7975                                convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7976       stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7977 
7978       /* For a store with loop-invariant (but other than power-of-2)
7979          stride (i.e. not a grouped access) like so:
7980 
7981              for (i = 0; i < n; i += stride)
7982                array[i] = ...;
7983 
7984            we generate a new induction variable and new stores from
7985            the components of the (vectorized) rhs:
7986 
7987              for (j = 0; ; j += VF*stride)
7988                vectemp = ...;
7989                tmp1 = vectemp[0];
7990                array[j] = tmp1;
7991                tmp2 = vectemp[1];
7992                array[j + stride] = tmp2;
7993                ...
7994          */
7995 
7996       unsigned nstores = const_nunits;
7997       unsigned lnel = 1;
7998       tree ltype = elem_type;
7999       tree lvectype = vectype;
8000       if (slp)
8001           {
8002             if (group_size < const_nunits
8003                 && const_nunits % group_size == 0)
8004               {
8005                 nstores = const_nunits / group_size;
8006                 lnel = group_size;
8007                 ltype = build_vector_type (elem_type, group_size);
8008                 lvectype = vectype;
8009 
8010                 /* First check if vec_extract optab doesn't support extraction
8011                      of vector elts directly.  */
8012                 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8013                 machine_mode vmode;
8014                 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8015                       || !related_vector_mode (TYPE_MODE (vectype), elmode,
8016                                                      group_size).exists (&vmode)
8017                       || (convert_optab_handler (vec_extract_optab,
8018                                                        TYPE_MODE (vectype), vmode)
8019                           == CODE_FOR_nothing))
8020                     {
8021                       /* Try to avoid emitting an extract of vector elements
8022                          by performing the extracts using an integer type of the
8023                          same size, extracting from a vector of those and then
8024                          re-interpreting it as the original vector type if
8025                          supported.  */
8026                       unsigned lsize
8027                         = group_size * GET_MODE_BITSIZE (elmode);
8028                       unsigned int lnunits = const_nunits / group_size;
8029                       /* If we can't construct such a vector fall back to
8030                          element extracts from the original vector type and
8031                          element size stores.  */
8032                       if (int_mode_for_size (lsize, 0).exists (&elmode)
8033                           && VECTOR_MODE_P (TYPE_MODE (vectype))
8034                           && related_vector_mode (TYPE_MODE (vectype), elmode,
8035                                                         lnunits).exists (&vmode)
8036                           && (convert_optab_handler (vec_extract_optab,
8037                                                              vmode, elmode)
8038                                 != CODE_FOR_nothing))
8039                         {
8040                           nstores = lnunits;
8041                           lnel = group_size;
8042                           ltype = build_nonstandard_integer_type (lsize, 1);
8043                           lvectype = build_vector_type (ltype, nstores);
8044                         }
8045                       /* Else fall back to vector extraction anyway.
8046                          Fewer stores are more important than avoiding spilling
8047                          of the vector we extract from.  Compared to the
8048                          construction case in vectorizable_load no store-forwarding
8049                          issue exists here for reasonable archs.  */
8050                     }
8051               }
8052             else if (group_size >= const_nunits
8053                        && group_size % const_nunits == 0)
8054               {
8055                 nstores = 1;
8056                 lnel = const_nunits;
8057                 ltype = vectype;
8058                 lvectype = vectype;
8059               }
8060             ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8061             ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8062           }
8063 
8064       ivstep = stride_step;
8065       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8066                                   build_int_cst (TREE_TYPE (ivstep), vf));
8067 
8068       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8069 
8070       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8071       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8072       create_iv (stride_base, ivstep, NULL,
8073                      loop, &incr_gsi, insert_after,
8074                      &offvar, NULL);
8075       incr = gsi_stmt (incr_gsi);
8076 
8077       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8078 
8079       alias_off = build_int_cst (ref_type, 0);
8080       stmt_vec_info next_stmt_info = first_stmt_info;
8081       for (g = 0; g < group_size; g++)
8082           {
8083             running_off = offvar;
8084             if (g)
8085               {
8086                 tree size = TYPE_SIZE_UNIT (ltype);
8087                 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
8088                                               size);
8089                 tree newoff = copy_ssa_name (running_off, NULL);
8090                 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8091                                                     running_off, pos);
8092                 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8093                 running_off = newoff;
8094               }
8095             if (!slp)
8096               op = vect_get_store_rhs (next_stmt_info);
8097             vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
8098                                    op, &vec_oprnds);
8099             unsigned int group_el = 0;
8100             unsigned HOST_WIDE_INT
8101               elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8102             for (j = 0; j < ncopies; j++)
8103               {
8104                 vec_oprnd = vec_oprnds[j];
8105                 /* Pun the vector to extract from if necessary.  */
8106                 if (lvectype != vectype)
8107                     {
8108                       tree tem = make_ssa_name (lvectype);
8109                       gimple *pun
8110                         = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
8111                                                                       lvectype, vec_oprnd));
8112                       vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8113                       vec_oprnd = tem;
8114                     }
8115                 for (i = 0; i < nstores; i++)
8116                     {
8117                       tree newref, newoff;
8118                       gimple *incr, *assign;
8119                       tree size = TYPE_SIZE (ltype);
8120                       /* Extract the i'th component.  */
8121                       tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8122                                                     bitsize_int (i), size);
8123                       tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8124                                                      size, pos);
8125 
8126                       elem = force_gimple_operand_gsi (gsi, elem, true,
8127                                                                NULL_TREE, true,
8128                                                                GSI_SAME_STMT);
8129 
8130                       tree this_off = build_int_cst (TREE_TYPE (alias_off),
8131                                                              group_el * elsz);
8132                       newref = build2 (MEM_REF, ltype,
8133                                            running_off, this_off);
8134                       vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8135 
8136                       /* And store it to *running_off.  */
8137                       assign = gimple_build_assign (newref, elem);
8138                       vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8139 
8140                       group_el += lnel;
8141                       if (! slp
8142                           || group_el == group_size)
8143                         {
8144                           newoff = copy_ssa_name (running_off, NULL);
8145                           incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8146                                                               running_off, stride_step);
8147                           vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8148 
8149                           running_off = newoff;
8150                           group_el = 0;
8151                         }
8152                       if (g == group_size - 1
8153                           && !slp)
8154                         {
8155                           if (j == 0 && i == 0)
8156                               *vec_stmt = assign;
8157                           STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8158                         }
8159                     }
8160               }
8161             next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8162             vec_oprnds.release ();
8163             if (slp)
8164               break;
8165           }
8166 
8167       return true;
8168     }
8169 
8170   auto_vec<tree> dr_chain (group_size);
8171   oprnds.create (group_size);
8172 
8173   gcc_assert (alignment_support_scheme);
8174   vec_loop_masks *loop_masks
8175     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8176        ? &LOOP_VINFO_MASKS (loop_vinfo)
8177        : NULL);
8178   vec_loop_lens *loop_lens
8179     = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8180        ? &LOOP_VINFO_LENS (loop_vinfo)
8181        : NULL);
8182 
8183   /* Shouldn't go with length-based approach if fully masked.  */
8184   gcc_assert (!loop_lens || !loop_masks);
8185 
8186   /* Targets with store-lane instructions must not require explicit
8187      realignment.  vect_supportable_dr_alignment always returns either
8188      dr_aligned or dr_unaligned_supported for masked operations.  */
8189   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8190                  && !mask
8191                  && !loop_masks)
8192                 || alignment_support_scheme == dr_aligned
8193                 || alignment_support_scheme == dr_unaligned_supported);
8194 
8195   tree offset = NULL_TREE;
8196   if (!known_eq (poffset, 0))
8197     offset = size_int (poffset);
8198 
8199   tree bump;
8200   tree vec_offset = NULL_TREE;
8201   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8202     {
8203       aggr_type = NULL_TREE;
8204       bump = NULL_TREE;
8205     }
8206   else if (memory_access_type == VMAT_GATHER_SCATTER)
8207     {
8208       aggr_type = elem_type;
8209       vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8210                                                &bump, &vec_offset);
8211     }
8212   else
8213     {
8214       if (memory_access_type == VMAT_LOAD_STORE_LANES)
8215           aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8216       else
8217           aggr_type = vectype;
8218       bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
8219                                                     memory_access_type);
8220     }
8221 
8222   if (mask)
8223     LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8224 
8225   /* In case the vectorization factor (VF) is bigger than the number
8226      of elements that we can fit in a vectype (nunits), we have to generate
8227      more than one vector stmt - i.e - we need to "unroll" the
8228      vector stmt by a factor VF/nunits.  */
8229 
8230   /* In case of interleaving (non-unit grouped access):
8231 
8232         S1:  &base + 2 = x2
8233         S2:  &base = x0
8234         S3:  &base + 1 = x1
8235         S4:  &base + 3 = x3
8236 
8237      We create vectorized stores starting from base address (the access of the
8238      first stmt in the chain (S2 in the above example), when the last store stmt
8239      of the chain (S4) is reached:
8240 
8241         VS1: &base = vx2
8242           VS2: &base + vec_size*1 = vx0
8243           VS3: &base + vec_size*2 = vx1
8244           VS4: &base + vec_size*3 = vx3
8245 
8246      Then permutation statements are generated:
8247 
8248           VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8249           VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8250           ...
8251 
8252      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8253      (the order of the data-refs in the output of vect_permute_store_chain
8254      corresponds to the order of scalar stmts in the interleaving chain - see
8255      the documentation of vect_permute_store_chain()).
8256 
8257      In case of both multiple types and interleaving, above vector stores and
8258      permutation stmts are created for every copy.  The result vector stmts are
8259      put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8260      STMT_VINFO_RELATED_STMT for the next copies.
8261   */
8262 
8263   auto_vec<tree> vec_masks;
8264   tree vec_mask = NULL;
8265   auto_vec<tree> vec_offsets;
8266   auto_vec<vec<tree> > gvec_oprnds;
8267   gvec_oprnds.safe_grow_cleared (group_size, true);
8268   for (j = 0; j < ncopies; j++)
8269     {
8270       gimple *new_stmt;
8271       if (j == 0)
8272           {
8273           if (slp)
8274             {
8275                 /* Get vectorized arguments for SLP_NODE.  */
8276                 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
8277                                          op, &vec_oprnds);
8278               vec_oprnd = vec_oprnds[0];
8279             }
8280           else
8281             {
8282                 /* For interleaved stores we collect vectorized defs for all the
8283                      stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8284                      used as an input to vect_permute_store_chain().
8285 
8286                      If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8287                      and OPRNDS are of size 1.  */
8288                 stmt_vec_info next_stmt_info = first_stmt_info;
8289                 for (i = 0; i < group_size; i++)
8290                     {
8291                       /* Since gaps are not supported for interleaved stores,
8292                          DR_GROUP_SIZE is the exact number of stmts in the chain.
8293                          Therefore, NEXT_STMT_INFO can't be NULL_TREE.  In case
8294                          that there is no interleaving, DR_GROUP_SIZE is 1,
8295                          and only one iteration of the loop will be executed.  */
8296                       op = vect_get_store_rhs (next_stmt_info);
8297                       vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8298                                                              ncopies, op, &gvec_oprnds[i]);
8299                       vec_oprnd = gvec_oprnds[i][0];
8300                       dr_chain.quick_push (gvec_oprnds[i][0]);
8301                       oprnds.quick_push (gvec_oprnds[i][0]);
8302                       next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8303                     }
8304                 if (mask)
8305                     {
8306                       vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8307                                                              mask, &vec_masks, mask_vectype);
8308                       vec_mask = vec_masks[0];
8309                     }
8310               }
8311 
8312             /* We should have catched mismatched types earlier.  */
8313             gcc_assert (useless_type_conversion_p (vectype,
8314                                                              TREE_TYPE (vec_oprnd)));
8315             bool simd_lane_access_p
8316               = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8317             if (simd_lane_access_p
8318                 && !loop_masks
8319                 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8320                 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8321                 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8322                 && integer_zerop (DR_INIT (first_dr_info->dr))
8323                 && alias_sets_conflict_p (get_alias_set (aggr_type),
8324                                                   get_alias_set (TREE_TYPE (ref_type))))
8325               {
8326                 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8327                 dataref_offset = build_int_cst (ref_type, 0);
8328               }
8329             else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8330               {
8331                 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8332                                                      slp_node, &gs_info, &dataref_ptr,
8333                                                      &vec_offsets);
8334                 vec_offset = vec_offsets[0];
8335               }
8336             else
8337               dataref_ptr
8338                 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8339                                                     simd_lane_access_p ? loop : NULL,
8340                                                     offset, &dummy, gsi, &ptr_incr,
8341                                                     simd_lane_access_p, bump);
8342           }
8343       else
8344           {
8345             /* For interleaved stores we created vectorized defs for all the
8346                defs stored in OPRNDS in the previous iteration (previous copy).
8347                DR_CHAIN is then used as an input to vect_permute_store_chain().
8348                If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8349                OPRNDS are of size 1.  */
8350             for (i = 0; i < group_size; i++)
8351               {
8352                 vec_oprnd = gvec_oprnds[i][j];
8353                 dr_chain[i] = gvec_oprnds[i][j];
8354                 oprnds[i] = gvec_oprnds[i][j];
8355               }
8356             if (mask)
8357               vec_mask = vec_masks[j];
8358             if (dataref_offset)
8359               dataref_offset
8360                 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8361             else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8362               vec_offset = vec_offsets[j];
8363             else
8364               dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8365                                                      stmt_info, bump);
8366           }
8367 
8368       if (memory_access_type == VMAT_LOAD_STORE_LANES)
8369           {
8370             tree vec_array;
8371 
8372             /* Get an array into which we can store the individual vectors.  */
8373             vec_array = create_vector_array (vectype, vec_num);
8374 
8375             /* Invalidate the current contents of VEC_ARRAY.  This should
8376                become an RTL clobber too, which prevents the vector registers
8377                from being upward-exposed.  */
8378             vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8379 
8380             /* Store the individual vectors into the array.  */
8381             for (i = 0; i < vec_num; i++)
8382               {
8383                 vec_oprnd = dr_chain[i];
8384                 write_vector_array (vinfo, stmt_info,
8385                                           gsi, vec_oprnd, vec_array, i);
8386               }
8387 
8388             tree final_mask = NULL;
8389             if (loop_masks)
8390               final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8391                                                        vectype, j);
8392             if (vec_mask)
8393               final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8394                                                      final_mask, vec_mask, gsi);
8395 
8396             gcall *call;
8397             if (final_mask)
8398               {
8399                 /* Emit:
8400                        MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8401                                              VEC_ARRAY).  */
8402                 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8403                 tree alias_ptr = build_int_cst (ref_type, align);
8404                 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8405                                                              dataref_ptr, alias_ptr,
8406                                                              final_mask, vec_array);
8407               }
8408             else
8409               {
8410                 /* Emit:
8411                        MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
8412                 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8413                 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8414                                                              vec_array);
8415                 gimple_call_set_lhs (call, data_ref);
8416               }
8417             gimple_call_set_nothrow (call, true);
8418             vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8419             new_stmt = call;
8420 
8421             /* Record that VEC_ARRAY is now dead.  */
8422             vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8423           }
8424       else
8425           {
8426             new_stmt = NULL;
8427             if (grouped_store)
8428               {
8429                 if (j == 0)
8430                     result_chain.create (group_size);
8431                 /* Permute.  */
8432                 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8433                                                   gsi, &result_chain);
8434               }
8435 
8436             stmt_vec_info next_stmt_info = first_stmt_info;
8437             for (i = 0; i < vec_num; i++)
8438               {
8439                 unsigned misalign;
8440                 unsigned HOST_WIDE_INT align;
8441 
8442                 tree final_mask = NULL_TREE;
8443                 if (loop_masks)
8444                     final_mask = vect_get_loop_mask (gsi, loop_masks,
8445                                                              vec_num * ncopies,
8446                                                              vectype, vec_num * j + i);
8447                 if (vec_mask)
8448                     final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8449                                                          final_mask, vec_mask, gsi);
8450 
8451                 if (memory_access_type == VMAT_GATHER_SCATTER)
8452                     {
8453                       tree scale = size_int (gs_info.scale);
8454                       gcall *call;
8455                       if (final_mask)
8456                         call = gimple_build_call_internal
8457                           (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8458                            scale, vec_oprnd, final_mask);
8459                       else
8460                         call = gimple_build_call_internal
8461                           (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8462                            scale, vec_oprnd);
8463                       gimple_call_set_nothrow (call, true);
8464                       vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8465                       new_stmt = call;
8466                       break;
8467                     }
8468 
8469                 if (i > 0)
8470                     /* Bump the vector pointer.  */
8471                     dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8472                                                          gsi, stmt_info, bump);
8473 
8474                 if (slp)
8475                     vec_oprnd = vec_oprnds[i];
8476                 else if (grouped_store)
8477                     /* For grouped stores vectorized defs are interleaved in
8478                        vect_permute_store_chain().  */
8479                     vec_oprnd = result_chain[i];
8480 
8481                 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8482                 if (alignment_support_scheme == dr_aligned)
8483                     misalign = 0;
8484                 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
8485                     {
8486                       align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8487                       misalign = 0;
8488                     }
8489                 else
8490                     misalign = misalignment;
8491                 if (dataref_offset == NULL_TREE
8492                       && TREE_CODE (dataref_ptr) == SSA_NAME)
8493                     set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8494                                                   misalign);
8495                 align = least_bit_hwi (misalign | align);
8496 
8497                 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8498                     {
8499                       tree perm_mask = perm_mask_for_reverse (vectype);
8500                       tree perm_dest = vect_create_destination_var
8501                         (vect_get_store_rhs (stmt_info), vectype);
8502                       tree new_temp = make_ssa_name (perm_dest);
8503 
8504                       /* Generate the permute statement.  */
8505                       gimple *perm_stmt
8506                         = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8507                                                      vec_oprnd, perm_mask);
8508                       vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8509 
8510                       perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8511                       vec_oprnd = new_temp;
8512                     }
8513 
8514                 /* Arguments are ready.  Create the new vector stmt.  */
8515                 if (final_mask)
8516                     {
8517                       tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8518                       gcall *call
8519                         = gimple_build_call_internal (IFN_MASK_STORE, 4,
8520                                                               dataref_ptr, ptr,
8521                                                               final_mask, vec_oprnd);
8522                       gimple_call_set_nothrow (call, true);
8523                       vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8524                       new_stmt = call;
8525                     }
8526                 else if (loop_lens)
8527                     {
8528                       tree final_len
8529                         = vect_get_loop_len (loop_vinfo, loop_lens,
8530                                                    vec_num * ncopies, vec_num * j + i);
8531                       tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8532                       machine_mode vmode = TYPE_MODE (vectype);
8533                       opt_machine_mode new_ovmode
8534                         = get_len_load_store_mode (vmode, false);
8535                       machine_mode new_vmode = new_ovmode.require ();
8536                       /* Need conversion if it's wrapped with VnQI.  */
8537                       if (vmode != new_vmode)
8538                         {
8539                           tree new_vtype
8540                               = build_vector_type_for_mode (unsigned_intQI_type_node,
8541                                                                   new_vmode);
8542                           tree var
8543                               = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8544                           vec_oprnd
8545                               = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8546                           gassign *new_stmt
8547                               = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8548                                                          vec_oprnd);
8549                           vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8550                                                                gsi);
8551                           vec_oprnd = var;
8552                         }
8553 
8554                       signed char biasval =
8555                         LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8556 
8557                       tree bias = build_int_cst (intQI_type_node, biasval);
8558                       gcall *call
8559                         = gimple_build_call_internal (IFN_LEN_STORE, 5, dataref_ptr,
8560                                                               ptr, final_len, vec_oprnd,
8561                                                               bias);
8562                       gimple_call_set_nothrow (call, true);
8563                       vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8564                       new_stmt = call;
8565                     }
8566                 else
8567                     {
8568                       data_ref = fold_build2 (MEM_REF, vectype,
8569                                                     dataref_ptr,
8570                                                     dataref_offset
8571                                                     ? dataref_offset
8572                                                     : build_int_cst (ref_type, 0));
8573                       if (alignment_support_scheme == dr_aligned)
8574                         ;
8575                       else
8576                         TREE_TYPE (data_ref)
8577                           = build_aligned_type (TREE_TYPE (data_ref),
8578                                                       align * BITS_PER_UNIT);
8579                       vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8580                       new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8581                       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8582                     }
8583 
8584                 if (slp)
8585                     continue;
8586 
8587                 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8588                 if (!next_stmt_info)
8589                     break;
8590               }
8591           }
8592       if (!slp)
8593           {
8594             if (j == 0)
8595               *vec_stmt = new_stmt;
8596             STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8597           }
8598     }
8599 
8600   for (i = 0; i < group_size; ++i)
8601     {
8602       vec<tree> oprndsi = gvec_oprnds[i];
8603       oprndsi.release ();
8604     }
8605   oprnds.release ();
8606   result_chain.release ();
8607   vec_oprnds.release ();
8608 
8609   return true;
8610 }
8611 
8612 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8613    VECTOR_CST mask.  No checks are made that the target platform supports the
8614    mask, so callers may wish to test can_vec_perm_const_p separately, or use
8615    vect_gen_perm_mask_checked.  */
8616 
8617 tree
vect_gen_perm_mask_any(tree vectype,const vec_perm_indices & sel)8618 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8619 {
8620   tree mask_type;
8621 
8622   poly_uint64 nunits = sel.length ();
8623   gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8624 
8625   mask_type = build_vector_type (ssizetype, nunits);
8626   return vec_perm_indices_to_tree (mask_type, sel);
8627 }
8628 
8629 /* Checked version of vect_gen_perm_mask_any.  Asserts can_vec_perm_const_p,
8630    i.e. that the target supports the pattern _for arbitrary input vectors_.  */
8631 
8632 tree
vect_gen_perm_mask_checked(tree vectype,const vec_perm_indices & sel)8633 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8634 {
8635   gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8636   return vect_gen_perm_mask_any (vectype, sel);
8637 }
8638 
8639 /* Given a vector variable X and Y, that was generated for the scalar
8640    STMT_INFO, generate instructions to permute the vector elements of X and Y
8641    using permutation mask MASK_VEC, insert them at *GSI and return the
8642    permuted vector variable.  */
8643 
8644 static tree
permute_vec_elements(vec_info * vinfo,tree x,tree y,tree mask_vec,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi)8645 permute_vec_elements (vec_info *vinfo,
8646                           tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8647                           gimple_stmt_iterator *gsi)
8648 {
8649   tree vectype = TREE_TYPE (x);
8650   tree perm_dest, data_ref;
8651   gimple *perm_stmt;
8652 
8653   tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8654   if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8655     perm_dest = vect_create_destination_var (scalar_dest, vectype);
8656   else
8657     perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8658   data_ref = make_ssa_name (perm_dest);
8659 
8660   /* Generate the permute statement.  */
8661   perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8662   vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8663 
8664   return data_ref;
8665 }
8666 
8667 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8668    inserting them on the loops preheader edge.  Returns true if we
8669    were successful in doing so (and thus STMT_INFO can be moved then),
8670    otherwise returns false.  */
8671 
8672 static bool
hoist_defs_of_uses(stmt_vec_info stmt_info,class loop * loop)8673 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8674 {
8675   ssa_op_iter i;
8676   tree op;
8677   bool any = false;
8678 
8679   FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8680     {
8681       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8682       if (!gimple_nop_p (def_stmt)
8683             && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8684           {
8685             /* Make sure we don't need to recurse.  While we could do
8686                so in simple cases when there are more complex use webs
8687                we don't have an easy way to preserve stmt order to fulfil
8688                dependencies within them.  */
8689             tree op2;
8690             ssa_op_iter i2;
8691             if (gimple_code (def_stmt) == GIMPLE_PHI)
8692               return false;
8693             FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8694               {
8695                 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8696                 if (!gimple_nop_p (def_stmt2)
8697                       && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8698                     return false;
8699               }
8700             any = true;
8701           }
8702     }
8703 
8704   if (!any)
8705     return true;
8706 
8707   FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8708     {
8709       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8710       if (!gimple_nop_p (def_stmt)
8711             && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8712           {
8713             gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8714             gsi_remove (&gsi, false);
8715             gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8716           }
8717     }
8718 
8719   return true;
8720 }
8721 
8722 /* vectorizable_load.
8723 
8724    Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8725    that can be vectorized.
8726    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8727    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8728    Return true if STMT_INFO is vectorizable in this way.  */
8729 
8730 static bool
vectorizable_load(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)8731 vectorizable_load (vec_info *vinfo,
8732                        stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8733                        gimple **vec_stmt, slp_tree slp_node,
8734                        stmt_vector_for_cost *cost_vec)
8735 {
8736   tree scalar_dest;
8737   tree vec_dest = NULL;
8738   tree data_ref = NULL;
8739   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8740   class loop *loop = NULL;
8741   class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8742   bool nested_in_vect_loop = false;
8743   tree elem_type;
8744   tree new_temp;
8745   machine_mode mode;
8746   tree dummy;
8747   tree dataref_ptr = NULL_TREE;
8748   tree dataref_offset = NULL_TREE;
8749   gimple *ptr_incr = NULL;
8750   int ncopies;
8751   int i, j;
8752   unsigned int group_size;
8753   poly_uint64 group_gap_adj;
8754   tree msq = NULL_TREE, lsq;
8755   tree realignment_token = NULL_TREE;
8756   gphi *phi = NULL;
8757   vec<tree> dr_chain = vNULL;
8758   bool grouped_load = false;
8759   stmt_vec_info first_stmt_info;
8760   stmt_vec_info first_stmt_info_for_drptr = NULL;
8761   bool compute_in_loop = false;
8762   class loop *at_loop;
8763   int vec_num;
8764   bool slp = (slp_node != NULL);
8765   bool slp_perm = false;
8766   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8767   poly_uint64 vf;
8768   tree aggr_type;
8769   gather_scatter_info gs_info;
8770   tree ref_type;
8771   enum vect_def_type mask_dt = vect_unknown_def_type;
8772 
8773   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8774     return false;
8775 
8776   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8777       && ! vec_stmt)
8778     return false;
8779 
8780   if (!STMT_VINFO_DATA_REF (stmt_info))
8781     return false;
8782 
8783   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8784   int mask_index = -1;
8785   if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8786     {
8787       scalar_dest = gimple_assign_lhs (assign);
8788       if (TREE_CODE (scalar_dest) != SSA_NAME)
8789           return false;
8790 
8791       tree_code code = gimple_assign_rhs_code (assign);
8792       if (code != ARRAY_REF
8793             && code != BIT_FIELD_REF
8794             && code != INDIRECT_REF
8795             && code != COMPONENT_REF
8796             && code != IMAGPART_EXPR
8797             && code != REALPART_EXPR
8798             && code != MEM_REF
8799             && TREE_CODE_CLASS (code) != tcc_declaration)
8800           return false;
8801     }
8802   else
8803     {
8804       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8805       if (!call || !gimple_call_internal_p (call))
8806           return false;
8807 
8808       internal_fn ifn = gimple_call_internal_fn (call);
8809       if (!internal_load_fn_p (ifn))
8810           return false;
8811 
8812       scalar_dest = gimple_call_lhs (call);
8813       if (!scalar_dest)
8814           return false;
8815 
8816       mask_index = internal_fn_mask_index (ifn);
8817       /* ??? For SLP the mask operand is always last.  */
8818       if (mask_index >= 0 && slp_node)
8819           mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
8820       if (mask_index >= 0
8821             && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8822                                               &mask, NULL, &mask_dt, &mask_vectype))
8823           return false;
8824     }
8825 
8826   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8827   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8828 
8829   if (loop_vinfo)
8830     {
8831       loop = LOOP_VINFO_LOOP (loop_vinfo);
8832       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8833       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8834     }
8835   else
8836     vf = 1;
8837 
8838   /* Multiple types in SLP are handled by creating the appropriate number of
8839      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
8840      case of SLP.  */
8841   if (slp)
8842     ncopies = 1;
8843   else
8844     ncopies = vect_get_num_copies (loop_vinfo, vectype);
8845 
8846   gcc_assert (ncopies >= 1);
8847 
8848   /* FORNOW. This restriction should be relaxed.  */
8849   if (nested_in_vect_loop && ncopies > 1)
8850     {
8851       if (dump_enabled_p ())
8852         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8853                          "multiple types in nested loop.\n");
8854       return false;
8855     }
8856 
8857   /* Invalidate assumptions made by dependence analysis when vectorization
8858      on the unrolled body effectively re-orders stmts.  */
8859   if (ncopies > 1
8860       && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8861       && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8862                        STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8863     {
8864       if (dump_enabled_p ())
8865           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8866                                "cannot perform implicit CSE when unrolling "
8867                                "with negative dependence distance\n");
8868       return false;
8869     }
8870 
8871   elem_type = TREE_TYPE (vectype);
8872   mode = TYPE_MODE (vectype);
8873 
8874   /* FORNOW. In some cases can vectorize even if data-type not supported
8875     (e.g. - data copies).  */
8876   if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8877     {
8878       if (dump_enabled_p ())
8879         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8880                          "Aligned load, but unsupported type.\n");
8881       return false;
8882     }
8883 
8884   /* Check if the load is a part of an interleaving chain.  */
8885   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8886     {
8887       grouped_load = true;
8888       /* FORNOW */
8889       gcc_assert (!nested_in_vect_loop);
8890       gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8891 
8892       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8893       group_size = DR_GROUP_SIZE (first_stmt_info);
8894 
8895       /* Refuse non-SLP vectorization of SLP-only groups.  */
8896       if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8897           {
8898             if (dump_enabled_p ())
8899               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8900                                    "cannot vectorize load in non-SLP mode.\n");
8901             return false;
8902           }
8903 
8904       if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8905           {
8906             slp_perm = true;
8907 
8908             if (!loop_vinfo)
8909               {
8910                 /* In BB vectorization we may not actually use a loaded vector
8911                      accessing elements in excess of DR_GROUP_SIZE.  */
8912                 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8913                 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8914                 unsigned HOST_WIDE_INT nunits;
8915                 unsigned j, k, maxk = 0;
8916                 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8917                     if (k > maxk)
8918                       maxk = k;
8919                 tree vectype = SLP_TREE_VECTYPE (slp_node);
8920                 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8921                       || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8922                     {
8923                       if (dump_enabled_p ())
8924                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8925                                              "BB vectorization with gaps at the end of "
8926                                              "a load is not supported\n");
8927                       return false;
8928                     }
8929               }
8930 
8931             auto_vec<tree> tem;
8932             unsigned n_perms;
8933             if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8934                                                        true, &n_perms))
8935               {
8936                 if (dump_enabled_p ())
8937                     dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8938                                          vect_location,
8939                                          "unsupported load permutation\n");
8940                 return false;
8941               }
8942           }
8943 
8944       /* Invalidate assumptions made by dependence analysis when vectorization
8945            on the unrolled body effectively re-orders stmts.  */
8946       if (!PURE_SLP_STMT (stmt_info)
8947             && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8948             && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8949                            STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8950           {
8951             if (dump_enabled_p ())
8952               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8953                                    "cannot perform implicit CSE when performing "
8954                                    "group loads with negative dependence distance\n");
8955             return false;
8956           }
8957     }
8958   else
8959     group_size = 1;
8960 
8961   vect_memory_access_type memory_access_type;
8962   enum dr_alignment_support alignment_support_scheme;
8963   int misalignment;
8964   poly_int64 poffset;
8965   if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8966                                   ncopies, &memory_access_type, &poffset,
8967                                   &alignment_support_scheme, &misalignment, &gs_info))
8968     return false;
8969 
8970   if (mask)
8971     {
8972       if (memory_access_type == VMAT_CONTIGUOUS)
8973           {
8974             machine_mode vec_mode = TYPE_MODE (vectype);
8975             if (!VECTOR_MODE_P (vec_mode)
8976                 || !can_vec_mask_load_store_p (vec_mode,
8977                                                        TYPE_MODE (mask_vectype), true))
8978               return false;
8979           }
8980       else if (memory_access_type != VMAT_LOAD_STORE_LANES
8981                  && memory_access_type != VMAT_GATHER_SCATTER)
8982           {
8983             if (dump_enabled_p ())
8984               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8985                                    "unsupported access type for masked load.\n");
8986             return false;
8987           }
8988       else if (memory_access_type == VMAT_GATHER_SCATTER
8989                  && gs_info.ifn == IFN_LAST
8990                  && !gs_info.decl)
8991           {
8992             if (dump_enabled_p ())
8993               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8994                                    "unsupported masked emulated gather.\n");
8995             return false;
8996           }
8997       else if (memory_access_type == VMAT_ELEMENTWISE
8998                  || memory_access_type == VMAT_STRIDED_SLP)
8999           {
9000             if (dump_enabled_p ())
9001               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9002                                    "unsupported masked strided access.\n");
9003             return false;
9004           }
9005     }
9006 
9007   if (!vec_stmt) /* transformation not required.  */
9008     {
9009       if (slp_node
9010             && mask
9011             && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
9012                                                             mask_vectype))
9013           {
9014             if (dump_enabled_p ())
9015               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9016                                    "incompatible vector types for invariants\n");
9017             return false;
9018           }
9019 
9020       if (!slp)
9021           STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
9022 
9023       if (loop_vinfo
9024             && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
9025           check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
9026                                                         VLS_LOAD, group_size,
9027                                                         memory_access_type, &gs_info,
9028                                                         mask);
9029 
9030       if (dump_enabled_p ()
9031             && memory_access_type != VMAT_ELEMENTWISE
9032             && memory_access_type != VMAT_GATHER_SCATTER
9033             && alignment_support_scheme != dr_aligned)
9034           dump_printf_loc (MSG_NOTE, vect_location,
9035                                "Vectorizing an unaligned access.\n");
9036 
9037       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
9038       vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
9039                                   alignment_support_scheme, misalignment,
9040                                   &gs_info, slp_node, cost_vec);
9041       return true;
9042     }
9043 
9044   if (!slp)
9045     gcc_assert (memory_access_type
9046                     == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
9047 
9048   if (dump_enabled_p ())
9049     dump_printf_loc (MSG_NOTE, vect_location,
9050                      "transform load. ncopies = %d\n", ncopies);
9051 
9052   /* Transform.  */
9053 
9054   dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
9055   ensure_base_align (dr_info);
9056 
9057   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
9058     {
9059       vect_build_gather_load_calls (vinfo,
9060                                             stmt_info, gsi, vec_stmt, &gs_info, mask);
9061       return true;
9062     }
9063 
9064   if (memory_access_type == VMAT_INVARIANT)
9065     {
9066       gcc_assert (!grouped_load && !mask && !bb_vinfo);
9067       /* If we have versioned for aliasing or the loop doesn't
9068            have any data dependencies that would preclude this,
9069            then we are sure this is a loop invariant load and
9070            thus we can insert it on the preheader edge.  */
9071       bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
9072                           && !nested_in_vect_loop
9073                           && hoist_defs_of_uses (stmt_info, loop));
9074       if (hoist_p)
9075           {
9076             gassign *stmt = as_a <gassign *> (stmt_info->stmt);
9077             if (dump_enabled_p ())
9078               dump_printf_loc (MSG_NOTE, vect_location,
9079                                    "hoisting out of the vectorized loop: %G", stmt);
9080             scalar_dest = copy_ssa_name (scalar_dest);
9081             tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
9082             gsi_insert_on_edge_immediate
9083               (loop_preheader_edge (loop),
9084                gimple_build_assign (scalar_dest, rhs));
9085           }
9086       /* These copies are all equivalent, but currently the representation
9087            requires a separate STMT_VINFO_VEC_STMT for each one.  */
9088       gimple_stmt_iterator gsi2 = *gsi;
9089       gsi_next (&gsi2);
9090       for (j = 0; j < ncopies; j++)
9091           {
9092             if (hoist_p)
9093               new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9094                                                    vectype, NULL);
9095             else
9096               new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9097                                                    vectype, &gsi2);
9098             gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
9099             if (slp)
9100               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9101             else
9102               {
9103                 if (j == 0)
9104                     *vec_stmt = new_stmt;
9105                 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9106               }
9107           }
9108       return true;
9109     }
9110 
9111   if (memory_access_type == VMAT_ELEMENTWISE
9112       || memory_access_type == VMAT_STRIDED_SLP)
9113     {
9114       gimple_stmt_iterator incr_gsi;
9115       bool insert_after;
9116       tree offvar;
9117       tree ivstep;
9118       tree running_off;
9119       vec<constructor_elt, va_gc> *v = NULL;
9120       tree stride_base, stride_step, alias_off;
9121       /* Checked by get_load_store_type.  */
9122       unsigned int const_nunits = nunits.to_constant ();
9123       unsigned HOST_WIDE_INT cst_offset = 0;
9124       tree dr_offset;
9125 
9126       gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
9127       gcc_assert (!nested_in_vect_loop);
9128 
9129       if (grouped_load)
9130           {
9131             first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9132             first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9133           }
9134       else
9135           {
9136             first_stmt_info = stmt_info;
9137             first_dr_info = dr_info;
9138           }
9139       if (slp && grouped_load)
9140           {
9141             group_size = DR_GROUP_SIZE (first_stmt_info);
9142             ref_type = get_group_alias_ptr_type (first_stmt_info);
9143           }
9144       else
9145           {
9146             if (grouped_load)
9147               cst_offset
9148                 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
9149                      * vect_get_place_in_interleaving_chain (stmt_info,
9150                                                                        first_stmt_info));
9151             group_size = 1;
9152             ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
9153           }
9154 
9155       dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
9156       stride_base
9157           = fold_build_pointer_plus
9158               (DR_BASE_ADDRESS (first_dr_info->dr),
9159                size_binop (PLUS_EXPR,
9160                                convert_to_ptrofftype (dr_offset),
9161                                convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
9162       stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
9163 
9164       /* For a load with loop-invariant (but other than power-of-2)
9165          stride (i.e. not a grouped access) like so:
9166 
9167              for (i = 0; i < n; i += stride)
9168                ... = array[i];
9169 
9170            we generate a new induction variable and new accesses to
9171            form a new vector (or vectors, depending on ncopies):
9172 
9173              for (j = 0; ; j += VF*stride)
9174                tmp1 = array[j];
9175                tmp2 = array[j + stride];
9176                ...
9177                vectemp = {tmp1, tmp2, ...}
9178          */
9179 
9180       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
9181                                   build_int_cst (TREE_TYPE (stride_step), vf));
9182 
9183       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
9184 
9185       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
9186       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
9187       create_iv (stride_base, ivstep, NULL,
9188                      loop, &incr_gsi, insert_after,
9189                      &offvar, NULL);
9190 
9191       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9192 
9193       running_off = offvar;
9194       alias_off = build_int_cst (ref_type, 0);
9195       int nloads = const_nunits;
9196       int lnel = 1;
9197       tree ltype = TREE_TYPE (vectype);
9198       tree lvectype = vectype;
9199       auto_vec<tree> dr_chain;
9200       if (memory_access_type == VMAT_STRIDED_SLP)
9201           {
9202             if (group_size < const_nunits)
9203               {
9204                 /* First check if vec_init optab supports construction from vector
9205                      elts directly.  Otherwise avoid emitting a constructor of
9206                      vector elements by performing the loads using an integer type
9207                      of the same size, constructing a vector of those and then
9208                      re-interpreting it as the original vector type.  This avoids a
9209                      huge runtime penalty due to the general inability to perform
9210                      store forwarding from smaller stores to a larger load.  */
9211                 tree ptype;
9212                 tree vtype
9213                     = vector_vector_composition_type (vectype,
9214                                                               const_nunits / group_size,
9215                                                               &ptype);
9216                 if (vtype != NULL_TREE)
9217                     {
9218                       nloads = const_nunits / group_size;
9219                       lnel = group_size;
9220                       lvectype = vtype;
9221                       ltype = ptype;
9222                     }
9223               }
9224             else
9225               {
9226                 nloads = 1;
9227                 lnel = const_nunits;
9228                 ltype = vectype;
9229               }
9230             ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9231           }
9232       /* Load vector(1) scalar_type if it's 1 element-wise vectype.  */
9233       else if (nloads == 1)
9234           ltype = vectype;
9235 
9236       if (slp)
9237           {
9238             /* For SLP permutation support we need to load the whole group,
9239                not only the number of vector stmts the permutation result
9240                fits in.  */
9241             if (slp_perm)
9242               {
9243                 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9244                      variable VF.  */
9245                 unsigned int const_vf = vf.to_constant ();
9246                 ncopies = CEIL (group_size * const_vf, const_nunits);
9247                 dr_chain.create (ncopies);
9248               }
9249             else
9250               ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9251           }
9252       unsigned int group_el = 0;
9253       unsigned HOST_WIDE_INT
9254           elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9255       unsigned int n_groups = 0;
9256       for (j = 0; j < ncopies; j++)
9257           {
9258             if (nloads > 1)
9259               vec_alloc (v, nloads);
9260             gimple *new_stmt = NULL;
9261             for (i = 0; i < nloads; i++)
9262               {
9263                 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9264                                                        group_el * elsz + cst_offset);
9265                 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9266                 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9267                 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
9268                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9269                 if (nloads > 1)
9270                     CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9271                                                   gimple_assign_lhs (new_stmt));
9272 
9273                 group_el += lnel;
9274                 if (! slp
9275                       || group_el == group_size)
9276                     {
9277                       n_groups++;
9278                       /* When doing SLP make sure to not load elements from
9279                          the next vector iteration, those will not be accessed
9280                          so just use the last element again.  See PR107451.  */
9281                       if (!slp || known_lt (n_groups, vf))
9282                         {
9283                           tree newoff = copy_ssa_name (running_off);
9284                           gimple *incr
9285                               = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9286                                                          running_off, stride_step);
9287                           vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9288                           running_off = newoff;
9289                         }
9290                       group_el = 0;
9291                     }
9292               }
9293             if (nloads > 1)
9294               {
9295                 tree vec_inv = build_constructor (lvectype, v);
9296                 new_temp = vect_init_vector (vinfo, stmt_info,
9297                                                      vec_inv, lvectype, gsi);
9298                 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9299                 if (lvectype != vectype)
9300                     {
9301                       new_stmt = gimple_build_assign (make_ssa_name (vectype),
9302                                                               VIEW_CONVERT_EXPR,
9303                                                               build1 (VIEW_CONVERT_EXPR,
9304                                                                         vectype, new_temp));
9305                       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9306                     }
9307               }
9308 
9309             if (slp)
9310               {
9311                 if (slp_perm)
9312                     dr_chain.quick_push (gimple_assign_lhs (new_stmt));
9313                 else
9314                     SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9315               }
9316             else
9317               {
9318                 if (j == 0)
9319                     *vec_stmt = new_stmt;
9320                 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9321               }
9322           }
9323       if (slp_perm)
9324           {
9325             unsigned n_perms;
9326             vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9327                                                   false, &n_perms);
9328           }
9329       return true;
9330     }
9331 
9332   if (memory_access_type == VMAT_GATHER_SCATTER
9333       || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9334     grouped_load = false;
9335 
9336   if (grouped_load)
9337     {
9338       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9339       group_size = DR_GROUP_SIZE (first_stmt_info);
9340       /* For SLP vectorization we directly vectorize a subchain
9341          without permutation.  */
9342       if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9343           first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9344       /* For BB vectorization always use the first stmt to base
9345            the data ref pointer on.  */
9346       if (bb_vinfo)
9347           first_stmt_info_for_drptr
9348             = vect_find_first_scalar_stmt_in_slp (slp_node);
9349 
9350       /* Check if the chain of loads is already vectorized.  */
9351       if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
9352             /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9353                ???  But we can only do so if there is exactly one
9354                as we have no way to get at the rest.  Leave the CSE
9355                opportunity alone.
9356                ???  With the group load eventually participating
9357                in multiple different permutations (having multiple
9358                slp nodes which refer to the same group) the CSE
9359                is even wrong code.  See PR56270.  */
9360             && !slp)
9361           {
9362             *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9363             return true;
9364           }
9365       first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9366       group_gap_adj = 0;
9367 
9368       /* VEC_NUM is the number of vect stmts to be created for this group.  */
9369       if (slp)
9370           {
9371             grouped_load = false;
9372             /* If an SLP permutation is from N elements to N elements,
9373                and if one vector holds a whole number of N, we can load
9374                the inputs to the permutation in the same way as an
9375                unpermuted sequence.  In other cases we need to load the
9376                whole group, not only the number of vector stmts the
9377                permutation result fits in.  */
9378             unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
9379             if (slp_perm
9380                 && (group_size != scalar_lanes
9381                       || !multiple_p (nunits, group_size)))
9382               {
9383                 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9384                      variable VF; see vect_transform_slp_perm_load.  */
9385                 unsigned int const_vf = vf.to_constant ();
9386                 unsigned int const_nunits = nunits.to_constant ();
9387                 vec_num = CEIL (group_size * const_vf, const_nunits);
9388                 group_gap_adj = vf * group_size - nunits * vec_num;
9389               }
9390             else
9391               {
9392                 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9393                 group_gap_adj
9394                     = group_size - scalar_lanes;
9395               }
9396           }
9397       else
9398           vec_num = group_size;
9399 
9400       ref_type = get_group_alias_ptr_type (first_stmt_info);
9401     }
9402   else
9403     {
9404       first_stmt_info = stmt_info;
9405       first_dr_info = dr_info;
9406       group_size = vec_num = 1;
9407       group_gap_adj = 0;
9408       ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9409       if (slp)
9410           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9411     }
9412 
9413   gcc_assert (alignment_support_scheme);
9414   vec_loop_masks *loop_masks
9415     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9416        ? &LOOP_VINFO_MASKS (loop_vinfo)
9417        : NULL);
9418   vec_loop_lens *loop_lens
9419     = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
9420        ? &LOOP_VINFO_LENS (loop_vinfo)
9421        : NULL);
9422 
9423   /* Shouldn't go with length-based approach if fully masked.  */
9424   gcc_assert (!loop_lens || !loop_masks);
9425 
9426   /* Targets with store-lane instructions must not require explicit
9427      realignment.  vect_supportable_dr_alignment always returns either
9428      dr_aligned or dr_unaligned_supported for masked operations.  */
9429   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9430                  && !mask
9431                  && !loop_masks)
9432                 || alignment_support_scheme == dr_aligned
9433                 || alignment_support_scheme == dr_unaligned_supported);
9434 
9435   /* In case the vectorization factor (VF) is bigger than the number
9436      of elements that we can fit in a vectype (nunits), we have to generate
9437      more than one vector stmt - i.e - we need to "unroll" the
9438      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
9439      from one copy of the vector stmt to the next, in the field
9440      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
9441      stages to find the correct vector defs to be used when vectorizing
9442      stmts that use the defs of the current stmt.  The example below
9443      illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9444      need to create 4 vectorized stmts):
9445 
9446      before vectorization:
9447                                 RELATED_STMT    VEC_STMT
9448         S1:     x = memref      -               -
9449         S2:     z = x + 1       -               -
9450 
9451      step 1: vectorize stmt S1:
9452         We first create the vector stmt VS1_0, and, as usual, record a
9453         pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9454         Next, we create the vector stmt VS1_1, and record a pointer to
9455         it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9456         Similarly, for VS1_2 and VS1_3.  This is the resulting chain of
9457         stmts and pointers:
9458                                 RELATED_STMT    VEC_STMT
9459         VS1_0:  vx0 = memref0   VS1_1           -
9460         VS1_1:  vx1 = memref1   VS1_2           -
9461         VS1_2:  vx2 = memref2   VS1_3           -
9462         VS1_3:  vx3 = memref3   -               -
9463         S1:     x = load        -               VS1_0
9464         S2:     z = x + 1       -               -
9465   */
9466 
9467   /* In case of interleaving (non-unit grouped access):
9468 
9469      S1:  x2 = &base + 2
9470      S2:  x0 = &base
9471      S3:  x1 = &base + 1
9472      S4:  x3 = &base + 3
9473 
9474      Vectorized loads are created in the order of memory accesses
9475      starting from the access of the first stmt of the chain:
9476 
9477      VS1: vx0 = &base
9478      VS2: vx1 = &base + vec_size*1
9479      VS3: vx3 = &base + vec_size*2
9480      VS4: vx4 = &base + vec_size*3
9481 
9482      Then permutation statements are generated:
9483 
9484      VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9485      VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9486        ...
9487 
9488      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9489      (the order of the data-refs in the output of vect_permute_load_chain
9490      corresponds to the order of scalar stmts in the interleaving chain - see
9491      the documentation of vect_permute_load_chain()).
9492      The generation of permutation stmts and recording them in
9493      STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9494 
9495      In case of both multiple types and interleaving, the vector loads and
9496      permutation stmts above are created for every copy.  The result vector
9497      stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9498      corresponding STMT_VINFO_RELATED_STMT for the next copies.  */
9499 
9500   /* If the data reference is aligned (dr_aligned) or potentially unaligned
9501      on a target that supports unaligned accesses (dr_unaligned_supported)
9502      we generate the following code:
9503          p = initial_addr;
9504          indx = 0;
9505          loop {
9506              p = p + indx * vectype_size;
9507            vec_dest = *(p);
9508            indx = indx + 1;
9509          }
9510 
9511      Otherwise, the data reference is potentially unaligned on a target that
9512      does not support unaligned accesses (dr_explicit_realign_optimized) -
9513      then generate the following code, in which the data in each iteration is
9514      obtained by two vector loads, one from the previous iteration, and one
9515      from the current iteration:
9516          p1 = initial_addr;
9517          msq_init = *(floor(p1))
9518          p2 = initial_addr + VS - 1;
9519          realignment_token = call target_builtin;
9520          indx = 0;
9521          loop {
9522            p2 = p2 + indx * vectype_size
9523            lsq = *(floor(p2))
9524            vec_dest = realign_load (msq, lsq, realignment_token)
9525            indx = indx + 1;
9526            msq = lsq;
9527          }   */
9528 
9529   /* If the misalignment remains the same throughout the execution of the
9530      loop, we can create the init_addr and permutation mask at the loop
9531      preheader.  Otherwise, it needs to be created inside the loop.
9532      This can only occur when vectorizing memory accesses in the inner-loop
9533      nested within an outer-loop that is being vectorized.  */
9534 
9535   if (nested_in_vect_loop
9536       && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9537                           GET_MODE_SIZE (TYPE_MODE (vectype))))
9538     {
9539       gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9540       compute_in_loop = true;
9541     }
9542 
9543   bool diff_first_stmt_info
9544     = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9545 
9546   tree offset = NULL_TREE;
9547   if ((alignment_support_scheme == dr_explicit_realign_optimized
9548        || alignment_support_scheme == dr_explicit_realign)
9549       && !compute_in_loop)
9550     {
9551       /* If we have different first_stmt_info, we can't set up realignment
9552            here, since we can't guarantee first_stmt_info DR has been
9553            initialized yet, use first_stmt_info_for_drptr DR by bumping the
9554            distance from first_stmt_info DR instead as below.  */
9555       if (!diff_first_stmt_info)
9556           msq = vect_setup_realignment (vinfo,
9557                                               first_stmt_info, gsi, &realignment_token,
9558                                               alignment_support_scheme, NULL_TREE,
9559                                               &at_loop);
9560       if (alignment_support_scheme == dr_explicit_realign_optimized)
9561           {
9562             phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9563             offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9564                                      size_one_node);
9565             gcc_assert (!first_stmt_info_for_drptr);
9566           }
9567     }
9568   else
9569     at_loop = loop;
9570 
9571   if (!known_eq (poffset, 0))
9572     offset = (offset
9573                 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
9574                 : size_int (poffset));
9575 
9576   tree bump;
9577   tree vec_offset = NULL_TREE;
9578   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9579     {
9580       aggr_type = NULL_TREE;
9581       bump = NULL_TREE;
9582     }
9583   else if (memory_access_type == VMAT_GATHER_SCATTER)
9584     {
9585       aggr_type = elem_type;
9586       vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9587                                                &bump, &vec_offset);
9588     }
9589   else
9590     {
9591       if (memory_access_type == VMAT_LOAD_STORE_LANES)
9592           aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9593       else
9594           aggr_type = vectype;
9595       bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9596                                                     memory_access_type);
9597     }
9598 
9599   auto_vec<tree> vec_offsets;
9600   auto_vec<tree> vec_masks;
9601   if (mask)
9602     {
9603       if (slp_node)
9604           vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
9605                                  &vec_masks);
9606       else
9607           vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
9608                                                &vec_masks, mask_vectype);
9609     }
9610   tree vec_mask = NULL_TREE;
9611   poly_uint64 group_elt = 0;
9612   for (j = 0; j < ncopies; j++)
9613     {
9614       /* 1. Create the vector or array pointer update chain.  */
9615       if (j == 0)
9616           {
9617             bool simd_lane_access_p
9618               = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9619             if (simd_lane_access_p
9620                 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9621                 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9622                 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9623                 && integer_zerop (DR_INIT (first_dr_info->dr))
9624                 && alias_sets_conflict_p (get_alias_set (aggr_type),
9625                                                   get_alias_set (TREE_TYPE (ref_type)))
9626                 && (alignment_support_scheme == dr_aligned
9627                       || alignment_support_scheme == dr_unaligned_supported))
9628               {
9629                 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9630                 dataref_offset = build_int_cst (ref_type, 0);
9631               }
9632             else if (diff_first_stmt_info)
9633               {
9634                 dataref_ptr
9635                     = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9636                                                       aggr_type, at_loop, offset, &dummy,
9637                                                       gsi, &ptr_incr, simd_lane_access_p,
9638                                                       bump);
9639                 /* Adjust the pointer by the difference to first_stmt.  */
9640                 data_reference_p ptrdr
9641                     = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9642                 tree diff
9643                     = fold_convert (sizetype,
9644                                         size_binop (MINUS_EXPR,
9645                                                       DR_INIT (first_dr_info->dr),
9646                                                       DR_INIT (ptrdr)));
9647                 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9648                                                        stmt_info, diff);
9649                 if (alignment_support_scheme == dr_explicit_realign)
9650                     {
9651                       msq = vect_setup_realignment (vinfo,
9652                                                             first_stmt_info_for_drptr, gsi,
9653                                                             &realignment_token,
9654                                                             alignment_support_scheme,
9655                                                             dataref_ptr, &at_loop);
9656                       gcc_assert (!compute_in_loop);
9657                     }
9658               }
9659             else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9660               {
9661                 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9662                                                      slp_node, &gs_info, &dataref_ptr,
9663                                                      &vec_offsets);
9664               }
9665             else
9666               dataref_ptr
9667                 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9668                                                     at_loop,
9669                                                     offset, &dummy, gsi, &ptr_incr,
9670                                                     simd_lane_access_p, bump);
9671             if (mask)
9672               vec_mask = vec_masks[0];
9673           }
9674       else
9675           {
9676             if (dataref_offset)
9677               dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9678                                                         bump);
9679             else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9680               dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9681                                                      stmt_info, bump);
9682             if (mask)
9683               vec_mask = vec_masks[j];
9684           }
9685 
9686       if (grouped_load || slp_perm)
9687           dr_chain.create (vec_num);
9688 
9689       gimple *new_stmt = NULL;
9690       if (memory_access_type == VMAT_LOAD_STORE_LANES)
9691           {
9692             tree vec_array;
9693 
9694             vec_array = create_vector_array (vectype, vec_num);
9695 
9696             tree final_mask = NULL_TREE;
9697             if (loop_masks)
9698               final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9699                                                        vectype, j);
9700             if (vec_mask)
9701               final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9702                                                      final_mask, vec_mask, gsi);
9703 
9704             gcall *call;
9705             if (final_mask)
9706               {
9707                 /* Emit:
9708                        VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9709                                                     VEC_MASK).  */
9710                 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9711                 tree alias_ptr = build_int_cst (ref_type, align);
9712                 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9713                                                              dataref_ptr, alias_ptr,
9714                                                              final_mask);
9715               }
9716             else
9717               {
9718                 /* Emit:
9719                        VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
9720                 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9721                 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9722               }
9723             gimple_call_set_lhs (call, vec_array);
9724             gimple_call_set_nothrow (call, true);
9725             vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9726             new_stmt = call;
9727 
9728             /* Extract each vector into an SSA_NAME.  */
9729             for (i = 0; i < vec_num; i++)
9730               {
9731                 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9732                                                       vec_array, i);
9733                 dr_chain.quick_push (new_temp);
9734               }
9735 
9736             /* Record the mapping between SSA_NAMEs and statements.  */
9737             vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9738 
9739             /* Record that VEC_ARRAY is now dead.  */
9740             vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9741           }
9742       else
9743           {
9744             for (i = 0; i < vec_num; i++)
9745               {
9746                 tree final_mask = NULL_TREE;
9747                 if (loop_masks
9748                       && memory_access_type != VMAT_INVARIANT)
9749                     final_mask = vect_get_loop_mask (gsi, loop_masks,
9750                                                              vec_num * ncopies,
9751                                                              vectype, vec_num * j + i);
9752                 if (vec_mask)
9753                     final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9754                                                          final_mask, vec_mask, gsi);
9755 
9756                 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9757                     dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9758                                                          gsi, stmt_info, bump);
9759 
9760                 /* 2. Create the vector-load in the loop.  */
9761                 switch (alignment_support_scheme)
9762                     {
9763                     case dr_aligned:
9764                     case dr_unaligned_supported:
9765                       {
9766                         unsigned int misalign;
9767                         unsigned HOST_WIDE_INT align;
9768 
9769                         if (memory_access_type == VMAT_GATHER_SCATTER
9770                               && gs_info.ifn != IFN_LAST)
9771                           {
9772                               if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9773                                 vec_offset = vec_offsets[vec_num * j + i];
9774                               tree zero = build_zero_cst (vectype);
9775                               tree scale = size_int (gs_info.scale);
9776                               gcall *call;
9777                               if (final_mask)
9778                                 call = gimple_build_call_internal
9779                                   (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9780                                    vec_offset, scale, zero, final_mask);
9781                               else
9782                                 call = gimple_build_call_internal
9783                                   (IFN_GATHER_LOAD, 4, dataref_ptr,
9784                                    vec_offset, scale, zero);
9785                               gimple_call_set_nothrow (call, true);
9786                               new_stmt = call;
9787                               data_ref = NULL_TREE;
9788                               break;
9789                           }
9790                         else if (memory_access_type == VMAT_GATHER_SCATTER)
9791                           {
9792                               /* Emulated gather-scatter.  */
9793                               gcc_assert (!final_mask);
9794                               unsigned HOST_WIDE_INT const_nunits
9795                                 = nunits.to_constant ();
9796                               unsigned HOST_WIDE_INT const_offset_nunits
9797                                 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
9798                                     .to_constant ();
9799                               vec<constructor_elt, va_gc> *ctor_elts;
9800                               vec_alloc (ctor_elts, const_nunits);
9801                               gimple_seq stmts = NULL;
9802                               /* We support offset vectors with more elements
9803                                  than the data vector for now.  */
9804                               unsigned HOST_WIDE_INT factor
9805                                 = const_offset_nunits / const_nunits;
9806                               vec_offset = vec_offsets[j / factor];
9807                               unsigned elt_offset = (j % factor) * const_nunits;
9808                               tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9809                               tree scale = size_int (gs_info.scale);
9810                               align
9811                                 = get_object_alignment (DR_REF (first_dr_info->dr));
9812                               tree ltype = build_aligned_type (TREE_TYPE (vectype),
9813                                                                        align);
9814                               for (unsigned k = 0; k < const_nunits; ++k)
9815                                 {
9816                                   tree boff = size_binop (MULT_EXPR,
9817                                                                 TYPE_SIZE (idx_type),
9818                                                                 bitsize_int
9819                                                                   (k + elt_offset));
9820                                   tree idx = gimple_build (&stmts, BIT_FIELD_REF,
9821                                                                  idx_type, vec_offset,
9822                                                                  TYPE_SIZE (idx_type),
9823                                                                  boff);
9824                                   idx = gimple_convert (&stmts, sizetype, idx);
9825                                   idx = gimple_build (&stmts, MULT_EXPR,
9826                                                             sizetype, idx, scale);
9827                                   tree ptr = gimple_build (&stmts, PLUS_EXPR,
9828                                                                  TREE_TYPE (dataref_ptr),
9829                                                                  dataref_ptr, idx);
9830                                   ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9831                                   tree elt = make_ssa_name (TREE_TYPE (vectype));
9832                                   tree ref = build2 (MEM_REF, ltype, ptr,
9833                                                          build_int_cst (ref_type, 0));
9834                                   new_stmt = gimple_build_assign (elt, ref);
9835                                   gimple_seq_add_stmt (&stmts, new_stmt);
9836                                   CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
9837                                 }
9838                               gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9839                               new_stmt = gimple_build_assign (NULL_TREE,
9840                                                                       build_constructor
9841                                                                         (vectype, ctor_elts));
9842                               data_ref = NULL_TREE;
9843                               break;
9844                           }
9845 
9846                         align =
9847                           known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9848                         if (alignment_support_scheme == dr_aligned)
9849                           misalign = 0;
9850                         else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9851                           {
9852                               align = dr_alignment
9853                                 (vect_dr_behavior (vinfo, first_dr_info));
9854                               misalign = 0;
9855                           }
9856                         else
9857                           misalign = misalignment;
9858                         if (dataref_offset == NULL_TREE
9859                               && TREE_CODE (dataref_ptr) == SSA_NAME)
9860                           set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9861                                                         align, misalign);
9862                         align = least_bit_hwi (misalign | align);
9863 
9864                         if (final_mask)
9865                           {
9866                               tree ptr = build_int_cst (ref_type,
9867                                                               align * BITS_PER_UNIT);
9868                               gcall *call
9869                                 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9870                                                                       dataref_ptr, ptr,
9871                                                                       final_mask);
9872                               gimple_call_set_nothrow (call, true);
9873                               new_stmt = call;
9874                               data_ref = NULL_TREE;
9875                           }
9876                         else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9877                           {
9878                               tree final_len
9879                                 = vect_get_loop_len (loop_vinfo, loop_lens,
9880                                                          vec_num * ncopies,
9881                                                          vec_num * j + i);
9882                               tree ptr = build_int_cst (ref_type,
9883                                                               align * BITS_PER_UNIT);
9884 
9885                               machine_mode vmode = TYPE_MODE (vectype);
9886                               opt_machine_mode new_ovmode
9887                                 = get_len_load_store_mode (vmode, true);
9888                               machine_mode new_vmode = new_ovmode.require ();
9889                               tree qi_type = unsigned_intQI_type_node;
9890 
9891                               signed char biasval =
9892                                 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9893 
9894                               tree bias = build_int_cst (intQI_type_node, biasval);
9895 
9896                               gcall *call
9897                                 = gimple_build_call_internal (IFN_LEN_LOAD, 4,
9898                                                                       dataref_ptr, ptr,
9899                                                                       final_len, bias);
9900                               gimple_call_set_nothrow (call, true);
9901                               new_stmt = call;
9902                               data_ref = NULL_TREE;
9903 
9904                               /* Need conversion if it's wrapped with VnQI.  */
9905                               if (vmode != new_vmode)
9906                                 {
9907                                   tree new_vtype
9908                                     = build_vector_type_for_mode (qi_type, new_vmode);
9909                                   tree var = vect_get_new_ssa_name (new_vtype,
9910                                                                             vect_simple_var);
9911                                   gimple_set_lhs (call, var);
9912                                   vect_finish_stmt_generation (vinfo, stmt_info, call,
9913                                                                        gsi);
9914                                   tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9915                                   new_stmt
9916                                     = gimple_build_assign (vec_dest,
9917                                                                  VIEW_CONVERT_EXPR, op);
9918                                 }
9919                           }
9920                         else
9921                           {
9922                               tree ltype = vectype;
9923                               tree new_vtype = NULL_TREE;
9924                               unsigned HOST_WIDE_INT gap
9925                                 = DR_GROUP_GAP (first_stmt_info);
9926                               unsigned int vect_align
9927                                 = vect_known_alignment_in_bytes (first_dr_info,
9928                                                                          vectype);
9929                               unsigned int scalar_dr_size
9930                                 = vect_get_scalar_dr_size (first_dr_info);
9931                               /* If there's no peeling for gaps but we have a gap
9932                                  with slp loads then load the lower half of the
9933                                  vector only.  See get_group_load_store_type for
9934                                  when we apply this optimization.  */
9935                               if (slp
9936                                   && loop_vinfo
9937                                   && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9938                                   && gap != 0
9939                                   && known_eq (nunits, (group_size - gap) * 2)
9940                                   && known_eq (nunits, group_size)
9941                                   && gap >= (vect_align / scalar_dr_size))
9942                                 {
9943                                   tree half_vtype;
9944                                   new_vtype
9945                                     = vector_vector_composition_type (vectype, 2,
9946                                                                                 &half_vtype);
9947                                   if (new_vtype != NULL_TREE)
9948                                     ltype = half_vtype;
9949                                 }
9950                               tree offset
9951                                 = (dataref_offset ? dataref_offset
9952                                                       : build_int_cst (ref_type, 0));
9953                               if (ltype != vectype
9954                                   && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9955                                 {
9956                                   unsigned HOST_WIDE_INT gap_offset
9957                                     = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9958                                   tree gapcst = build_int_cst (ref_type, gap_offset);
9959                                   offset = size_binop (PLUS_EXPR, offset, gapcst);
9960                                 }
9961                               data_ref
9962                                 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9963                               if (alignment_support_scheme == dr_aligned)
9964                                 ;
9965                               else
9966                                 TREE_TYPE (data_ref)
9967                                   = build_aligned_type (TREE_TYPE (data_ref),
9968                                                               align * BITS_PER_UNIT);
9969                               if (ltype != vectype)
9970                                 {
9971                                   vect_copy_ref_info (data_ref,
9972                                                             DR_REF (first_dr_info->dr));
9973                                   tree tem = make_ssa_name (ltype);
9974                                   new_stmt = gimple_build_assign (tem, data_ref);
9975                                   vect_finish_stmt_generation (vinfo, stmt_info,
9976                                                                        new_stmt, gsi);
9977                                   data_ref = NULL;
9978                                   vec<constructor_elt, va_gc> *v;
9979                                   vec_alloc (v, 2);
9980                                   if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9981                                     {
9982                                         CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9983                                                                       build_zero_cst (ltype));
9984                                         CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9985                                     }
9986                                   else
9987                                     {
9988                                         CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9989                                         CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9990                                                                       build_zero_cst (ltype));
9991                                     }
9992                                   gcc_assert (new_vtype != NULL_TREE);
9993                                   if (new_vtype == vectype)
9994                                     new_stmt = gimple_build_assign (
9995                                         vec_dest, build_constructor (vectype, v));
9996                                   else
9997                                     {
9998                                         tree new_vname = make_ssa_name (new_vtype);
9999                                         new_stmt = gimple_build_assign (
10000                                           new_vname, build_constructor (new_vtype, v));
10001                                         vect_finish_stmt_generation (vinfo, stmt_info,
10002                                                                            new_stmt, gsi);
10003                                         new_stmt = gimple_build_assign (
10004                                           vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
10005                                                                 new_vname));
10006                                     }
10007                                 }
10008                           }
10009                         break;
10010                       }
10011                     case dr_explicit_realign:
10012                       {
10013                         tree ptr, bump;
10014 
10015                         tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10016 
10017                         if (compute_in_loop)
10018                           msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10019                                                                 &realignment_token,
10020                                                                 dr_explicit_realign,
10021                                                                 dataref_ptr, NULL);
10022 
10023                         if (TREE_CODE (dataref_ptr) == SSA_NAME)
10024                           ptr = copy_ssa_name (dataref_ptr);
10025                         else
10026                           ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
10027                         // For explicit realign the target alignment should be
10028                         // known at compile time.
10029                         unsigned HOST_WIDE_INT align =
10030                           DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
10031                         new_stmt = gimple_build_assign
10032                                          (ptr, BIT_AND_EXPR, dataref_ptr,
10033                                           build_int_cst
10034                                           (TREE_TYPE (dataref_ptr),
10035                                            -(HOST_WIDE_INT) align));
10036                         vect_finish_stmt_generation (vinfo, stmt_info,
10037                                                              new_stmt, gsi);
10038                         data_ref
10039                           = build2 (MEM_REF, vectype, ptr,
10040                                         build_int_cst (ref_type, 0));
10041                         vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10042                         vec_dest = vect_create_destination_var (scalar_dest,
10043                                                                           vectype);
10044                         new_stmt = gimple_build_assign (vec_dest, data_ref);
10045                         new_temp = make_ssa_name (vec_dest, new_stmt);
10046                         gimple_assign_set_lhs (new_stmt, new_temp);
10047                         gimple_move_vops (new_stmt, stmt_info->stmt);
10048                         vect_finish_stmt_generation (vinfo, stmt_info,
10049                                                              new_stmt, gsi);
10050                         msq = new_temp;
10051 
10052                         bump = size_binop (MULT_EXPR, vs,
10053                                                TYPE_SIZE_UNIT (elem_type));
10054                         bump = size_binop (MINUS_EXPR, bump, size_one_node);
10055                         ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
10056                                                      stmt_info, bump);
10057                         new_stmt = gimple_build_assign
10058                                          (NULL_TREE, BIT_AND_EXPR, ptr,
10059                                           build_int_cst
10060                                           (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
10061                         ptr = copy_ssa_name (ptr, new_stmt);
10062                         gimple_assign_set_lhs (new_stmt, ptr);
10063                         vect_finish_stmt_generation (vinfo, stmt_info,
10064                                                              new_stmt, gsi);
10065                         data_ref
10066                           = build2 (MEM_REF, vectype, ptr,
10067                                         build_int_cst (ref_type, 0));
10068                         break;
10069                       }
10070                     case dr_explicit_realign_optimized:
10071                       {
10072                         if (TREE_CODE (dataref_ptr) == SSA_NAME)
10073                           new_temp = copy_ssa_name (dataref_ptr);
10074                         else
10075                           new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
10076                         // We should only be doing this if we know the target
10077                         // alignment at compile time.
10078                         unsigned HOST_WIDE_INT align =
10079                           DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
10080                         new_stmt = gimple_build_assign
10081                           (new_temp, BIT_AND_EXPR, dataref_ptr,
10082                            build_int_cst (TREE_TYPE (dataref_ptr),
10083                                              -(HOST_WIDE_INT) align));
10084                         vect_finish_stmt_generation (vinfo, stmt_info,
10085                                                              new_stmt, gsi);
10086                         data_ref
10087                           = build2 (MEM_REF, vectype, new_temp,
10088                                         build_int_cst (ref_type, 0));
10089                         break;
10090                       }
10091                     default:
10092                       gcc_unreachable ();
10093                     }
10094                 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10095                 /* DATA_REF is null if we've already built the statement.  */
10096                 if (data_ref)
10097                     {
10098                       vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10099                       new_stmt = gimple_build_assign (vec_dest, data_ref);
10100                     }
10101                 new_temp = make_ssa_name (vec_dest, new_stmt);
10102                 gimple_set_lhs (new_stmt, new_temp);
10103                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10104 
10105                 /* 3. Handle explicit realignment if necessary/supported.
10106                      Create in loop:
10107                        vec_dest = realign_load (msq, lsq, realignment_token)  */
10108                 if (alignment_support_scheme == dr_explicit_realign_optimized
10109                       || alignment_support_scheme == dr_explicit_realign)
10110                     {
10111                       lsq = gimple_assign_lhs (new_stmt);
10112                       if (!realignment_token)
10113                         realignment_token = dataref_ptr;
10114                       vec_dest = vect_create_destination_var (scalar_dest, vectype);
10115                       new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
10116                                                               msq, lsq, realignment_token);
10117                       new_temp = make_ssa_name (vec_dest, new_stmt);
10118                       gimple_assign_set_lhs (new_stmt, new_temp);
10119                       vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10120 
10121                       if (alignment_support_scheme == dr_explicit_realign_optimized)
10122                         {
10123                           gcc_assert (phi);
10124                           if (i == vec_num - 1 && j == ncopies - 1)
10125                               add_phi_arg (phi, lsq,
10126                                              loop_latch_edge (containing_loop),
10127                                              UNKNOWN_LOCATION);
10128                           msq = lsq;
10129                         }
10130                     }
10131 
10132                 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10133                     {
10134                       tree perm_mask = perm_mask_for_reverse (vectype);
10135                       new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
10136                                                                perm_mask, stmt_info, gsi);
10137                       new_stmt = SSA_NAME_DEF_STMT (new_temp);
10138                     }
10139 
10140                 /* Collect vector loads and later create their permutation in
10141                      vect_transform_grouped_load ().  */
10142                 if (grouped_load || slp_perm)
10143                     dr_chain.quick_push (new_temp);
10144 
10145                 /* Store vector loads in the corresponding SLP_NODE.  */
10146                 if (slp && !slp_perm)
10147                     SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10148 
10149                 /* With SLP permutation we load the gaps as well, without
10150                    we need to skip the gaps after we manage to fully load
10151                      all elements.  group_gap_adj is DR_GROUP_SIZE here.  */
10152                 group_elt += nunits;
10153                 if (maybe_ne (group_gap_adj, 0U)
10154                       && !slp_perm
10155                       && known_eq (group_elt, group_size - group_gap_adj))
10156                     {
10157                       poly_wide_int bump_val
10158                         = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10159                            * group_gap_adj);
10160                       if (tree_int_cst_sgn
10161                               (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10162                         bump_val = -bump_val;
10163                       tree bump = wide_int_to_tree (sizetype, bump_val);
10164                       dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10165                                                              gsi, stmt_info, bump);
10166                       group_elt = 0;
10167                     }
10168               }
10169             /* Bump the vector pointer to account for a gap or for excess
10170                elements loaded for a permuted SLP load.  */
10171             if (maybe_ne (group_gap_adj, 0U) && slp_perm)
10172               {
10173                 poly_wide_int bump_val
10174                     = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10175                        * group_gap_adj);
10176                 if (tree_int_cst_sgn
10177                         (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10178                     bump_val = -bump_val;
10179                 tree bump = wide_int_to_tree (sizetype, bump_val);
10180                 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10181                                                        stmt_info, bump);
10182               }
10183           }
10184 
10185       if (slp && !slp_perm)
10186           continue;
10187 
10188       if (slp_perm)
10189         {
10190             unsigned n_perms;
10191             /* For SLP we know we've seen all possible uses of dr_chain so
10192                direct vect_transform_slp_perm_load to DCE the unused parts.
10193                ???  This is a hack to prevent compile-time issues as seen
10194                in PR101120 and friends.  */
10195             bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
10196                                                               gsi, vf, false, &n_perms,
10197                                                               nullptr, true);
10198             gcc_assert (ok);
10199         }
10200       else
10201         {
10202           if (grouped_load)
10203               {
10204                 if (memory_access_type != VMAT_LOAD_STORE_LANES)
10205                     vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
10206                                                        group_size, gsi);
10207                 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10208               }
10209           else
10210               {
10211                 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10212               }
10213         }
10214       dr_chain.release ();
10215     }
10216   if (!slp)
10217     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10218 
10219   return true;
10220 }
10221 
10222 /* Function vect_is_simple_cond.
10223 
10224    Input:
10225    LOOP - the loop that is being vectorized.
10226    COND - Condition that is checked for simple use.
10227 
10228    Output:
10229    *COMP_VECTYPE - the vector type for the comparison.
10230    *DTS - The def types for the arguments of the comparison
10231 
10232    Returns whether a COND can be vectorized.  Checks whether
10233    condition operands are supportable using vec_is_simple_use.  */
10234 
10235 static bool
vect_is_simple_cond(tree cond,vec_info * vinfo,stmt_vec_info stmt_info,slp_tree slp_node,tree * comp_vectype,enum vect_def_type * dts,tree vectype)10236 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
10237                          slp_tree slp_node, tree *comp_vectype,
10238                          enum vect_def_type *dts, tree vectype)
10239 {
10240   tree lhs, rhs;
10241   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10242   slp_tree slp_op;
10243 
10244   /* Mask case.  */
10245   if (TREE_CODE (cond) == SSA_NAME
10246       && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
10247     {
10248       if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
10249                                      &slp_op, &dts[0], comp_vectype)
10250             || !*comp_vectype
10251             || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
10252           return false;
10253       return true;
10254     }
10255 
10256   if (!COMPARISON_CLASS_P (cond))
10257     return false;
10258 
10259   lhs = TREE_OPERAND (cond, 0);
10260   rhs = TREE_OPERAND (cond, 1);
10261 
10262   if (TREE_CODE (lhs) == SSA_NAME)
10263     {
10264       if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
10265                                      &lhs, &slp_op, &dts[0], &vectype1))
10266           return false;
10267     }
10268   else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
10269              || TREE_CODE (lhs) == FIXED_CST)
10270     dts[0] = vect_constant_def;
10271   else
10272     return false;
10273 
10274   if (TREE_CODE (rhs) == SSA_NAME)
10275     {
10276       if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
10277                                      &rhs, &slp_op, &dts[1], &vectype2))
10278           return false;
10279     }
10280   else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
10281              || TREE_CODE (rhs) == FIXED_CST)
10282     dts[1] = vect_constant_def;
10283   else
10284     return false;
10285 
10286   if (vectype1 && vectype2
10287       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10288                        TYPE_VECTOR_SUBPARTS (vectype2)))
10289     return false;
10290 
10291   *comp_vectype = vectype1 ? vectype1 : vectype2;
10292   /* Invariant comparison.  */
10293   if (! *comp_vectype)
10294     {
10295       tree scalar_type = TREE_TYPE (lhs);
10296       if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10297           *comp_vectype = truth_type_for (vectype);
10298       else
10299           {
10300             /* If we can widen the comparison to match vectype do so.  */
10301             if (INTEGRAL_TYPE_P (scalar_type)
10302                 && !slp_node
10303                 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10304                                           TYPE_SIZE (TREE_TYPE (vectype))))
10305               scalar_type = build_nonstandard_integer_type
10306                 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
10307             *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10308                                                                    slp_node);
10309           }
10310     }
10311 
10312   return true;
10313 }
10314 
10315 /* vectorizable_condition.
10316 
10317    Check if STMT_INFO is conditional modify expression that can be vectorized.
10318    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10319    stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
10320    at GSI.
10321 
10322    When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10323 
10324    Return true if STMT_INFO is vectorizable in this way.  */
10325 
10326 static bool
vectorizable_condition(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)10327 vectorizable_condition (vec_info *vinfo,
10328                               stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10329                               gimple **vec_stmt,
10330                               slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10331 {
10332   tree scalar_dest = NULL_TREE;
10333   tree vec_dest = NULL_TREE;
10334   tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10335   tree then_clause, else_clause;
10336   tree comp_vectype = NULL_TREE;
10337   tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10338   tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10339   tree vec_compare;
10340   tree new_temp;
10341   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10342   enum vect_def_type dts[4]
10343     = {vect_unknown_def_type, vect_unknown_def_type,
10344        vect_unknown_def_type, vect_unknown_def_type};
10345   int ndts = 4;
10346   int ncopies;
10347   int vec_num;
10348   enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10349   int i;
10350   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10351   vec<tree> vec_oprnds0 = vNULL;
10352   vec<tree> vec_oprnds1 = vNULL;
10353   vec<tree> vec_oprnds2 = vNULL;
10354   vec<tree> vec_oprnds3 = vNULL;
10355   tree vec_cmp_type;
10356   bool masked = false;
10357 
10358   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10359     return false;
10360 
10361   /* Is vectorizable conditional operation?  */
10362   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10363   if (!stmt)
10364     return false;
10365 
10366   code = gimple_assign_rhs_code (stmt);
10367   if (code != COND_EXPR)
10368     return false;
10369 
10370   stmt_vec_info reduc_info = NULL;
10371   int reduc_index = -1;
10372   vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10373   bool for_reduction
10374     = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10375   if (for_reduction)
10376     {
10377       if (slp_node)
10378           return false;
10379       reduc_info = info_for_reduction (vinfo, stmt_info);
10380       reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10381       reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10382       gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10383                       || reduc_index != -1);
10384     }
10385   else
10386     {
10387       if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10388           return false;
10389     }
10390 
10391   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10392   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10393 
10394   if (slp_node)
10395     {
10396       ncopies = 1;
10397       vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10398     }
10399   else
10400     {
10401       ncopies = vect_get_num_copies (loop_vinfo, vectype);
10402       vec_num = 1;
10403     }
10404 
10405   gcc_assert (ncopies >= 1);
10406   if (for_reduction && ncopies > 1)
10407     return false; /* FORNOW */
10408 
10409   cond_expr = gimple_assign_rhs1 (stmt);
10410 
10411   if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
10412                                   &comp_vectype, &dts[0], vectype)
10413       || !comp_vectype)
10414     return false;
10415 
10416   unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
10417   slp_tree then_slp_node, else_slp_node;
10418   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
10419                                  &then_clause, &then_slp_node, &dts[2], &vectype1))
10420     return false;
10421   if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
10422                                  &else_clause, &else_slp_node, &dts[3], &vectype2))
10423     return false;
10424 
10425   if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10426     return false;
10427 
10428   if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10429     return false;
10430 
10431   masked = !COMPARISON_CLASS_P (cond_expr);
10432   vec_cmp_type = truth_type_for (comp_vectype);
10433 
10434   if (vec_cmp_type == NULL_TREE)
10435     return false;
10436 
10437   cond_code = TREE_CODE (cond_expr);
10438   if (!masked)
10439     {
10440       cond_expr0 = TREE_OPERAND (cond_expr, 0);
10441       cond_expr1 = TREE_OPERAND (cond_expr, 1);
10442     }
10443 
10444   /* For conditional reductions, the "then" value needs to be the candidate
10445      value calculated by this iteration while the "else" value needs to be
10446      the result carried over from previous iterations.  If the COND_EXPR
10447      is the other way around, we need to swap it.  */
10448   bool must_invert_cmp_result = false;
10449   if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10450     {
10451       if (masked)
10452           must_invert_cmp_result = true;
10453       else
10454           {
10455             bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10456             tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10457             if (new_code == ERROR_MARK)
10458               must_invert_cmp_result = true;
10459             else
10460               {
10461                 cond_code = new_code;
10462                 /* Make sure we don't accidentally use the old condition.  */
10463                 cond_expr = NULL_TREE;
10464               }
10465           }
10466       std::swap (then_clause, else_clause);
10467     }
10468 
10469   if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10470     {
10471       /* Boolean values may have another representation in vectors
10472            and therefore we prefer bit operations over comparison for
10473            them (which also works for scalar masks).  We store opcodes
10474            to use in bitop1 and bitop2.  Statement is vectorized as
10475            BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10476            depending on bitop1 and bitop2 arity.  */
10477       switch (cond_code)
10478           {
10479           case GT_EXPR:
10480             bitop1 = BIT_NOT_EXPR;
10481             bitop2 = BIT_AND_EXPR;
10482             break;
10483           case GE_EXPR:
10484             bitop1 = BIT_NOT_EXPR;
10485             bitop2 = BIT_IOR_EXPR;
10486             break;
10487           case LT_EXPR:
10488             bitop1 = BIT_NOT_EXPR;
10489             bitop2 = BIT_AND_EXPR;
10490             std::swap (cond_expr0, cond_expr1);
10491             break;
10492           case LE_EXPR:
10493             bitop1 = BIT_NOT_EXPR;
10494             bitop2 = BIT_IOR_EXPR;
10495             std::swap (cond_expr0, cond_expr1);
10496             break;
10497           case NE_EXPR:
10498             bitop1 = BIT_XOR_EXPR;
10499             break;
10500           case EQ_EXPR:
10501             bitop1 = BIT_XOR_EXPR;
10502             bitop2 = BIT_NOT_EXPR;
10503             break;
10504           default:
10505             return false;
10506           }
10507       cond_code = SSA_NAME;
10508     }
10509 
10510   if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10511       && reduction_type == EXTRACT_LAST_REDUCTION
10512       && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10513     {
10514       if (dump_enabled_p ())
10515           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10516                                "reduction comparison operation not supported.\n");
10517       return false;
10518     }
10519 
10520   if (!vec_stmt)
10521     {
10522       if (bitop1 != NOP_EXPR)
10523           {
10524             machine_mode mode = TYPE_MODE (comp_vectype);
10525             optab optab;
10526 
10527             optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10528             if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10529               return false;
10530 
10531             if (bitop2 != NOP_EXPR)
10532               {
10533                 optab = optab_for_tree_code (bitop2, comp_vectype,
10534                                                      optab_default);
10535                 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10536                     return false;
10537               }
10538           }
10539 
10540       vect_cost_for_stmt kind = vector_stmt;
10541       if (reduction_type == EXTRACT_LAST_REDUCTION)
10542           /* Count one reduction-like operation per vector.  */
10543           kind = vec_to_scalar;
10544       else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10545           return false;
10546 
10547       if (slp_node
10548             && (!vect_maybe_update_slp_op_vectype
10549                      (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10550                 || (op_adjust == 1
10551                       && !vect_maybe_update_slp_op_vectype
10552                               (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10553                 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10554                 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10555           {
10556             if (dump_enabled_p ())
10557               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10558                                    "incompatible vector types for invariants\n");
10559             return false;
10560           }
10561 
10562       if (loop_vinfo && for_reduction
10563             && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10564           {
10565             if (reduction_type == EXTRACT_LAST_REDUCTION)
10566               vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10567                                            ncopies * vec_num, vectype, NULL);
10568             /* Extra inactive lanes should be safe for vect_nested_cycle.  */
10569             else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10570               {
10571                 if (dump_enabled_p ())
10572                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10573                                          "conditional reduction prevents the use"
10574                                          " of partial vectors.\n");
10575                 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10576               }
10577           }
10578 
10579       STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10580       vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10581                                     cost_vec, kind);
10582       return true;
10583     }
10584 
10585   /* Transform.  */
10586 
10587   /* Handle def.  */
10588   scalar_dest = gimple_assign_lhs (stmt);
10589   if (reduction_type != EXTRACT_LAST_REDUCTION)
10590     vec_dest = vect_create_destination_var (scalar_dest, vectype);
10591 
10592   bool swap_cond_operands = false;
10593 
10594   /* See whether another part of the vectorized code applies a loop
10595      mask to the condition, or to its inverse.  */
10596 
10597   vec_loop_masks *masks = NULL;
10598   if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10599     {
10600       if (reduction_type == EXTRACT_LAST_REDUCTION)
10601           masks = &LOOP_VINFO_MASKS (loop_vinfo);
10602       else
10603           {
10604             scalar_cond_masked_key cond (cond_expr, ncopies);
10605             if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10606               masks = &LOOP_VINFO_MASKS (loop_vinfo);
10607             else
10608               {
10609                 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10610                 tree_code orig_code = cond.code;
10611                 cond.code = invert_tree_comparison (cond.code, honor_nans);
10612                 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
10613                     {
10614                       masks = &LOOP_VINFO_MASKS (loop_vinfo);
10615                       cond_code = cond.code;
10616                       swap_cond_operands = true;
10617                     }
10618                 else
10619                     {
10620                       /* Try the inverse of the current mask.  We check if the
10621                          inverse mask is live and if so we generate a negate of
10622                          the current mask such that we still honor NaNs.  */
10623                       cond.inverted_p = true;
10624                       cond.code = orig_code;
10625                       if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10626                         {
10627                           masks = &LOOP_VINFO_MASKS (loop_vinfo);
10628                           cond_code = cond.code;
10629                           swap_cond_operands = true;
10630                           must_invert_cmp_result = true;
10631                         }
10632                     }
10633               }
10634           }
10635     }
10636 
10637   /* Handle cond expr.  */
10638   if (masked)
10639     vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10640                            cond_expr, &vec_oprnds0, comp_vectype,
10641                            then_clause, &vec_oprnds2, vectype,
10642                            reduction_type != EXTRACT_LAST_REDUCTION
10643                            ? else_clause : NULL, &vec_oprnds3, vectype);
10644   else
10645     vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10646                            cond_expr0, &vec_oprnds0, comp_vectype,
10647                            cond_expr1, &vec_oprnds1, comp_vectype,
10648                            then_clause, &vec_oprnds2, vectype,
10649                            reduction_type != EXTRACT_LAST_REDUCTION
10650                            ? else_clause : NULL, &vec_oprnds3, vectype);
10651 
10652   /* Arguments are ready.  Create the new vector stmt.  */
10653   FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10654     {
10655       vec_then_clause = vec_oprnds2[i];
10656       if (reduction_type != EXTRACT_LAST_REDUCTION)
10657           vec_else_clause = vec_oprnds3[i];
10658 
10659       if (swap_cond_operands)
10660           std::swap (vec_then_clause, vec_else_clause);
10661 
10662       if (masked)
10663           vec_compare = vec_cond_lhs;
10664       else
10665           {
10666             vec_cond_rhs = vec_oprnds1[i];
10667             if (bitop1 == NOP_EXPR)
10668               {
10669                 gimple_seq stmts = NULL;
10670                 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10671                                                      vec_cond_lhs, vec_cond_rhs);
10672                 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10673               }
10674             else
10675               {
10676                 new_temp = make_ssa_name (vec_cmp_type);
10677                 gassign *new_stmt;
10678                 if (bitop1 == BIT_NOT_EXPR)
10679                     new_stmt = gimple_build_assign (new_temp, bitop1,
10680                                                             vec_cond_rhs);
10681                 else
10682                     new_stmt
10683                       = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10684                                                    vec_cond_rhs);
10685                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10686                 if (bitop2 == NOP_EXPR)
10687                     vec_compare = new_temp;
10688                 else if (bitop2 == BIT_NOT_EXPR)
10689                     {
10690                       /* Instead of doing ~x ? y : z do x ? z : y.  */
10691                       vec_compare = new_temp;
10692                       std::swap (vec_then_clause, vec_else_clause);
10693                     }
10694                 else
10695                     {
10696                       vec_compare = make_ssa_name (vec_cmp_type);
10697                       new_stmt
10698                         = gimple_build_assign (vec_compare, bitop2,
10699                                                      vec_cond_lhs, new_temp);
10700                       vect_finish_stmt_generation (vinfo, stmt_info,
10701                                                          new_stmt, gsi);
10702                     }
10703               }
10704           }
10705 
10706       /* If we decided to apply a loop mask to the result of the vector
10707            comparison, AND the comparison with the mask now.  Later passes
10708            should then be able to reuse the AND results between mulitple
10709            vector statements.
10710 
10711            For example:
10712            for (int i = 0; i < 100; ++i)
10713            x[i] = y[i] ? z[i] : 10;
10714 
10715            results in following optimized GIMPLE:
10716 
10717            mask__35.8_43 = vect__4.7_41 != { 0, ... };
10718            vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10719            _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10720            vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10721            vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10722            vect_iftmp.11_47, { 10, ... }>;
10723 
10724            instead of using a masked and unmasked forms of
10725            vec != { 0, ... } (masked in the MASK_LOAD,
10726            unmasked in the VEC_COND_EXPR).  */
10727 
10728       /* Force vec_compare to be an SSA_NAME rather than a comparison,
10729            in cases where that's necessary.  */
10730 
10731       if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10732           {
10733             if (!is_gimple_val (vec_compare))
10734               {
10735                 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10736                 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10737                                                                    vec_compare);
10738                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10739                 vec_compare = vec_compare_name;
10740               }
10741 
10742             if (must_invert_cmp_result)
10743               {
10744                 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10745                 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10746                                                                    BIT_NOT_EXPR,
10747                                                                    vec_compare);
10748                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10749                 vec_compare = vec_compare_name;
10750               }
10751 
10752             if (masks)
10753               {
10754                 tree loop_mask
10755                     = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10756                                               vectype, i);
10757                 tree tmp2 = make_ssa_name (vec_cmp_type);
10758                 gassign *g
10759                     = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10760                                                loop_mask);
10761                 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10762                 vec_compare = tmp2;
10763               }
10764           }
10765 
10766       gimple *new_stmt;
10767       if (reduction_type == EXTRACT_LAST_REDUCTION)
10768           {
10769             gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10770             tree lhs = gimple_get_lhs (old_stmt);
10771             new_stmt = gimple_build_call_internal
10772                 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10773                  vec_then_clause);
10774             gimple_call_set_lhs (new_stmt, lhs);
10775             SSA_NAME_DEF_STMT (lhs) = new_stmt;
10776             if (old_stmt == gsi_stmt (*gsi))
10777               vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10778             else
10779               {
10780                 /* In this case we're moving the definition to later in the
10781                      block.  That doesn't matter because the only uses of the
10782                      lhs are in phi statements.  */
10783                 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10784                 gsi_remove (&old_gsi, true);
10785                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10786               }
10787           }
10788       else
10789           {
10790             new_temp = make_ssa_name (vec_dest);
10791             new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10792                                                     vec_then_clause, vec_else_clause);
10793             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10794           }
10795       if (slp_node)
10796           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10797       else
10798           STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10799     }
10800 
10801   if (!slp_node)
10802     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10803 
10804   vec_oprnds0.release ();
10805   vec_oprnds1.release ();
10806   vec_oprnds2.release ();
10807   vec_oprnds3.release ();
10808 
10809   return true;
10810 }
10811 
10812 /* vectorizable_comparison.
10813 
10814    Check if STMT_INFO is comparison expression that can be vectorized.
10815    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10816    comparison, put it in VEC_STMT, and insert it at GSI.
10817 
10818    Return true if STMT_INFO is vectorizable in this way.  */
10819 
10820 static bool
vectorizable_comparison(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,stmt_vector_for_cost * cost_vec)10821 vectorizable_comparison (vec_info *vinfo,
10822                                stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10823                                gimple **vec_stmt,
10824                                slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10825 {
10826   tree lhs, rhs1, rhs2;
10827   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10828   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10829   tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10830   tree new_temp;
10831   loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10832   enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10833   int ndts = 2;
10834   poly_uint64 nunits;
10835   int ncopies;
10836   enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10837   int i;
10838   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10839   vec<tree> vec_oprnds0 = vNULL;
10840   vec<tree> vec_oprnds1 = vNULL;
10841   tree mask_type;
10842   tree mask;
10843 
10844   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10845     return false;
10846 
10847   if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10848     return false;
10849 
10850   mask_type = vectype;
10851   nunits = TYPE_VECTOR_SUBPARTS (vectype);
10852 
10853   if (slp_node)
10854     ncopies = 1;
10855   else
10856     ncopies = vect_get_num_copies (loop_vinfo, vectype);
10857 
10858   gcc_assert (ncopies >= 1);
10859   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10860     return false;
10861 
10862   gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10863   if (!stmt)
10864     return false;
10865 
10866   code = gimple_assign_rhs_code (stmt);
10867 
10868   if (TREE_CODE_CLASS (code) != tcc_comparison)
10869     return false;
10870 
10871   slp_tree slp_rhs1, slp_rhs2;
10872   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10873                                  0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10874     return false;
10875 
10876   if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10877                                  1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10878     return false;
10879 
10880   if (vectype1 && vectype2
10881       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10882                        TYPE_VECTOR_SUBPARTS (vectype2)))
10883     return false;
10884 
10885   vectype = vectype1 ? vectype1 : vectype2;
10886 
10887   /* Invariant comparison.  */
10888   if (!vectype)
10889     {
10890       if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10891           vectype = mask_type;
10892       else
10893           vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10894                                                          slp_node);
10895       if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10896           return false;
10897     }
10898   else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10899     return false;
10900 
10901   /* Can't compare mask and non-mask types.  */
10902   if (vectype1 && vectype2
10903       && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10904     return false;
10905 
10906   /* Boolean values may have another representation in vectors
10907      and therefore we prefer bit operations over comparison for
10908      them (which also works for scalar masks).  We store opcodes
10909      to use in bitop1 and bitop2.  Statement is vectorized as
10910        BITOP2 (rhs1 BITOP1 rhs2) or
10911        rhs1 BITOP2 (BITOP1 rhs2)
10912      depending on bitop1 and bitop2 arity.  */
10913   bool swap_p = false;
10914   if (VECTOR_BOOLEAN_TYPE_P (vectype))
10915     {
10916       if (code == GT_EXPR)
10917           {
10918             bitop1 = BIT_NOT_EXPR;
10919             bitop2 = BIT_AND_EXPR;
10920           }
10921       else if (code == GE_EXPR)
10922           {
10923             bitop1 = BIT_NOT_EXPR;
10924             bitop2 = BIT_IOR_EXPR;
10925           }
10926       else if (code == LT_EXPR)
10927           {
10928             bitop1 = BIT_NOT_EXPR;
10929             bitop2 = BIT_AND_EXPR;
10930             swap_p = true;
10931           }
10932       else if (code == LE_EXPR)
10933           {
10934             bitop1 = BIT_NOT_EXPR;
10935             bitop2 = BIT_IOR_EXPR;
10936             swap_p = true;
10937           }
10938       else
10939           {
10940             bitop1 = BIT_XOR_EXPR;
10941             if (code == EQ_EXPR)
10942               bitop2 = BIT_NOT_EXPR;
10943           }
10944     }
10945 
10946   if (!vec_stmt)
10947     {
10948       if (bitop1 == NOP_EXPR)
10949           {
10950             if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10951               return false;
10952           }
10953       else
10954           {
10955             machine_mode mode = TYPE_MODE (vectype);
10956             optab optab;
10957 
10958             optab = optab_for_tree_code (bitop1, vectype, optab_default);
10959             if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10960               return false;
10961 
10962             if (bitop2 != NOP_EXPR)
10963               {
10964                 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10965                 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10966                     return false;
10967               }
10968           }
10969 
10970       /* Put types on constant and invariant SLP children.  */
10971       if (slp_node
10972             && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10973                 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10974           {
10975             if (dump_enabled_p ())
10976               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10977                                    "incompatible vector types for invariants\n");
10978             return false;
10979           }
10980 
10981       STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10982       vect_model_simple_cost (vinfo, stmt_info,
10983                                     ncopies * (1 + (bitop2 != NOP_EXPR)),
10984                                     dts, ndts, slp_node, cost_vec);
10985       return true;
10986     }
10987 
10988   /* Transform.  */
10989 
10990   /* Handle def.  */
10991   lhs = gimple_assign_lhs (stmt);
10992   mask = vect_create_destination_var (lhs, mask_type);
10993 
10994   vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10995                          rhs1, &vec_oprnds0, vectype,
10996                          rhs2, &vec_oprnds1, vectype);
10997   if (swap_p)
10998     std::swap (vec_oprnds0, vec_oprnds1);
10999 
11000   /* Arguments are ready.  Create the new vector stmt.  */
11001   FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
11002     {
11003       gimple *new_stmt;
11004       vec_rhs2 = vec_oprnds1[i];
11005 
11006       new_temp = make_ssa_name (mask);
11007       if (bitop1 == NOP_EXPR)
11008           {
11009             new_stmt = gimple_build_assign (new_temp, code,
11010                                                     vec_rhs1, vec_rhs2);
11011             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11012           }
11013       else
11014           {
11015             if (bitop1 == BIT_NOT_EXPR)
11016               new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
11017             else
11018               new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
11019                                                       vec_rhs2);
11020             vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11021             if (bitop2 != NOP_EXPR)
11022               {
11023                 tree res = make_ssa_name (mask);
11024                 if (bitop2 == BIT_NOT_EXPR)
11025                     new_stmt = gimple_build_assign (res, bitop2, new_temp);
11026                 else
11027                     new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
11028                                                             new_temp);
11029                 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11030               }
11031           }
11032       if (slp_node)
11033           SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
11034       else
11035           STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11036     }
11037 
11038   if (!slp_node)
11039     *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11040 
11041   vec_oprnds0.release ();
11042   vec_oprnds1.release ();
11043 
11044   return true;
11045 }
11046 
11047 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
11048    can handle all live statements in the node.  Otherwise return true
11049    if STMT_INFO is not live or if vectorizable_live_operation can handle it.
11050    GSI and VEC_STMT_P are as for vectorizable_live_operation.  */
11051 
11052 static bool
can_vectorize_live_stmts(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,slp_tree slp_node,slp_instance slp_node_instance,bool vec_stmt_p,stmt_vector_for_cost * cost_vec)11053 can_vectorize_live_stmts (vec_info *vinfo,
11054                                 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11055                                 slp_tree slp_node, slp_instance slp_node_instance,
11056                                 bool vec_stmt_p,
11057                                 stmt_vector_for_cost *cost_vec)
11058 {
11059   if (slp_node)
11060     {
11061       stmt_vec_info slp_stmt_info;
11062       unsigned int i;
11063       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
11064           {
11065             if (STMT_VINFO_LIVE_P (slp_stmt_info)
11066                 && !vectorizable_live_operation (vinfo,
11067                                                          slp_stmt_info, gsi, slp_node,
11068                                                          slp_node_instance, i,
11069                                                          vec_stmt_p, cost_vec))
11070               return false;
11071           }
11072     }
11073   else if (STMT_VINFO_LIVE_P (stmt_info)
11074              && !vectorizable_live_operation (vinfo, stmt_info, gsi,
11075                                                       slp_node, slp_node_instance, -1,
11076                                                       vec_stmt_p, cost_vec))
11077     return false;
11078 
11079   return true;
11080 }
11081 
11082 /* Make sure the statement is vectorizable.  */
11083 
11084 opt_result
vect_analyze_stmt(vec_info * vinfo,stmt_vec_info stmt_info,bool * need_to_vectorize,slp_tree node,slp_instance node_instance,stmt_vector_for_cost * cost_vec)11085 vect_analyze_stmt (vec_info *vinfo,
11086                        stmt_vec_info stmt_info, bool *need_to_vectorize,
11087                        slp_tree node, slp_instance node_instance,
11088                        stmt_vector_for_cost *cost_vec)
11089 {
11090   bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11091   enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
11092   bool ok;
11093   gimple_seq pattern_def_seq;
11094 
11095   if (dump_enabled_p ())
11096     dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
11097                          stmt_info->stmt);
11098 
11099   if (gimple_has_volatile_ops (stmt_info->stmt))
11100     return opt_result::failure_at (stmt_info->stmt,
11101                                            "not vectorized:"
11102                                            " stmt has volatile operands: %G\n",
11103                                            stmt_info->stmt);
11104 
11105   if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11106       && node == NULL
11107       && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
11108     {
11109       gimple_stmt_iterator si;
11110 
11111       for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
11112           {
11113             stmt_vec_info pattern_def_stmt_info
11114               = vinfo->lookup_stmt (gsi_stmt (si));
11115             if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
11116                 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
11117               {
11118                 /* Analyze def stmt of STMT if it's a pattern stmt.  */
11119                 if (dump_enabled_p ())
11120                     dump_printf_loc (MSG_NOTE, vect_location,
11121                                          "==> examining pattern def statement: %G",
11122                                          pattern_def_stmt_info->stmt);
11123 
11124                 opt_result res
11125                     = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
11126                                              need_to_vectorize, node, node_instance,
11127                                              cost_vec);
11128                 if (!res)
11129                     return res;
11130               }
11131           }
11132     }
11133 
11134   /* Skip stmts that do not need to be vectorized. In loops this is expected
11135      to include:
11136      - the COND_EXPR which is the loop exit condition
11137      - any LABEL_EXPRs in the loop
11138      - computations that are used only for array indexing or loop control.
11139      In basic blocks we only analyze statements that are a part of some SLP
11140      instance, therefore, all the statements are relevant.
11141 
11142      Pattern statement needs to be analyzed instead of the original statement
11143      if the original statement is not relevant.  Otherwise, we analyze both
11144      statements.  In basic blocks we are called from some SLP instance
11145      traversal, don't analyze pattern stmts instead, the pattern stmts
11146      already will be part of SLP instance.  */
11147 
11148   stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
11149   if (!STMT_VINFO_RELEVANT_P (stmt_info)
11150       && !STMT_VINFO_LIVE_P (stmt_info))
11151     {
11152       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11153             && pattern_stmt_info
11154             && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11155                 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11156         {
11157           /* Analyze PATTERN_STMT instead of the original stmt.  */
11158             stmt_info = pattern_stmt_info;
11159           if (dump_enabled_p ())
11160               dump_printf_loc (MSG_NOTE, vect_location,
11161                                    "==> examining pattern statement: %G",
11162                                    stmt_info->stmt);
11163         }
11164       else
11165         {
11166           if (dump_enabled_p ())
11167             dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
11168 
11169           return opt_result::success ();
11170         }
11171     }
11172   else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11173              && node == NULL
11174              && pattern_stmt_info
11175              && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11176                  || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11177     {
11178       /* Analyze PATTERN_STMT too.  */
11179       if (dump_enabled_p ())
11180           dump_printf_loc (MSG_NOTE, vect_location,
11181                                "==> examining pattern statement: %G",
11182                                pattern_stmt_info->stmt);
11183 
11184       opt_result res
11185           = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
11186                                    node_instance, cost_vec);
11187       if (!res)
11188           return res;
11189    }
11190 
11191   switch (STMT_VINFO_DEF_TYPE (stmt_info))
11192     {
11193       case vect_internal_def:
11194         break;
11195 
11196       case vect_reduction_def:
11197       case vect_nested_cycle:
11198          gcc_assert (!bb_vinfo
11199                          && (relevance == vect_used_in_outer
11200                                || relevance == vect_used_in_outer_by_reduction
11201                                || relevance == vect_used_by_reduction
11202                                || relevance == vect_unused_in_scope
11203                                || relevance == vect_used_only_live));
11204          break;
11205 
11206       case vect_induction_def:
11207           gcc_assert (!bb_vinfo);
11208           break;
11209 
11210       case vect_constant_def:
11211       case vect_external_def:
11212       case vect_unknown_def_type:
11213       default:
11214         gcc_unreachable ();
11215     }
11216 
11217   tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11218   if (node)
11219     STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
11220 
11221   if (STMT_VINFO_RELEVANT_P (stmt_info))
11222     {
11223       gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
11224       gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
11225                       || (call && gimple_call_lhs (call) == NULL_TREE));
11226       *need_to_vectorize = true;
11227     }
11228 
11229   if (PURE_SLP_STMT (stmt_info) && !node)
11230     {
11231       if (dump_enabled_p ())
11232           dump_printf_loc (MSG_NOTE, vect_location,
11233                                "handled only by SLP analysis\n");
11234       return opt_result::success ();
11235     }
11236 
11237   ok = true;
11238   if (!bb_vinfo
11239       && (STMT_VINFO_RELEVANT_P (stmt_info)
11240             || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
11241     /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11242        -mveclibabi= takes preference over library functions with
11243        the simd attribute.  */
11244     ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11245             || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
11246                                                      cost_vec)
11247             || vectorizable_conversion (vinfo, stmt_info,
11248                                               NULL, NULL, node, cost_vec)
11249             || vectorizable_operation (vinfo, stmt_info,
11250                                              NULL, NULL, node, cost_vec)
11251             || vectorizable_assignment (vinfo, stmt_info,
11252                                               NULL, NULL, node, cost_vec)
11253             || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11254             || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11255             || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11256                                              node, node_instance, cost_vec)
11257             || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
11258                                              NULL, node, cost_vec)
11259             || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11260             || vectorizable_condition (vinfo, stmt_info,
11261                                              NULL, NULL, node, cost_vec)
11262             || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11263                                               cost_vec)
11264             || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11265                                           stmt_info, NULL, node));
11266   else
11267     {
11268       if (bb_vinfo)
11269           ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11270                 || vectorizable_simd_clone_call (vinfo, stmt_info,
11271                                                          NULL, NULL, node, cost_vec)
11272                 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
11273                                                     cost_vec)
11274                 || vectorizable_shift (vinfo, stmt_info,
11275                                              NULL, NULL, node, cost_vec)
11276                 || vectorizable_operation (vinfo, stmt_info,
11277                                                    NULL, NULL, node, cost_vec)
11278                 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
11279                                                     cost_vec)
11280                 || vectorizable_load (vinfo, stmt_info,
11281                                             NULL, NULL, node, cost_vec)
11282                 || vectorizable_store (vinfo, stmt_info,
11283                                              NULL, NULL, node, cost_vec)
11284                 || vectorizable_condition (vinfo, stmt_info,
11285                                                    NULL, NULL, node, cost_vec)
11286                 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11287                                                     cost_vec)
11288                 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
11289     }
11290 
11291   if (node)
11292     STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11293 
11294   if (!ok)
11295     return opt_result::failure_at (stmt_info->stmt,
11296                                            "not vectorized:"
11297                                            " relevant stmt not supported: %G",
11298                                            stmt_info->stmt);
11299 
11300   /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11301       need extra handling, except for vectorizable reductions.  */
11302   if (!bb_vinfo
11303       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11304       && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11305       && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11306                                             stmt_info, NULL, node, node_instance,
11307                                             false, cost_vec))
11308     return opt_result::failure_at (stmt_info->stmt,
11309                                            "not vectorized:"
11310                                            " live stmt not supported: %G",
11311                                            stmt_info->stmt);
11312 
11313   return opt_result::success ();
11314 }
11315 
11316 
11317 /* Function vect_transform_stmt.
11318 
11319    Create a vectorized stmt to replace STMT_INFO, and insert it at GSI.  */
11320 
11321 bool
vect_transform_stmt(vec_info * vinfo,stmt_vec_info stmt_info,gimple_stmt_iterator * gsi,slp_tree slp_node,slp_instance slp_node_instance)11322 vect_transform_stmt (vec_info *vinfo,
11323                          stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11324                          slp_tree slp_node, slp_instance slp_node_instance)
11325 {
11326   bool is_store = false;
11327   gimple *vec_stmt = NULL;
11328   bool done;
11329 
11330   gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11331 
11332   tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11333   if (slp_node)
11334     STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
11335 
11336   switch (STMT_VINFO_TYPE (stmt_info))
11337     {
11338     case type_demotion_vec_info_type:
11339     case type_promotion_vec_info_type:
11340     case type_conversion_vec_info_type:
11341       done = vectorizable_conversion (vinfo, stmt_info,
11342                                               gsi, &vec_stmt, slp_node, NULL);
11343       gcc_assert (done);
11344       break;
11345 
11346     case induc_vec_info_type:
11347       done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11348                                              stmt_info, &vec_stmt, slp_node,
11349                                              NULL);
11350       gcc_assert (done);
11351       break;
11352 
11353     case shift_vec_info_type:
11354       done = vectorizable_shift (vinfo, stmt_info,
11355                                          gsi, &vec_stmt, slp_node, NULL);
11356       gcc_assert (done);
11357       break;
11358 
11359     case op_vec_info_type:
11360       done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11361                                              NULL);
11362       gcc_assert (done);
11363       break;
11364 
11365     case assignment_vec_info_type:
11366       done = vectorizable_assignment (vinfo, stmt_info,
11367                                               gsi, &vec_stmt, slp_node, NULL);
11368       gcc_assert (done);
11369       break;
11370 
11371     case load_vec_info_type:
11372       done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11373                                         NULL);
11374       gcc_assert (done);
11375       break;
11376 
11377     case store_vec_info_type:
11378       done = vectorizable_store (vinfo, stmt_info,
11379                                          gsi, &vec_stmt, slp_node, NULL);
11380       gcc_assert (done);
11381       if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11382           {
11383             /* In case of interleaving, the whole chain is vectorized when the
11384                last store in the chain is reached.  Store stmts before the last
11385                one are skipped, and there vec_stmt_info shouldn't be freed
11386                meanwhile.  */
11387             stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11388             if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11389               is_store = true;
11390           }
11391       else
11392           is_store = true;
11393       break;
11394 
11395     case condition_vec_info_type:
11396       done = vectorizable_condition (vinfo, stmt_info,
11397                                              gsi, &vec_stmt, slp_node, NULL);
11398       gcc_assert (done);
11399       break;
11400 
11401     case comparison_vec_info_type:
11402       done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11403                                               slp_node, NULL);
11404       gcc_assert (done);
11405       break;
11406 
11407     case call_vec_info_type:
11408       done = vectorizable_call (vinfo, stmt_info,
11409                                         gsi, &vec_stmt, slp_node, NULL);
11410       break;
11411 
11412     case call_simd_clone_vec_info_type:
11413       done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11414                                                      slp_node, NULL);
11415       break;
11416 
11417     case reduc_vec_info_type:
11418       done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11419                                                gsi, &vec_stmt, slp_node);
11420       gcc_assert (done);
11421       break;
11422 
11423     case cycle_phi_info_type:
11424       done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11425                                                &vec_stmt, slp_node, slp_node_instance);
11426       gcc_assert (done);
11427       break;
11428 
11429     case lc_phi_info_type:
11430       done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11431                                           stmt_info, &vec_stmt, slp_node);
11432       gcc_assert (done);
11433       break;
11434 
11435     case phi_info_type:
11436       done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
11437       gcc_assert (done);
11438       break;
11439 
11440     default:
11441       if (!STMT_VINFO_LIVE_P (stmt_info))
11442           {
11443             if (dump_enabled_p ())
11444               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11445                              "stmt not supported.\n");
11446             gcc_unreachable ();
11447           }
11448       done = true;
11449     }
11450 
11451   if (!slp_node && vec_stmt)
11452     gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
11453 
11454   if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
11455     {
11456       /* Handle stmts whose DEF is used outside the loop-nest that is
11457            being vectorized.  */
11458       done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
11459                                                slp_node_instance, true, NULL);
11460       gcc_assert (done);
11461     }
11462 
11463   if (slp_node)
11464     STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11465 
11466   return is_store;
11467 }
11468 
11469 
11470 /* Remove a group of stores (for SLP or interleaving), free their
11471    stmt_vec_info.  */
11472 
11473 void
vect_remove_stores(vec_info * vinfo,stmt_vec_info first_stmt_info)11474 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11475 {
11476   stmt_vec_info next_stmt_info = first_stmt_info;
11477 
11478   while (next_stmt_info)
11479     {
11480       stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11481       next_stmt_info = vect_orig_stmt (next_stmt_info);
11482       /* Free the attached stmt_vec_info and remove the stmt.  */
11483       vinfo->remove_stmt (next_stmt_info);
11484       next_stmt_info = tmp;
11485     }
11486 }
11487 
11488 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11489    elements of type SCALAR_TYPE, or null if the target doesn't support
11490    such a type.
11491 
11492    If NUNITS is zero, return a vector type that contains elements of
11493    type SCALAR_TYPE, choosing whichever vector size the target prefers.
11494 
11495    If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11496    for this vectorization region and want to "autodetect" the best choice.
11497    Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11498    and we want the new type to be interoperable with it.   PREVAILING_MODE
11499    in this case can be a scalar integer mode or a vector mode; when it
11500    is a vector mode, the function acts like a tree-level version of
11501    related_vector_mode.  */
11502 
11503 tree
get_related_vectype_for_scalar_type(machine_mode prevailing_mode,tree scalar_type,poly_uint64 nunits)11504 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11505                                              tree scalar_type, poly_uint64 nunits)
11506 {
11507   tree orig_scalar_type = scalar_type;
11508   scalar_mode inner_mode;
11509   machine_mode simd_mode;
11510   tree vectype;
11511 
11512   if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11513       && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11514     return NULL_TREE;
11515 
11516   unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11517 
11518   /* For vector types of elements whose mode precision doesn't
11519      match their types precision we use a element type of mode
11520      precision.  The vectorization routines will have to make sure
11521      they support the proper result truncation/extension.
11522      We also make sure to build vector types with INTEGER_TYPE
11523      component type only.  */
11524   if (INTEGRAL_TYPE_P (scalar_type)
11525       && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11526             || TREE_CODE (scalar_type) != INTEGER_TYPE))
11527     scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11528                                                               TYPE_UNSIGNED (scalar_type));
11529 
11530   /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11531      When the component mode passes the above test simply use a type
11532      corresponding to that mode.  The theory is that any use that
11533      would cause problems with this will disable vectorization anyway.  */
11534   else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11535              && !INTEGRAL_TYPE_P (scalar_type))
11536     scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11537 
11538   /* We can't build a vector type of elements with alignment bigger than
11539      their size.  */
11540   else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11541     scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11542                                                               TYPE_UNSIGNED (scalar_type));
11543 
11544   /* If we felt back to using the mode fail if there was
11545      no scalar type for it.  */
11546   if (scalar_type == NULL_TREE)
11547     return NULL_TREE;
11548 
11549   /* If no prevailing mode was supplied, use the mode the target prefers.
11550      Otherwise lookup a vector mode based on the prevailing mode.  */
11551   if (prevailing_mode == VOIDmode)
11552     {
11553       gcc_assert (known_eq (nunits, 0U));
11554       simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11555       if (SCALAR_INT_MODE_P (simd_mode))
11556           {
11557             /* Traditional behavior is not to take the integer mode
11558                literally, but simply to use it as a way of determining
11559                the vector size.  It is up to mode_for_vector to decide
11560                what the TYPE_MODE should be.
11561 
11562                Note that nunits == 1 is allowed in order to support single
11563                element vector types.  */
11564             if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11565                 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11566               return NULL_TREE;
11567           }
11568     }
11569   else if (SCALAR_INT_MODE_P (prevailing_mode)
11570              || !related_vector_mode (prevailing_mode,
11571                                             inner_mode, nunits).exists (&simd_mode))
11572     {
11573       /* Fall back to using mode_for_vector, mostly in the hope of being
11574            able to use an integer mode.  */
11575       if (known_eq (nunits, 0U)
11576             && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11577           return NULL_TREE;
11578 
11579       if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11580           return NULL_TREE;
11581     }
11582 
11583   vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11584 
11585   /* In cases where the mode was chosen by mode_for_vector, check that
11586      the target actually supports the chosen mode, or that it at least
11587      allows the vector mode to be replaced by a like-sized integer.  */
11588   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11589       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11590     return NULL_TREE;
11591 
11592   /* Re-attach the address-space qualifier if we canonicalized the scalar
11593      type.  */
11594   if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11595     return build_qualified_type
11596                (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11597 
11598   return vectype;
11599 }
11600 
11601 /* Function get_vectype_for_scalar_type.
11602 
11603    Returns the vector type corresponding to SCALAR_TYPE as supported
11604    by the target.  If GROUP_SIZE is nonzero and we're performing BB
11605    vectorization, make sure that the number of elements in the vector
11606    is no bigger than GROUP_SIZE.  */
11607 
11608 tree
get_vectype_for_scalar_type(vec_info * vinfo,tree scalar_type,unsigned int group_size)11609 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11610                                    unsigned int group_size)
11611 {
11612   /* For BB vectorization, we should always have a group size once we've
11613      constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11614      are tentative requests during things like early data reference
11615      analysis and pattern recognition.  */
11616   if (is_a <bb_vec_info> (vinfo))
11617     gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11618   else
11619     group_size = 0;
11620 
11621   tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11622                                                                   scalar_type);
11623   if (vectype && vinfo->vector_mode == VOIDmode)
11624     vinfo->vector_mode = TYPE_MODE (vectype);
11625 
11626   /* Register the natural choice of vector type, before the group size
11627      has been applied.  */
11628   if (vectype)
11629     vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11630 
11631   /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11632      try again with an explicit number of elements.  */
11633   if (vectype
11634       && group_size
11635       && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11636     {
11637       /* Start with the biggest number of units that fits within
11638            GROUP_SIZE and halve it until we find a valid vector type.
11639            Usually either the first attempt will succeed or all will
11640            fail (in the latter case because GROUP_SIZE is too small
11641            for the target), but it's possible that a target could have
11642            a hole between supported vector types.
11643 
11644            If GROUP_SIZE is not a power of 2, this has the effect of
11645            trying the largest power of 2 that fits within the group,
11646            even though the group is not a multiple of that vector size.
11647            The BB vectorizer will then try to carve up the group into
11648            smaller pieces.  */
11649       unsigned int nunits = 1 << floor_log2 (group_size);
11650       do
11651           {
11652             vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11653                                                                        scalar_type, nunits);
11654             nunits /= 2;
11655           }
11656       while (nunits > 1 && !vectype);
11657     }
11658 
11659   return vectype;
11660 }
11661 
11662 /* Return the vector type corresponding to SCALAR_TYPE as supported
11663    by the target.  NODE, if nonnull, is the SLP tree node that will
11664    use the returned vector type.  */
11665 
11666 tree
get_vectype_for_scalar_type(vec_info * vinfo,tree scalar_type,slp_tree node)11667 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11668 {
11669   unsigned int group_size = 0;
11670   if (node)
11671     group_size = SLP_TREE_LANES (node);
11672   return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11673 }
11674 
11675 /* Function get_mask_type_for_scalar_type.
11676 
11677    Returns the mask type corresponding to a result of comparison
11678    of vectors of specified SCALAR_TYPE as supported by target.
11679    If GROUP_SIZE is nonzero and we're performing BB vectorization,
11680    make sure that the number of elements in the vector is no bigger
11681    than GROUP_SIZE.  */
11682 
11683 tree
get_mask_type_for_scalar_type(vec_info * vinfo,tree scalar_type,unsigned int group_size)11684 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11685                                      unsigned int group_size)
11686 {
11687   tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11688 
11689   if (!vectype)
11690     return NULL;
11691 
11692   return truth_type_for (vectype);
11693 }
11694 
11695 /* Function get_same_sized_vectype
11696 
11697    Returns a vector type corresponding to SCALAR_TYPE of size
11698    VECTOR_TYPE if supported by the target.  */
11699 
11700 tree
get_same_sized_vectype(tree scalar_type,tree vector_type)11701 get_same_sized_vectype (tree scalar_type, tree vector_type)
11702 {
11703   if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11704     return truth_type_for (vector_type);
11705 
11706   poly_uint64 nunits;
11707   if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11708                        GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11709     return NULL_TREE;
11710 
11711   return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11712                                                         scalar_type, nunits);
11713 }
11714 
11715 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11716    would not change the chosen vector modes.  */
11717 
11718 bool
vect_chooses_same_modes_p(vec_info * vinfo,machine_mode vector_mode)11719 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11720 {
11721   for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11722        i != vinfo->used_vector_modes.end (); ++i)
11723     if (!VECTOR_MODE_P (*i)
11724           || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11725       return false;
11726   return true;
11727 }
11728 
11729 /* Function vect_is_simple_use.
11730 
11731    Input:
11732    VINFO - the vect info of the loop or basic block that is being vectorized.
11733    OPERAND - operand in the loop or bb.
11734    Output:
11735    DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11736      case OPERAND is an SSA_NAME that is defined in the vectorizable region
11737    DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11738      the definition could be anywhere in the function
11739    DT - the type of definition
11740 
11741    Returns whether a stmt with OPERAND can be vectorized.
11742    For loops, supportable operands are constants, loop invariants, and operands
11743    that are defined by the current iteration of the loop.  Unsupportable
11744    operands are those that are defined by a previous iteration of the loop (as
11745    is the case in reduction/induction computations).
11746    For basic blocks, supportable operands are constants and bb invariants.
11747    For now, operands defined outside the basic block are not supported.  */
11748 
11749 bool
vect_is_simple_use(tree operand,vec_info * vinfo,enum vect_def_type * dt,stmt_vec_info * def_stmt_info_out,gimple ** def_stmt_out)11750 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11751                         stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11752 {
11753   if (def_stmt_info_out)
11754     *def_stmt_info_out = NULL;
11755   if (def_stmt_out)
11756     *def_stmt_out = NULL;
11757   *dt = vect_unknown_def_type;
11758 
11759   if (dump_enabled_p ())
11760     {
11761       dump_printf_loc (MSG_NOTE, vect_location,
11762                        "vect_is_simple_use: operand ");
11763       if (TREE_CODE (operand) == SSA_NAME
11764             && !SSA_NAME_IS_DEFAULT_DEF (operand))
11765           dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11766       else
11767           dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11768     }
11769 
11770   if (CONSTANT_CLASS_P (operand))
11771     *dt = vect_constant_def;
11772   else if (is_gimple_min_invariant (operand))
11773     *dt = vect_external_def;
11774   else if (TREE_CODE (operand) != SSA_NAME)
11775     *dt = vect_unknown_def_type;
11776   else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11777     *dt = vect_external_def;
11778   else
11779     {
11780       gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11781       stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11782       if (!stmt_vinfo)
11783           *dt = vect_external_def;
11784       else
11785           {
11786             stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11787             def_stmt = stmt_vinfo->stmt;
11788             *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11789             if (def_stmt_info_out)
11790               *def_stmt_info_out = stmt_vinfo;
11791           }
11792       if (def_stmt_out)
11793           *def_stmt_out = def_stmt;
11794     }
11795 
11796   if (dump_enabled_p ())
11797     {
11798       dump_printf (MSG_NOTE, ", type of def: ");
11799       switch (*dt)
11800           {
11801           case vect_uninitialized_def:
11802             dump_printf (MSG_NOTE, "uninitialized\n");
11803             break;
11804           case vect_constant_def:
11805             dump_printf (MSG_NOTE, "constant\n");
11806             break;
11807           case vect_external_def:
11808             dump_printf (MSG_NOTE, "external\n");
11809             break;
11810           case vect_internal_def:
11811             dump_printf (MSG_NOTE, "internal\n");
11812             break;
11813           case vect_induction_def:
11814             dump_printf (MSG_NOTE, "induction\n");
11815             break;
11816           case vect_reduction_def:
11817             dump_printf (MSG_NOTE, "reduction\n");
11818             break;
11819           case vect_double_reduction_def:
11820             dump_printf (MSG_NOTE, "double reduction\n");
11821             break;
11822           case vect_nested_cycle:
11823             dump_printf (MSG_NOTE, "nested cycle\n");
11824             break;
11825           case vect_unknown_def_type:
11826             dump_printf (MSG_NOTE, "unknown\n");
11827             break;
11828           }
11829     }
11830 
11831   if (*dt == vect_unknown_def_type)
11832     {
11833       if (dump_enabled_p ())
11834         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11835                          "Unsupported pattern.\n");
11836       return false;
11837     }
11838 
11839   return true;
11840 }
11841 
11842 /* Function vect_is_simple_use.
11843 
11844    Same as vect_is_simple_use but also determines the vector operand
11845    type of OPERAND and stores it to *VECTYPE.  If the definition of
11846    OPERAND is vect_uninitialized_def, vect_constant_def or
11847    vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11848    is responsible to compute the best suited vector type for the
11849    scalar operand.  */
11850 
11851 bool
vect_is_simple_use(tree operand,vec_info * vinfo,enum vect_def_type * dt,tree * vectype,stmt_vec_info * def_stmt_info_out,gimple ** def_stmt_out)11852 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11853                         tree *vectype, stmt_vec_info *def_stmt_info_out,
11854                         gimple **def_stmt_out)
11855 {
11856   stmt_vec_info def_stmt_info;
11857   gimple *def_stmt;
11858   if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11859     return false;
11860 
11861   if (def_stmt_out)
11862     *def_stmt_out = def_stmt;
11863   if (def_stmt_info_out)
11864     *def_stmt_info_out = def_stmt_info;
11865 
11866   /* Now get a vector type if the def is internal, otherwise supply
11867      NULL_TREE and leave it up to the caller to figure out a proper
11868      type for the use stmt.  */
11869   if (*dt == vect_internal_def
11870       || *dt == vect_induction_def
11871       || *dt == vect_reduction_def
11872       || *dt == vect_double_reduction_def
11873       || *dt == vect_nested_cycle)
11874     {
11875       *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11876       gcc_assert (*vectype != NULL_TREE);
11877       if (dump_enabled_p ())
11878           dump_printf_loc (MSG_NOTE, vect_location,
11879                                "vect_is_simple_use: vectype %T\n", *vectype);
11880     }
11881   else if (*dt == vect_uninitialized_def
11882              || *dt == vect_constant_def
11883              || *dt == vect_external_def)
11884     *vectype = NULL_TREE;
11885   else
11886     gcc_unreachable ();
11887 
11888   return true;
11889 }
11890 
11891 /* Function vect_is_simple_use.
11892 
11893    Same as vect_is_simple_use but determines the operand by operand
11894    position OPERAND from either STMT or SLP_NODE, filling in *OP
11895    and *SLP_DEF (when SLP_NODE is not NULL).  */
11896 
11897 bool
vect_is_simple_use(vec_info * vinfo,stmt_vec_info stmt,slp_tree slp_node,unsigned operand,tree * op,slp_tree * slp_def,enum vect_def_type * dt,tree * vectype,stmt_vec_info * def_stmt_info_out)11898 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11899                         unsigned operand, tree *op, slp_tree *slp_def,
11900                         enum vect_def_type *dt,
11901                         tree *vectype, stmt_vec_info *def_stmt_info_out)
11902 {
11903   if (slp_node)
11904     {
11905       slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11906       *slp_def = child;
11907       *vectype = SLP_TREE_VECTYPE (child);
11908       if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11909           {
11910             *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11911             return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11912           }
11913       else
11914           {
11915             if (def_stmt_info_out)
11916               *def_stmt_info_out = NULL;
11917             *op = SLP_TREE_SCALAR_OPS (child)[0];
11918             *dt = SLP_TREE_DEF_TYPE (child);
11919             return true;
11920           }
11921     }
11922   else
11923     {
11924       *slp_def = NULL;
11925       if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11926           {
11927             if (gimple_assign_rhs_code (ass) == COND_EXPR
11928                 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11929               {
11930                 if (operand < 2)
11931                     *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11932                 else
11933                     *op = gimple_op (ass, operand);
11934               }
11935             else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11936               *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11937             else
11938               *op = gimple_op (ass, operand + 1);
11939           }
11940       else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11941           *op = gimple_call_arg (call, operand);
11942       else
11943           gcc_unreachable ();
11944       return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11945     }
11946 }
11947 
11948 /* If OP is not NULL and is external or constant update its vector
11949    type with VECTYPE.  Returns true if successful or false if not,
11950    for example when conflicting vector types are present.  */
11951 
11952 bool
vect_maybe_update_slp_op_vectype(slp_tree op,tree vectype)11953 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11954 {
11955   if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11956     return true;
11957   if (SLP_TREE_VECTYPE (op))
11958     return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11959   SLP_TREE_VECTYPE (op) = vectype;
11960   return true;
11961 }
11962 
11963 /* Function supportable_widening_operation
11964 
11965    Check whether an operation represented by the code CODE is a
11966    widening operation that is supported by the target platform in
11967    vector form (i.e., when operating on arguments of type VECTYPE_IN
11968    producing a result of type VECTYPE_OUT).
11969 
11970    Widening operations we currently support are NOP (CONVERT), FLOAT,
11971    FIX_TRUNC and WIDEN_MULT.  This function checks if these operations
11972    are supported by the target platform either directly (via vector
11973    tree-codes), or via target builtins.
11974 
11975    Output:
11976    - CODE1 and CODE2 are codes of vector operations to be used when
11977    vectorizing the operation, if available.
11978    - MULTI_STEP_CVT determines the number of required intermediate steps in
11979    case of multi-step conversion (like char->short->int - in that case
11980    MULTI_STEP_CVT will be 1).
11981    - INTERM_TYPES contains the intermediate type required to perform the
11982    widening operation (short in the above example).  */
11983 
11984 bool
supportable_widening_operation(vec_info * vinfo,enum tree_code code,stmt_vec_info stmt_info,tree vectype_out,tree vectype_in,enum tree_code * code1,enum tree_code * code2,int * multi_step_cvt,vec<tree> * interm_types)11985 supportable_widening_operation (vec_info *vinfo,
11986                                         enum tree_code code, stmt_vec_info stmt_info,
11987                                         tree vectype_out, tree vectype_in,
11988                                 enum tree_code *code1, enum tree_code *code2,
11989                                 int *multi_step_cvt,
11990                                 vec<tree> *interm_types)
11991 {
11992   loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11993   class loop *vect_loop = NULL;
11994   machine_mode vec_mode;
11995   enum insn_code icode1, icode2;
11996   optab optab1, optab2;
11997   tree vectype = vectype_in;
11998   tree wide_vectype = vectype_out;
11999   enum tree_code c1, c2;
12000   int i;
12001   tree prev_type, intermediate_type;
12002   machine_mode intermediate_mode, prev_mode;
12003   optab optab3, optab4;
12004 
12005   *multi_step_cvt = 0;
12006   if (loop_info)
12007     vect_loop = LOOP_VINFO_LOOP (loop_info);
12008 
12009   switch (code)
12010     {
12011     case WIDEN_MULT_EXPR:
12012       /* The result of a vectorized widening operation usually requires
12013            two vectors (because the widened results do not fit into one vector).
12014            The generated vector results would normally be expected to be
12015            generated in the same order as in the original scalar computation,
12016            i.e. if 8 results are generated in each vector iteration, they are
12017            to be organized as follows:
12018                     vect1: [res1,res2,res3,res4],
12019                     vect2: [res5,res6,res7,res8].
12020 
12021            However, in the special case that the result of the widening
12022            operation is used in a reduction computation only, the order doesn't
12023            matter (because when vectorizing a reduction we change the order of
12024            the computation).  Some targets can take advantage of this and
12025            generate more efficient code.  For example, targets like Altivec,
12026            that support widen_mult using a sequence of {mult_even,mult_odd}
12027            generate the following vectors:
12028                     vect1: [res1,res3,res5,res7],
12029                     vect2: [res2,res4,res6,res8].
12030 
12031            When vectorizing outer-loops, we execute the inner-loop sequentially
12032            (each vectorized inner-loop iteration contributes to VF outer-loop
12033            iterations in parallel).  We therefore don't allow to change the
12034            order of the computation in the inner-loop during outer-loop
12035            vectorization.  */
12036       /* TODO: Another case in which order doesn't *really* matter is when we
12037            widen and then contract again, e.g. (short)((int)x * y >> 8).
12038            Normally, pack_trunc performs an even/odd permute, whereas the
12039            repack from an even/odd expansion would be an interleave, which
12040            would be significantly simpler for e.g. AVX2.  */
12041       /* In any case, in order to avoid duplicating the code below, recurse
12042            on VEC_WIDEN_MULT_EVEN_EXPR.  If it succeeds, all the return values
12043            are properly set up for the caller.  If we fail, we'll continue with
12044            a VEC_WIDEN_MULT_LO/HI_EXPR check.  */
12045       if (vect_loop
12046             && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
12047             && !nested_in_vect_loop_p (vect_loop, stmt_info)
12048             && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
12049                                                        stmt_info, vectype_out,
12050                                                        vectype_in, code1, code2,
12051                                                        multi_step_cvt, interm_types))
12052         {
12053           /* Elements in a vector with vect_used_by_reduction property cannot
12054              be reordered if the use chain with this property does not have the
12055              same operation.  One such an example is s += a * b, where elements
12056              in a and b cannot be reordered.  Here we check if the vector defined
12057              by STMT is only directly used in the reduction statement.  */
12058             tree lhs = gimple_assign_lhs (stmt_info->stmt);
12059             stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
12060             if (use_stmt_info
12061                 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
12062               return true;
12063         }
12064       c1 = VEC_WIDEN_MULT_LO_EXPR;
12065       c2 = VEC_WIDEN_MULT_HI_EXPR;
12066       break;
12067 
12068     case DOT_PROD_EXPR:
12069       c1 = DOT_PROD_EXPR;
12070       c2 = DOT_PROD_EXPR;
12071       break;
12072 
12073     case SAD_EXPR:
12074       c1 = SAD_EXPR;
12075       c2 = SAD_EXPR;
12076       break;
12077 
12078     case VEC_WIDEN_MULT_EVEN_EXPR:
12079       /* Support the recursion induced just above.  */
12080       c1 = VEC_WIDEN_MULT_EVEN_EXPR;
12081       c2 = VEC_WIDEN_MULT_ODD_EXPR;
12082       break;
12083 
12084     case WIDEN_LSHIFT_EXPR:
12085       c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
12086       c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
12087       break;
12088 
12089     case WIDEN_PLUS_EXPR:
12090       c1 = VEC_WIDEN_PLUS_LO_EXPR;
12091       c2 = VEC_WIDEN_PLUS_HI_EXPR;
12092       break;
12093 
12094     case WIDEN_MINUS_EXPR:
12095       c1 = VEC_WIDEN_MINUS_LO_EXPR;
12096       c2 = VEC_WIDEN_MINUS_HI_EXPR;
12097       break;
12098 
12099     CASE_CONVERT:
12100       c1 = VEC_UNPACK_LO_EXPR;
12101       c2 = VEC_UNPACK_HI_EXPR;
12102       break;
12103 
12104     case FLOAT_EXPR:
12105       c1 = VEC_UNPACK_FLOAT_LO_EXPR;
12106       c2 = VEC_UNPACK_FLOAT_HI_EXPR;
12107       break;
12108 
12109     case FIX_TRUNC_EXPR:
12110       c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
12111       c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
12112       break;
12113 
12114     default:
12115       gcc_unreachable ();
12116     }
12117 
12118   if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
12119     std::swap (c1, c2);
12120 
12121   if (code == FIX_TRUNC_EXPR)
12122     {
12123       /* The signedness is determined from output operand.  */
12124       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12125       optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
12126     }
12127   else if (CONVERT_EXPR_CODE_P (code)
12128              && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
12129              && VECTOR_BOOLEAN_TYPE_P (vectype)
12130              && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
12131              && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12132     {
12133       /* If the input and result modes are the same, a different optab
12134            is needed where we pass in the number of units in vectype.  */
12135       optab1 = vec_unpacks_sbool_lo_optab;
12136       optab2 = vec_unpacks_sbool_hi_optab;
12137     }
12138   else
12139     {
12140       optab1 = optab_for_tree_code (c1, vectype, optab_default);
12141       optab2 = optab_for_tree_code (c2, vectype, optab_default);
12142     }
12143 
12144   if (!optab1 || !optab2)
12145     return false;
12146 
12147   vec_mode = TYPE_MODE (vectype);
12148   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
12149        || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
12150     return false;
12151 
12152   *code1 = c1;
12153   *code2 = c2;
12154 
12155   if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12156       && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12157     {
12158       if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12159           return true;
12160       /* For scalar masks we may have different boolean
12161            vector types having the same QImode.  Thus we
12162            add additional check for elements number.  */
12163       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
12164                         TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12165           return true;
12166     }
12167 
12168   /* Check if it's a multi-step conversion that can be done using intermediate
12169      types.  */
12170 
12171   prev_type = vectype;
12172   prev_mode = vec_mode;
12173 
12174   if (!CONVERT_EXPR_CODE_P (code))
12175     return false;
12176 
12177   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12178      intermediate steps in promotion sequence.  We try
12179      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12180      not.  */
12181   interm_types->create (MAX_INTERM_CVT_STEPS);
12182   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12183     {
12184       intermediate_mode = insn_data[icode1].operand[0].mode;
12185       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12186           intermediate_type
12187             = vect_halve_mask_nunits (prev_type, intermediate_mode);
12188       else
12189           intermediate_type
12190             = lang_hooks.types.type_for_mode (intermediate_mode,
12191                                                       TYPE_UNSIGNED (prev_type));
12192 
12193       if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12194             && VECTOR_BOOLEAN_TYPE_P (prev_type)
12195             && intermediate_mode == prev_mode
12196             && SCALAR_INT_MODE_P (prev_mode))
12197           {
12198             /* If the input and result modes are the same, a different optab
12199                is needed where we pass in the number of units in vectype.  */
12200             optab3 = vec_unpacks_sbool_lo_optab;
12201             optab4 = vec_unpacks_sbool_hi_optab;
12202           }
12203       else
12204           {
12205             optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
12206             optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
12207           }
12208 
12209       if (!optab3 || !optab4
12210           || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
12211             || insn_data[icode1].operand[0].mode != intermediate_mode
12212             || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
12213             || insn_data[icode2].operand[0].mode != intermediate_mode
12214             || ((icode1 = optab_handler (optab3, intermediate_mode))
12215                 == CODE_FOR_nothing)
12216             || ((icode2 = optab_handler (optab4, intermediate_mode))
12217                 == CODE_FOR_nothing))
12218           break;
12219 
12220       interm_types->quick_push (intermediate_type);
12221       (*multi_step_cvt)++;
12222 
12223       if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12224             && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12225           {
12226             if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12227               return true;
12228             if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
12229                               TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12230               return true;
12231           }
12232 
12233       prev_type = intermediate_type;
12234       prev_mode = intermediate_mode;
12235     }
12236 
12237   interm_types->release ();
12238   return false;
12239 }
12240 
12241 
12242 /* Function supportable_narrowing_operation
12243 
12244    Check whether an operation represented by the code CODE is a
12245    narrowing operation that is supported by the target platform in
12246    vector form (i.e., when operating on arguments of type VECTYPE_IN
12247    and producing a result of type VECTYPE_OUT).
12248 
12249    Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12250    and FLOAT.  This function checks if these operations are supported by
12251    the target platform directly via vector tree-codes.
12252 
12253    Output:
12254    - CODE1 is the code of a vector operation to be used when
12255    vectorizing the operation, if available.
12256    - MULTI_STEP_CVT determines the number of required intermediate steps in
12257    case of multi-step conversion (like int->short->char - in that case
12258    MULTI_STEP_CVT will be 1).
12259    - INTERM_TYPES contains the intermediate type required to perform the
12260    narrowing operation (short in the above example).   */
12261 
12262 bool
supportable_narrowing_operation(enum tree_code code,tree vectype_out,tree vectype_in,enum tree_code * code1,int * multi_step_cvt,vec<tree> * interm_types)12263 supportable_narrowing_operation (enum tree_code code,
12264                                          tree vectype_out, tree vectype_in,
12265                                          enum tree_code *code1, int *multi_step_cvt,
12266                                  vec<tree> *interm_types)
12267 {
12268   machine_mode vec_mode;
12269   enum insn_code icode1;
12270   optab optab1, interm_optab;
12271   tree vectype = vectype_in;
12272   tree narrow_vectype = vectype_out;
12273   enum tree_code c1;
12274   tree intermediate_type, prev_type;
12275   machine_mode intermediate_mode, prev_mode;
12276   int i;
12277   unsigned HOST_WIDE_INT n_elts;
12278   bool uns;
12279 
12280   *multi_step_cvt = 0;
12281   switch (code)
12282     {
12283     CASE_CONVERT:
12284       c1 = VEC_PACK_TRUNC_EXPR;
12285       if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12286             && VECTOR_BOOLEAN_TYPE_P (vectype)
12287             && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
12288             && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
12289             && n_elts < BITS_PER_UNIT)
12290           optab1 = vec_pack_sbool_trunc_optab;
12291       else
12292           optab1 = optab_for_tree_code (c1, vectype, optab_default);
12293       break;
12294 
12295     case FIX_TRUNC_EXPR:
12296       c1 = VEC_PACK_FIX_TRUNC_EXPR;
12297       /* The signedness is determined from output operand.  */
12298       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12299       break;
12300 
12301     case FLOAT_EXPR:
12302       c1 = VEC_PACK_FLOAT_EXPR;
12303       optab1 = optab_for_tree_code (c1, vectype, optab_default);
12304       break;
12305 
12306     default:
12307       gcc_unreachable ();
12308     }
12309 
12310   if (!optab1)
12311     return false;
12312 
12313   vec_mode = TYPE_MODE (vectype);
12314   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12315     return false;
12316 
12317   *code1 = c1;
12318 
12319   if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12320     {
12321       if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12322           return true;
12323       /* For scalar masks we may have different boolean
12324            vector types having the same QImode.  Thus we
12325            add additional check for elements number.  */
12326       if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12327                         TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12328           return true;
12329     }
12330 
12331   if (code == FLOAT_EXPR)
12332     return false;
12333 
12334   /* Check if it's a multi-step conversion that can be done using intermediate
12335      types.  */
12336   prev_mode = vec_mode;
12337   prev_type = vectype;
12338   if (code == FIX_TRUNC_EXPR)
12339     uns = TYPE_UNSIGNED (vectype_out);
12340   else
12341     uns = TYPE_UNSIGNED (vectype);
12342 
12343   /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12344      conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12345      costly than signed.  */
12346   if (code == FIX_TRUNC_EXPR && uns)
12347     {
12348       enum insn_code icode2;
12349 
12350       intermediate_type
12351           = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12352       interm_optab
12353           = optab_for_tree_code (c1, intermediate_type, optab_default);
12354       if (interm_optab != unknown_optab
12355             && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12356             && insn_data[icode1].operand[0].mode
12357                == insn_data[icode2].operand[0].mode)
12358           {
12359             uns = false;
12360             optab1 = interm_optab;
12361             icode1 = icode2;
12362           }
12363     }
12364 
12365   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12366      intermediate steps in promotion sequence.  We try
12367      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not.  */
12368   interm_types->create (MAX_INTERM_CVT_STEPS);
12369   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12370     {
12371       intermediate_mode = insn_data[icode1].operand[0].mode;
12372       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12373           intermediate_type
12374             = vect_double_mask_nunits (prev_type, intermediate_mode);
12375       else
12376           intermediate_type
12377             = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12378       if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12379             && VECTOR_BOOLEAN_TYPE_P (prev_type)
12380             && SCALAR_INT_MODE_P (prev_mode)
12381             && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
12382             && n_elts < BITS_PER_UNIT)
12383           interm_optab = vec_pack_sbool_trunc_optab;
12384       else
12385           interm_optab
12386             = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12387                                          optab_default);
12388       if (!interm_optab
12389             || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12390             || insn_data[icode1].operand[0].mode != intermediate_mode
12391             || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12392                 == CODE_FOR_nothing))
12393           break;
12394 
12395       interm_types->quick_push (intermediate_type);
12396       (*multi_step_cvt)++;
12397 
12398       if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12399           {
12400             if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12401               return true;
12402             if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12403                               TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12404               return true;
12405           }
12406 
12407       prev_mode = intermediate_mode;
12408       prev_type = intermediate_type;
12409       optab1 = interm_optab;
12410     }
12411 
12412   interm_types->release ();
12413   return false;
12414 }
12415 
12416 /* Generate and return a vector mask of MASK_TYPE such that
12417    mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12418    Add the statements to SEQ.  */
12419 
12420 tree
vect_gen_while(gimple_seq * seq,tree mask_type,tree start_index,tree end_index,const char * name)12421 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
12422                     tree end_index, const char *name)
12423 {
12424   tree cmp_type = TREE_TYPE (start_index);
12425   gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12426                                                                    cmp_type, mask_type,
12427                                                                    OPTIMIZE_FOR_SPEED));
12428   gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12429                                                       start_index, end_index,
12430                                                       build_zero_cst (mask_type));
12431   tree tmp;
12432   if (name)
12433     tmp = make_temp_ssa_name (mask_type, NULL, name);
12434   else
12435     tmp = make_ssa_name (mask_type);
12436   gimple_call_set_lhs (call, tmp);
12437   gimple_seq_add_stmt (seq, call);
12438   return tmp;
12439 }
12440 
12441 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12442    J + START_INDEX < END_INDEX for all J <= I.  Add the statements to SEQ.  */
12443 
12444 tree
vect_gen_while_not(gimple_seq * seq,tree mask_type,tree start_index,tree end_index)12445 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12446                         tree end_index)
12447 {
12448   tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
12449   return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12450 }
12451 
12452 /* Try to compute the vector types required to vectorize STMT_INFO,
12453    returning true on success and false if vectorization isn't possible.
12454    If GROUP_SIZE is nonzero and we're performing BB vectorization,
12455    take sure that the number of elements in the vectors is no bigger
12456    than GROUP_SIZE.
12457 
12458    On success:
12459 
12460    - Set *STMT_VECTYPE_OUT to:
12461      - NULL_TREE if the statement doesn't need to be vectorized;
12462      - the equivalent of STMT_VINFO_VECTYPE otherwise.
12463 
12464    - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12465      number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12466      statement does not help to determine the overall number of units.  */
12467 
12468 opt_result
vect_get_vector_types_for_stmt(vec_info * vinfo,stmt_vec_info stmt_info,tree * stmt_vectype_out,tree * nunits_vectype_out,unsigned int group_size)12469 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12470                                         tree *stmt_vectype_out,
12471                                         tree *nunits_vectype_out,
12472                                         unsigned int group_size)
12473 {
12474   gimple *stmt = stmt_info->stmt;
12475 
12476   /* For BB vectorization, we should always have a group size once we've
12477      constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12478      are tentative requests during things like early data reference
12479      analysis and pattern recognition.  */
12480   if (is_a <bb_vec_info> (vinfo))
12481     gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12482   else
12483     group_size = 0;
12484 
12485   *stmt_vectype_out = NULL_TREE;
12486   *nunits_vectype_out = NULL_TREE;
12487 
12488   if (gimple_get_lhs (stmt) == NULL_TREE
12489       /* MASK_STORE has no lhs, but is ok.  */
12490       && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12491     {
12492       if (is_a <gcall *> (stmt))
12493           {
12494             /* Ignore calls with no lhs.  These must be calls to
12495                #pragma omp simd functions, and what vectorization factor
12496                it really needs can't be determined until
12497                vectorizable_simd_clone_call.  */
12498             if (dump_enabled_p ())
12499               dump_printf_loc (MSG_NOTE, vect_location,
12500                                    "defer to SIMD clone analysis.\n");
12501             return opt_result::success ();
12502           }
12503 
12504       return opt_result::failure_at (stmt,
12505                                              "not vectorized: irregular stmt.%G", stmt);
12506     }
12507 
12508   tree vectype;
12509   tree scalar_type = NULL_TREE;
12510   if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12511     {
12512       vectype = STMT_VINFO_VECTYPE (stmt_info);
12513       if (dump_enabled_p ())
12514           dump_printf_loc (MSG_NOTE, vect_location,
12515                                "precomputed vectype: %T\n", vectype);
12516     }
12517   else if (vect_use_mask_type_p (stmt_info))
12518     {
12519       unsigned int precision = stmt_info->mask_precision;
12520       scalar_type = build_nonstandard_integer_type (precision, 1);
12521       vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12522       if (!vectype)
12523           return opt_result::failure_at (stmt, "not vectorized: unsupported"
12524                                                " data-type %T\n", scalar_type);
12525       if (dump_enabled_p ())
12526           dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12527     }
12528   else
12529     {
12530       if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12531           scalar_type = TREE_TYPE (DR_REF (dr));
12532       else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12533           scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12534       else
12535           scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12536 
12537       if (dump_enabled_p ())
12538           {
12539             if (group_size)
12540               dump_printf_loc (MSG_NOTE, vect_location,
12541                                    "get vectype for scalar type (group size %d):"
12542                                    " %T\n", group_size, scalar_type);
12543             else
12544               dump_printf_loc (MSG_NOTE, vect_location,
12545                                    "get vectype for scalar type: %T\n", scalar_type);
12546           }
12547       vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12548       if (!vectype)
12549           return opt_result::failure_at (stmt,
12550                                                "not vectorized:"
12551                                                " unsupported data-type %T\n",
12552                                                scalar_type);
12553 
12554       if (dump_enabled_p ())
12555           dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12556     }
12557 
12558   if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
12559     return opt_result::failure_at (stmt,
12560                                            "not vectorized: vector stmt in loop:%G",
12561                                            stmt);
12562 
12563   *stmt_vectype_out = vectype;
12564 
12565   /* Don't try to compute scalar types if the stmt produces a boolean
12566      vector; use the existing vector type instead.  */
12567   tree nunits_vectype = vectype;
12568   if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12569     {
12570       /* The number of units is set according to the smallest scalar
12571            type (or the largest vector size, but we only support one
12572            vector size per vectorization).  */
12573       scalar_type = vect_get_smallest_scalar_type (stmt_info,
12574                                                                TREE_TYPE (vectype));
12575       if (scalar_type != TREE_TYPE (vectype))
12576           {
12577             if (dump_enabled_p ())
12578               dump_printf_loc (MSG_NOTE, vect_location,
12579                                    "get vectype for smallest scalar type: %T\n",
12580                                    scalar_type);
12581             nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12582                                                                       group_size);
12583             if (!nunits_vectype)
12584               return opt_result::failure_at
12585                 (stmt, "not vectorized: unsupported data-type %T\n",
12586                  scalar_type);
12587             if (dump_enabled_p ())
12588               dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12589                                    nunits_vectype);
12590           }
12591     }
12592 
12593   if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12594                        TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
12595     return opt_result::failure_at (stmt,
12596                                            "Not vectorized: Incompatible number "
12597                                            "of vector subparts between %T and %T\n",
12598                                            nunits_vectype, *stmt_vectype_out);
12599 
12600   if (dump_enabled_p ())
12601     {
12602       dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12603       dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12604       dump_printf (MSG_NOTE, "\n");
12605     }
12606 
12607   *nunits_vectype_out = nunits_vectype;
12608   return opt_result::success ();
12609 }
12610 
12611 /* Generate and return statement sequence that sets vector length LEN that is:
12612 
12613    min_of_start_and_end = min (START_INDEX, END_INDEX);
12614    left_len = END_INDEX - min_of_start_and_end;
12615    rhs = min (left_len, LEN_LIMIT);
12616    LEN = rhs;
12617 
12618    Note: the cost of the code generated by this function is modeled
12619    by vect_estimate_min_profitable_iters, so changes here may need
12620    corresponding changes there.  */
12621 
12622 gimple_seq
vect_gen_len(tree len,tree start_index,tree end_index,tree len_limit)12623 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12624 {
12625   gimple_seq stmts = NULL;
12626   tree len_type = TREE_TYPE (len);
12627   gcc_assert (TREE_TYPE (start_index) == len_type);
12628 
12629   tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12630   tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12631   tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12632   gimple* stmt = gimple_build_assign (len, rhs);
12633   gimple_seq_add_stmt (&stmts, stmt);
12634 
12635   return stmts;
12636 }
12637 
12638