xref: /dragonfly/contrib/gcc-8.0/gcc/tree-vect-stmts.c (revision 95059079af47f9a66a175f374f2da1a5020e3255)
1 /* Statement Analysis and Transformation for Vectorization
2    Copyright (C) 2003-2018 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4    and Ira Rosen <irar@il.ibm.com>
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h"            /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
55 
56 /* For lang_hooks.types.type_for_mode.  */
57 #include "langhooks.h"
58 
59 /* Return the vectorized type for the given statement.  */
60 
61 tree
stmt_vectype(struct _stmt_vec_info * stmt_info)62 stmt_vectype (struct _stmt_vec_info *stmt_info)
63 {
64   return STMT_VINFO_VECTYPE (stmt_info);
65 }
66 
67 /* Return TRUE iff the given statement is in an inner loop relative to
68    the loop being vectorized.  */
69 bool
stmt_in_inner_loop_p(struct _stmt_vec_info * stmt_info)70 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
71 {
72   gimple *stmt = STMT_VINFO_STMT (stmt_info);
73   basic_block bb = gimple_bb (stmt);
74   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75   struct loop* loop;
76 
77   if (!loop_vinfo)
78     return false;
79 
80   loop = LOOP_VINFO_LOOP (loop_vinfo);
81 
82   return (bb->loop_father == loop->inner);
83 }
84 
85 /* Record the cost of a statement, either by directly informing the
86    target model or by saving it in a vector for later processing.
87    Return a preliminary estimate of the statement's cost.  */
88 
89 unsigned
record_stmt_cost(stmt_vector_for_cost * body_cost_vec,int count,enum vect_cost_for_stmt kind,stmt_vec_info stmt_info,int misalign,enum vect_cost_model_location where)90 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
91                       enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92                       int misalign, enum vect_cost_model_location where)
93 {
94   if ((kind == vector_load || kind == unaligned_load)
95       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96     kind = vector_gather_load;
97   if ((kind == vector_store || kind == unaligned_store)
98       && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99     kind = vector_scatter_store;
100   if (body_cost_vec)
101     {
102       tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
103       stmt_info_for_cost si = { count, kind,
104                                       stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
105                                         misalign };
106       body_cost_vec->safe_push (si);
107       return (unsigned)
108           (builtin_vectorization_cost (kind, vectype, misalign) * count);
109     }
110   else
111     return add_stmt_cost (stmt_info->vinfo->target_cost_data,
112                                 count, kind, stmt_info, misalign, where);
113 }
114 
115 /* Return a variable of type ELEM_TYPE[NELEMS].  */
116 
117 static tree
create_vector_array(tree elem_type,unsigned HOST_WIDE_INT nelems)118 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
119 {
120   return create_tmp_var (build_array_type_nelts (elem_type, nelems),
121                                "vect_array");
122 }
123 
124 /* ARRAY is an array of vectors created by create_vector_array.
125    Return an SSA_NAME for the vector in index N.  The reference
126    is part of the vectorization of STMT and the vector is associated
127    with scalar destination SCALAR_DEST.  */
128 
129 static tree
read_vector_array(gimple * stmt,gimple_stmt_iterator * gsi,tree scalar_dest,tree array,unsigned HOST_WIDE_INT n)130 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
131                        tree array, unsigned HOST_WIDE_INT n)
132 {
133   tree vect_type, vect, vect_name, array_ref;
134   gimple *new_stmt;
135 
136   gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
137   vect_type = TREE_TYPE (TREE_TYPE (array));
138   vect = vect_create_destination_var (scalar_dest, vect_type);
139   array_ref = build4 (ARRAY_REF, vect_type, array,
140                           build_int_cst (size_type_node, n),
141                           NULL_TREE, NULL_TREE);
142 
143   new_stmt = gimple_build_assign (vect, array_ref);
144   vect_name = make_ssa_name (vect, new_stmt);
145   gimple_assign_set_lhs (new_stmt, vect_name);
146   vect_finish_stmt_generation (stmt, new_stmt, gsi);
147 
148   return vect_name;
149 }
150 
151 /* ARRAY is an array of vectors created by create_vector_array.
152    Emit code to store SSA_NAME VECT in index N of the array.
153    The store is part of the vectorization of STMT.  */
154 
155 static void
write_vector_array(gimple * stmt,gimple_stmt_iterator * gsi,tree vect,tree array,unsigned HOST_WIDE_INT n)156 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
157                         tree array, unsigned HOST_WIDE_INT n)
158 {
159   tree array_ref;
160   gimple *new_stmt;
161 
162   array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
163                           build_int_cst (size_type_node, n),
164                           NULL_TREE, NULL_TREE);
165 
166   new_stmt = gimple_build_assign (array_ref, vect);
167   vect_finish_stmt_generation (stmt, new_stmt, gsi);
168 }
169 
170 /* PTR is a pointer to an array of type TYPE.  Return a representation
171    of *PTR.  The memory reference replaces those in FIRST_DR
172    (and its group).  */
173 
174 static tree
create_array_ref(tree type,tree ptr,tree alias_ptr_type)175 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
176 {
177   tree mem_ref;
178 
179   mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
180   /* Arrays have the same alignment as their type.  */
181   set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
182   return mem_ref;
183 }
184 
185 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
186 
187 /* Function vect_mark_relevant.
188 
189    Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
190 
191 static void
vect_mark_relevant(vec<gimple * > * worklist,gimple * stmt,enum vect_relevant relevant,bool live_p)192 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
193                         enum vect_relevant relevant, bool live_p)
194 {
195   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
196   enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
197   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
198   gimple *pattern_stmt;
199 
200   if (dump_enabled_p ())
201     {
202       dump_printf_loc (MSG_NOTE, vect_location,
203                            "mark relevant %d, live %d: ", relevant, live_p);
204       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
205     }
206 
207   /* If this stmt is an original stmt in a pattern, we might need to mark its
208      related pattern stmt instead of the original stmt.  However, such stmts
209      may have their own uses that are not in any pattern, in such cases the
210      stmt itself should be marked.  */
211   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
212     {
213       /* This is the last stmt in a sequence that was detected as a
214            pattern that can potentially be vectorized.  Don't mark the stmt
215            as relevant/live because it's not going to be vectorized.
216            Instead mark the pattern-stmt that replaces it.  */
217 
218       pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
219 
220       if (dump_enabled_p ())
221           dump_printf_loc (MSG_NOTE, vect_location,
222                                "last stmt in pattern. don't mark"
223                                " relevant/live.\n");
224       stmt_info = vinfo_for_stmt (pattern_stmt);
225       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
226       save_relevant = STMT_VINFO_RELEVANT (stmt_info);
227       save_live_p = STMT_VINFO_LIVE_P (stmt_info);
228       stmt = pattern_stmt;
229     }
230 
231   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
232   if (relevant > STMT_VINFO_RELEVANT (stmt_info))
233     STMT_VINFO_RELEVANT (stmt_info) = relevant;
234 
235   if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
236       && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
237     {
238       if (dump_enabled_p ())
239         dump_printf_loc (MSG_NOTE, vect_location,
240                          "already marked relevant/live.\n");
241       return;
242     }
243 
244   worklist->safe_push (stmt);
245 }
246 
247 
248 /* Function is_simple_and_all_uses_invariant
249 
250    Return true if STMT is simple and all uses of it are invariant.  */
251 
252 bool
is_simple_and_all_uses_invariant(gimple * stmt,loop_vec_info loop_vinfo)253 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
254 {
255   tree op;
256   gimple *def_stmt;
257   ssa_op_iter iter;
258 
259   if (!is_gimple_assign (stmt))
260     return false;
261 
262   FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
263     {
264       enum vect_def_type dt = vect_uninitialized_def;
265 
266       if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
267           {
268             if (dump_enabled_p ())
269               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
270                                    "use not simple.\n");
271             return false;
272           }
273 
274       if (dt != vect_external_def && dt != vect_constant_def)
275           return false;
276     }
277   return true;
278 }
279 
280 /* Function vect_stmt_relevant_p.
281 
282    Return true if STMT in loop that is represented by LOOP_VINFO is
283    "relevant for vectorization".
284 
285    A stmt is considered "relevant for vectorization" if:
286    - it has uses outside the loop.
287    - it has vdefs (it alters memory).
288    - control stmts in the loop (except for the exit condition).
289 
290    CHECKME: what other side effects would the vectorizer allow?  */
291 
292 static bool
vect_stmt_relevant_p(gimple * stmt,loop_vec_info loop_vinfo,enum vect_relevant * relevant,bool * live_p)293 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
294                           enum vect_relevant *relevant, bool *live_p)
295 {
296   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
297   ssa_op_iter op_iter;
298   imm_use_iterator imm_iter;
299   use_operand_p use_p;
300   def_operand_p def_p;
301 
302   *relevant = vect_unused_in_scope;
303   *live_p = false;
304 
305   /* cond stmt other than loop exit cond.  */
306   if (is_ctrl_stmt (stmt)
307       && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
308          != loop_exit_ctrl_vec_info_type)
309     *relevant = vect_used_in_scope;
310 
311   /* changing memory.  */
312   if (gimple_code (stmt) != GIMPLE_PHI)
313     if (gimple_vdef (stmt)
314           && !gimple_clobber_p (stmt))
315       {
316           if (dump_enabled_p ())
317             dump_printf_loc (MSG_NOTE, vect_location,
318                            "vec_stmt_relevant_p: stmt has vdefs.\n");
319           *relevant = vect_used_in_scope;
320       }
321 
322   /* uses outside the loop.  */
323   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
324     {
325       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
326           {
327             basic_block bb = gimple_bb (USE_STMT (use_p));
328             if (!flow_bb_inside_loop_p (loop, bb))
329               {
330                 if (dump_enabled_p ())
331                     dump_printf_loc (MSG_NOTE, vect_location,
332                                  "vec_stmt_relevant_p: used out of loop.\n");
333 
334                 if (is_gimple_debug (USE_STMT (use_p)))
335                     continue;
336 
337                 /* We expect all such uses to be in the loop exit phis
338                      (because of loop closed form)   */
339                 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
340                 gcc_assert (bb == single_exit (loop)->dest);
341 
342               *live_p = true;
343               }
344           }
345     }
346 
347   if (*live_p && *relevant == vect_unused_in_scope
348       && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
349     {
350       if (dump_enabled_p ())
351           dump_printf_loc (MSG_NOTE, vect_location,
352                                "vec_stmt_relevant_p: stmt live but not relevant.\n");
353       *relevant = vect_used_only_live;
354     }
355 
356   return (*live_p || *relevant);
357 }
358 
359 
360 /* Function exist_non_indexing_operands_for_use_p
361 
362    USE is one of the uses attached to STMT.  Check if USE is
363    used in STMT for anything other than indexing an array.  */
364 
365 static bool
exist_non_indexing_operands_for_use_p(tree use,gimple * stmt)366 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
367 {
368   tree operand;
369   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
370 
371   /* USE corresponds to some operand in STMT.  If there is no data
372      reference in STMT, then any operand that corresponds to USE
373      is not indexing an array.  */
374   if (!STMT_VINFO_DATA_REF (stmt_info))
375     return true;
376 
377   /* STMT has a data_ref. FORNOW this means that its of one of
378      the following forms:
379      -1- ARRAY_REF = var
380      -2- var = ARRAY_REF
381      (This should have been verified in analyze_data_refs).
382 
383      'var' in the second case corresponds to a def, not a use,
384      so USE cannot correspond to any operands that are not used
385      for array indexing.
386 
387      Therefore, all we need to check is if STMT falls into the
388      first case, and whether var corresponds to USE.  */
389 
390   if (!gimple_assign_copy_p (stmt))
391     {
392       if (is_gimple_call (stmt)
393             && gimple_call_internal_p (stmt))
394           {
395             internal_fn ifn = gimple_call_internal_fn (stmt);
396             int mask_index = internal_fn_mask_index (ifn);
397             if (mask_index >= 0
398                 && use == gimple_call_arg (stmt, mask_index))
399               return true;
400             int stored_value_index = internal_fn_stored_value_index (ifn);
401             if (stored_value_index >= 0
402                 && use == gimple_call_arg (stmt, stored_value_index))
403               return true;
404             if (internal_gather_scatter_fn_p (ifn)
405                 && use == gimple_call_arg (stmt, 1))
406               return true;
407           }
408       return false;
409     }
410 
411   if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
412     return false;
413   operand = gimple_assign_rhs1 (stmt);
414   if (TREE_CODE (operand) != SSA_NAME)
415     return false;
416 
417   if (operand == use)
418     return true;
419 
420   return false;
421 }
422 
423 
424 /*
425    Function process_use.
426 
427    Inputs:
428    - a USE in STMT in a loop represented by LOOP_VINFO
429    - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
430      that defined USE.  This is done by calling mark_relevant and passing it
431      the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
432    - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
433      be performed.
434 
435    Outputs:
436    Generally, LIVE_P and RELEVANT are used to define the liveness and
437    relevance info of the DEF_STMT of this USE:
438        STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
439        STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
440    Exceptions:
441    - case 1: If USE is used only for address computations (e.g. array indexing),
442    which does not need to be directly vectorized, then the liveness/relevance
443    of the respective DEF_STMT is left unchanged.
444    - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
445    skip DEF_STMT cause it had already been processed.
446    - case 3: If DEF_STMT and STMT are in different nests, then  "relevant" will
447    be modified accordingly.
448 
449    Return true if everything is as expected. Return false otherwise.  */
450 
451 static bool
process_use(gimple * stmt,tree use,loop_vec_info loop_vinfo,enum vect_relevant relevant,vec<gimple * > * worklist,bool force)452 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
453                enum vect_relevant relevant, vec<gimple *> *worklist,
454                bool force)
455 {
456   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
457   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
458   stmt_vec_info dstmt_vinfo;
459   basic_block bb, def_bb;
460   gimple *def_stmt;
461   enum vect_def_type dt;
462 
463   /* case 1: we are only interested in uses that need to be vectorized.  Uses
464      that are used for address computation are not considered relevant.  */
465   if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
466      return true;
467 
468   if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
469     {
470       if (dump_enabled_p ())
471         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
472                          "not vectorized: unsupported use in stmt.\n");
473       return false;
474     }
475 
476   if (!def_stmt || gimple_nop_p (def_stmt))
477     return true;
478 
479   def_bb = gimple_bb (def_stmt);
480   if (!flow_bb_inside_loop_p (loop, def_bb))
481     {
482       if (dump_enabled_p ())
483           dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
484       return true;
485     }
486 
487   /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
488      DEF_STMT must have already been processed, because this should be the
489      only way that STMT, which is a reduction-phi, was put in the worklist,
490      as there should be no other uses for DEF_STMT in the loop.  So we just
491      check that everything is as expected, and we are done.  */
492   dstmt_vinfo = vinfo_for_stmt (def_stmt);
493   bb = gimple_bb (stmt);
494   if (gimple_code (stmt) == GIMPLE_PHI
495       && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
496       && gimple_code (def_stmt) != GIMPLE_PHI
497       && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
498       && bb->loop_father == def_bb->loop_father)
499     {
500       if (dump_enabled_p ())
501           dump_printf_loc (MSG_NOTE, vect_location,
502                          "reduc-stmt defining reduc-phi in the same nest.\n");
503       if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
504           dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
505       gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
506       gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
507                       || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
508       return true;
509     }
510 
511   /* case 3a: outer-loop stmt defining an inner-loop stmt:
512           outer-loop-header-bb:
513                     d = def_stmt
514           inner-loop:
515                     stmt # use (d)
516           outer-loop-tail-bb:
517                     ...                   */
518   if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
519     {
520       if (dump_enabled_p ())
521           dump_printf_loc (MSG_NOTE, vect_location,
522                          "outer-loop def-stmt defining inner-loop stmt.\n");
523 
524       switch (relevant)
525           {
526           case vect_unused_in_scope:
527             relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
528                           vect_used_in_scope : vect_unused_in_scope;
529             break;
530 
531           case vect_used_in_outer_by_reduction:
532           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
533             relevant = vect_used_by_reduction;
534             break;
535 
536           case vect_used_in_outer:
537           gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
538             relevant = vect_used_in_scope;
539             break;
540 
541           case vect_used_in_scope:
542             break;
543 
544           default:
545             gcc_unreachable ();
546           }
547     }
548 
549   /* case 3b: inner-loop stmt defining an outer-loop stmt:
550           outer-loop-header-bb:
551                     ...
552           inner-loop:
553                     d = def_stmt
554           outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
555                     stmt # use (d)                */
556   else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
557     {
558       if (dump_enabled_p ())
559           dump_printf_loc (MSG_NOTE, vect_location,
560                          "inner-loop def-stmt defining outer-loop stmt.\n");
561 
562       switch (relevant)
563         {
564         case vect_unused_in_scope:
565           relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
566             || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
567                       vect_used_in_outer_by_reduction : vect_unused_in_scope;
568           break;
569 
570         case vect_used_by_reduction:
571           case vect_used_only_live:
572           relevant = vect_used_in_outer_by_reduction;
573           break;
574 
575         case vect_used_in_scope:
576           relevant = vect_used_in_outer;
577           break;
578 
579         default:
580           gcc_unreachable ();
581         }
582     }
583   /* We are also not interested in uses on loop PHI backedges that are
584      inductions.  Otherwise we'll needlessly vectorize the IV increment
585      and cause hybrid SLP for SLP inductions.  Unless the PHI is live
586      of course.  */
587   else if (gimple_code (stmt) == GIMPLE_PHI
588              && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
589              && ! STMT_VINFO_LIVE_P (stmt_vinfo)
590              && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
591                  == use))
592     {
593       if (dump_enabled_p ())
594           dump_printf_loc (MSG_NOTE, vect_location,
595                          "induction value on backedge.\n");
596       return true;
597     }
598 
599 
600   vect_mark_relevant (worklist, def_stmt, relevant, false);
601   return true;
602 }
603 
604 
605 /* Function vect_mark_stmts_to_be_vectorized.
606 
607    Not all stmts in the loop need to be vectorized. For example:
608 
609      for i...
610        for j...
611    1.    T0 = i + j
612    2.      T1 = a[T0]
613 
614    3.    j = j + 1
615 
616    Stmt 1 and 3 do not need to be vectorized, because loop control and
617    addressing of vectorized data-refs are handled differently.
618 
619    This pass detects such stmts.  */
620 
621 bool
vect_mark_stmts_to_be_vectorized(loop_vec_info loop_vinfo)622 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
623 {
624   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
625   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
626   unsigned int nbbs = loop->num_nodes;
627   gimple_stmt_iterator si;
628   gimple *stmt;
629   unsigned int i;
630   stmt_vec_info stmt_vinfo;
631   basic_block bb;
632   gimple *phi;
633   bool live_p;
634   enum vect_relevant relevant;
635 
636   if (dump_enabled_p ())
637     dump_printf_loc (MSG_NOTE, vect_location,
638                      "=== vect_mark_stmts_to_be_vectorized ===\n");
639 
640   auto_vec<gimple *, 64> worklist;
641 
642   /* 1. Init worklist.  */
643   for (i = 0; i < nbbs; i++)
644     {
645       bb = bbs[i];
646       for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
647           {
648             phi = gsi_stmt (si);
649             if (dump_enabled_p ())
650               {
651                 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
652                 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
653               }
654 
655             if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
656               vect_mark_relevant (&worklist, phi, relevant, live_p);
657           }
658       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
659           {
660             stmt = gsi_stmt (si);
661             if (dump_enabled_p ())
662               {
663                 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
664                 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
665               }
666 
667             if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
668               vect_mark_relevant (&worklist, stmt, relevant, live_p);
669           }
670     }
671 
672   /* 2. Process_worklist */
673   while (worklist.length () > 0)
674     {
675       use_operand_p use_p;
676       ssa_op_iter iter;
677 
678       stmt = worklist.pop ();
679       if (dump_enabled_p ())
680           {
681           dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
682           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
683           }
684 
685       /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
686            (DEF_STMT) as relevant/irrelevant according to the relevance property
687            of STMT.  */
688       stmt_vinfo = vinfo_for_stmt (stmt);
689       relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
690 
691       /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
692            propagated as is to the DEF_STMTs of its USEs.
693 
694            One exception is when STMT has been identified as defining a reduction
695            variable; in this case we set the relevance to vect_used_by_reduction.
696            This is because we distinguish between two kinds of relevant stmts -
697            those that are used by a reduction computation, and those that are
698            (also) used by a regular computation.  This allows us later on to
699            identify stmts that are used solely by a reduction, and therefore the
700            order of the results that they produce does not have to be kept.  */
701 
702       switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
703         {
704           case vect_reduction_def:
705               gcc_assert (relevant != vect_unused_in_scope);
706               if (relevant != vect_unused_in_scope
707                     && relevant != vect_used_in_scope
708                     && relevant != vect_used_by_reduction
709                     && relevant != vect_used_only_live)
710                 {
711                     if (dump_enabled_p ())
712                       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
713                                            "unsupported use of reduction.\n");
714                     return false;
715                 }
716               break;
717 
718           case vect_nested_cycle:
719               if (relevant != vect_unused_in_scope
720                     && relevant != vect_used_in_outer_by_reduction
721                     && relevant != vect_used_in_outer)
722               {
723                 if (dump_enabled_p ())
724                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
725                                    "unsupported use of nested cycle.\n");
726 
727                 return false;
728               }
729             break;
730 
731           case vect_double_reduction_def:
732               if (relevant != vect_unused_in_scope
733                     && relevant != vect_used_by_reduction
734                     && relevant != vect_used_only_live)
735               {
736                 if (dump_enabled_p ())
737                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
738                                    "unsupported use of double reduction.\n");
739 
740                 return false;
741               }
742             break;
743 
744           default:
745             break;
746         }
747 
748       if (is_pattern_stmt_p (stmt_vinfo))
749         {
750           /* Pattern statements are not inserted into the code, so
751              FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
752              have to scan the RHS or function arguments instead.  */
753           if (is_gimple_assign (stmt))
754             {
755                 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
756                 tree op = gimple_assign_rhs1 (stmt);
757 
758                 i = 1;
759                 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
760                     {
761                       if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
762                                             relevant, &worklist, false)
763                           || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
764                                                relevant, &worklist, false))
765                         return false;
766                       i = 2;
767                     }
768                 for (; i < gimple_num_ops (stmt); i++)
769                 {
770                       op = gimple_op (stmt, i);
771                   if (TREE_CODE (op) == SSA_NAME
772                           && !process_use (stmt, op, loop_vinfo, relevant,
773                                                &worklist, false))
774                     return false;
775                  }
776             }
777           else if (is_gimple_call (stmt))
778             {
779               for (i = 0; i < gimple_call_num_args (stmt); i++)
780                 {
781                   tree arg = gimple_call_arg (stmt, i);
782                       if (!process_use (stmt, arg, loop_vinfo, relevant,
783                                             &worklist, false))
784                     return false;
785                 }
786             }
787         }
788       else
789         FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
790           {
791             tree op = USE_FROM_PTR (use_p);
792               if (!process_use (stmt, op, loop_vinfo, relevant,
793                                     &worklist, false))
794               return false;
795           }
796 
797       if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
798           {
799             gather_scatter_info gs_info;
800             if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
801               gcc_unreachable ();
802             if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
803                                   &worklist, true))
804               return false;
805           }
806     } /* while worklist */
807 
808   return true;
809 }
810 
811 
812 /* Function vect_model_simple_cost.
813 
814    Models cost for simple operations, i.e. those that only emit ncopies of a
815    single op.  Right now, this does not account for multiple insns that could
816    be generated for the single vector op.  We will handle that shortly.  */
817 
818 void
vect_model_simple_cost(stmt_vec_info stmt_info,int ncopies,enum vect_def_type * dt,int ndts,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec)819 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
820                               enum vect_def_type *dt,
821                               int ndts,
822                               stmt_vector_for_cost *prologue_cost_vec,
823                               stmt_vector_for_cost *body_cost_vec)
824 {
825   int i;
826   int inside_cost = 0, prologue_cost = 0;
827 
828   /* The SLP costs were already calculated during SLP tree build.  */
829   gcc_assert (!PURE_SLP_STMT (stmt_info));
830 
831   /* Cost the "broadcast" of a scalar operand in to a vector operand.
832      Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
833      cost model.  */
834   for (i = 0; i < ndts; i++)
835     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
836       prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
837                                                    stmt_info, 0, vect_prologue);
838 
839   /* Pass the inside-of-loop statements to the target-specific cost model.  */
840   inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
841                                           stmt_info, 0, vect_body);
842 
843   if (dump_enabled_p ())
844     dump_printf_loc (MSG_NOTE, vect_location,
845                      "vect_model_simple_cost: inside_cost = %d, "
846                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
847 }
848 
849 
850 /* Model cost for type demotion and promotion operations.  PWR is normally
851    zero for single-step promotions and demotions.  It will be one if
852    two-step promotion/demotion is required, and so on.  Each additional
853    step doubles the number of instructions required.  */
854 
855 static void
vect_model_promotion_demotion_cost(stmt_vec_info stmt_info,enum vect_def_type * dt,int pwr)856 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
857                                             enum vect_def_type *dt, int pwr)
858 {
859   int i, tmp;
860   int inside_cost = 0, prologue_cost = 0;
861   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
862   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
863   void *target_cost_data;
864 
865   /* The SLP costs were already calculated during SLP tree build.  */
866   gcc_assert (!PURE_SLP_STMT (stmt_info));
867 
868   if (loop_vinfo)
869     target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
870   else
871     target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
872 
873   for (i = 0; i < pwr + 1; i++)
874     {
875       tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
876           (i + 1) : i;
877       inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
878                                             vec_promote_demote, stmt_info, 0,
879                                             vect_body);
880     }
881 
882   /* FORNOW: Assuming maximum 2 args per stmts.  */
883   for (i = 0; i < 2; i++)
884     if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
885       prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
886                                               stmt_info, 0, vect_prologue);
887 
888   if (dump_enabled_p ())
889     dump_printf_loc (MSG_NOTE, vect_location,
890                      "vect_model_promotion_demotion_cost: inside_cost = %d, "
891                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
892 }
893 
894 /* Function vect_model_store_cost
895 
896    Models cost for stores.  In the case of grouped accesses, one access
897    has the overhead of the grouped access attributed to it.  */
898 
899 void
vect_model_store_cost(stmt_vec_info stmt_info,int ncopies,vect_memory_access_type memory_access_type,vec_load_store_type vls_type,slp_tree slp_node,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec)900 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
901                            vect_memory_access_type memory_access_type,
902                            vec_load_store_type vls_type, slp_tree slp_node,
903                            stmt_vector_for_cost *prologue_cost_vec,
904                            stmt_vector_for_cost *body_cost_vec)
905 {
906   unsigned int inside_cost = 0, prologue_cost = 0;
907   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
908   gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
909   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
910 
911   if (vls_type == VLS_STORE_INVARIANT)
912     prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
913                                                stmt_info, 0, vect_prologue);
914 
915   /* Grouped stores update all elements in the group at once,
916      so we want the DR for the first statement.  */
917   if (!slp_node && grouped_access_p)
918     {
919       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
920       dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
921     }
922 
923   /* True if we should include any once-per-group costs as well as
924      the cost of the statement itself.  For SLP we only get called
925      once per group anyhow.  */
926   bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
927 
928   /* We assume that the cost of a single store-lanes instruction is
929      equivalent to the cost of GROUP_SIZE separate stores.  If a grouped
930      access is instead being provided by a permute-and-store operation,
931      include the cost of the permutes.  */
932   if (first_stmt_p
933       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
934     {
935       /* Uses a high and low interleave or shuffle operations for each
936            needed permute.  */
937       int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
938       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
939       inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
940                                               stmt_info, 0, vect_body);
941 
942       if (dump_enabled_p ())
943         dump_printf_loc (MSG_NOTE, vect_location,
944                          "vect_model_store_cost: strided group_size = %d .\n",
945                          group_size);
946     }
947 
948   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
949   /* Costs of the stores.  */
950   if (memory_access_type == VMAT_ELEMENTWISE
951       || memory_access_type == VMAT_GATHER_SCATTER)
952     {
953       /* N scalar stores plus extracting the elements.  */
954       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
955       inside_cost += record_stmt_cost (body_cost_vec,
956                                                ncopies * assumed_nunits,
957                                                scalar_store, stmt_info, 0, vect_body);
958     }
959   else
960     vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
961 
962   if (memory_access_type == VMAT_ELEMENTWISE
963       || memory_access_type == VMAT_STRIDED_SLP)
964     {
965       /* N scalar stores plus extracting the elements.  */
966       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
967       inside_cost += record_stmt_cost (body_cost_vec,
968                                                ncopies * assumed_nunits,
969                                                vec_to_scalar, stmt_info, 0, vect_body);
970     }
971 
972   if (dump_enabled_p ())
973     dump_printf_loc (MSG_NOTE, vect_location,
974                      "vect_model_store_cost: inside_cost = %d, "
975                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
976 }
977 
978 
979 /* Calculate cost of DR's memory access.  */
980 void
vect_get_store_cost(struct data_reference * dr,int ncopies,unsigned int * inside_cost,stmt_vector_for_cost * body_cost_vec)981 vect_get_store_cost (struct data_reference *dr, int ncopies,
982                          unsigned int *inside_cost,
983                          stmt_vector_for_cost *body_cost_vec)
984 {
985   int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
986   gimple *stmt = DR_STMT (dr);
987   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
988 
989   switch (alignment_support_scheme)
990     {
991     case dr_aligned:
992       {
993           *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
994                                                     vector_store, stmt_info, 0,
995                                                     vect_body);
996 
997         if (dump_enabled_p ())
998           dump_printf_loc (MSG_NOTE, vect_location,
999                            "vect_model_store_cost: aligned.\n");
1000         break;
1001       }
1002 
1003     case dr_unaligned_supported:
1004       {
1005         /* Here, we assign an additional cost for the unaligned store.  */
1006           *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1007                                                     unaligned_store, stmt_info,
1008                                                     DR_MISALIGNMENT (dr), vect_body);
1009         if (dump_enabled_p ())
1010           dump_printf_loc (MSG_NOTE, vect_location,
1011                            "vect_model_store_cost: unaligned supported by "
1012                            "hardware.\n");
1013         break;
1014       }
1015 
1016     case dr_unaligned_unsupported:
1017       {
1018         *inside_cost = VECT_MAX_COST;
1019 
1020         if (dump_enabled_p ())
1021           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1022                            "vect_model_store_cost: unsupported access.\n");
1023         break;
1024       }
1025 
1026     default:
1027       gcc_unreachable ();
1028     }
1029 }
1030 
1031 
1032 /* Function vect_model_load_cost
1033 
1034    Models cost for loads.  In the case of grouped accesses, one access has
1035    the overhead of the grouped access attributed to it.  Since unaligned
1036    accesses are supported for loads, we also account for the costs of the
1037    access scheme chosen.  */
1038 
1039 void
vect_model_load_cost(stmt_vec_info stmt_info,int ncopies,vect_memory_access_type memory_access_type,slp_tree slp_node,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec)1040 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1041                           vect_memory_access_type memory_access_type,
1042                           slp_tree slp_node,
1043                           stmt_vector_for_cost *prologue_cost_vec,
1044                           stmt_vector_for_cost *body_cost_vec)
1045 {
1046   gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1047   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1048   unsigned int inside_cost = 0, prologue_cost = 0;
1049   bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1050 
1051   /* Grouped loads read all elements in the group at once,
1052      so we want the DR for the first statement.  */
1053   if (!slp_node && grouped_access_p)
1054     {
1055       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1056       dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1057     }
1058 
1059   /* True if we should include any once-per-group costs as well as
1060      the cost of the statement itself.  For SLP we only get called
1061      once per group anyhow.  */
1062   bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1063 
1064   /* We assume that the cost of a single load-lanes instruction is
1065      equivalent to the cost of GROUP_SIZE separate loads.  If a grouped
1066      access is instead being provided by a load-and-permute operation,
1067      include the cost of the permutes.  */
1068   if (first_stmt_p
1069       && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1070     {
1071       /* Uses an even and odd extract operations or shuffle operations
1072            for each needed permute.  */
1073       int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1074       int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1075       inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1076                                               stmt_info, 0, vect_body);
1077 
1078       if (dump_enabled_p ())
1079         dump_printf_loc (MSG_NOTE, vect_location,
1080                          "vect_model_load_cost: strided group_size = %d .\n",
1081                          group_size);
1082     }
1083 
1084   /* The loads themselves.  */
1085   if (memory_access_type == VMAT_ELEMENTWISE
1086       || memory_access_type == VMAT_GATHER_SCATTER)
1087     {
1088       /* N scalar loads plus gathering them into a vector.  */
1089       tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1090       unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1091       inside_cost += record_stmt_cost (body_cost_vec,
1092                                                ncopies * assumed_nunits,
1093                                                scalar_load, stmt_info, 0, vect_body);
1094     }
1095   else
1096     vect_get_load_cost (dr, ncopies, first_stmt_p,
1097                               &inside_cost, &prologue_cost,
1098                               prologue_cost_vec, body_cost_vec, true);
1099   if (memory_access_type == VMAT_ELEMENTWISE
1100       || memory_access_type == VMAT_STRIDED_SLP)
1101     inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1102                                              stmt_info, 0, vect_body);
1103 
1104   if (dump_enabled_p ())
1105     dump_printf_loc (MSG_NOTE, vect_location,
1106                      "vect_model_load_cost: inside_cost = %d, "
1107                      "prologue_cost = %d .\n", inside_cost, prologue_cost);
1108 }
1109 
1110 
1111 /* Calculate cost of DR's memory access.  */
1112 void
vect_get_load_cost(struct data_reference * dr,int ncopies,bool add_realign_cost,unsigned int * inside_cost,unsigned int * prologue_cost,stmt_vector_for_cost * prologue_cost_vec,stmt_vector_for_cost * body_cost_vec,bool record_prologue_costs)1113 vect_get_load_cost (struct data_reference *dr, int ncopies,
1114                         bool add_realign_cost, unsigned int *inside_cost,
1115                         unsigned int *prologue_cost,
1116                         stmt_vector_for_cost *prologue_cost_vec,
1117                         stmt_vector_for_cost *body_cost_vec,
1118                         bool record_prologue_costs)
1119 {
1120   int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1121   gimple *stmt = DR_STMT (dr);
1122   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1123 
1124   switch (alignment_support_scheme)
1125     {
1126     case dr_aligned:
1127       {
1128           *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1129                                                     stmt_info, 0, vect_body);
1130 
1131         if (dump_enabled_p ())
1132           dump_printf_loc (MSG_NOTE, vect_location,
1133                            "vect_model_load_cost: aligned.\n");
1134 
1135         break;
1136       }
1137     case dr_unaligned_supported:
1138       {
1139         /* Here, we assign an additional cost for the unaligned load.  */
1140           *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1141                                                     unaligned_load, stmt_info,
1142                                                     DR_MISALIGNMENT (dr), vect_body);
1143 
1144         if (dump_enabled_p ())
1145           dump_printf_loc (MSG_NOTE, vect_location,
1146                            "vect_model_load_cost: unaligned supported by "
1147                            "hardware.\n");
1148 
1149         break;
1150       }
1151     case dr_explicit_realign:
1152       {
1153           *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1154                                                     vector_load, stmt_info, 0, vect_body);
1155           *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1156                                                     vec_perm, stmt_info, 0, vect_body);
1157 
1158         /* FIXME: If the misalignment remains fixed across the iterations of
1159            the containing loop, the following cost should be added to the
1160            prologue costs.  */
1161         if (targetm.vectorize.builtin_mask_for_load)
1162             *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1163                                                       stmt_info, 0, vect_body);
1164 
1165         if (dump_enabled_p ())
1166           dump_printf_loc (MSG_NOTE, vect_location,
1167                            "vect_model_load_cost: explicit realign\n");
1168 
1169         break;
1170       }
1171     case dr_explicit_realign_optimized:
1172       {
1173         if (dump_enabled_p ())
1174           dump_printf_loc (MSG_NOTE, vect_location,
1175                            "vect_model_load_cost: unaligned software "
1176                            "pipelined.\n");
1177 
1178         /* Unaligned software pipeline has a load of an address, an initial
1179            load, and possibly a mask operation to "prime" the loop.  However,
1180            if this is an access in a group of loads, which provide grouped
1181            access, then the above cost should only be considered for one
1182            access in the group.  Inside the loop, there is a load op
1183            and a realignment op.  */
1184 
1185         if (add_realign_cost && record_prologue_costs)
1186           {
1187               *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1188                                                             vector_stmt, stmt_info,
1189                                                             0, vect_prologue);
1190             if (targetm.vectorize.builtin_mask_for_load)
1191                 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1192                                                               vector_stmt, stmt_info,
1193                                                               0, vect_prologue);
1194           }
1195 
1196           *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1197                                                     stmt_info, 0, vect_body);
1198           *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1199                                                     stmt_info, 0, vect_body);
1200 
1201         if (dump_enabled_p ())
1202           dump_printf_loc (MSG_NOTE, vect_location,
1203                            "vect_model_load_cost: explicit realign optimized"
1204                            "\n");
1205 
1206         break;
1207       }
1208 
1209     case dr_unaligned_unsupported:
1210       {
1211         *inside_cost = VECT_MAX_COST;
1212 
1213         if (dump_enabled_p ())
1214           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1215                            "vect_model_load_cost: unsupported access.\n");
1216         break;
1217       }
1218 
1219     default:
1220       gcc_unreachable ();
1221     }
1222 }
1223 
1224 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1225    the loop preheader for the vectorized stmt STMT.  */
1226 
1227 static void
vect_init_vector_1(gimple * stmt,gimple * new_stmt,gimple_stmt_iterator * gsi)1228 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1229 {
1230   if (gsi)
1231     vect_finish_stmt_generation (stmt, new_stmt, gsi);
1232   else
1233     {
1234       stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1235       loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1236 
1237       if (loop_vinfo)
1238         {
1239           struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1240             basic_block new_bb;
1241             edge pe;
1242 
1243           if (nested_in_vect_loop_p (loop, stmt))
1244             loop = loop->inner;
1245 
1246             pe = loop_preheader_edge (loop);
1247           new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1248           gcc_assert (!new_bb);
1249           }
1250       else
1251        {
1252           bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1253           basic_block bb;
1254           gimple_stmt_iterator gsi_bb_start;
1255 
1256           gcc_assert (bb_vinfo);
1257           bb = BB_VINFO_BB (bb_vinfo);
1258           gsi_bb_start = gsi_after_labels (bb);
1259           gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1260        }
1261     }
1262 
1263   if (dump_enabled_p ())
1264     {
1265       dump_printf_loc (MSG_NOTE, vect_location,
1266                        "created new init_stmt: ");
1267       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1268     }
1269 }
1270 
1271 /* Function vect_init_vector.
1272 
1273    Insert a new stmt (INIT_STMT) that initializes a new variable of type
1274    TYPE with the value VAL.  If TYPE is a vector type and VAL does not have
1275    vector type a vector with all elements equal to VAL is created first.
1276    Place the initialization at BSI if it is not NULL.  Otherwise, place the
1277    initialization at the loop preheader.
1278    Return the DEF of INIT_STMT.
1279    It will be used in the vectorization of STMT.  */
1280 
1281 tree
vect_init_vector(gimple * stmt,tree val,tree type,gimple_stmt_iterator * gsi)1282 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1283 {
1284   gimple *init_stmt;
1285   tree new_temp;
1286 
1287   /* We abuse this function to push sth to a SSA name with initial 'val'.  */
1288   if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1289     {
1290       gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1291       if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1292           {
1293             /* Scalar boolean value should be transformed into
1294                all zeros or all ones value before building a vector.  */
1295             if (VECTOR_BOOLEAN_TYPE_P (type))
1296               {
1297                 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1298                 tree false_val = build_zero_cst (TREE_TYPE (type));
1299 
1300                 if (CONSTANT_CLASS_P (val))
1301                     val = integer_zerop (val) ? false_val : true_val;
1302                 else
1303                     {
1304                       new_temp = make_ssa_name (TREE_TYPE (type));
1305                       init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1306                                                                val, true_val, false_val);
1307                       vect_init_vector_1 (stmt, init_stmt, gsi);
1308                       val = new_temp;
1309                     }
1310               }
1311             else if (CONSTANT_CLASS_P (val))
1312               val = fold_convert (TREE_TYPE (type), val);
1313             else
1314               {
1315                 new_temp = make_ssa_name (TREE_TYPE (type));
1316                 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1317                     init_stmt = gimple_build_assign (new_temp,
1318                                                              fold_build1 (VIEW_CONVERT_EXPR,
1319                                                                             TREE_TYPE (type),
1320                                                                             val));
1321                 else
1322                     init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1323                 vect_init_vector_1 (stmt, init_stmt, gsi);
1324                 val = new_temp;
1325               }
1326           }
1327       val = build_vector_from_val (type, val);
1328     }
1329 
1330   new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1331   init_stmt = gimple_build_assign  (new_temp, val);
1332   vect_init_vector_1 (stmt, init_stmt, gsi);
1333   return new_temp;
1334 }
1335 
1336 /* Function vect_get_vec_def_for_operand_1.
1337 
1338    For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1339    DT that will be used in the vectorized stmt.  */
1340 
1341 tree
vect_get_vec_def_for_operand_1(gimple * def_stmt,enum vect_def_type dt)1342 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1343 {
1344   tree vec_oprnd;
1345   gimple *vec_stmt;
1346   stmt_vec_info def_stmt_info = NULL;
1347 
1348   switch (dt)
1349     {
1350     /* operand is a constant or a loop invariant.  */
1351     case vect_constant_def:
1352     case vect_external_def:
1353       /* Code should use vect_get_vec_def_for_operand.  */
1354       gcc_unreachable ();
1355 
1356     /* operand is defined inside the loop.  */
1357     case vect_internal_def:
1358       {
1359         /* Get the def from the vectorized stmt.  */
1360         def_stmt_info = vinfo_for_stmt (def_stmt);
1361 
1362         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1363         /* Get vectorized pattern statement.  */
1364         if (!vec_stmt
1365             && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1366             && !STMT_VINFO_RELEVANT (def_stmt_info))
1367           vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1368                        STMT_VINFO_RELATED_STMT (def_stmt_info)));
1369         gcc_assert (vec_stmt);
1370           if (gimple_code (vec_stmt) == GIMPLE_PHI)
1371             vec_oprnd = PHI_RESULT (vec_stmt);
1372           else if (is_gimple_call (vec_stmt))
1373             vec_oprnd = gimple_call_lhs (vec_stmt);
1374           else
1375             vec_oprnd = gimple_assign_lhs (vec_stmt);
1376         return vec_oprnd;
1377       }
1378 
1379     /* operand is defined by a loop header phi.  */
1380     case vect_reduction_def:
1381     case vect_double_reduction_def:
1382     case vect_nested_cycle:
1383     case vect_induction_def:
1384       {
1385           gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1386 
1387         /* Get the def from the vectorized stmt.  */
1388         def_stmt_info = vinfo_for_stmt (def_stmt);
1389         vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1390           if (gimple_code (vec_stmt) == GIMPLE_PHI)
1391             vec_oprnd = PHI_RESULT (vec_stmt);
1392           else
1393             vec_oprnd = gimple_get_lhs (vec_stmt);
1394         return vec_oprnd;
1395       }
1396 
1397     default:
1398       gcc_unreachable ();
1399     }
1400 }
1401 
1402 
1403 /* Function vect_get_vec_def_for_operand.
1404 
1405    OP is an operand in STMT.  This function returns a (vector) def that will be
1406    used in the vectorized stmt for STMT.
1407 
1408    In the case that OP is an SSA_NAME which is defined in the loop, then
1409    STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1410 
1411    In case OP is an invariant or constant, a new stmt that creates a vector def
1412    needs to be introduced.  VECTYPE may be used to specify a required type for
1413    vector invariant.  */
1414 
1415 tree
vect_get_vec_def_for_operand(tree op,gimple * stmt,tree vectype)1416 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1417 {
1418   gimple *def_stmt;
1419   enum vect_def_type dt;
1420   bool is_simple_use;
1421   stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1422   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1423 
1424   if (dump_enabled_p ())
1425     {
1426       dump_printf_loc (MSG_NOTE, vect_location,
1427                        "vect_get_vec_def_for_operand: ");
1428       dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1429       dump_printf (MSG_NOTE, "\n");
1430     }
1431 
1432   is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1433   gcc_assert (is_simple_use);
1434   if (def_stmt && dump_enabled_p ())
1435     {
1436       dump_printf_loc (MSG_NOTE, vect_location, "  def_stmt =  ");
1437       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1438     }
1439 
1440   if (dt == vect_constant_def || dt == vect_external_def)
1441     {
1442       tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1443       tree vector_type;
1444 
1445       if (vectype)
1446           vector_type = vectype;
1447       else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1448                  && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1449           vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1450       else
1451           vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1452 
1453       gcc_assert (vector_type);
1454       return vect_init_vector (stmt, op, vector_type, NULL);
1455     }
1456   else
1457     return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1458 }
1459 
1460 
1461 /* Function vect_get_vec_def_for_stmt_copy
1462 
1463    Return a vector-def for an operand.  This function is used when the
1464    vectorized stmt to be created (by the caller to this function) is a "copy"
1465    created in case the vectorized result cannot fit in one vector, and several
1466    copies of the vector-stmt are required.  In this case the vector-def is
1467    retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1468    of the stmt that defines VEC_OPRND.
1469    DT is the type of the vector def VEC_OPRND.
1470 
1471    Context:
1472         In case the vectorization factor (VF) is bigger than the number
1473    of elements that can fit in a vectype (nunits), we have to generate
1474    more than one vector stmt to vectorize the scalar stmt.  This situation
1475    arises when there are multiple data-types operated upon in the loop; the
1476    smallest data-type determines the VF, and as a result, when vectorizing
1477    stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1478    vector stmt (each computing a vector of 'nunits' results, and together
1479    computing 'VF' results in each iteration).  This function is called when
1480    vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1481    which VF=16 and nunits=4, so the number of copies required is 4):
1482 
1483    scalar stmt:         vectorized into:        STMT_VINFO_RELATED_STMT
1484 
1485    S1: x = load         VS1.0:  vx.0 = memref0      VS1.1
1486                         VS1.1:  vx.1 = memref1      VS1.2
1487                         VS1.2:  vx.2 = memref2      VS1.3
1488                         VS1.3:  vx.3 = memref3
1489 
1490    S2: z = x + ...      VSnew.0:  vz0 = vx.0 + ...  VSnew.1
1491                         VSnew.1:  vz1 = vx.1 + ...  VSnew.2
1492                         VSnew.2:  vz2 = vx.2 + ...  VSnew.3
1493                         VSnew.3:  vz3 = vx.3 + ...
1494 
1495    The vectorization of S1 is explained in vectorizable_load.
1496    The vectorization of S2:
1497         To create the first vector-stmt out of the 4 copies - VSnew.0 -
1498    the function 'vect_get_vec_def_for_operand' is called to
1499    get the relevant vector-def for each operand of S2.  For operand x it
1500    returns  the vector-def 'vx.0'.
1501 
1502         To create the remaining copies of the vector-stmt (VSnew.j), this
1503    function is called to get the relevant vector-def for each operand.  It is
1504    obtained from the respective VS1.j stmt, which is recorded in the
1505    STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1506 
1507         For example, to obtain the vector-def 'vx.1' in order to create the
1508    vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1509    Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1510    STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1511    and return its def ('vx.1').
1512    Overall, to create the above sequence this function will be called 3 times:
1513         vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1514         vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1515         vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2);  */
1516 
1517 tree
vect_get_vec_def_for_stmt_copy(enum vect_def_type dt,tree vec_oprnd)1518 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1519 {
1520   gimple *vec_stmt_for_operand;
1521   stmt_vec_info def_stmt_info;
1522 
1523   /* Do nothing; can reuse same def.  */
1524   if (dt == vect_external_def || dt == vect_constant_def )
1525     return vec_oprnd;
1526 
1527   vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1528   def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1529   gcc_assert (def_stmt_info);
1530   vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1531   gcc_assert (vec_stmt_for_operand);
1532   if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1533     vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1534   else
1535     vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1536   return vec_oprnd;
1537 }
1538 
1539 
1540 /* Get vectorized definitions for the operands to create a copy of an original
1541    stmt.  See vect_get_vec_def_for_stmt_copy () for details.  */
1542 
1543 void
vect_get_vec_defs_for_stmt_copy(enum vect_def_type * dt,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1)1544 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1545                                          vec<tree> *vec_oprnds0,
1546                                          vec<tree> *vec_oprnds1)
1547 {
1548   tree vec_oprnd = vec_oprnds0->pop ();
1549 
1550   vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1551   vec_oprnds0->quick_push (vec_oprnd);
1552 
1553   if (vec_oprnds1 && vec_oprnds1->length ())
1554     {
1555       vec_oprnd = vec_oprnds1->pop ();
1556       vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1557       vec_oprnds1->quick_push (vec_oprnd);
1558     }
1559 }
1560 
1561 
1562 /* Get vectorized definitions for OP0 and OP1.  */
1563 
1564 void
vect_get_vec_defs(tree op0,tree op1,gimple * stmt,vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,slp_tree slp_node)1565 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1566                        vec<tree> *vec_oprnds0,
1567                        vec<tree> *vec_oprnds1,
1568                        slp_tree slp_node)
1569 {
1570   if (slp_node)
1571     {
1572       int nops = (op1 == NULL_TREE) ? 1 : 2;
1573       auto_vec<tree> ops (nops);
1574       auto_vec<vec<tree> > vec_defs (nops);
1575 
1576       ops.quick_push (op0);
1577       if (op1)
1578         ops.quick_push (op1);
1579 
1580       vect_get_slp_defs (ops, slp_node, &vec_defs);
1581 
1582       *vec_oprnds0 = vec_defs[0];
1583       if (op1)
1584           *vec_oprnds1 = vec_defs[1];
1585     }
1586   else
1587     {
1588       tree vec_oprnd;
1589 
1590       vec_oprnds0->create (1);
1591       vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1592       vec_oprnds0->quick_push (vec_oprnd);
1593 
1594       if (op1)
1595           {
1596             vec_oprnds1->create (1);
1597             vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1598             vec_oprnds1->quick_push (vec_oprnd);
1599           }
1600     }
1601 }
1602 
1603 /* Helper function called by vect_finish_replace_stmt and
1604    vect_finish_stmt_generation.  Set the location of the new
1605    statement and create a stmt_vec_info for it.  */
1606 
1607 static void
vect_finish_stmt_generation_1(gimple * stmt,gimple * vec_stmt)1608 vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
1609 {
1610   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1611   vec_info *vinfo = stmt_info->vinfo;
1612 
1613   set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1614 
1615   if (dump_enabled_p ())
1616     {
1617       dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1618       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1619     }
1620 
1621   gimple_set_location (vec_stmt, gimple_location (stmt));
1622 
1623   /* While EH edges will generally prevent vectorization, stmt might
1624      e.g. be in a must-not-throw region.  Ensure newly created stmts
1625      that could throw are part of the same region.  */
1626   int lp_nr = lookup_stmt_eh_lp (stmt);
1627   if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1628     add_stmt_to_eh_lp (vec_stmt, lp_nr);
1629 }
1630 
1631 /* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1632    which sets the same scalar result as STMT did.  */
1633 
1634 void
vect_finish_replace_stmt(gimple * stmt,gimple * vec_stmt)1635 vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
1636 {
1637   gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
1638 
1639   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1640   gsi_replace (&gsi, vec_stmt, true);
1641 
1642   vect_finish_stmt_generation_1 (stmt, vec_stmt);
1643 }
1644 
1645 /* Function vect_finish_stmt_generation.
1646 
1647    Insert a new stmt.  */
1648 
1649 void
vect_finish_stmt_generation(gimple * stmt,gimple * vec_stmt,gimple_stmt_iterator * gsi)1650 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1651                                    gimple_stmt_iterator *gsi)
1652 {
1653   gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1654 
1655   if (!gsi_end_p (*gsi)
1656       && gimple_has_mem_ops (vec_stmt))
1657     {
1658       gimple *at_stmt = gsi_stmt (*gsi);
1659       tree vuse = gimple_vuse (at_stmt);
1660       if (vuse && TREE_CODE (vuse) == SSA_NAME)
1661           {
1662             tree vdef = gimple_vdef (at_stmt);
1663             gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1664             /* If we have an SSA vuse and insert a store, update virtual
1665                SSA form to avoid triggering the renamer.  Do so only
1666                if we can easily see all uses - which is what almost always
1667                happens with the way vectorized stmts are inserted.  */
1668             if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1669                 && ((is_gimple_assign (vec_stmt)
1670                        && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1671                       || (is_gimple_call (vec_stmt)
1672                           && !(gimple_call_flags (vec_stmt)
1673                                  & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1674               {
1675                 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1676                 gimple_set_vdef (vec_stmt, new_vdef);
1677                 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1678               }
1679           }
1680     }
1681   gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1682   vect_finish_stmt_generation_1 (stmt, vec_stmt);
1683 }
1684 
1685 /* We want to vectorize a call to combined function CFN with function
1686    decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1687    as the types of all inputs.  Check whether this is possible using
1688    an internal function, returning its code if so or IFN_LAST if not.  */
1689 
1690 static internal_fn
vectorizable_internal_function(combined_fn cfn,tree fndecl,tree vectype_out,tree vectype_in)1691 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1692                                         tree vectype_out, tree vectype_in)
1693 {
1694   internal_fn ifn;
1695   if (internal_fn_p (cfn))
1696     ifn = as_internal_fn (cfn);
1697   else
1698     ifn = associated_internal_fn (fndecl);
1699   if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1700     {
1701       const direct_internal_fn_info &info = direct_internal_fn (ifn);
1702       if (info.vectorizable)
1703           {
1704             tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1705             tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1706             if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1707                                                         OPTIMIZE_FOR_SPEED))
1708               return ifn;
1709           }
1710     }
1711   return IFN_LAST;
1712 }
1713 
1714 
1715 static tree permute_vec_elements (tree, tree, tree, gimple *,
1716                                           gimple_stmt_iterator *);
1717 
1718 /* Check whether a load or store statement in the loop described by
1719    LOOP_VINFO is possible in a fully-masked loop.  This is testing
1720    whether the vectorizer pass has the appropriate support, as well as
1721    whether the target does.
1722 
1723    VLS_TYPE says whether the statement is a load or store and VECTYPE
1724    is the type of the vector being loaded or stored.  MEMORY_ACCESS_TYPE
1725    says how the load or store is going to be implemented and GROUP_SIZE
1726    is the number of load or store statements in the containing group.
1727    If the access is a gather load or scatter store, GS_INFO describes
1728    its arguments.
1729 
1730    Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1731    supported, otherwise record the required mask types.  */
1732 
1733 static void
check_load_store_masking(loop_vec_info loop_vinfo,tree vectype,vec_load_store_type vls_type,int group_size,vect_memory_access_type memory_access_type,gather_scatter_info * gs_info)1734 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1735                                 vec_load_store_type vls_type, int group_size,
1736                                 vect_memory_access_type memory_access_type,
1737                                 gather_scatter_info *gs_info)
1738 {
1739   /* Invariant loads need no special support.  */
1740   if (memory_access_type == VMAT_INVARIANT)
1741     return;
1742 
1743   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1744   machine_mode vecmode = TYPE_MODE (vectype);
1745   bool is_load = (vls_type == VLS_LOAD);
1746   if (memory_access_type == VMAT_LOAD_STORE_LANES)
1747     {
1748       if (is_load
1749             ? !vect_load_lanes_supported (vectype, group_size, true)
1750             : !vect_store_lanes_supported (vectype, group_size, true))
1751           {
1752             if (dump_enabled_p ())
1753               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1754                                    "can't use a fully-masked loop because the"
1755                                    " target doesn't have an appropriate masked"
1756                                    " load/store-lanes instruction.\n");
1757             LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1758             return;
1759           }
1760       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1761       vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1762       return;
1763     }
1764 
1765   if (memory_access_type == VMAT_GATHER_SCATTER)
1766     {
1767       internal_fn ifn = (is_load
1768                                ? IFN_MASK_GATHER_LOAD
1769                                : IFN_MASK_SCATTER_STORE);
1770       tree offset_type = TREE_TYPE (gs_info->offset);
1771       if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1772                                                                gs_info->memory_type,
1773                                                                TYPE_SIGN (offset_type),
1774                                                                gs_info->scale))
1775           {
1776             if (dump_enabled_p ())
1777               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1778                                    "can't use a fully-masked loop because the"
1779                                    " target doesn't have an appropriate masked"
1780                                    " gather load or scatter store instruction.\n");
1781             LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1782             return;
1783           }
1784       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1785       vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1786       return;
1787     }
1788 
1789   if (memory_access_type != VMAT_CONTIGUOUS
1790       && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1791     {
1792       /* Element X of the data must come from iteration i * VF + X of the
1793            scalar loop.  We need more work to support other mappings.  */
1794       if (dump_enabled_p ())
1795           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1796                                "can't use a fully-masked loop because an access"
1797                                " isn't contiguous.\n");
1798       LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1799       return;
1800     }
1801 
1802   machine_mode mask_mode;
1803   if (!(targetm.vectorize.get_mask_mode
1804           (GET_MODE_NUNITS (vecmode),
1805            GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1806       || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1807     {
1808       if (dump_enabled_p ())
1809           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1810                                "can't use a fully-masked loop because the target"
1811                                " doesn't have the appropriate masked load or"
1812                                " store.\n");
1813       LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1814       return;
1815     }
1816   /* We might load more scalars than we need for permuting SLP loads.
1817      We checked in get_group_load_store_type that the extra elements
1818      don't leak into a new vector.  */
1819   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1820   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1821   unsigned int nvectors;
1822   if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1823     vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1824   else
1825     gcc_unreachable ();
1826 }
1827 
1828 /* Return the mask input to a masked load or store.  VEC_MASK is the vectorized
1829    form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1830    that needs to be applied to all loads and stores in a vectorized loop.
1831    Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1832 
1833    MASK_TYPE is the type of both masks.  If new statements are needed,
1834    insert them before GSI.  */
1835 
1836 static tree
prepare_load_store_mask(tree mask_type,tree loop_mask,tree vec_mask,gimple_stmt_iterator * gsi)1837 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1838                                gimple_stmt_iterator *gsi)
1839 {
1840   gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1841   if (!loop_mask)
1842     return vec_mask;
1843 
1844   gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1845   tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1846   gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1847                                                     vec_mask, loop_mask);
1848   gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1849   return and_res;
1850 }
1851 
1852 /* Determine whether we can use a gather load or scatter store to vectorize
1853    strided load or store STMT by truncating the current offset to a smaller
1854    width.  We need to be able to construct an offset vector:
1855 
1856      { 0, X, X*2, X*3, ... }
1857 
1858    without loss of precision, where X is STMT's DR_STEP.
1859 
1860    Return true if this is possible, describing the gather load or scatter
1861    store in GS_INFO.  MASKED_P is true if the load or store is conditional.  */
1862 
1863 static bool
vect_truncate_gather_scatter_offset(gimple * stmt,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)1864 vect_truncate_gather_scatter_offset (gimple *stmt, loop_vec_info loop_vinfo,
1865                                              bool masked_p,
1866                                              gather_scatter_info *gs_info)
1867 {
1868   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1869   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1870   tree step = DR_STEP (dr);
1871   if (TREE_CODE (step) != INTEGER_CST)
1872     {
1873       /* ??? Perhaps we could use range information here?  */
1874       if (dump_enabled_p ())
1875           dump_printf_loc (MSG_NOTE, vect_location,
1876                                "cannot truncate variable step.\n");
1877       return false;
1878     }
1879 
1880   /* Get the number of bits in an element.  */
1881   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1882   scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1883   unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1884 
1885   /* Set COUNT to the upper limit on the number of elements - 1.
1886      Start with the maximum vectorization factor.  */
1887   unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1888 
1889   /* Try lowering COUNT to the number of scalar latch iterations.  */
1890   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1891   widest_int max_iters;
1892   if (max_loop_iterations (loop, &max_iters)
1893       && max_iters < count)
1894     count = max_iters.to_shwi ();
1895 
1896   /* Try scales of 1 and the element size.  */
1897   int scales[] = { 1, vect_get_scalar_dr_size (dr) };
1898   bool overflow_p = false;
1899   for (int i = 0; i < 2; ++i)
1900     {
1901       int scale = scales[i];
1902       widest_int factor;
1903       if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1904           continue;
1905 
1906       /* See whether we can calculate (COUNT - 1) * STEP / SCALE
1907            in OFFSET_BITS bits.  */
1908       widest_int range = wi::mul (count, factor, SIGNED, &overflow_p);
1909       if (overflow_p)
1910           continue;
1911       signop sign = range >= 0 ? UNSIGNED : SIGNED;
1912       if (wi::min_precision (range, sign) > element_bits)
1913           {
1914             overflow_p = true;
1915             continue;
1916           }
1917 
1918       /* See whether the target supports the operation.  */
1919       tree memory_type = TREE_TYPE (DR_REF (dr));
1920       if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
1921                                              memory_type, element_bits, sign, scale,
1922                                              &gs_info->ifn, &gs_info->element_type))
1923           continue;
1924 
1925       tree offset_type = build_nonstandard_integer_type (element_bits,
1926                                                                        sign == UNSIGNED);
1927 
1928       gs_info->decl = NULL_TREE;
1929       /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1930            but we don't need to store that here.  */
1931       gs_info->base = NULL_TREE;
1932       gs_info->offset = fold_convert (offset_type, step);
1933       gs_info->offset_dt = vect_constant_def;
1934       gs_info->offset_vectype = NULL_TREE;
1935       gs_info->scale = scale;
1936       gs_info->memory_type = memory_type;
1937       return true;
1938     }
1939 
1940   if (overflow_p && dump_enabled_p ())
1941     dump_printf_loc (MSG_NOTE, vect_location,
1942                          "truncating gather/scatter offset to %d bits"
1943                          " might change its value.\n", element_bits);
1944 
1945   return false;
1946 }
1947 
1948 /* Return true if we can use gather/scatter internal functions to
1949    vectorize STMT, which is a grouped or strided load or store.
1950    MASKED_P is true if load or store is conditional.  When returning
1951    true, fill in GS_INFO with the information required to perform the
1952    operation.  */
1953 
1954 static bool
vect_use_strided_gather_scatters_p(gimple * stmt,loop_vec_info loop_vinfo,bool masked_p,gather_scatter_info * gs_info)1955 vect_use_strided_gather_scatters_p (gimple *stmt, loop_vec_info loop_vinfo,
1956                                             bool masked_p,
1957                                             gather_scatter_info *gs_info)
1958 {
1959   if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info)
1960       || gs_info->decl)
1961     return vect_truncate_gather_scatter_offset (stmt, loop_vinfo,
1962                                                             masked_p, gs_info);
1963 
1964   scalar_mode element_mode = SCALAR_TYPE_MODE (gs_info->element_type);
1965   unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1966   tree offset_type = TREE_TYPE (gs_info->offset);
1967   unsigned int offset_bits = TYPE_PRECISION (offset_type);
1968 
1969   /* Enforced by vect_check_gather_scatter.  */
1970   gcc_assert (element_bits >= offset_bits);
1971 
1972   /* If the elements are wider than the offset, convert the offset to the
1973      same width, without changing its sign.  */
1974   if (element_bits > offset_bits)
1975     {
1976       bool unsigned_p = TYPE_UNSIGNED (offset_type);
1977       offset_type = build_nonstandard_integer_type (element_bits, unsigned_p);
1978       gs_info->offset = fold_convert (offset_type, gs_info->offset);
1979     }
1980 
1981   if (dump_enabled_p ())
1982     dump_printf_loc (MSG_NOTE, vect_location,
1983                          "using gather/scatter for strided/grouped access,"
1984                          " scale = %d\n", gs_info->scale);
1985 
1986   return true;
1987 }
1988 
1989 /* STMT is a non-strided load or store, meaning that it accesses
1990    elements with a known constant step.  Return -1 if that step
1991    is negative, 0 if it is zero, and 1 if it is greater than zero.  */
1992 
1993 static int
compare_step_with_zero(gimple * stmt)1994 compare_step_with_zero (gimple *stmt)
1995 {
1996   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1997   data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1998   return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1999                                      size_zero_node);
2000 }
2001 
2002 /* If the target supports a permute mask that reverses the elements in
2003    a vector of type VECTYPE, return that mask, otherwise return null.  */
2004 
2005 static tree
perm_mask_for_reverse(tree vectype)2006 perm_mask_for_reverse (tree vectype)
2007 {
2008   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2009 
2010   /* The encoding has a single stepped pattern.  */
2011   vec_perm_builder sel (nunits, 1, 3);
2012   for (int i = 0; i < 3; ++i)
2013     sel.quick_push (nunits - 1 - i);
2014 
2015   vec_perm_indices indices (sel, 1, nunits);
2016   if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2017     return NULL_TREE;
2018   return vect_gen_perm_mask_checked (vectype, indices);
2019 }
2020 
2021 /* STMT is either a masked or unconditional store.  Return the value
2022    being stored.  */
2023 
2024 tree
vect_get_store_rhs(gimple * stmt)2025 vect_get_store_rhs (gimple *stmt)
2026 {
2027   if (gassign *assign = dyn_cast <gassign *> (stmt))
2028     {
2029       gcc_assert (gimple_assign_single_p (assign));
2030       return gimple_assign_rhs1 (assign);
2031     }
2032   if (gcall *call = dyn_cast <gcall *> (stmt))
2033     {
2034       internal_fn ifn = gimple_call_internal_fn (call);
2035       int index = internal_fn_stored_value_index (ifn);
2036       gcc_assert (index >= 0);
2037       return gimple_call_arg (stmt, index);
2038     }
2039   gcc_unreachable ();
2040 }
2041 
2042 /* A subroutine of get_load_store_type, with a subset of the same
2043    arguments.  Handle the case where STMT is part of a grouped load
2044    or store.
2045 
2046    For stores, the statements in the group are all consecutive
2047    and there is no gap at the end.  For loads, the statements in the
2048    group might not be consecutive; there can be gaps between statements
2049    as well as at the end.  */
2050 
2051 static bool
get_group_load_store_type(gimple * stmt,tree vectype,bool slp,bool masked_p,vec_load_store_type vls_type,vect_memory_access_type * memory_access_type,gather_scatter_info * gs_info)2052 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
2053                                  bool masked_p, vec_load_store_type vls_type,
2054                                  vect_memory_access_type *memory_access_type,
2055                                  gather_scatter_info *gs_info)
2056 {
2057   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2058   vec_info *vinfo = stmt_info->vinfo;
2059   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2060   struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2061   gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
2062   data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
2063   unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
2064   bool single_element_p = (stmt == first_stmt
2065                                  && !GROUP_NEXT_ELEMENT (stmt_info));
2066   unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
2067   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2068 
2069   /* True if the vectorized statements would access beyond the last
2070      statement in the group.  */
2071   bool overrun_p = false;
2072 
2073   /* True if we can cope with such overrun by peeling for gaps, so that
2074      there is at least one final scalar iteration after the vector loop.  */
2075   bool can_overrun_p = (!masked_p
2076                               && vls_type == VLS_LOAD
2077                               && loop_vinfo
2078                               && !loop->inner);
2079 
2080   /* There can only be a gap at the end of the group if the stride is
2081      known at compile time.  */
2082   gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
2083 
2084   /* Stores can't yet have gaps.  */
2085   gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2086 
2087   if (slp)
2088     {
2089       if (STMT_VINFO_STRIDED_P (stmt_info))
2090           {
2091             /* Try to use consecutive accesses of GROUP_SIZE elements,
2092                separated by the stride, until we have a complete vector.
2093                Fall back to scalar accesses if that isn't possible.  */
2094             if (multiple_p (nunits, group_size))
2095               *memory_access_type = VMAT_STRIDED_SLP;
2096             else
2097               *memory_access_type = VMAT_ELEMENTWISE;
2098           }
2099       else
2100           {
2101             overrun_p = loop_vinfo && gap != 0;
2102             if (overrun_p && vls_type != VLS_LOAD)
2103               {
2104                 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2105                                      "Grouped store with gaps requires"
2106                                      " non-consecutive accesses\n");
2107                 return false;
2108               }
2109             /* An overrun is fine if the trailing elements are smaller
2110                than the alignment boundary B.  Every vector access will
2111                be a multiple of B and so we are guaranteed to access a
2112                non-gap element in the same B-sized block.  */
2113             if (overrun_p
2114                 && gap < (vect_known_alignment_in_bytes (first_dr)
2115                               / vect_get_scalar_dr_size (first_dr)))
2116               overrun_p = false;
2117             if (overrun_p && !can_overrun_p)
2118               {
2119                 if (dump_enabled_p ())
2120                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2121                                          "Peeling for outer loop is not supported\n");
2122                 return false;
2123               }
2124             *memory_access_type = VMAT_CONTIGUOUS;
2125           }
2126     }
2127   else
2128     {
2129       /* We can always handle this case using elementwise accesses,
2130            but see if something more efficient is available.  */
2131       *memory_access_type = VMAT_ELEMENTWISE;
2132 
2133       /* If there is a gap at the end of the group then these optimizations
2134            would access excess elements in the last iteration.  */
2135       bool would_overrun_p = (gap != 0);
2136       /* An overrun is fine if the trailing elements are smaller than the
2137            alignment boundary B.  Every vector access will be a multiple of B
2138            and so we are guaranteed to access a non-gap element in the
2139            same B-sized block.  */
2140       if (would_overrun_p
2141             && !masked_p
2142             && gap < (vect_known_alignment_in_bytes (first_dr)
2143                         / vect_get_scalar_dr_size (first_dr)))
2144           would_overrun_p = false;
2145 
2146       if (!STMT_VINFO_STRIDED_P (stmt_info)
2147             && (can_overrun_p || !would_overrun_p)
2148             && compare_step_with_zero (stmt) > 0)
2149           {
2150             /* First cope with the degenerate case of a single-element
2151                vector.  */
2152             if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2153               *memory_access_type = VMAT_CONTIGUOUS;
2154 
2155             /* Otherwise try using LOAD/STORE_LANES.  */
2156             if (*memory_access_type == VMAT_ELEMENTWISE
2157                 && (vls_type == VLS_LOAD
2158                       ? vect_load_lanes_supported (vectype, group_size, masked_p)
2159                       : vect_store_lanes_supported (vectype, group_size,
2160                                                             masked_p)))
2161               {
2162                 *memory_access_type = VMAT_LOAD_STORE_LANES;
2163                 overrun_p = would_overrun_p;
2164               }
2165 
2166             /* If that fails, try using permuting loads.  */
2167             if (*memory_access_type == VMAT_ELEMENTWISE
2168                 && (vls_type == VLS_LOAD
2169                       ? vect_grouped_load_supported (vectype, single_element_p,
2170                                                              group_size)
2171                       : vect_grouped_store_supported (vectype, group_size)))
2172               {
2173                 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2174                 overrun_p = would_overrun_p;
2175               }
2176           }
2177 
2178       /* As a last resort, trying using a gather load or scatter store.
2179 
2180            ??? Although the code can handle all group sizes correctly,
2181            it probably isn't a win to use separate strided accesses based
2182            on nearby locations.  Or, even if it's a win over scalar code,
2183            it might not be a win over vectorizing at a lower VF, if that
2184            allows us to use contiguous accesses.  */
2185       if (*memory_access_type == VMAT_ELEMENTWISE
2186             && single_element_p
2187             && loop_vinfo
2188             && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2189                                                              masked_p, gs_info))
2190           *memory_access_type = VMAT_GATHER_SCATTER;
2191     }
2192 
2193   if (vls_type != VLS_LOAD && first_stmt == stmt)
2194     {
2195       /* STMT is the leader of the group. Check the operands of all the
2196            stmts of the group.  */
2197       gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
2198       while (next_stmt)
2199           {
2200             tree op = vect_get_store_rhs (next_stmt);
2201             gimple *def_stmt;
2202             enum vect_def_type dt;
2203             if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
2204               {
2205                 if (dump_enabled_p ())
2206                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2207                                          "use not simple.\n");
2208                 return false;
2209               }
2210             next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2211           }
2212     }
2213 
2214   if (overrun_p)
2215     {
2216       gcc_assert (can_overrun_p);
2217       if (dump_enabled_p ())
2218           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2219                                "Data access with gaps requires scalar "
2220                                "epilogue loop\n");
2221       LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2222     }
2223 
2224   return true;
2225 }
2226 
2227 /* A subroutine of get_load_store_type, with a subset of the same
2228    arguments.  Handle the case where STMT is a load or store that
2229    accesses consecutive elements with a negative step.  */
2230 
2231 static vect_memory_access_type
get_negative_load_store_type(gimple * stmt,tree vectype,vec_load_store_type vls_type,unsigned int ncopies)2232 get_negative_load_store_type (gimple *stmt, tree vectype,
2233                                     vec_load_store_type vls_type,
2234                                     unsigned int ncopies)
2235 {
2236   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2237   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2238   dr_alignment_support alignment_support_scheme;
2239 
2240   if (ncopies > 1)
2241     {
2242       if (dump_enabled_p ())
2243           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2244                                "multiple types with negative step.\n");
2245       return VMAT_ELEMENTWISE;
2246     }
2247 
2248   alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2249   if (alignment_support_scheme != dr_aligned
2250       && alignment_support_scheme != dr_unaligned_supported)
2251     {
2252       if (dump_enabled_p ())
2253           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2254                                "negative step but alignment required.\n");
2255       return VMAT_ELEMENTWISE;
2256     }
2257 
2258   if (vls_type == VLS_STORE_INVARIANT)
2259     {
2260       if (dump_enabled_p ())
2261           dump_printf_loc (MSG_NOTE, vect_location,
2262                                "negative step with invariant source;"
2263                                " no permute needed.\n");
2264       return VMAT_CONTIGUOUS_DOWN;
2265     }
2266 
2267   if (!perm_mask_for_reverse (vectype))
2268     {
2269       if (dump_enabled_p ())
2270           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2271                                "negative step and reversing not supported.\n");
2272       return VMAT_ELEMENTWISE;
2273     }
2274 
2275   return VMAT_CONTIGUOUS_REVERSE;
2276 }
2277 
2278 /* Analyze load or store statement STMT of type VLS_TYPE.  Return true
2279    if there is a memory access type that the vectorized form can use,
2280    storing it in *MEMORY_ACCESS_TYPE if so.  If we decide to use gathers
2281    or scatters, fill in GS_INFO accordingly.
2282 
2283    SLP says whether we're performing SLP rather than loop vectorization.
2284    MASKED_P is true if the statement is conditional on a vectorized mask.
2285    VECTYPE is the vector type that the vectorized statements will use.
2286    NCOPIES is the number of vector statements that will be needed.  */
2287 
2288 static bool
get_load_store_type(gimple * stmt,tree vectype,bool slp,bool masked_p,vec_load_store_type vls_type,unsigned int ncopies,vect_memory_access_type * memory_access_type,gather_scatter_info * gs_info)2289 get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
2290                          vec_load_store_type vls_type, unsigned int ncopies,
2291                          vect_memory_access_type *memory_access_type,
2292                          gather_scatter_info *gs_info)
2293 {
2294   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2295   vec_info *vinfo = stmt_info->vinfo;
2296   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2297   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2298   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2299     {
2300       *memory_access_type = VMAT_GATHER_SCATTER;
2301       gimple *def_stmt;
2302       if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
2303           gcc_unreachable ();
2304       else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
2305                                             &gs_info->offset_dt,
2306                                             &gs_info->offset_vectype))
2307           {
2308             if (dump_enabled_p ())
2309               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2310                                    "%s index use not simple.\n",
2311                                    vls_type == VLS_LOAD ? "gather" : "scatter");
2312             return false;
2313           }
2314     }
2315   else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2316     {
2317       if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
2318                                               memory_access_type, gs_info))
2319           return false;
2320     }
2321   else if (STMT_VINFO_STRIDED_P (stmt_info))
2322     {
2323       gcc_assert (!slp);
2324       if (loop_vinfo
2325             && vect_use_strided_gather_scatters_p (stmt, loop_vinfo,
2326                                                              masked_p, gs_info))
2327           *memory_access_type = VMAT_GATHER_SCATTER;
2328       else
2329           *memory_access_type = VMAT_ELEMENTWISE;
2330     }
2331   else
2332     {
2333       int cmp = compare_step_with_zero (stmt);
2334       if (cmp < 0)
2335           *memory_access_type = get_negative_load_store_type
2336             (stmt, vectype, vls_type, ncopies);
2337       else if (cmp == 0)
2338           {
2339             gcc_assert (vls_type == VLS_LOAD);
2340             *memory_access_type = VMAT_INVARIANT;
2341           }
2342       else
2343           *memory_access_type = VMAT_CONTIGUOUS;
2344     }
2345 
2346   if ((*memory_access_type == VMAT_ELEMENTWISE
2347        || *memory_access_type == VMAT_STRIDED_SLP)
2348       && !nunits.is_constant ())
2349     {
2350       if (dump_enabled_p ())
2351           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2352                                "Not using elementwise accesses due to variable "
2353                                "vectorization factor.\n");
2354       return false;
2355     }
2356 
2357   /* FIXME: At the moment the cost model seems to underestimate the
2358      cost of using elementwise accesses.  This check preserves the
2359      traditional behavior until that can be fixed.  */
2360   if (*memory_access_type == VMAT_ELEMENTWISE
2361       && !STMT_VINFO_STRIDED_P (stmt_info)
2362       && !(stmt == GROUP_FIRST_ELEMENT (stmt_info)
2363              && !GROUP_NEXT_ELEMENT (stmt_info)
2364              && !pow2p_hwi (GROUP_SIZE (stmt_info))))
2365     {
2366       if (dump_enabled_p ())
2367           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2368                                "not falling back to elementwise accesses\n");
2369       return false;
2370     }
2371   return true;
2372 }
2373 
2374 /* Return true if boolean argument MASK is suitable for vectorizing
2375    conditional load or store STMT.  When returning true, store the type
2376    of the definition in *MASK_DT_OUT and the type of the vectorized mask
2377    in *MASK_VECTYPE_OUT.  */
2378 
2379 static bool
vect_check_load_store_mask(gimple * stmt,tree mask,vect_def_type * mask_dt_out,tree * mask_vectype_out)2380 vect_check_load_store_mask (gimple *stmt, tree mask,
2381                                   vect_def_type *mask_dt_out,
2382                                   tree *mask_vectype_out)
2383 {
2384   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2385     {
2386       if (dump_enabled_p ())
2387           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2388                                "mask argument is not a boolean.\n");
2389       return false;
2390     }
2391 
2392   if (TREE_CODE (mask) != SSA_NAME)
2393     {
2394       if (dump_enabled_p ())
2395           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2396                                "mask argument is not an SSA name.\n");
2397       return false;
2398     }
2399 
2400   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2401   gimple *def_stmt;
2402   enum vect_def_type mask_dt;
2403   tree mask_vectype;
2404   if (!vect_is_simple_use (mask, stmt_info->vinfo, &def_stmt, &mask_dt,
2405                                  &mask_vectype))
2406     {
2407       if (dump_enabled_p ())
2408           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2409                                "mask use not simple.\n");
2410       return false;
2411     }
2412 
2413   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2414   if (!mask_vectype)
2415     mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2416 
2417   if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2418     {
2419       if (dump_enabled_p ())
2420           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2421                                "could not find an appropriate vector mask type.\n");
2422       return false;
2423     }
2424 
2425   if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2426                     TYPE_VECTOR_SUBPARTS (vectype)))
2427     {
2428       if (dump_enabled_p ())
2429           {
2430             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2431                                  "vector mask type ");
2432             dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2433             dump_printf (MSG_MISSED_OPTIMIZATION,
2434                            " does not match vector data type ");
2435             dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2436             dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2437           }
2438       return false;
2439     }
2440 
2441   *mask_dt_out = mask_dt;
2442   *mask_vectype_out = mask_vectype;
2443   return true;
2444 }
2445 
2446 /* Return true if stored value RHS is suitable for vectorizing store
2447    statement STMT.  When returning true, store the type of the
2448    definition in *RHS_DT_OUT, the type of the vectorized store value in
2449    *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT.  */
2450 
2451 static bool
vect_check_store_rhs(gimple * stmt,tree rhs,vect_def_type * rhs_dt_out,tree * rhs_vectype_out,vec_load_store_type * vls_type_out)2452 vect_check_store_rhs (gimple *stmt, tree rhs, vect_def_type *rhs_dt_out,
2453                           tree *rhs_vectype_out, vec_load_store_type *vls_type_out)
2454 {
2455   /* In the case this is a store from a constant make sure
2456      native_encode_expr can handle it.  */
2457   if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2458     {
2459       if (dump_enabled_p ())
2460           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2461                                "cannot encode constant as a byte sequence.\n");
2462       return false;
2463     }
2464 
2465   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2466   gimple *def_stmt;
2467   enum vect_def_type rhs_dt;
2468   tree rhs_vectype;
2469   if (!vect_is_simple_use (rhs, stmt_info->vinfo, &def_stmt, &rhs_dt,
2470                                  &rhs_vectype))
2471     {
2472       if (dump_enabled_p ())
2473           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2474                                "use not simple.\n");
2475       return false;
2476     }
2477 
2478   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2479   if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2480     {
2481       if (dump_enabled_p ())
2482           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2483                                "incompatible vector types.\n");
2484       return false;
2485     }
2486 
2487   *rhs_dt_out = rhs_dt;
2488   *rhs_vectype_out = rhs_vectype;
2489   if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2490     *vls_type_out = VLS_STORE_INVARIANT;
2491   else
2492     *vls_type_out = VLS_STORE;
2493   return true;
2494 }
2495 
2496 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2497    Note that we support masks with floating-point type, in which case the
2498    floats are interpreted as a bitmask.  */
2499 
2500 static tree
vect_build_all_ones_mask(gimple * stmt,tree masktype)2501 vect_build_all_ones_mask (gimple *stmt, tree masktype)
2502 {
2503   if (TREE_CODE (masktype) == INTEGER_TYPE)
2504     return build_int_cst (masktype, -1);
2505   else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2506     {
2507       tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2508       mask = build_vector_from_val (masktype, mask);
2509       return vect_init_vector (stmt, mask, masktype, NULL);
2510     }
2511   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2512     {
2513       REAL_VALUE_TYPE r;
2514       long tmp[6];
2515       for (int j = 0; j < 6; ++j)
2516           tmp[j] = -1;
2517       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2518       tree mask = build_real (TREE_TYPE (masktype), r);
2519       mask = build_vector_from_val (masktype, mask);
2520       return vect_init_vector (stmt, mask, masktype, NULL);
2521     }
2522   gcc_unreachable ();
2523 }
2524 
2525 /* Build an all-zero merge value of type VECTYPE while vectorizing
2526    STMT as a gather load.  */
2527 
2528 static tree
vect_build_zero_merge_argument(gimple * stmt,tree vectype)2529 vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2530 {
2531   tree merge;
2532   if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2533     merge = build_int_cst (TREE_TYPE (vectype), 0);
2534   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2535     {
2536       REAL_VALUE_TYPE r;
2537       long tmp[6];
2538       for (int j = 0; j < 6; ++j)
2539           tmp[j] = 0;
2540       real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2541       merge = build_real (TREE_TYPE (vectype), r);
2542     }
2543   else
2544     gcc_unreachable ();
2545   merge = build_vector_from_val (vectype, merge);
2546   return vect_init_vector (stmt, merge, vectype, NULL);
2547 }
2548 
2549 /* Build a gather load call while vectorizing STMT.  Insert new instructions
2550    before GSI and add them to VEC_STMT.  GS_INFO describes the gather load
2551    operation.  If the load is conditional, MASK is the unvectorized
2552    condition and MASK_DT is its definition type, otherwise MASK is null.  */
2553 
2554 static void
vect_build_gather_load_calls(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,gather_scatter_info * gs_info,tree mask,vect_def_type mask_dt)2555 vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2556                                     gimple **vec_stmt, gather_scatter_info *gs_info,
2557                                     tree mask, vect_def_type mask_dt)
2558 {
2559   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2560   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2561   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2562   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2563   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2564   int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2565   edge pe = loop_preheader_edge (loop);
2566   enum { NARROW, NONE, WIDEN } modifier;
2567   poly_uint64 gather_off_nunits
2568     = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2569 
2570   tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2571   tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2572   tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2573   tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2574   tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2575   tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2576   tree scaletype = TREE_VALUE (arglist);
2577   gcc_checking_assert (types_compatible_p (srctype, rettype)
2578                            && (!mask || types_compatible_p (srctype, masktype)));
2579 
2580   tree perm_mask = NULL_TREE;
2581   tree mask_perm_mask = NULL_TREE;
2582   if (known_eq (nunits, gather_off_nunits))
2583     modifier = NONE;
2584   else if (known_eq (nunits * 2, gather_off_nunits))
2585     {
2586       modifier = WIDEN;
2587 
2588       /* Currently widening gathers and scatters are only supported for
2589            fixed-length vectors.  */
2590       int count = gather_off_nunits.to_constant ();
2591       vec_perm_builder sel (count, count, 1);
2592       for (int i = 0; i < count; ++i)
2593           sel.quick_push (i | (count / 2));
2594 
2595       vec_perm_indices indices (sel, 1, count);
2596       perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2597                                                         indices);
2598     }
2599   else if (known_eq (nunits, gather_off_nunits * 2))
2600     {
2601       modifier = NARROW;
2602 
2603       /* Currently narrowing gathers and scatters are only supported for
2604            fixed-length vectors.  */
2605       int count = nunits.to_constant ();
2606       vec_perm_builder sel (count, count, 1);
2607       sel.quick_grow (count);
2608       for (int i = 0; i < count; ++i)
2609           sel[i] = i < count / 2 ? i : i + count / 2;
2610       vec_perm_indices indices (sel, 2, count);
2611       perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2612 
2613       ncopies *= 2;
2614 
2615       if (mask)
2616           {
2617             for (int i = 0; i < count; ++i)
2618               sel[i] = i | (count / 2);
2619             indices.new_vector (sel, 2, count);
2620             mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2621           }
2622     }
2623   else
2624     gcc_unreachable ();
2625 
2626   tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2627                                                          vectype);
2628 
2629   tree ptr = fold_convert (ptrtype, gs_info->base);
2630   if (!is_gimple_min_invariant (ptr))
2631     {
2632       gimple_seq seq;
2633       ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2634       basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2635       gcc_assert (!new_bb);
2636     }
2637 
2638   tree scale = build_int_cst (scaletype, gs_info->scale);
2639 
2640   tree vec_oprnd0 = NULL_TREE;
2641   tree vec_mask = NULL_TREE;
2642   tree src_op = NULL_TREE;
2643   tree mask_op = NULL_TREE;
2644   tree prev_res = NULL_TREE;
2645   stmt_vec_info prev_stmt_info = NULL;
2646 
2647   if (!mask)
2648     {
2649       src_op = vect_build_zero_merge_argument (stmt, rettype);
2650       mask_op = vect_build_all_ones_mask (stmt, masktype);
2651     }
2652 
2653   for (int j = 0; j < ncopies; ++j)
2654     {
2655       tree op, var;
2656       gimple *new_stmt;
2657       if (modifier == WIDEN && (j & 1))
2658           op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2659                                            perm_mask, stmt, gsi);
2660       else if (j == 0)
2661           op = vec_oprnd0
2662             = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2663       else
2664           op = vec_oprnd0
2665             = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2666 
2667       if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2668           {
2669             gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2670                                         TYPE_VECTOR_SUBPARTS (idxtype)));
2671             var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2672             op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2673             new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2674             vect_finish_stmt_generation (stmt, new_stmt, gsi);
2675             op = var;
2676           }
2677 
2678       if (mask)
2679           {
2680             if (mask_perm_mask && (j & 1))
2681               mask_op = permute_vec_elements (mask_op, mask_op,
2682                                                       mask_perm_mask, stmt, gsi);
2683             else
2684               {
2685                 if (j == 0)
2686                     vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2687                 else
2688                     vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
2689 
2690                 mask_op = vec_mask;
2691                 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2692                     {
2693                       gcc_assert
2694                         (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2695                                      TYPE_VECTOR_SUBPARTS (masktype)));
2696                       var = vect_get_new_ssa_name (masktype, vect_simple_var);
2697                       mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2698                       new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
2699                                                               mask_op);
2700                       vect_finish_stmt_generation (stmt, new_stmt, gsi);
2701                       mask_op = var;
2702                     }
2703               }
2704             src_op = mask_op;
2705           }
2706 
2707       new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2708                                             mask_op, scale);
2709 
2710       if (!useless_type_conversion_p (vectype, rettype))
2711           {
2712             gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2713                                         TYPE_VECTOR_SUBPARTS (rettype)));
2714             op = vect_get_new_ssa_name (rettype, vect_simple_var);
2715             gimple_call_set_lhs (new_stmt, op);
2716             vect_finish_stmt_generation (stmt, new_stmt, gsi);
2717             var = make_ssa_name (vec_dest);
2718             op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2719             new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2720           }
2721       else
2722           {
2723             var = make_ssa_name (vec_dest, new_stmt);
2724             gimple_call_set_lhs (new_stmt, var);
2725           }
2726 
2727       vect_finish_stmt_generation (stmt, new_stmt, gsi);
2728 
2729       if (modifier == NARROW)
2730           {
2731             if ((j & 1) == 0)
2732               {
2733                 prev_res = var;
2734                 continue;
2735               }
2736             var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2737             new_stmt = SSA_NAME_DEF_STMT (var);
2738           }
2739 
2740       if (prev_stmt_info == NULL)
2741           STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2742       else
2743           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2744       prev_stmt_info = vinfo_for_stmt (new_stmt);
2745     }
2746 }
2747 
2748 /* Prepare the base and offset in GS_INFO for vectorization.
2749    Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2750    to the vectorized offset argument for the first copy of STMT.  STMT
2751    is the statement described by GS_INFO and LOOP is the containing loop.  */
2752 
2753 static void
vect_get_gather_scatter_ops(struct loop * loop,gimple * stmt,gather_scatter_info * gs_info,tree * dataref_ptr,tree * vec_offset)2754 vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
2755                                    gather_scatter_info *gs_info,
2756                                    tree *dataref_ptr, tree *vec_offset)
2757 {
2758   gimple_seq stmts = NULL;
2759   *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2760   if (stmts != NULL)
2761     {
2762       basic_block new_bb;
2763       edge pe = loop_preheader_edge (loop);
2764       new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2765       gcc_assert (!new_bb);
2766     }
2767   tree offset_type = TREE_TYPE (gs_info->offset);
2768   tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2769   *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt,
2770                                                         offset_vectype);
2771 }
2772 
2773 /* Prepare to implement a grouped or strided load or store using
2774    the gather load or scatter store operation described by GS_INFO.
2775    STMT is the load or store statement.
2776 
2777    Set *DATAREF_BUMP to the amount that should be added to the base
2778    address after each copy of the vectorized statement.  Set *VEC_OFFSET
2779    to an invariant offset vector in which element I has the value
2780    I * DR_STEP / SCALE.  */
2781 
2782 static void
vect_get_strided_load_store_ops(gimple * stmt,loop_vec_info loop_vinfo,gather_scatter_info * gs_info,tree * dataref_bump,tree * vec_offset)2783 vect_get_strided_load_store_ops (gimple *stmt, loop_vec_info loop_vinfo,
2784                                          gather_scatter_info *gs_info,
2785                                          tree *dataref_bump, tree *vec_offset)
2786 {
2787   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2788   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2789   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2790   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2791   gimple_seq stmts;
2792 
2793   tree bump = size_binop (MULT_EXPR,
2794                                 fold_convert (sizetype, DR_STEP (dr)),
2795                                 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2796   *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2797   if (stmts)
2798     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2799 
2800   /* The offset given in GS_INFO can have pointer type, so use the element
2801      type of the vector instead.  */
2802   tree offset_type = TREE_TYPE (gs_info->offset);
2803   tree offset_vectype = get_vectype_for_scalar_type (offset_type);
2804   offset_type = TREE_TYPE (offset_vectype);
2805 
2806   /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type.  */
2807   tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
2808                                 ssize_int (gs_info->scale));
2809   step = fold_convert (offset_type, step);
2810   step = force_gimple_operand (step, &stmts, true, NULL_TREE);
2811 
2812   /* Create {0, X, X*2, X*3, ...}.  */
2813   *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
2814                                     build_zero_cst (offset_type), step);
2815   if (stmts)
2816     gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2817 }
2818 
2819 /* Return the amount that should be added to a vector pointer to move
2820    to the next or previous copy of AGGR_TYPE.  DR is the data reference
2821    being vectorized and MEMORY_ACCESS_TYPE describes the type of
2822    vectorization.  */
2823 
2824 static tree
vect_get_data_ptr_increment(data_reference * dr,tree aggr_type,vect_memory_access_type memory_access_type)2825 vect_get_data_ptr_increment (data_reference *dr, tree aggr_type,
2826                                    vect_memory_access_type memory_access_type)
2827 {
2828   if (memory_access_type == VMAT_INVARIANT)
2829     return size_zero_node;
2830 
2831   tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2832   tree step = vect_dr_behavior (dr)->step;
2833   if (tree_int_cst_sgn (step) == -1)
2834     iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2835   return iv_step;
2836 }
2837 
2838 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}.  */
2839 
2840 static bool
vectorizable_bswap(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,tree vectype_in,enum vect_def_type * dt)2841 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2842                         gimple **vec_stmt, slp_tree slp_node,
2843                         tree vectype_in, enum vect_def_type *dt)
2844 {
2845   tree op, vectype;
2846   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2847   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2848   unsigned ncopies;
2849   unsigned HOST_WIDE_INT nunits, num_bytes;
2850 
2851   op = gimple_call_arg (stmt, 0);
2852   vectype = STMT_VINFO_VECTYPE (stmt_info);
2853 
2854   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2855     return false;
2856 
2857   /* Multiple types in SLP are handled by creating the appropriate number of
2858      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
2859      case of SLP.  */
2860   if (slp_node)
2861     ncopies = 1;
2862   else
2863     ncopies = vect_get_num_copies (loop_vinfo, vectype);
2864 
2865   gcc_assert (ncopies >= 1);
2866 
2867   tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2868   if (! char_vectype)
2869     return false;
2870 
2871   if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
2872     return false;
2873 
2874   unsigned word_bytes = num_bytes / nunits;
2875 
2876   /* The encoding uses one stepped pattern for each byte in the word.  */
2877   vec_perm_builder elts (num_bytes, word_bytes, 3);
2878   for (unsigned i = 0; i < 3; ++i)
2879     for (unsigned j = 0; j < word_bytes; ++j)
2880       elts.quick_push ((i + 1) * word_bytes - j - 1);
2881 
2882   vec_perm_indices indices (elts, 1, num_bytes);
2883   if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2884     return false;
2885 
2886   if (! vec_stmt)
2887     {
2888       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2889       if (dump_enabled_p ())
2890         dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2891                          "\n");
2892       if (! slp_node)
2893           {
2894             add_stmt_cost (stmt_info->vinfo->target_cost_data,
2895                                1, vector_stmt, stmt_info, 0, vect_prologue);
2896             add_stmt_cost (stmt_info->vinfo->target_cost_data,
2897                                ncopies, vec_perm, stmt_info, 0, vect_body);
2898           }
2899       return true;
2900     }
2901 
2902   tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
2903 
2904   /* Transform.  */
2905   vec<tree> vec_oprnds = vNULL;
2906   gimple *new_stmt = NULL;
2907   stmt_vec_info prev_stmt_info = NULL;
2908   for (unsigned j = 0; j < ncopies; j++)
2909     {
2910       /* Handle uses.  */
2911       if (j == 0)
2912         vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2913       else
2914         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2915 
2916       /* Arguments are ready. create the new vector stmt.  */
2917       unsigned i;
2918       tree vop;
2919       FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2920        {
2921            tree tem = make_ssa_name (char_vectype);
2922            new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2923                                                                   char_vectype, vop));
2924            vect_finish_stmt_generation (stmt, new_stmt, gsi);
2925            tree tem2 = make_ssa_name (char_vectype);
2926            new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2927                                                    tem, tem, bswap_vconst);
2928            vect_finish_stmt_generation (stmt, new_stmt, gsi);
2929            tem = make_ssa_name (vectype);
2930            new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2931                                                                   vectype, tem2));
2932            vect_finish_stmt_generation (stmt, new_stmt, gsi);
2933          if (slp_node)
2934            SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2935        }
2936 
2937       if (slp_node)
2938         continue;
2939 
2940       if (j == 0)
2941         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2942       else
2943         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2944 
2945       prev_stmt_info = vinfo_for_stmt (new_stmt);
2946     }
2947 
2948   vec_oprnds.release ();
2949   return true;
2950 }
2951 
2952 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2953    integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2954    in a single step.  On success, store the binary pack code in
2955    *CONVERT_CODE.  */
2956 
2957 static bool
simple_integer_narrowing(tree vectype_out,tree vectype_in,tree_code * convert_code)2958 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2959                                 tree_code *convert_code)
2960 {
2961   if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2962       || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2963     return false;
2964 
2965   tree_code code;
2966   int multi_step_cvt = 0;
2967   auto_vec <tree, 8> interm_types;
2968   if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2969                                                   &code, &multi_step_cvt,
2970                                                   &interm_types)
2971       || multi_step_cvt)
2972     return false;
2973 
2974   *convert_code = code;
2975   return true;
2976 }
2977 
2978 /* Function vectorizable_call.
2979 
2980    Check if GS performs a function call that can be vectorized.
2981    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2982    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2983    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
2984 
2985 static bool
vectorizable_call(gimple * gs,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)2986 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2987                        slp_tree slp_node)
2988 {
2989   gcall *stmt;
2990   tree vec_dest;
2991   tree scalar_dest;
2992   tree op, type;
2993   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2994   stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2995   tree vectype_out, vectype_in;
2996   poly_uint64 nunits_in;
2997   poly_uint64 nunits_out;
2998   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2999   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3000   vec_info *vinfo = stmt_info->vinfo;
3001   tree fndecl, new_temp, rhs_type;
3002   gimple *def_stmt;
3003   enum vect_def_type dt[3]
3004     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
3005   int ndts = 3;
3006   gimple *new_stmt = NULL;
3007   int ncopies, j;
3008   vec<tree> vargs = vNULL;
3009   enum { NARROW, NONE, WIDEN } modifier;
3010   size_t i, nargs;
3011   tree lhs;
3012 
3013   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3014     return false;
3015 
3016   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3017       && ! vec_stmt)
3018     return false;
3019 
3020   /* Is GS a vectorizable call?   */
3021   stmt = dyn_cast <gcall *> (gs);
3022   if (!stmt)
3023     return false;
3024 
3025   if (gimple_call_internal_p (stmt)
3026       && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3027             || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3028     /* Handled by vectorizable_load and vectorizable_store.  */
3029     return false;
3030 
3031   if (gimple_call_lhs (stmt) == NULL_TREE
3032       || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3033     return false;
3034 
3035   gcc_checking_assert (!stmt_can_throw_internal (stmt));
3036 
3037   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3038 
3039   /* Process function arguments.  */
3040   rhs_type = NULL_TREE;
3041   vectype_in = NULL_TREE;
3042   nargs = gimple_call_num_args (stmt);
3043 
3044   /* Bail out if the function has more than three arguments, we do not have
3045      interesting builtin functions to vectorize with more than two arguments
3046      except for fma.  No arguments is also not good.  */
3047   if (nargs == 0 || nargs > 3)
3048     return false;
3049 
3050   /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic.  */
3051   if (gimple_call_internal_p (stmt)
3052       && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3053     {
3054       nargs = 0;
3055       rhs_type = unsigned_type_node;
3056     }
3057 
3058   for (i = 0; i < nargs; i++)
3059     {
3060       tree opvectype;
3061 
3062       op = gimple_call_arg (stmt, i);
3063 
3064       /* We can only handle calls with arguments of the same type.  */
3065       if (rhs_type
3066             && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3067           {
3068             if (dump_enabled_p ())
3069               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3070                              "argument types differ.\n");
3071             return false;
3072           }
3073       if (!rhs_type)
3074           rhs_type = TREE_TYPE (op);
3075 
3076       if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
3077           {
3078             if (dump_enabled_p ())
3079               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3080                              "use not simple.\n");
3081             return false;
3082           }
3083 
3084       if (!vectype_in)
3085           vectype_in = opvectype;
3086       else if (opvectype
3087                  && opvectype != vectype_in)
3088           {
3089             if (dump_enabled_p ())
3090               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3091                              "argument vector types differ.\n");
3092             return false;
3093           }
3094     }
3095   /* If all arguments are external or constant defs use a vector type with
3096      the same size as the output vector type.  */
3097   if (!vectype_in)
3098     vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3099   if (vec_stmt)
3100     gcc_assert (vectype_in);
3101   if (!vectype_in)
3102     {
3103       if (dump_enabled_p ())
3104         {
3105           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3106                            "no vectype for scalar type ");
3107           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3108           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3109         }
3110 
3111       return false;
3112     }
3113 
3114   /* FORNOW */
3115   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3116   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3117   if (known_eq (nunits_in * 2, nunits_out))
3118     modifier = NARROW;
3119   else if (known_eq (nunits_out, nunits_in))
3120     modifier = NONE;
3121   else if (known_eq (nunits_out * 2, nunits_in))
3122     modifier = WIDEN;
3123   else
3124     return false;
3125 
3126   /* We only handle functions that do not read or clobber memory.  */
3127   if (gimple_vuse (stmt))
3128     {
3129       if (dump_enabled_p ())
3130           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3131                                "function reads from or writes to memory.\n");
3132       return false;
3133     }
3134 
3135   /* For now, we only vectorize functions if a target specific builtin
3136      is available.  TODO -- in some cases, it might be profitable to
3137      insert the calls for pieces of the vector, in order to be able
3138      to vectorize other operations in the loop.  */
3139   fndecl = NULL_TREE;
3140   internal_fn ifn = IFN_LAST;
3141   combined_fn cfn = gimple_call_combined_fn (stmt);
3142   tree callee = gimple_call_fndecl (stmt);
3143 
3144   /* First try using an internal function.  */
3145   tree_code convert_code = ERROR_MARK;
3146   if (cfn != CFN_LAST
3147       && (modifier == NONE
3148             || (modifier == NARROW
3149                 && simple_integer_narrowing (vectype_out, vectype_in,
3150                                                      &convert_code))))
3151     ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3152                                                     vectype_in);
3153 
3154   /* If that fails, try asking for a target-specific built-in function.  */
3155   if (ifn == IFN_LAST)
3156     {
3157       if (cfn != CFN_LAST)
3158           fndecl = targetm.vectorize.builtin_vectorized_function
3159             (cfn, vectype_out, vectype_in);
3160       else if (callee)
3161           fndecl = targetm.vectorize.builtin_md_vectorized_function
3162             (callee, vectype_out, vectype_in);
3163     }
3164 
3165   if (ifn == IFN_LAST && !fndecl)
3166     {
3167       if (cfn == CFN_GOMP_SIMD_LANE
3168             && !slp_node
3169             && loop_vinfo
3170             && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3171             && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3172             && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3173                == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3174           {
3175             /* We can handle IFN_GOMP_SIMD_LANE by returning a
3176                { 0, 1, 2, ... vf - 1 } vector.  */
3177             gcc_assert (nargs == 0);
3178           }
3179       else if (modifier == NONE
3180                  && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3181                        || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3182                        || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3183           return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
3184                                            vectype_in, dt);
3185       else
3186           {
3187             if (dump_enabled_p ())
3188               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3189                                    "function is not vectorizable.\n");
3190             return false;
3191           }
3192     }
3193 
3194   if (slp_node)
3195     ncopies = 1;
3196   else if (modifier == NARROW && ifn == IFN_LAST)
3197     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3198   else
3199     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3200 
3201   /* Sanity check: make sure that at least one copy of the vectorized stmt
3202      needs to be generated.  */
3203   gcc_assert (ncopies >= 1);
3204 
3205   if (!vec_stmt) /* transformation not required.  */
3206     {
3207       STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3208       if (dump_enabled_p ())
3209         dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
3210                          "\n");
3211       if (!slp_node)
3212           {
3213             vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
3214             if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3215               add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
3216                                  vec_promote_demote, stmt_info, 0, vect_body);
3217           }
3218 
3219       return true;
3220     }
3221 
3222   /* Transform.  */
3223 
3224   if (dump_enabled_p ())
3225     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3226 
3227   /* Handle def.  */
3228   scalar_dest = gimple_call_lhs (stmt);
3229   vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3230 
3231   prev_stmt_info = NULL;
3232   if (modifier == NONE || ifn != IFN_LAST)
3233     {
3234       tree prev_res = NULL_TREE;
3235       for (j = 0; j < ncopies; ++j)
3236           {
3237             /* Build argument list for the vectorized call.  */
3238             if (j == 0)
3239               vargs.create (nargs);
3240             else
3241               vargs.truncate (0);
3242 
3243             if (slp_node)
3244               {
3245                 auto_vec<vec<tree> > vec_defs (nargs);
3246                 vec<tree> vec_oprnds0;
3247 
3248                 for (i = 0; i < nargs; i++)
3249                     vargs.quick_push (gimple_call_arg (stmt, i));
3250                 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3251                 vec_oprnds0 = vec_defs[0];
3252 
3253                 /* Arguments are ready.  Create the new vector stmt.  */
3254                 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3255                     {
3256                       size_t k;
3257                       for (k = 0; k < nargs; k++)
3258                         {
3259                           vec<tree> vec_oprndsk = vec_defs[k];
3260                           vargs[k] = vec_oprndsk[i];
3261                         }
3262                       if (modifier == NARROW)
3263                         {
3264                           tree half_res = make_ssa_name (vectype_in);
3265                           gcall *call
3266                               = gimple_build_call_internal_vec (ifn, vargs);
3267                           gimple_call_set_lhs (call, half_res);
3268                           gimple_call_set_nothrow (call, true);
3269                           new_stmt = call;
3270                           vect_finish_stmt_generation (stmt, new_stmt, gsi);
3271                           if ((i & 1) == 0)
3272                               {
3273                                 prev_res = half_res;
3274                                 continue;
3275                               }
3276                           new_temp = make_ssa_name (vec_dest);
3277                           new_stmt = gimple_build_assign (new_temp, convert_code,
3278                                                                   prev_res, half_res);
3279                         }
3280                       else
3281                         {
3282                           gcall *call;
3283                           if (ifn != IFN_LAST)
3284                               call = gimple_build_call_internal_vec (ifn, vargs);
3285                           else
3286                               call = gimple_build_call_vec (fndecl, vargs);
3287                           new_temp = make_ssa_name (vec_dest, call);
3288                           gimple_call_set_lhs (call, new_temp);
3289                           gimple_call_set_nothrow (call, true);
3290                           new_stmt = call;
3291                         }
3292                       vect_finish_stmt_generation (stmt, new_stmt, gsi);
3293                       SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3294                     }
3295 
3296                 for (i = 0; i < nargs; i++)
3297                     {
3298                       vec<tree> vec_oprndsi = vec_defs[i];
3299                       vec_oprndsi.release ();
3300                     }
3301                 continue;
3302               }
3303 
3304             for (i = 0; i < nargs; i++)
3305               {
3306                 op = gimple_call_arg (stmt, i);
3307                 if (j == 0)
3308                     vec_oprnd0
3309                       = vect_get_vec_def_for_operand (op, stmt);
3310                 else
3311                     {
3312                       vec_oprnd0 = gimple_call_arg (new_stmt, i);
3313                       vec_oprnd0
3314                     = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3315                     }
3316 
3317                 vargs.quick_push (vec_oprnd0);
3318               }
3319 
3320             if (gimple_call_internal_p (stmt)
3321                 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3322               {
3323                 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3324                 tree new_var
3325                     = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3326                 gimple *init_stmt = gimple_build_assign (new_var, cst);
3327                 vect_init_vector_1 (stmt, init_stmt, NULL);
3328                 new_temp = make_ssa_name (vec_dest);
3329                 new_stmt = gimple_build_assign (new_temp, new_var);
3330               }
3331             else if (modifier == NARROW)
3332               {
3333                 tree half_res = make_ssa_name (vectype_in);
3334                 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3335                 gimple_call_set_lhs (call, half_res);
3336                 gimple_call_set_nothrow (call, true);
3337                 new_stmt = call;
3338                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3339                 if ((j & 1) == 0)
3340                     {
3341                       prev_res = half_res;
3342                       continue;
3343                     }
3344                 new_temp = make_ssa_name (vec_dest);
3345                 new_stmt = gimple_build_assign (new_temp, convert_code,
3346                                                         prev_res, half_res);
3347               }
3348             else
3349               {
3350                 gcall *call;
3351                 if (ifn != IFN_LAST)
3352                     call = gimple_build_call_internal_vec (ifn, vargs);
3353                 else
3354                     call = gimple_build_call_vec (fndecl, vargs);
3355                 new_temp = make_ssa_name (vec_dest, new_stmt);
3356                 gimple_call_set_lhs (call, new_temp);
3357                 gimple_call_set_nothrow (call, true);
3358                 new_stmt = call;
3359               }
3360             vect_finish_stmt_generation (stmt, new_stmt, gsi);
3361 
3362             if (j == (modifier == NARROW ? 1 : 0))
3363               STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3364             else
3365               STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3366 
3367             prev_stmt_info = vinfo_for_stmt (new_stmt);
3368           }
3369     }
3370   else if (modifier == NARROW)
3371     {
3372       for (j = 0; j < ncopies; ++j)
3373           {
3374             /* Build argument list for the vectorized call.  */
3375             if (j == 0)
3376               vargs.create (nargs * 2);
3377             else
3378               vargs.truncate (0);
3379 
3380             if (slp_node)
3381               {
3382                 auto_vec<vec<tree> > vec_defs (nargs);
3383                 vec<tree> vec_oprnds0;
3384 
3385                 for (i = 0; i < nargs; i++)
3386                     vargs.quick_push (gimple_call_arg (stmt, i));
3387                 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3388                 vec_oprnds0 = vec_defs[0];
3389 
3390                 /* Arguments are ready.  Create the new vector stmt.  */
3391                 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3392                     {
3393                       size_t k;
3394                       vargs.truncate (0);
3395                       for (k = 0; k < nargs; k++)
3396                         {
3397                           vec<tree> vec_oprndsk = vec_defs[k];
3398                           vargs.quick_push (vec_oprndsk[i]);
3399                           vargs.quick_push (vec_oprndsk[i + 1]);
3400                         }
3401                       gcall *call;
3402                       if (ifn != IFN_LAST)
3403                         call = gimple_build_call_internal_vec (ifn, vargs);
3404                       else
3405                         call = gimple_build_call_vec (fndecl, vargs);
3406                       new_temp = make_ssa_name (vec_dest, call);
3407                       gimple_call_set_lhs (call, new_temp);
3408                       gimple_call_set_nothrow (call, true);
3409                       new_stmt = call;
3410                       vect_finish_stmt_generation (stmt, new_stmt, gsi);
3411                       SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3412                     }
3413 
3414                 for (i = 0; i < nargs; i++)
3415                     {
3416                       vec<tree> vec_oprndsi = vec_defs[i];
3417                       vec_oprndsi.release ();
3418                     }
3419                 continue;
3420               }
3421 
3422             for (i = 0; i < nargs; i++)
3423               {
3424                 op = gimple_call_arg (stmt, i);
3425                 if (j == 0)
3426                     {
3427                       vec_oprnd0
3428                         = vect_get_vec_def_for_operand (op, stmt);
3429                       vec_oprnd1
3430                         = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3431                     }
3432                 else
3433                     {
3434                       vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3435                       vec_oprnd0
3436                         = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3437                       vec_oprnd1
3438                         = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3439                     }
3440 
3441                 vargs.quick_push (vec_oprnd0);
3442                 vargs.quick_push (vec_oprnd1);
3443               }
3444 
3445             new_stmt = gimple_build_call_vec (fndecl, vargs);
3446             new_temp = make_ssa_name (vec_dest, new_stmt);
3447             gimple_call_set_lhs (new_stmt, new_temp);
3448             vect_finish_stmt_generation (stmt, new_stmt, gsi);
3449 
3450             if (j == 0)
3451               STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3452             else
3453               STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3454 
3455             prev_stmt_info = vinfo_for_stmt (new_stmt);
3456           }
3457 
3458       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3459     }
3460   else
3461     /* No current target implements this case.  */
3462     return false;
3463 
3464   vargs.release ();
3465 
3466   /* The call in STMT might prevent it from being removed in dce.
3467      We however cannot remove it here, due to the way the ssa name
3468      it defines is mapped to the new definition.  So just replace
3469      rhs of the statement with something harmless.  */
3470 
3471   if (slp_node)
3472     return true;
3473 
3474   type = TREE_TYPE (scalar_dest);
3475   if (is_pattern_stmt_p (stmt_info))
3476     lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3477   else
3478     lhs = gimple_call_lhs (stmt);
3479 
3480   new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3481   set_vinfo_for_stmt (new_stmt, stmt_info);
3482   set_vinfo_for_stmt (stmt, NULL);
3483   STMT_VINFO_STMT (stmt_info) = new_stmt;
3484   gsi_replace (gsi, new_stmt, false);
3485 
3486   return true;
3487 }
3488 
3489 
3490 struct simd_call_arg_info
3491 {
3492   tree vectype;
3493   tree op;
3494   HOST_WIDE_INT linear_step;
3495   enum vect_def_type dt;
3496   unsigned int align;
3497   bool simd_lane_linear;
3498 };
3499 
3500 /* Helper function of vectorizable_simd_clone_call.  If OP, an SSA_NAME,
3501    is linear within simd lane (but not within whole loop), note it in
3502    *ARGINFO.  */
3503 
3504 static void
vect_simd_lane_linear(tree op,struct loop * loop,struct simd_call_arg_info * arginfo)3505 vect_simd_lane_linear (tree op, struct loop *loop,
3506                            struct simd_call_arg_info *arginfo)
3507 {
3508   gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3509 
3510   if (!is_gimple_assign (def_stmt)
3511       || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3512       || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3513     return;
3514 
3515   tree base = gimple_assign_rhs1 (def_stmt);
3516   HOST_WIDE_INT linear_step = 0;
3517   tree v = gimple_assign_rhs2 (def_stmt);
3518   while (TREE_CODE (v) == SSA_NAME)
3519     {
3520       tree t;
3521       def_stmt = SSA_NAME_DEF_STMT (v);
3522       if (is_gimple_assign (def_stmt))
3523           switch (gimple_assign_rhs_code (def_stmt))
3524             {
3525             case PLUS_EXPR:
3526               t = gimple_assign_rhs2 (def_stmt);
3527               if (linear_step || TREE_CODE (t) != INTEGER_CST)
3528                 return;
3529               base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3530               v = gimple_assign_rhs1 (def_stmt);
3531               continue;
3532             case MULT_EXPR:
3533               t = gimple_assign_rhs2 (def_stmt);
3534               if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3535                 return;
3536               linear_step = tree_to_shwi (t);
3537               v = gimple_assign_rhs1 (def_stmt);
3538               continue;
3539             CASE_CONVERT:
3540               t = gimple_assign_rhs1 (def_stmt);
3541               if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3542                     || (TYPE_PRECISION (TREE_TYPE (v))
3543                         < TYPE_PRECISION (TREE_TYPE (t))))
3544                 return;
3545               if (!linear_step)
3546                 linear_step = 1;
3547               v = t;
3548               continue;
3549             default:
3550               return;
3551             }
3552       else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3553                  && loop->simduid
3554                  && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3555                  && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3556                        == loop->simduid))
3557           {
3558             if (!linear_step)
3559               linear_step = 1;
3560             arginfo->linear_step = linear_step;
3561             arginfo->op = base;
3562             arginfo->simd_lane_linear = true;
3563             return;
3564           }
3565     }
3566 }
3567 
3568 /* Return the number of elements in vector type VECTYPE, which is associated
3569    with a SIMD clone.  At present these vectors always have a constant
3570    length.  */
3571 
3572 static unsigned HOST_WIDE_INT
simd_clone_subparts(tree vectype)3573 simd_clone_subparts (tree vectype)
3574 {
3575   return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3576 }
3577 
3578 /* Function vectorizable_simd_clone_call.
3579 
3580    Check if STMT performs a function call that can be vectorized
3581    by calling a simd clone of the function.
3582    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3583    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3584    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
3585 
3586 static bool
vectorizable_simd_clone_call(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)3587 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3588                                     gimple **vec_stmt, slp_tree slp_node)
3589 {
3590   tree vec_dest;
3591   tree scalar_dest;
3592   tree op, type;
3593   tree vec_oprnd0 = NULL_TREE;
3594   stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3595   tree vectype;
3596   unsigned int nunits;
3597   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3598   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3599   vec_info *vinfo = stmt_info->vinfo;
3600   struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3601   tree fndecl, new_temp;
3602   gimple *def_stmt;
3603   gimple *new_stmt = NULL;
3604   int ncopies, j;
3605   auto_vec<simd_call_arg_info> arginfo;
3606   vec<tree> vargs = vNULL;
3607   size_t i, nargs;
3608   tree lhs, rtype, ratype;
3609   vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3610 
3611   /* Is STMT a vectorizable call?   */
3612   if (!is_gimple_call (stmt))
3613     return false;
3614 
3615   fndecl = gimple_call_fndecl (stmt);
3616   if (fndecl == NULL_TREE)
3617     return false;
3618 
3619   struct cgraph_node *node = cgraph_node::get (fndecl);
3620   if (node == NULL || node->simd_clones == NULL)
3621     return false;
3622 
3623   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3624     return false;
3625 
3626   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3627       && ! vec_stmt)
3628     return false;
3629 
3630   if (gimple_call_lhs (stmt)
3631       && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3632     return false;
3633 
3634   gcc_checking_assert (!stmt_can_throw_internal (stmt));
3635 
3636   vectype = STMT_VINFO_VECTYPE (stmt_info);
3637 
3638   if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3639     return false;
3640 
3641   /* FORNOW */
3642   if (slp_node)
3643     return false;
3644 
3645   /* Process function arguments.  */
3646   nargs = gimple_call_num_args (stmt);
3647 
3648   /* Bail out if the function has zero arguments.  */
3649   if (nargs == 0)
3650     return false;
3651 
3652   arginfo.reserve (nargs, true);
3653 
3654   for (i = 0; i < nargs; i++)
3655     {
3656       simd_call_arg_info thisarginfo;
3657       affine_iv iv;
3658 
3659       thisarginfo.linear_step = 0;
3660       thisarginfo.align = 0;
3661       thisarginfo.op = NULL_TREE;
3662       thisarginfo.simd_lane_linear = false;
3663 
3664       op = gimple_call_arg (stmt, i);
3665       if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3666                                      &thisarginfo.vectype)
3667             || thisarginfo.dt == vect_uninitialized_def)
3668           {
3669             if (dump_enabled_p ())
3670               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3671                                    "use not simple.\n");
3672             return false;
3673           }
3674 
3675       if (thisarginfo.dt == vect_constant_def
3676             || thisarginfo.dt == vect_external_def)
3677           gcc_assert (thisarginfo.vectype == NULL_TREE);
3678       else
3679           gcc_assert (thisarginfo.vectype != NULL_TREE);
3680 
3681       /* For linear arguments, the analyze phase should have saved
3682            the base and step in STMT_VINFO_SIMD_CLONE_INFO.  */
3683       if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3684             && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3685           {
3686             gcc_assert (vec_stmt);
3687             thisarginfo.linear_step
3688               = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3689             thisarginfo.op
3690               = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3691             thisarginfo.simd_lane_linear
3692               = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3693                  == boolean_true_node);
3694             /* If loop has been peeled for alignment, we need to adjust it.  */
3695             tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3696             tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3697             if (n1 != n2 && !thisarginfo.simd_lane_linear)
3698               {
3699                 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3700                 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3701                 tree opt = TREE_TYPE (thisarginfo.op);
3702                 bias = fold_convert (TREE_TYPE (step), bias);
3703                 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3704                 thisarginfo.op
3705                     = fold_build2 (POINTER_TYPE_P (opt)
3706                                      ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3707                                      thisarginfo.op, bias);
3708               }
3709           }
3710       else if (!vec_stmt
3711                  && thisarginfo.dt != vect_constant_def
3712                  && thisarginfo.dt != vect_external_def
3713                  && loop_vinfo
3714                  && TREE_CODE (op) == SSA_NAME
3715                  && simple_iv (loop, loop_containing_stmt (stmt), op,
3716                                    &iv, false)
3717                  && tree_fits_shwi_p (iv.step))
3718           {
3719             thisarginfo.linear_step = tree_to_shwi (iv.step);
3720             thisarginfo.op = iv.base;
3721           }
3722       else if ((thisarginfo.dt == vect_constant_def
3723                     || thisarginfo.dt == vect_external_def)
3724                  && POINTER_TYPE_P (TREE_TYPE (op)))
3725           thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3726       /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3727            linear too.  */
3728       if (POINTER_TYPE_P (TREE_TYPE (op))
3729             && !thisarginfo.linear_step
3730             && !vec_stmt
3731             && thisarginfo.dt != vect_constant_def
3732             && thisarginfo.dt != vect_external_def
3733             && loop_vinfo
3734             && !slp_node
3735             && TREE_CODE (op) == SSA_NAME)
3736           vect_simd_lane_linear (op, loop, &thisarginfo);
3737 
3738       arginfo.quick_push (thisarginfo);
3739     }
3740 
3741   unsigned HOST_WIDE_INT vf;
3742   if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3743     {
3744       if (dump_enabled_p ())
3745           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3746                                "not considering SIMD clones; not yet supported"
3747                                " for variable-width vectors.\n");
3748       return false;
3749     }
3750 
3751   unsigned int badness = 0;
3752   struct cgraph_node *bestn = NULL;
3753   if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3754     bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3755   else
3756     for (struct cgraph_node *n = node->simd_clones; n != NULL;
3757            n = n->simdclone->next_clone)
3758       {
3759           unsigned int this_badness = 0;
3760           if (n->simdclone->simdlen > vf
3761               || n->simdclone->nargs != nargs)
3762             continue;
3763           if (n->simdclone->simdlen < vf)
3764             this_badness += (exact_log2 (vf)
3765                                  - exact_log2 (n->simdclone->simdlen)) * 1024;
3766           if (n->simdclone->inbranch)
3767             this_badness += 2048;
3768           int target_badness = targetm.simd_clone.usable (n);
3769           if (target_badness < 0)
3770             continue;
3771           this_badness += target_badness * 512;
3772           /* FORNOW: Have to add code to add the mask argument.  */
3773           if (n->simdclone->inbranch)
3774             continue;
3775           for (i = 0; i < nargs; i++)
3776             {
3777               switch (n->simdclone->args[i].arg_type)
3778                 {
3779                 case SIMD_CLONE_ARG_TYPE_VECTOR:
3780                     if (!useless_type_conversion_p
3781                               (n->simdclone->args[i].orig_type,
3782                                TREE_TYPE (gimple_call_arg (stmt, i))))
3783                       i = -1;
3784                     else if (arginfo[i].dt == vect_constant_def
3785                                || arginfo[i].dt == vect_external_def
3786                                || arginfo[i].linear_step)
3787                       this_badness += 64;
3788                     break;
3789                 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3790                     if (arginfo[i].dt != vect_constant_def
3791                         && arginfo[i].dt != vect_external_def)
3792                       i = -1;
3793                     break;
3794                 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3795                 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3796                     if (arginfo[i].dt == vect_constant_def
3797                         || arginfo[i].dt == vect_external_def
3798                         || (arginfo[i].linear_step
3799                               != n->simdclone->args[i].linear_step))
3800                       i = -1;
3801                     break;
3802                 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3803                 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3804                 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3805                 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3806                 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3807                 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3808                     /* FORNOW */
3809                     i = -1;
3810                     break;
3811                 case SIMD_CLONE_ARG_TYPE_MASK:
3812                     gcc_unreachable ();
3813                 }
3814               if (i == (size_t) -1)
3815                 break;
3816               if (n->simdclone->args[i].alignment > arginfo[i].align)
3817                 {
3818                     i = -1;
3819                     break;
3820                 }
3821               if (arginfo[i].align)
3822                 this_badness += (exact_log2 (arginfo[i].align)
3823                                      - exact_log2 (n->simdclone->args[i].alignment));
3824             }
3825           if (i == (size_t) -1)
3826             continue;
3827           if (bestn == NULL || this_badness < badness)
3828             {
3829               bestn = n;
3830               badness = this_badness;
3831             }
3832       }
3833 
3834   if (bestn == NULL)
3835     return false;
3836 
3837   for (i = 0; i < nargs; i++)
3838     if ((arginfo[i].dt == vect_constant_def
3839            || arginfo[i].dt == vect_external_def)
3840           && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3841       {
3842           arginfo[i].vectype
3843             = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3844                                                                                      i)));
3845           if (arginfo[i].vectype == NULL
3846               || (simd_clone_subparts (arginfo[i].vectype)
3847                     > bestn->simdclone->simdlen))
3848             return false;
3849       }
3850 
3851   fndecl = bestn->decl;
3852   nunits = bestn->simdclone->simdlen;
3853   ncopies = vf / nunits;
3854 
3855   /* If the function isn't const, only allow it in simd loops where user
3856      has asserted that at least nunits consecutive iterations can be
3857      performed using SIMD instructions.  */
3858   if ((loop == NULL || (unsigned) loop->safelen < nunits)
3859       && gimple_vuse (stmt))
3860     return false;
3861 
3862   /* Sanity check: make sure that at least one copy of the vectorized stmt
3863      needs to be generated.  */
3864   gcc_assert (ncopies >= 1);
3865 
3866   if (!vec_stmt) /* transformation not required.  */
3867     {
3868       STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3869       for (i = 0; i < nargs; i++)
3870           if ((bestn->simdclone->args[i].arg_type
3871                == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3872               || (bestn->simdclone->args[i].arg_type
3873                     == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3874             {
3875               STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3876                                                                                           + 1);
3877               STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3878               tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3879                            ? size_type_node : TREE_TYPE (arginfo[i].op);
3880               tree ls = build_int_cst (lst, arginfo[i].linear_step);
3881               STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3882               tree sll = arginfo[i].simd_lane_linear
3883                            ? boolean_true_node : boolean_false_node;
3884               STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3885             }
3886       STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3887       if (dump_enabled_p ())
3888           dump_printf_loc (MSG_NOTE, vect_location,
3889                                "=== vectorizable_simd_clone_call ===\n");
3890 /*      vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3891       return true;
3892     }
3893 
3894   /* Transform.  */
3895 
3896   if (dump_enabled_p ())
3897     dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3898 
3899   /* Handle def.  */
3900   scalar_dest = gimple_call_lhs (stmt);
3901   vec_dest = NULL_TREE;
3902   rtype = NULL_TREE;
3903   ratype = NULL_TREE;
3904   if (scalar_dest)
3905     {
3906       vec_dest = vect_create_destination_var (scalar_dest, vectype);
3907       rtype = TREE_TYPE (TREE_TYPE (fndecl));
3908       if (TREE_CODE (rtype) == ARRAY_TYPE)
3909           {
3910             ratype = rtype;
3911             rtype = TREE_TYPE (ratype);
3912           }
3913     }
3914 
3915   prev_stmt_info = NULL;
3916   for (j = 0; j < ncopies; ++j)
3917     {
3918       /* Build argument list for the vectorized call.  */
3919       if (j == 0)
3920           vargs.create (nargs);
3921       else
3922           vargs.truncate (0);
3923 
3924       for (i = 0; i < nargs; i++)
3925           {
3926             unsigned int k, l, m, o;
3927             tree atype;
3928             op = gimple_call_arg (stmt, i);
3929             switch (bestn->simdclone->args[i].arg_type)
3930               {
3931               case SIMD_CLONE_ARG_TYPE_VECTOR:
3932                 atype = bestn->simdclone->args[i].vector_type;
3933                 o = nunits / simd_clone_subparts (atype);
3934                 for (m = j * o; m < (j + 1) * o; m++)
3935                     {
3936                       if (simd_clone_subparts (atype)
3937                           < simd_clone_subparts (arginfo[i].vectype))
3938                         {
3939                           poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3940                           k = (simd_clone_subparts (arginfo[i].vectype)
3941                                  / simd_clone_subparts (atype));
3942                           gcc_assert ((k & (k - 1)) == 0);
3943                           if (m == 0)
3944                               vec_oprnd0
3945                                 = vect_get_vec_def_for_operand (op, stmt);
3946                           else
3947                               {
3948                                 vec_oprnd0 = arginfo[i].op;
3949                                 if ((m & (k - 1)) == 0)
3950                                   vec_oprnd0
3951                                     = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3952                                                                                 vec_oprnd0);
3953                               }
3954                           arginfo[i].op = vec_oprnd0;
3955                           vec_oprnd0
3956                               = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3957                                           bitsize_int (prec),
3958                                           bitsize_int ((m & (k - 1)) * prec));
3959                           new_stmt
3960                               = gimple_build_assign (make_ssa_name (atype),
3961                                                          vec_oprnd0);
3962                           vect_finish_stmt_generation (stmt, new_stmt, gsi);
3963                           vargs.safe_push (gimple_assign_lhs (new_stmt));
3964                         }
3965                       else
3966                         {
3967                           k = (simd_clone_subparts (atype)
3968                                  / simd_clone_subparts (arginfo[i].vectype));
3969                           gcc_assert ((k & (k - 1)) == 0);
3970                           vec<constructor_elt, va_gc> *ctor_elts;
3971                           if (k != 1)
3972                               vec_alloc (ctor_elts, k);
3973                           else
3974                               ctor_elts = NULL;
3975                           for (l = 0; l < k; l++)
3976                               {
3977                                 if (m == 0 && l == 0)
3978                                   vec_oprnd0
3979                                     = vect_get_vec_def_for_operand (op, stmt);
3980                                 else
3981                                   vec_oprnd0
3982                                     = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3983                                                                                 arginfo[i].op);
3984                                 arginfo[i].op = vec_oprnd0;
3985                                 if (k == 1)
3986                                   break;
3987                                 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3988                                                               vec_oprnd0);
3989                               }
3990                           if (k == 1)
3991                               vargs.safe_push (vec_oprnd0);
3992                           else
3993                               {
3994                                 vec_oprnd0 = build_constructor (atype, ctor_elts);
3995                                 new_stmt
3996                                   = gimple_build_assign (make_ssa_name (atype),
3997                                                                vec_oprnd0);
3998                                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3999                                 vargs.safe_push (gimple_assign_lhs (new_stmt));
4000                               }
4001                         }
4002                     }
4003                 break;
4004               case SIMD_CLONE_ARG_TYPE_UNIFORM:
4005                 vargs.safe_push (op);
4006                 break;
4007               case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4008               case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4009                 if (j == 0)
4010                     {
4011                       gimple_seq stmts;
4012                       arginfo[i].op
4013                         = force_gimple_operand (arginfo[i].op, &stmts, true,
4014                                                       NULL_TREE);
4015                       if (stmts != NULL)
4016                         {
4017                           basic_block new_bb;
4018                           edge pe = loop_preheader_edge (loop);
4019                           new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4020                           gcc_assert (!new_bb);
4021                         }
4022                       if (arginfo[i].simd_lane_linear)
4023                         {
4024                           vargs.safe_push (arginfo[i].op);
4025                           break;
4026                         }
4027                       tree phi_res = copy_ssa_name (op);
4028                       gphi *new_phi = create_phi_node (phi_res, loop->header);
4029                       set_vinfo_for_stmt (new_phi,
4030                                               new_stmt_vec_info (new_phi, loop_vinfo));
4031                       add_phi_arg (new_phi, arginfo[i].op,
4032                                      loop_preheader_edge (loop), UNKNOWN_LOCATION);
4033                       enum tree_code code
4034                         = POINTER_TYPE_P (TREE_TYPE (op))
4035                           ? POINTER_PLUS_EXPR : PLUS_EXPR;
4036                       tree type = POINTER_TYPE_P (TREE_TYPE (op))
4037                                     ? sizetype : TREE_TYPE (op);
4038                       widest_int cst
4039                         = wi::mul (bestn->simdclone->args[i].linear_step,
4040                                      ncopies * nunits);
4041                       tree tcst = wide_int_to_tree (type, cst);
4042                       tree phi_arg = copy_ssa_name (op);
4043                       new_stmt
4044                         = gimple_build_assign (phi_arg, code, phi_res, tcst);
4045                       gimple_stmt_iterator si = gsi_after_labels (loop->header);
4046                       gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4047                       set_vinfo_for_stmt (new_stmt,
4048                                               new_stmt_vec_info (new_stmt, loop_vinfo));
4049                       add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4050                                      UNKNOWN_LOCATION);
4051                       arginfo[i].op = phi_res;
4052                       vargs.safe_push (phi_res);
4053                     }
4054                 else
4055                     {
4056                       enum tree_code code
4057                         = POINTER_TYPE_P (TREE_TYPE (op))
4058                           ? POINTER_PLUS_EXPR : PLUS_EXPR;
4059                       tree type = POINTER_TYPE_P (TREE_TYPE (op))
4060                                     ? sizetype : TREE_TYPE (op);
4061                       widest_int cst
4062                         = wi::mul (bestn->simdclone->args[i].linear_step,
4063                                      j * nunits);
4064                       tree tcst = wide_int_to_tree (type, cst);
4065                       new_temp = make_ssa_name (TREE_TYPE (op));
4066                       new_stmt = gimple_build_assign (new_temp, code,
4067                                                               arginfo[i].op, tcst);
4068                       vect_finish_stmt_generation (stmt, new_stmt, gsi);
4069                       vargs.safe_push (new_temp);
4070                     }
4071                 break;
4072               case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4073               case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4074               case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4075               case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4076               case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4077               case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4078               default:
4079                 gcc_unreachable ();
4080               }
4081           }
4082 
4083       new_stmt = gimple_build_call_vec (fndecl, vargs);
4084       if (vec_dest)
4085           {
4086             gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4087             if (ratype)
4088               new_temp = create_tmp_var (ratype);
4089             else if (simd_clone_subparts (vectype)
4090                        == simd_clone_subparts (rtype))
4091               new_temp = make_ssa_name (vec_dest, new_stmt);
4092             else
4093               new_temp = make_ssa_name (rtype, new_stmt);
4094             gimple_call_set_lhs (new_stmt, new_temp);
4095           }
4096       vect_finish_stmt_generation (stmt, new_stmt, gsi);
4097 
4098       if (vec_dest)
4099           {
4100             if (simd_clone_subparts (vectype) < nunits)
4101               {
4102                 unsigned int k, l;
4103                 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4104                 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4105                 k = nunits / simd_clone_subparts (vectype);
4106                 gcc_assert ((k & (k - 1)) == 0);
4107                 for (l = 0; l < k; l++)
4108                     {
4109                       tree t;
4110                       if (ratype)
4111                         {
4112                           t = build_fold_addr_expr (new_temp);
4113                           t = build2 (MEM_REF, vectype, t,
4114                                           build_int_cst (TREE_TYPE (t), l * bytes));
4115                         }
4116                       else
4117                         t = build3 (BIT_FIELD_REF, vectype, new_temp,
4118                                         bitsize_int (prec), bitsize_int (l * prec));
4119                       new_stmt
4120                         = gimple_build_assign (make_ssa_name (vectype), t);
4121                       vect_finish_stmt_generation (stmt, new_stmt, gsi);
4122                       if (j == 0 && l == 0)
4123                         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4124                       else
4125                         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4126 
4127                       prev_stmt_info = vinfo_for_stmt (new_stmt);
4128                     }
4129 
4130                 if (ratype)
4131                     {
4132                       tree clobber = build_constructor (ratype, NULL);
4133                       TREE_THIS_VOLATILE (clobber) = 1;
4134                       new_stmt = gimple_build_assign (new_temp, clobber);
4135                       vect_finish_stmt_generation (stmt, new_stmt, gsi);
4136                     }
4137                 continue;
4138               }
4139             else if (simd_clone_subparts (vectype) > nunits)
4140               {
4141                 unsigned int k = (simd_clone_subparts (vectype)
4142                                         / simd_clone_subparts (rtype));
4143                 gcc_assert ((k & (k - 1)) == 0);
4144                 if ((j & (k - 1)) == 0)
4145                     vec_alloc (ret_ctor_elts, k);
4146                 if (ratype)
4147                     {
4148                       unsigned int m, o = nunits / simd_clone_subparts (rtype);
4149                       for (m = 0; m < o; m++)
4150                         {
4151                           tree tem = build4 (ARRAY_REF, rtype, new_temp,
4152                                                    size_int (m), NULL_TREE, NULL_TREE);
4153                           new_stmt
4154                               = gimple_build_assign (make_ssa_name (rtype), tem);
4155                           vect_finish_stmt_generation (stmt, new_stmt, gsi);
4156                           CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4157                                                         gimple_assign_lhs (new_stmt));
4158                         }
4159                       tree clobber = build_constructor (ratype, NULL);
4160                       TREE_THIS_VOLATILE (clobber) = 1;
4161                       new_stmt = gimple_build_assign (new_temp, clobber);
4162                       vect_finish_stmt_generation (stmt, new_stmt, gsi);
4163                     }
4164                 else
4165                     CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4166                 if ((j & (k - 1)) != k - 1)
4167                     continue;
4168                 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4169                 new_stmt
4170                     = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4171                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4172 
4173                 if ((unsigned) j == k - 1)
4174                     STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4175                 else
4176                     STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4177 
4178                 prev_stmt_info = vinfo_for_stmt (new_stmt);
4179                 continue;
4180               }
4181             else if (ratype)
4182               {
4183                 tree t = build_fold_addr_expr (new_temp);
4184                 t = build2 (MEM_REF, vectype, t,
4185                                 build_int_cst (TREE_TYPE (t), 0));
4186                 new_stmt
4187                     = gimple_build_assign (make_ssa_name (vec_dest), t);
4188                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4189                 tree clobber = build_constructor (ratype, NULL);
4190                 TREE_THIS_VOLATILE (clobber) = 1;
4191                 vect_finish_stmt_generation (stmt,
4192                                                      gimple_build_assign (new_temp,
4193                                                                                 clobber), gsi);
4194               }
4195           }
4196 
4197       if (j == 0)
4198           STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4199       else
4200           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4201 
4202       prev_stmt_info = vinfo_for_stmt (new_stmt);
4203     }
4204 
4205   vargs.release ();
4206 
4207   /* The call in STMT might prevent it from being removed in dce.
4208      We however cannot remove it here, due to the way the ssa name
4209      it defines is mapped to the new definition.  So just replace
4210      rhs of the statement with something harmless.  */
4211 
4212   if (slp_node)
4213     return true;
4214 
4215   if (scalar_dest)
4216     {
4217       type = TREE_TYPE (scalar_dest);
4218       if (is_pattern_stmt_p (stmt_info))
4219           lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
4220       else
4221           lhs = gimple_call_lhs (stmt);
4222       new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4223     }
4224   else
4225     new_stmt = gimple_build_nop ();
4226   set_vinfo_for_stmt (new_stmt, stmt_info);
4227   set_vinfo_for_stmt (stmt, NULL);
4228   STMT_VINFO_STMT (stmt_info) = new_stmt;
4229   gsi_replace (gsi, new_stmt, true);
4230   unlink_stmt_vdef (stmt);
4231 
4232   return true;
4233 }
4234 
4235 
4236 /* Function vect_gen_widened_results_half
4237 
4238    Create a vector stmt whose code, type, number of arguments, and result
4239    variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4240    VEC_OPRND0 and VEC_OPRND1.  The new vector stmt is to be inserted at BSI.
4241    In the case that CODE is a CALL_EXPR, this means that a call to DECL
4242    needs to be created (DECL is a function-decl of a target-builtin).
4243    STMT is the original scalar stmt that we are vectorizing.  */
4244 
4245 static gimple *
vect_gen_widened_results_half(enum tree_code code,tree decl,tree vec_oprnd0,tree vec_oprnd1,int op_type,tree vec_dest,gimple_stmt_iterator * gsi,gimple * stmt)4246 vect_gen_widened_results_half (enum tree_code code,
4247                                      tree decl,
4248                                tree vec_oprnd0, tree vec_oprnd1, int op_type,
4249                                      tree vec_dest, gimple_stmt_iterator *gsi,
4250                                      gimple *stmt)
4251 {
4252   gimple *new_stmt;
4253   tree new_temp;
4254 
4255   /* Generate half of the widened result:  */
4256   if (code == CALL_EXPR)
4257     {
4258       /* Target specific support  */
4259       if (op_type == binary_op)
4260           new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
4261       else
4262           new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
4263       new_temp = make_ssa_name (vec_dest, new_stmt);
4264       gimple_call_set_lhs (new_stmt, new_temp);
4265     }
4266   else
4267     {
4268       /* Generic support */
4269       gcc_assert (op_type == TREE_CODE_LENGTH (code));
4270       if (op_type != binary_op)
4271           vec_oprnd1 = NULL;
4272       new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4273       new_temp = make_ssa_name (vec_dest, new_stmt);
4274       gimple_assign_set_lhs (new_stmt, new_temp);
4275     }
4276   vect_finish_stmt_generation (stmt, new_stmt, gsi);
4277 
4278   return new_stmt;
4279 }
4280 
4281 
4282 /* Get vectorized definitions for loop-based vectorization.  For the first
4283    operand we call vect_get_vec_def_for_operand() (with OPRND containing
4284    scalar operand), and for the rest we get a copy with
4285    vect_get_vec_def_for_stmt_copy() using the previous vector definition
4286    (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4287    The vectors are collected into VEC_OPRNDS.  */
4288 
4289 static void
vect_get_loop_based_defs(tree * oprnd,gimple * stmt,enum vect_def_type dt,vec<tree> * vec_oprnds,int multi_step_cvt)4290 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
4291                                 vec<tree> *vec_oprnds, int multi_step_cvt)
4292 {
4293   tree vec_oprnd;
4294 
4295   /* Get first vector operand.  */
4296   /* All the vector operands except the very first one (that is scalar oprnd)
4297      are stmt copies.  */
4298   if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4299     vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4300   else
4301     vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4302 
4303   vec_oprnds->quick_push (vec_oprnd);
4304 
4305   /* Get second vector operand.  */
4306   vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
4307   vec_oprnds->quick_push (vec_oprnd);
4308 
4309   *oprnd = vec_oprnd;
4310 
4311   /* For conversion in multiple steps, continue to get operands
4312      recursively.  */
4313   if (multi_step_cvt)
4314     vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds,  multi_step_cvt - 1);
4315 }
4316 
4317 
4318 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4319    For multi-step conversions store the resulting vectors and call the function
4320    recursively.  */
4321 
4322 static void
vect_create_vectorized_demotion_stmts(vec<tree> * vec_oprnds,int multi_step_cvt,gimple * stmt,vec<tree> vec_dsts,gimple_stmt_iterator * gsi,slp_tree slp_node,enum tree_code code,stmt_vec_info * prev_stmt_info)4323 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4324                                                int multi_step_cvt, gimple *stmt,
4325                                                vec<tree> vec_dsts,
4326                                                gimple_stmt_iterator *gsi,
4327                                                slp_tree slp_node, enum tree_code code,
4328                                                stmt_vec_info *prev_stmt_info)
4329 {
4330   unsigned int i;
4331   tree vop0, vop1, new_tmp, vec_dest;
4332   gimple *new_stmt;
4333   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4334 
4335   vec_dest = vec_dsts.pop ();
4336 
4337   for (i = 0; i < vec_oprnds->length (); i += 2)
4338     {
4339       /* Create demotion operation.  */
4340       vop0 = (*vec_oprnds)[i];
4341       vop1 = (*vec_oprnds)[i + 1];
4342       new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4343       new_tmp = make_ssa_name (vec_dest, new_stmt);
4344       gimple_assign_set_lhs (new_stmt, new_tmp);
4345       vect_finish_stmt_generation (stmt, new_stmt, gsi);
4346 
4347       if (multi_step_cvt)
4348           /* Store the resulting vector for next recursive call.  */
4349           (*vec_oprnds)[i/2] = new_tmp;
4350       else
4351           {
4352             /* This is the last step of the conversion sequence. Store the
4353                vectors in SLP_NODE or in vector info of the scalar statement
4354                (or in STMT_VINFO_RELATED_STMT chain).  */
4355             if (slp_node)
4356               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4357             else
4358               {
4359                 if (!*prev_stmt_info)
4360                     STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4361                 else
4362                     STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4363 
4364                 *prev_stmt_info = vinfo_for_stmt (new_stmt);
4365               }
4366           }
4367     }
4368 
4369   /* For multi-step demotion operations we first generate demotion operations
4370      from the source type to the intermediate types, and then combine the
4371      results (stored in VEC_OPRNDS) in demotion operation to the destination
4372      type.  */
4373   if (multi_step_cvt)
4374     {
4375       /* At each level of recursion we have half of the operands we had at the
4376            previous level.  */
4377       vec_oprnds->truncate ((i+1)/2);
4378       vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4379                                                        stmt, vec_dsts, gsi, slp_node,
4380                                                        VEC_PACK_TRUNC_EXPR,
4381                                                        prev_stmt_info);
4382     }
4383 
4384   vec_dsts.quick_push (vec_dest);
4385 }
4386 
4387 
4388 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4389    and VEC_OPRNDS1 (for binary operations).  For multi-step conversions store
4390    the resulting vectors and call the function recursively.  */
4391 
4392 static void
vect_create_vectorized_promotion_stmts(vec<tree> * vec_oprnds0,vec<tree> * vec_oprnds1,gimple * stmt,tree vec_dest,gimple_stmt_iterator * gsi,enum tree_code code1,enum tree_code code2,tree decl1,tree decl2,int op_type)4393 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4394                                                   vec<tree> *vec_oprnds1,
4395                                                   gimple *stmt, tree vec_dest,
4396                                                   gimple_stmt_iterator *gsi,
4397                                                   enum tree_code code1,
4398                                                   enum tree_code code2, tree decl1,
4399                                                   tree decl2, int op_type)
4400 {
4401   int i;
4402   tree vop0, vop1, new_tmp1, new_tmp2;
4403   gimple *new_stmt1, *new_stmt2;
4404   vec<tree> vec_tmp = vNULL;
4405 
4406   vec_tmp.create (vec_oprnds0->length () * 2);
4407   FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4408     {
4409       if (op_type == binary_op)
4410           vop1 = (*vec_oprnds1)[i];
4411       else
4412           vop1 = NULL_TREE;
4413 
4414       /* Generate the two halves of promotion operation.  */
4415       new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4416                                                              op_type, vec_dest, gsi, stmt);
4417       new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4418                                                              op_type, vec_dest, gsi, stmt);
4419       if (is_gimple_call (new_stmt1))
4420           {
4421             new_tmp1 = gimple_call_lhs (new_stmt1);
4422             new_tmp2 = gimple_call_lhs (new_stmt2);
4423           }
4424       else
4425           {
4426             new_tmp1 = gimple_assign_lhs (new_stmt1);
4427             new_tmp2 = gimple_assign_lhs (new_stmt2);
4428           }
4429 
4430       /* Store the results for the next step.  */
4431       vec_tmp.quick_push (new_tmp1);
4432       vec_tmp.quick_push (new_tmp2);
4433     }
4434 
4435   vec_oprnds0->release ();
4436   *vec_oprnds0 = vec_tmp;
4437 }
4438 
4439 
4440 /* Check if STMT performs a conversion operation, that can be vectorized.
4441    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4442    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4443    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
4444 
4445 static bool
vectorizable_conversion(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)4446 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4447                                gimple **vec_stmt, slp_tree slp_node)
4448 {
4449   tree vec_dest;
4450   tree scalar_dest;
4451   tree op0, op1 = NULL_TREE;
4452   tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4453   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4454   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4455   enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4456   enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4457   tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4458   tree new_temp;
4459   gimple *def_stmt;
4460   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4461   int ndts = 2;
4462   gimple *new_stmt = NULL;
4463   stmt_vec_info prev_stmt_info;
4464   poly_uint64 nunits_in;
4465   poly_uint64 nunits_out;
4466   tree vectype_out, vectype_in;
4467   int ncopies, i, j;
4468   tree lhs_type, rhs_type;
4469   enum { NARROW, NONE, WIDEN } modifier;
4470   vec<tree> vec_oprnds0 = vNULL;
4471   vec<tree> vec_oprnds1 = vNULL;
4472   tree vop0;
4473   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4474   vec_info *vinfo = stmt_info->vinfo;
4475   int multi_step_cvt = 0;
4476   vec<tree> interm_types = vNULL;
4477   tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4478   int op_type;
4479   unsigned short fltsz;
4480 
4481   /* Is STMT a vectorizable conversion?   */
4482 
4483   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4484     return false;
4485 
4486   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4487       && ! vec_stmt)
4488     return false;
4489 
4490   if (!is_gimple_assign (stmt))
4491     return false;
4492 
4493   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4494     return false;
4495 
4496   code = gimple_assign_rhs_code (stmt);
4497   if (!CONVERT_EXPR_CODE_P (code)
4498       && code != FIX_TRUNC_EXPR
4499       && code != FLOAT_EXPR
4500       && code != WIDEN_MULT_EXPR
4501       && code != WIDEN_LSHIFT_EXPR)
4502     return false;
4503 
4504   op_type = TREE_CODE_LENGTH (code);
4505 
4506   /* Check types of lhs and rhs.  */
4507   scalar_dest = gimple_assign_lhs (stmt);
4508   lhs_type = TREE_TYPE (scalar_dest);
4509   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4510 
4511   op0 = gimple_assign_rhs1 (stmt);
4512   rhs_type = TREE_TYPE (op0);
4513 
4514   if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4515       && !((INTEGRAL_TYPE_P (lhs_type)
4516               && INTEGRAL_TYPE_P (rhs_type))
4517              || (SCALAR_FLOAT_TYPE_P (lhs_type)
4518                  && SCALAR_FLOAT_TYPE_P (rhs_type))))
4519     return false;
4520 
4521   if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4522       && ((INTEGRAL_TYPE_P (lhs_type)
4523              && !type_has_mode_precision_p (lhs_type))
4524             || (INTEGRAL_TYPE_P (rhs_type)
4525                 && !type_has_mode_precision_p (rhs_type))))
4526     {
4527       if (dump_enabled_p ())
4528           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4529                          "type conversion to/from bit-precision unsupported."
4530                          "\n");
4531       return false;
4532     }
4533 
4534   /* Check the operands of the operation.  */
4535   if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4536     {
4537       if (dump_enabled_p ())
4538           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4539                          "use not simple.\n");
4540       return false;
4541     }
4542   if (op_type == binary_op)
4543     {
4544       bool ok;
4545 
4546       op1 = gimple_assign_rhs2 (stmt);
4547       gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4548       /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4549            OP1.  */
4550       if (CONSTANT_CLASS_P (op0))
4551           ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4552       else
4553           ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4554 
4555       if (!ok)
4556           {
4557           if (dump_enabled_p ())
4558             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4559                              "use not simple.\n");
4560             return false;
4561           }
4562     }
4563 
4564   /* If op0 is an external or constant defs use a vector type of
4565      the same size as the output vector type.  */
4566   if (!vectype_in)
4567     vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4568   if (vec_stmt)
4569     gcc_assert (vectype_in);
4570   if (!vectype_in)
4571     {
4572       if (dump_enabled_p ())
4573           {
4574             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4575                            "no vectype for scalar type ");
4576             dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4577           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4578           }
4579 
4580       return false;
4581     }
4582 
4583   if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4584       && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4585     {
4586       if (dump_enabled_p ())
4587           {
4588             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4589                            "can't convert between boolean and non "
4590                                  "boolean vectors");
4591             dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4592           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4593           }
4594 
4595       return false;
4596     }
4597 
4598   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4599   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4600   if (known_eq (nunits_out, nunits_in))
4601     modifier = NONE;
4602   else if (multiple_p (nunits_out, nunits_in))
4603     modifier = NARROW;
4604   else
4605     {
4606       gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4607       modifier = WIDEN;
4608     }
4609 
4610   /* Multiple types in SLP are handled by creating the appropriate number of
4611      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
4612      case of SLP.  */
4613   if (slp_node)
4614     ncopies = 1;
4615   else if (modifier == NARROW)
4616     ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4617   else
4618     ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4619 
4620   /* Sanity check: make sure that at least one copy of the vectorized stmt
4621      needs to be generated.  */
4622   gcc_assert (ncopies >= 1);
4623 
4624   bool found_mode = false;
4625   scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4626   scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4627   opt_scalar_mode rhs_mode_iter;
4628 
4629   /* Supportable by target?  */
4630   switch (modifier)
4631     {
4632     case NONE:
4633       if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4634           return false;
4635       if (supportable_convert_operation (code, vectype_out, vectype_in,
4636                                                    &decl1, &code1))
4637           break;
4638       /* FALLTHRU */
4639     unsupported:
4640       if (dump_enabled_p ())
4641           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4642                          "conversion not supported by target.\n");
4643       return false;
4644 
4645     case WIDEN:
4646       if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4647                                                     &code1, &code2, &multi_step_cvt,
4648                                                     &interm_types))
4649           {
4650             /* Binary widening operation can only be supported directly by the
4651                architecture.  */
4652             gcc_assert (!(multi_step_cvt && op_type == binary_op));
4653             break;
4654           }
4655 
4656       if (code != FLOAT_EXPR
4657             || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4658           goto unsupported;
4659 
4660       fltsz = GET_MODE_SIZE (lhs_mode);
4661       FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4662           {
4663             rhs_mode = rhs_mode_iter.require ();
4664             if (GET_MODE_SIZE (rhs_mode) > fltsz)
4665               break;
4666 
4667             cvt_type
4668               = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4669             cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4670             if (cvt_type == NULL_TREE)
4671               goto unsupported;
4672 
4673             if (GET_MODE_SIZE (rhs_mode) == fltsz)
4674               {
4675                 if (!supportable_convert_operation (code, vectype_out,
4676                                                               cvt_type, &decl1, &codecvt1))
4677                     goto unsupported;
4678               }
4679             else if (!supportable_widening_operation (code, stmt, vectype_out,
4680                                                                 cvt_type, &codecvt1,
4681                                                                 &codecvt2, &multi_step_cvt,
4682                                                                 &interm_types))
4683               continue;
4684             else
4685               gcc_assert (multi_step_cvt == 0);
4686 
4687             if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4688                                                         vectype_in, &code1, &code2,
4689                                                         &multi_step_cvt, &interm_types))
4690               {
4691                 found_mode = true;
4692                 break;
4693               }
4694           }
4695 
4696       if (!found_mode)
4697           goto unsupported;
4698 
4699       if (GET_MODE_SIZE (rhs_mode) == fltsz)
4700           codecvt2 = ERROR_MARK;
4701       else
4702           {
4703             multi_step_cvt++;
4704             interm_types.safe_push (cvt_type);
4705             cvt_type = NULL_TREE;
4706           }
4707       break;
4708 
4709     case NARROW:
4710       gcc_assert (op_type == unary_op);
4711       if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4712                                                      &code1, &multi_step_cvt,
4713                                                      &interm_types))
4714           break;
4715 
4716       if (code != FIX_TRUNC_EXPR
4717             || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4718           goto unsupported;
4719 
4720       cvt_type
4721           = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4722       cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4723       if (cvt_type == NULL_TREE)
4724           goto unsupported;
4725       if (!supportable_convert_operation (code, cvt_type, vectype_in,
4726                                                     &decl1, &codecvt1))
4727           goto unsupported;
4728       if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4729                                                      &code1, &multi_step_cvt,
4730                                                      &interm_types))
4731           break;
4732       goto unsupported;
4733 
4734     default:
4735       gcc_unreachable ();
4736     }
4737 
4738   if (!vec_stmt)              /* transformation not required.  */
4739     {
4740       if (dump_enabled_p ())
4741           dump_printf_loc (MSG_NOTE, vect_location,
4742                          "=== vectorizable_conversion ===\n");
4743       if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4744         {
4745             STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4746             if (!slp_node)
4747               vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4748           }
4749       else if (modifier == NARROW)
4750           {
4751             STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4752             if (!slp_node)
4753               vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4754           }
4755       else
4756           {
4757             STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4758             if (!slp_node)
4759               vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4760           }
4761       interm_types.release ();
4762       return true;
4763     }
4764 
4765   /* Transform.  */
4766   if (dump_enabled_p ())
4767     dump_printf_loc (MSG_NOTE, vect_location,
4768                      "transform conversion. ncopies = %d.\n", ncopies);
4769 
4770   if (op_type == binary_op)
4771     {
4772       if (CONSTANT_CLASS_P (op0))
4773           op0 = fold_convert (TREE_TYPE (op1), op0);
4774       else if (CONSTANT_CLASS_P (op1))
4775           op1 = fold_convert (TREE_TYPE (op0), op1);
4776     }
4777 
4778   /* In case of multi-step conversion, we first generate conversion operations
4779      to the intermediate types, and then from that types to the final one.
4780      We create vector destinations for the intermediate type (TYPES) received
4781      from supportable_*_operation, and store them in the correct order
4782      for future use in vect_create_vectorized_*_stmts ().  */
4783   auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4784   vec_dest = vect_create_destination_var (scalar_dest,
4785                                                     (cvt_type && modifier == WIDEN)
4786                                                     ? cvt_type : vectype_out);
4787   vec_dsts.quick_push (vec_dest);
4788 
4789   if (multi_step_cvt)
4790     {
4791       for (i = interm_types.length () - 1;
4792              interm_types.iterate (i, &intermediate_type); i--)
4793           {
4794             vec_dest = vect_create_destination_var (scalar_dest,
4795                                                               intermediate_type);
4796             vec_dsts.quick_push (vec_dest);
4797           }
4798     }
4799 
4800   if (cvt_type)
4801     vec_dest = vect_create_destination_var (scalar_dest,
4802                                                       modifier == WIDEN
4803                                                       ? vectype_out : cvt_type);
4804 
4805   if (!slp_node)
4806     {
4807       if (modifier == WIDEN)
4808           {
4809             vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4810             if (op_type == binary_op)
4811               vec_oprnds1.create (1);
4812           }
4813       else if (modifier == NARROW)
4814           vec_oprnds0.create (
4815                        2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4816     }
4817   else if (code == WIDEN_LSHIFT_EXPR)
4818     vec_oprnds1.create (slp_node->vec_stmts_size);
4819 
4820   last_oprnd = op0;
4821   prev_stmt_info = NULL;
4822   switch (modifier)
4823     {
4824     case NONE:
4825       for (j = 0; j < ncopies; j++)
4826           {
4827             if (j == 0)
4828               vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4829             else
4830               vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4831 
4832             FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4833               {
4834                 /* Arguments are ready, create the new vector stmt.  */
4835                 if (code1 == CALL_EXPR)
4836                     {
4837                       new_stmt = gimple_build_call (decl1, 1, vop0);
4838                       new_temp = make_ssa_name (vec_dest, new_stmt);
4839                       gimple_call_set_lhs (new_stmt, new_temp);
4840                     }
4841                 else
4842                     {
4843                       gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4844                       new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4845                       new_temp = make_ssa_name (vec_dest, new_stmt);
4846                       gimple_assign_set_lhs (new_stmt, new_temp);
4847                     }
4848 
4849                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4850                 if (slp_node)
4851                     SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4852                 else
4853                     {
4854                       if (!prev_stmt_info)
4855                         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4856                       else
4857                         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4858                       prev_stmt_info = vinfo_for_stmt (new_stmt);
4859                     }
4860               }
4861           }
4862       break;
4863 
4864     case WIDEN:
4865       /* In case the vectorization factor (VF) is bigger than the number
4866            of elements that we can fit in a vectype (nunits), we have to
4867            generate more than one vector stmt - i.e - we need to "unroll"
4868            the vector stmt by a factor VF/nunits.  */
4869       for (j = 0; j < ncopies; j++)
4870           {
4871             /* Handle uses.  */
4872             if (j == 0)
4873               {
4874                 if (slp_node)
4875                     {
4876                       if (code == WIDEN_LSHIFT_EXPR)
4877                         {
4878                           unsigned int k;
4879 
4880                           vec_oprnd1 = op1;
4881                           /* Store vec_oprnd1 for every vector stmt to be created
4882                                for SLP_NODE.  We check during the analysis that all
4883                                the shift arguments are the same.  */
4884                           for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4885                               vec_oprnds1.quick_push (vec_oprnd1);
4886 
4887                           vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4888                                                    slp_node);
4889                         }
4890                       else
4891                         vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4892                                                &vec_oprnds1, slp_node);
4893                     }
4894                 else
4895                     {
4896                       vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4897                       vec_oprnds0.quick_push (vec_oprnd0);
4898                       if (op_type == binary_op)
4899                         {
4900                           if (code == WIDEN_LSHIFT_EXPR)
4901                               vec_oprnd1 = op1;
4902                           else
4903                               vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4904                           vec_oprnds1.quick_push (vec_oprnd1);
4905                         }
4906                     }
4907               }
4908             else
4909               {
4910                 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4911                 vec_oprnds0.truncate (0);
4912                 vec_oprnds0.quick_push (vec_oprnd0);
4913                 if (op_type == binary_op)
4914                     {
4915                       if (code == WIDEN_LSHIFT_EXPR)
4916                         vec_oprnd1 = op1;
4917                       else
4918                         vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4919                                                                                  vec_oprnd1);
4920                       vec_oprnds1.truncate (0);
4921                       vec_oprnds1.quick_push (vec_oprnd1);
4922                     }
4923               }
4924 
4925             /* Arguments are ready.  Create the new vector stmts.  */
4926             for (i = multi_step_cvt; i >= 0; i--)
4927               {
4928                 tree this_dest = vec_dsts[i];
4929                 enum tree_code c1 = code1, c2 = code2;
4930                 if (i == 0 && codecvt2 != ERROR_MARK)
4931                     {
4932                       c1 = codecvt1;
4933                       c2 = codecvt2;
4934                     }
4935                 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4936                                                                   &vec_oprnds1,
4937                                                                   stmt, this_dest, gsi,
4938                                                                   c1, c2, decl1, decl2,
4939                                                                   op_type);
4940               }
4941 
4942             FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4943               {
4944                 if (cvt_type)
4945                     {
4946                       if (codecvt1 == CALL_EXPR)
4947                         {
4948                           new_stmt = gimple_build_call (decl1, 1, vop0);
4949                           new_temp = make_ssa_name (vec_dest, new_stmt);
4950                           gimple_call_set_lhs (new_stmt, new_temp);
4951                         }
4952                       else
4953                         {
4954                           gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4955                           new_temp = make_ssa_name (vec_dest);
4956                           new_stmt = gimple_build_assign (new_temp, codecvt1,
4957                                                                   vop0);
4958                         }
4959 
4960                       vect_finish_stmt_generation (stmt, new_stmt, gsi);
4961                     }
4962                 else
4963                     new_stmt = SSA_NAME_DEF_STMT (vop0);
4964 
4965                 if (slp_node)
4966                     SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4967                 else
4968                     {
4969                       if (!prev_stmt_info)
4970                         STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4971                       else
4972                         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4973                       prev_stmt_info = vinfo_for_stmt (new_stmt);
4974                     }
4975               }
4976           }
4977 
4978       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4979       break;
4980 
4981     case NARROW:
4982       /* In case the vectorization factor (VF) is bigger than the number
4983            of elements that we can fit in a vectype (nunits), we have to
4984            generate more than one vector stmt - i.e - we need to "unroll"
4985            the vector stmt by a factor VF/nunits.  */
4986       for (j = 0; j < ncopies; j++)
4987           {
4988             /* Handle uses.  */
4989             if (slp_node)
4990               vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4991                                      slp_node);
4992             else
4993               {
4994                 vec_oprnds0.truncate (0);
4995                 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4996                                                   vect_pow2 (multi_step_cvt) - 1);
4997               }
4998 
4999             /* Arguments are ready.  Create the new vector stmts.  */
5000             if (cvt_type)
5001               FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5002                 {
5003                     if (codecvt1 == CALL_EXPR)
5004                       {
5005                         new_stmt = gimple_build_call (decl1, 1, vop0);
5006                         new_temp = make_ssa_name (vec_dest, new_stmt);
5007                         gimple_call_set_lhs (new_stmt, new_temp);
5008                       }
5009                     else
5010                       {
5011                         gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5012                         new_temp = make_ssa_name (vec_dest);
5013                         new_stmt = gimple_build_assign (new_temp, codecvt1,
5014                                                                 vop0);
5015                       }
5016 
5017                     vect_finish_stmt_generation (stmt, new_stmt, gsi);
5018                     vec_oprnds0[i] = new_temp;
5019                 }
5020 
5021             vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5022                                                              stmt, vec_dsts, gsi,
5023                                                              slp_node, code1,
5024                                                              &prev_stmt_info);
5025           }
5026 
5027       *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5028       break;
5029     }
5030 
5031   vec_oprnds0.release ();
5032   vec_oprnds1.release ();
5033   interm_types.release ();
5034 
5035   return true;
5036 }
5037 
5038 
5039 /* Function vectorizable_assignment.
5040 
5041    Check if STMT performs an assignment (copy) that can be vectorized.
5042    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5043    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5044    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5045 
5046 static bool
vectorizable_assignment(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)5047 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
5048                                gimple **vec_stmt, slp_tree slp_node)
5049 {
5050   tree vec_dest;
5051   tree scalar_dest;
5052   tree op;
5053   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5054   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5055   tree new_temp;
5056   gimple *def_stmt;
5057   enum vect_def_type dt[1] = {vect_unknown_def_type};
5058   int ndts = 1;
5059   int ncopies;
5060   int i, j;
5061   vec<tree> vec_oprnds = vNULL;
5062   tree vop;
5063   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5064   vec_info *vinfo = stmt_info->vinfo;
5065   gimple *new_stmt = NULL;
5066   stmt_vec_info prev_stmt_info = NULL;
5067   enum tree_code code;
5068   tree vectype_in;
5069 
5070   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5071     return false;
5072 
5073   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5074       && ! vec_stmt)
5075     return false;
5076 
5077   /* Is vectorizable assignment?  */
5078   if (!is_gimple_assign (stmt))
5079     return false;
5080 
5081   scalar_dest = gimple_assign_lhs (stmt);
5082   if (TREE_CODE (scalar_dest) != SSA_NAME)
5083     return false;
5084 
5085   code = gimple_assign_rhs_code (stmt);
5086   if (gimple_assign_single_p (stmt)
5087       || code == PAREN_EXPR
5088       || CONVERT_EXPR_CODE_P (code))
5089     op = gimple_assign_rhs1 (stmt);
5090   else
5091     return false;
5092 
5093   if (code == VIEW_CONVERT_EXPR)
5094     op = TREE_OPERAND (op, 0);
5095 
5096   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5097   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5098 
5099   /* Multiple types in SLP are handled by creating the appropriate number of
5100      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5101      case of SLP.  */
5102   if (slp_node)
5103     ncopies = 1;
5104   else
5105     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5106 
5107   gcc_assert (ncopies >= 1);
5108 
5109   if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
5110     {
5111       if (dump_enabled_p ())
5112         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5113                          "use not simple.\n");
5114       return false;
5115     }
5116 
5117   /* We can handle NOP_EXPR conversions that do not change the number
5118      of elements or the vector size.  */
5119   if ((CONVERT_EXPR_CODE_P (code)
5120        || code == VIEW_CONVERT_EXPR)
5121       && (!vectype_in
5122             || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5123             || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5124                            GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5125     return false;
5126 
5127   /* We do not handle bit-precision changes.  */
5128   if ((CONVERT_EXPR_CODE_P (code)
5129        || code == VIEW_CONVERT_EXPR)
5130       && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5131       && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5132             || !type_has_mode_precision_p (TREE_TYPE (op)))
5133       /* But a conversion that does not change the bit-pattern is ok.  */
5134       && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5135               > TYPE_PRECISION (TREE_TYPE (op)))
5136              && TYPE_UNSIGNED (TREE_TYPE (op)))
5137       /* Conversion between boolean types of different sizes is
5138            a simple assignment in case their vectypes are same
5139            boolean vectors.  */
5140       && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5141             || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5142     {
5143       if (dump_enabled_p ())
5144         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5145                          "type conversion to/from bit-precision "
5146                          "unsupported.\n");
5147       return false;
5148     }
5149 
5150   if (!vec_stmt) /* transformation not required.  */
5151     {
5152       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5153       if (dump_enabled_p ())
5154         dump_printf_loc (MSG_NOTE, vect_location,
5155                          "=== vectorizable_assignment ===\n");
5156       if (!slp_node)
5157           vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5158       return true;
5159     }
5160 
5161   /* Transform.  */
5162   if (dump_enabled_p ())
5163     dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5164 
5165   /* Handle def.  */
5166   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5167 
5168   /* Handle use.  */
5169   for (j = 0; j < ncopies; j++)
5170     {
5171       /* Handle uses.  */
5172       if (j == 0)
5173         vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
5174       else
5175         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
5176 
5177       /* Arguments are ready. create the new vector stmt.  */
5178       FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5179        {
5180            if (CONVERT_EXPR_CODE_P (code)
5181                || code == VIEW_CONVERT_EXPR)
5182              vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5183          new_stmt = gimple_build_assign (vec_dest, vop);
5184          new_temp = make_ssa_name (vec_dest, new_stmt);
5185          gimple_assign_set_lhs (new_stmt, new_temp);
5186          vect_finish_stmt_generation (stmt, new_stmt, gsi);
5187          if (slp_node)
5188            SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5189        }
5190 
5191       if (slp_node)
5192         continue;
5193 
5194       if (j == 0)
5195         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5196       else
5197         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5198 
5199       prev_stmt_info = vinfo_for_stmt (new_stmt);
5200     }
5201 
5202   vec_oprnds.release ();
5203   return true;
5204 }
5205 
5206 
5207 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5208    either as shift by a scalar or by a vector.  */
5209 
5210 bool
vect_supportable_shift(enum tree_code code,tree scalar_type)5211 vect_supportable_shift (enum tree_code code, tree scalar_type)
5212 {
5213 
5214   machine_mode vec_mode;
5215   optab optab;
5216   int icode;
5217   tree vectype;
5218 
5219   vectype = get_vectype_for_scalar_type (scalar_type);
5220   if (!vectype)
5221     return false;
5222 
5223   optab = optab_for_tree_code (code, vectype, optab_scalar);
5224   if (!optab
5225       || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5226     {
5227       optab = optab_for_tree_code (code, vectype, optab_vector);
5228       if (!optab
5229           || (optab_handler (optab, TYPE_MODE (vectype))
5230                       == CODE_FOR_nothing))
5231         return false;
5232     }
5233 
5234   vec_mode = TYPE_MODE (vectype);
5235   icode = (int) optab_handler (optab, vec_mode);
5236   if (icode == CODE_FOR_nothing)
5237     return false;
5238 
5239   return true;
5240 }
5241 
5242 
5243 /* Function vectorizable_shift.
5244 
5245    Check if STMT performs a shift operation that can be vectorized.
5246    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5247    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5248    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5249 
5250 static bool
vectorizable_shift(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)5251 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
5252                     gimple **vec_stmt, slp_tree slp_node)
5253 {
5254   tree vec_dest;
5255   tree scalar_dest;
5256   tree op0, op1 = NULL;
5257   tree vec_oprnd1 = NULL_TREE;
5258   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5259   tree vectype;
5260   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5261   enum tree_code code;
5262   machine_mode vec_mode;
5263   tree new_temp;
5264   optab optab;
5265   int icode;
5266   machine_mode optab_op2_mode;
5267   gimple *def_stmt;
5268   enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5269   int ndts = 2;
5270   gimple *new_stmt = NULL;
5271   stmt_vec_info prev_stmt_info;
5272   poly_uint64 nunits_in;
5273   poly_uint64 nunits_out;
5274   tree vectype_out;
5275   tree op1_vectype;
5276   int ncopies;
5277   int j, i;
5278   vec<tree> vec_oprnds0 = vNULL;
5279   vec<tree> vec_oprnds1 = vNULL;
5280   tree vop0, vop1;
5281   unsigned int k;
5282   bool scalar_shift_arg = true;
5283   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5284   vec_info *vinfo = stmt_info->vinfo;
5285 
5286   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5287     return false;
5288 
5289   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5290       && ! vec_stmt)
5291     return false;
5292 
5293   /* Is STMT a vectorizable binary/unary operation?   */
5294   if (!is_gimple_assign (stmt))
5295     return false;
5296 
5297   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5298     return false;
5299 
5300   code = gimple_assign_rhs_code (stmt);
5301 
5302   if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5303       || code == RROTATE_EXPR))
5304     return false;
5305 
5306   scalar_dest = gimple_assign_lhs (stmt);
5307   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5308   if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5309     {
5310       if (dump_enabled_p ())
5311         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5312                          "bit-precision shifts not supported.\n");
5313       return false;
5314     }
5315 
5316   op0 = gimple_assign_rhs1 (stmt);
5317   if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5318     {
5319       if (dump_enabled_p ())
5320         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5321                          "use not simple.\n");
5322       return false;
5323     }
5324   /* If op0 is an external or constant def use a vector type with
5325      the same size as the output vector type.  */
5326   if (!vectype)
5327     vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5328   if (vec_stmt)
5329     gcc_assert (vectype);
5330   if (!vectype)
5331     {
5332       if (dump_enabled_p ())
5333         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5334                          "no vectype for scalar type\n");
5335       return false;
5336     }
5337 
5338   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5339   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5340   if (maybe_ne (nunits_out, nunits_in))
5341     return false;
5342 
5343   op1 = gimple_assign_rhs2 (stmt);
5344   if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
5345     {
5346       if (dump_enabled_p ())
5347         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5348                          "use not simple.\n");
5349       return false;
5350     }
5351 
5352   /* Multiple types in SLP are handled by creating the appropriate number of
5353      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5354      case of SLP.  */
5355   if (slp_node)
5356     ncopies = 1;
5357   else
5358     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5359 
5360   gcc_assert (ncopies >= 1);
5361 
5362   /* Determine whether the shift amount is a vector, or scalar.  If the
5363      shift/rotate amount is a vector, use the vector/vector shift optabs.  */
5364 
5365   if ((dt[1] == vect_internal_def
5366        || dt[1] == vect_induction_def)
5367       && !slp_node)
5368     scalar_shift_arg = false;
5369   else if (dt[1] == vect_constant_def
5370              || dt[1] == vect_external_def
5371              || dt[1] == vect_internal_def)
5372     {
5373       /* In SLP, need to check whether the shift count is the same,
5374            in loops if it is a constant or invariant, it is always
5375            a scalar shift.  */
5376       if (slp_node)
5377           {
5378             vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5379             gimple *slpstmt;
5380 
5381             FOR_EACH_VEC_ELT (stmts, k, slpstmt)
5382               if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5383                 scalar_shift_arg = false;
5384 
5385             /* For internal SLP defs we have to make sure we see scalar stmts
5386                for all vector elements.
5387                ???  For different vectors we could resort to a different
5388                scalar shift operand but code-generation below simply always
5389                takes the first.  */
5390             if (dt[1] == vect_internal_def
5391                 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),                           stmts.length ()))
5392               scalar_shift_arg = false;
5393           }
5394 
5395       /* If the shift amount is computed by a pattern stmt we cannot
5396          use the scalar amount directly thus give up and use a vector
5397            shift.  */
5398       if (dt[1] == vect_internal_def)
5399           {
5400             gimple *def = SSA_NAME_DEF_STMT (op1);
5401             if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5402               scalar_shift_arg = false;
5403           }
5404     }
5405   else
5406     {
5407       if (dump_enabled_p ())
5408         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5409                          "operand mode requires invariant argument.\n");
5410       return false;
5411     }
5412 
5413   /* Vector shifted by vector.  */
5414   if (!scalar_shift_arg)
5415     {
5416       optab = optab_for_tree_code (code, vectype, optab_vector);
5417       if (dump_enabled_p ())
5418         dump_printf_loc (MSG_NOTE, vect_location,
5419                          "vector/vector shift/rotate found.\n");
5420 
5421       if (!op1_vectype)
5422           op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5423       if (op1_vectype == NULL_TREE
5424             || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5425           {
5426             if (dump_enabled_p ())
5427               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5428                              "unusable type for last operand in"
5429                              " vector/vector shift/rotate.\n");
5430             return false;
5431           }
5432     }
5433   /* See if the machine has a vector shifted by scalar insn and if not
5434      then see if it has a vector shifted by vector insn.  */
5435   else
5436     {
5437       optab = optab_for_tree_code (code, vectype, optab_scalar);
5438       if (optab
5439           && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5440         {
5441           if (dump_enabled_p ())
5442             dump_printf_loc (MSG_NOTE, vect_location,
5443                              "vector/scalar shift/rotate found.\n");
5444         }
5445       else
5446         {
5447           optab = optab_for_tree_code (code, vectype, optab_vector);
5448           if (optab
5449                && (optab_handler (optab, TYPE_MODE (vectype))
5450                       != CODE_FOR_nothing))
5451             {
5452                 scalar_shift_arg = false;
5453 
5454               if (dump_enabled_p ())
5455                 dump_printf_loc (MSG_NOTE, vect_location,
5456                                  "vector/vector shift/rotate found.\n");
5457 
5458               /* Unlike the other binary operators, shifts/rotates have
5459                  the rhs being int, instead of the same type as the lhs,
5460                  so make sure the scalar is the right type if we are
5461                      dealing with vectors of long long/long/short/char.  */
5462               if (dt[1] == vect_constant_def)
5463                 op1 = fold_convert (TREE_TYPE (vectype), op1);
5464                 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5465                                                                TREE_TYPE (op1)))
5466                     {
5467                       if (slp_node
5468                           && TYPE_MODE (TREE_TYPE (vectype))
5469                                != TYPE_MODE (TREE_TYPE (op1)))
5470                         {
5471                       if (dump_enabled_p ())
5472                         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5473                                          "unusable type for last operand in"
5474                                          " vector/vector shift/rotate.\n");
5475                           return false;
5476                         }
5477                       if (vec_stmt && !slp_node)
5478                         {
5479                           op1 = fold_convert (TREE_TYPE (vectype), op1);
5480                           op1 = vect_init_vector (stmt, op1,
5481                                                         TREE_TYPE (vectype), NULL);
5482                         }
5483                     }
5484             }
5485         }
5486     }
5487 
5488   /* Supportable by target?  */
5489   if (!optab)
5490     {
5491       if (dump_enabled_p ())
5492         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5493                          "no optab.\n");
5494       return false;
5495     }
5496   vec_mode = TYPE_MODE (vectype);
5497   icode = (int) optab_handler (optab, vec_mode);
5498   if (icode == CODE_FOR_nothing)
5499     {
5500       if (dump_enabled_p ())
5501         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5502                          "op not supported by target.\n");
5503       /* Check only during analysis.  */
5504       if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5505             || (!vec_stmt
5506                 && !vect_worthwhile_without_simd_p (vinfo, code)))
5507         return false;
5508       if (dump_enabled_p ())
5509         dump_printf_loc (MSG_NOTE, vect_location,
5510                          "proceeding using word mode.\n");
5511     }
5512 
5513   /* Worthwhile without SIMD support?  Check only during analysis.  */
5514   if (!vec_stmt
5515       && !VECTOR_MODE_P (TYPE_MODE (vectype))
5516       && !vect_worthwhile_without_simd_p (vinfo, code))
5517     {
5518       if (dump_enabled_p ())
5519         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5520                          "not worthwhile without SIMD support.\n");
5521       return false;
5522     }
5523 
5524   if (!vec_stmt) /* transformation not required.  */
5525     {
5526       STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5527       if (dump_enabled_p ())
5528         dump_printf_loc (MSG_NOTE, vect_location,
5529                          "=== vectorizable_shift ===\n");
5530       if (!slp_node)
5531           vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5532       return true;
5533     }
5534 
5535   /* Transform.  */
5536 
5537   if (dump_enabled_p ())
5538     dump_printf_loc (MSG_NOTE, vect_location,
5539                      "transform binary/unary operation.\n");
5540 
5541   /* Handle def.  */
5542   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5543 
5544   prev_stmt_info = NULL;
5545   for (j = 0; j < ncopies; j++)
5546     {
5547       /* Handle uses.  */
5548       if (j == 0)
5549         {
5550           if (scalar_shift_arg)
5551             {
5552               /* Vector shl and shr insn patterns can be defined with scalar
5553                  operand 2 (shift operand).  In this case, use constant or loop
5554                  invariant op1 directly, without extending it to vector mode
5555                  first.  */
5556               optab_op2_mode = insn_data[icode].operand[2].mode;
5557               if (!VECTOR_MODE_P (optab_op2_mode))
5558                 {
5559                   if (dump_enabled_p ())
5560                     dump_printf_loc (MSG_NOTE, vect_location,
5561                                      "operand 1 using scalar mode.\n");
5562                   vec_oprnd1 = op1;
5563                   vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5564                   vec_oprnds1.quick_push (vec_oprnd1);
5565                   if (slp_node)
5566                     {
5567                       /* Store vec_oprnd1 for every vector stmt to be created
5568                          for SLP_NODE.  We check during the analysis that all
5569                          the shift arguments are the same.
5570                          TODO: Allow different constants for different vector
5571                          stmts generated for an SLP instance.  */
5572                       for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5573                         vec_oprnds1.quick_push (vec_oprnd1);
5574                     }
5575                 }
5576             }
5577 
5578           /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5579              (a special case for certain kind of vector shifts); otherwise,
5580              operand 1 should be of a vector type (the usual case).  */
5581           if (vec_oprnd1)
5582             vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5583                                slp_node);
5584           else
5585             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5586                                slp_node);
5587         }
5588       else
5589         vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5590 
5591       /* Arguments are ready.  Create the new vector stmt.  */
5592       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5593         {
5594           vop1 = vec_oprnds1[i];
5595             new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5596           new_temp = make_ssa_name (vec_dest, new_stmt);
5597           gimple_assign_set_lhs (new_stmt, new_temp);
5598           vect_finish_stmt_generation (stmt, new_stmt, gsi);
5599           if (slp_node)
5600             SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5601         }
5602 
5603       if (slp_node)
5604         continue;
5605 
5606       if (j == 0)
5607         STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5608       else
5609         STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5610       prev_stmt_info = vinfo_for_stmt (new_stmt);
5611     }
5612 
5613   vec_oprnds0.release ();
5614   vec_oprnds1.release ();
5615 
5616   return true;
5617 }
5618 
5619 
5620 /* Function vectorizable_operation.
5621 
5622    Check if STMT performs a binary, unary or ternary operation that can
5623    be vectorized.
5624    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5625    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5626    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
5627 
5628 static bool
vectorizable_operation(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)5629 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5630                               gimple **vec_stmt, slp_tree slp_node)
5631 {
5632   tree vec_dest;
5633   tree scalar_dest;
5634   tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5635   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5636   tree vectype;
5637   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5638   enum tree_code code, orig_code;
5639   machine_mode vec_mode;
5640   tree new_temp;
5641   int op_type;
5642   optab optab;
5643   bool target_support_p;
5644   gimple *def_stmt;
5645   enum vect_def_type dt[3]
5646     = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5647   int ndts = 3;
5648   gimple *new_stmt = NULL;
5649   stmt_vec_info prev_stmt_info;
5650   poly_uint64 nunits_in;
5651   poly_uint64 nunits_out;
5652   tree vectype_out;
5653   int ncopies;
5654   int j, i;
5655   vec<tree> vec_oprnds0 = vNULL;
5656   vec<tree> vec_oprnds1 = vNULL;
5657   vec<tree> vec_oprnds2 = vNULL;
5658   tree vop0, vop1, vop2;
5659   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5660   vec_info *vinfo = stmt_info->vinfo;
5661 
5662   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5663     return false;
5664 
5665   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5666       && ! vec_stmt)
5667     return false;
5668 
5669   /* Is STMT a vectorizable binary/unary operation?   */
5670   if (!is_gimple_assign (stmt))
5671     return false;
5672 
5673   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5674     return false;
5675 
5676   orig_code = code = gimple_assign_rhs_code (stmt);
5677 
5678   /* For pointer addition and subtraction, we should use the normal
5679      plus and minus for the vector operation.  */
5680   if (code == POINTER_PLUS_EXPR)
5681     code = PLUS_EXPR;
5682   if (code == POINTER_DIFF_EXPR)
5683     code = MINUS_EXPR;
5684 
5685   /* Support only unary or binary operations.  */
5686   op_type = TREE_CODE_LENGTH (code);
5687   if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5688     {
5689       if (dump_enabled_p ())
5690         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5691                          "num. args = %d (not unary/binary/ternary op).\n",
5692                          op_type);
5693       return false;
5694     }
5695 
5696   scalar_dest = gimple_assign_lhs (stmt);
5697   vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5698 
5699   /* Most operations cannot handle bit-precision types without extra
5700      truncations.  */
5701   if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5702       && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5703       /* Exception are bitwise binary operations.  */
5704       && code != BIT_IOR_EXPR
5705       && code != BIT_XOR_EXPR
5706       && code != BIT_AND_EXPR)
5707     {
5708       if (dump_enabled_p ())
5709         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5710                          "bit-precision arithmetic not supported.\n");
5711       return false;
5712     }
5713 
5714   op0 = gimple_assign_rhs1 (stmt);
5715   if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5716     {
5717       if (dump_enabled_p ())
5718         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5719                          "use not simple.\n");
5720       return false;
5721     }
5722   /* If op0 is an external or constant def use a vector type with
5723      the same size as the output vector type.  */
5724   if (!vectype)
5725     {
5726       /* For boolean type we cannot determine vectype by
5727            invariant value (don't know whether it is a vector
5728            of booleans or vector of integers).  We use output
5729            vectype because operations on boolean don't change
5730            type.  */
5731       if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5732           {
5733             if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5734               {
5735                 if (dump_enabled_p ())
5736                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5737                                          "not supported operation on bool value.\n");
5738                 return false;
5739               }
5740             vectype = vectype_out;
5741           }
5742       else
5743           vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5744     }
5745   if (vec_stmt)
5746     gcc_assert (vectype);
5747   if (!vectype)
5748     {
5749       if (dump_enabled_p ())
5750         {
5751           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5752                            "no vectype for scalar type ");
5753           dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5754                              TREE_TYPE (op0));
5755           dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5756         }
5757 
5758       return false;
5759     }
5760 
5761   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5762   nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5763   if (maybe_ne (nunits_out, nunits_in))
5764     return false;
5765 
5766   if (op_type == binary_op || op_type == ternary_op)
5767     {
5768       op1 = gimple_assign_rhs2 (stmt);
5769       if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5770           {
5771             if (dump_enabled_p ())
5772               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5773                              "use not simple.\n");
5774             return false;
5775           }
5776     }
5777   if (op_type == ternary_op)
5778     {
5779       op2 = gimple_assign_rhs3 (stmt);
5780       if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5781           {
5782             if (dump_enabled_p ())
5783               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5784                              "use not simple.\n");
5785             return false;
5786           }
5787     }
5788 
5789   /* Multiple types in SLP are handled by creating the appropriate number of
5790      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
5791      case of SLP.  */
5792   if (slp_node)
5793     ncopies = 1;
5794   else
5795     ncopies = vect_get_num_copies (loop_vinfo, vectype);
5796 
5797   gcc_assert (ncopies >= 1);
5798 
5799   /* Shifts are handled in vectorizable_shift ().  */
5800   if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5801       || code == RROTATE_EXPR)
5802    return false;
5803 
5804   /* Supportable by target?  */
5805 
5806   vec_mode = TYPE_MODE (vectype);
5807   if (code == MULT_HIGHPART_EXPR)
5808     target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5809   else
5810     {
5811       optab = optab_for_tree_code (code, vectype, optab_default);
5812       if (!optab)
5813           {
5814           if (dump_enabled_p ())
5815             dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5816                              "no optab.\n");
5817             return false;
5818           }
5819       target_support_p = (optab_handler (optab, vec_mode)
5820                                 != CODE_FOR_nothing);
5821     }
5822 
5823   if (!target_support_p)
5824     {
5825       if (dump_enabled_p ())
5826           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5827                          "op not supported by target.\n");
5828       /* Check only during analysis.  */
5829       if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5830             || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5831         return false;
5832       if (dump_enabled_p ())
5833           dump_printf_loc (MSG_NOTE, vect_location,
5834                          "proceeding using word mode.\n");
5835     }
5836 
5837   /* Worthwhile without SIMD support?  Check only during analysis.  */
5838   if (!VECTOR_MODE_P (vec_mode)
5839       && !vec_stmt
5840       && !vect_worthwhile_without_simd_p (vinfo, code))
5841     {
5842       if (dump_enabled_p ())
5843         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5844                          "not worthwhile without SIMD support.\n");
5845       return false;
5846     }
5847 
5848   if (!vec_stmt) /* transformation not required.  */
5849     {
5850       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5851       if (dump_enabled_p ())
5852         dump_printf_loc (MSG_NOTE, vect_location,
5853                          "=== vectorizable_operation ===\n");
5854       if (!slp_node)
5855           vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5856       return true;
5857     }
5858 
5859   /* Transform.  */
5860 
5861   if (dump_enabled_p ())
5862     dump_printf_loc (MSG_NOTE, vect_location,
5863                      "transform binary/unary operation.\n");
5864 
5865   /* Handle def.  */
5866   vec_dest = vect_create_destination_var (scalar_dest, vectype);
5867 
5868   /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5869      vectors with unsigned elements, but the result is signed.  So, we
5870      need to compute the MINUS_EXPR into vectype temporary and
5871      VIEW_CONVERT_EXPR it into the final vectype_out result.  */
5872   tree vec_cvt_dest = NULL_TREE;
5873   if (orig_code == POINTER_DIFF_EXPR)
5874     vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5875 
5876   /* In case the vectorization factor (VF) is bigger than the number
5877      of elements that we can fit in a vectype (nunits), we have to generate
5878      more than one vector stmt - i.e - we need to "unroll" the
5879      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
5880      from one copy of the vector stmt to the next, in the field
5881      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
5882      stages to find the correct vector defs to be used when vectorizing
5883      stmts that use the defs of the current stmt.  The example below
5884      illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5885      we need to create 4 vectorized stmts):
5886 
5887      before vectorization:
5888                                 RELATED_STMT    VEC_STMT
5889         S1:     x = memref      -               -
5890         S2:     z = x + 1       -               -
5891 
5892      step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5893              there):
5894                                 RELATED_STMT    VEC_STMT
5895         VS1_0:  vx0 = memref0   VS1_1           -
5896         VS1_1:  vx1 = memref1   VS1_2           -
5897         VS1_2:  vx2 = memref2   VS1_3           -
5898         VS1_3:  vx3 = memref3   -               -
5899         S1:     x = load        -               VS1_0
5900         S2:     z = x + 1       -               -
5901 
5902      step2: vectorize stmt S2 (done here):
5903         To vectorize stmt S2 we first need to find the relevant vector
5904         def for the first operand 'x'.  This is, as usual, obtained from
5905         the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5906         that defines 'x' (S1).  This way we find the stmt VS1_0, and the
5907         relevant vector def 'vx0'.  Having found 'vx0' we can generate
5908         the vector stmt VS2_0, and as usual, record it in the
5909         STMT_VINFO_VEC_STMT of stmt S2.
5910         When creating the second copy (VS2_1), we obtain the relevant vector
5911         def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5912         stmt VS1_0.  This way we find the stmt VS1_1 and the relevant
5913         vector def 'vx1'.  Using 'vx1' we create stmt VS2_1 and record a
5914         pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5915         Similarly when creating stmts VS2_2 and VS2_3.  This is the resulting
5916         chain of stmts and pointers:
5917                                 RELATED_STMT    VEC_STMT
5918         VS1_0:  vx0 = memref0   VS1_1           -
5919         VS1_1:  vx1 = memref1   VS1_2           -
5920         VS1_2:  vx2 = memref2   VS1_3           -
5921         VS1_3:  vx3 = memref3   -               -
5922         S1:     x = load        -               VS1_0
5923         VS2_0:  vz0 = vx0 + v1  VS2_1           -
5924         VS2_1:  vz1 = vx1 + v1  VS2_2           -
5925         VS2_2:  vz2 = vx2 + v1  VS2_3           -
5926         VS2_3:  vz3 = vx3 + v1  -               -
5927         S2:     z = x + 1       -               VS2_0  */
5928 
5929   prev_stmt_info = NULL;
5930   for (j = 0; j < ncopies; j++)
5931     {
5932       /* Handle uses.  */
5933       if (j == 0)
5934           {
5935             if (op_type == binary_op)
5936               vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5937                                      slp_node);
5938             else if (op_type == ternary_op)
5939               {
5940                 if (slp_node)
5941                     {
5942                       auto_vec<tree> ops(3);
5943                       ops.quick_push (op0);
5944                       ops.quick_push (op1);
5945                       ops.quick_push (op2);
5946                       auto_vec<vec<tree> > vec_defs(3);
5947                       vect_get_slp_defs (ops, slp_node, &vec_defs);
5948                       vec_oprnds0 = vec_defs[0];
5949                       vec_oprnds1 = vec_defs[1];
5950                       vec_oprnds2 = vec_defs[2];
5951                     }
5952                 else
5953                     {
5954                       vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5955                                              NULL);
5956                       vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5957                                              NULL);
5958                     }
5959               }
5960             else
5961               vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5962                                      slp_node);
5963           }
5964       else
5965           {
5966             vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5967             if (op_type == ternary_op)
5968               {
5969                 tree vec_oprnd = vec_oprnds2.pop ();
5970                 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5971                                                                                  vec_oprnd));
5972               }
5973           }
5974 
5975       /* Arguments are ready.  Create the new vector stmt.  */
5976       FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5977         {
5978             vop1 = ((op_type == binary_op || op_type == ternary_op)
5979                       ? vec_oprnds1[i] : NULL_TREE);
5980             vop2 = ((op_type == ternary_op)
5981                       ? vec_oprnds2[i] : NULL_TREE);
5982             new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5983             new_temp = make_ssa_name (vec_dest, new_stmt);
5984             gimple_assign_set_lhs (new_stmt, new_temp);
5985             vect_finish_stmt_generation (stmt, new_stmt, gsi);
5986             if (vec_cvt_dest)
5987               {
5988                 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
5989                 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
5990                                                         new_temp);
5991                 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
5992                 gimple_assign_set_lhs (new_stmt, new_temp);
5993                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5994               }
5995           if (slp_node)
5996               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5997         }
5998 
5999       if (slp_node)
6000         continue;
6001 
6002       if (j == 0)
6003           STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6004       else
6005           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6006       prev_stmt_info = vinfo_for_stmt (new_stmt);
6007     }
6008 
6009   vec_oprnds0.release ();
6010   vec_oprnds1.release ();
6011   vec_oprnds2.release ();
6012 
6013   return true;
6014 }
6015 
6016 /* A helper function to ensure data reference DR's base alignment.  */
6017 
6018 static void
ensure_base_align(struct data_reference * dr)6019 ensure_base_align (struct data_reference *dr)
6020 {
6021   if (!dr->aux)
6022     return;
6023 
6024   if (DR_VECT_AUX (dr)->base_misaligned)
6025     {
6026       tree base_decl = DR_VECT_AUX (dr)->base_decl;
6027 
6028       unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
6029 
6030       if (decl_in_symtab_p (base_decl))
6031           symtab_node::get (base_decl)->increase_alignment (align_base_to);
6032       else
6033           {
6034             SET_DECL_ALIGN (base_decl, align_base_to);
6035           DECL_USER_ALIGN (base_decl) = 1;
6036           }
6037       DR_VECT_AUX (dr)->base_misaligned = false;
6038     }
6039 }
6040 
6041 
6042 /* Function get_group_alias_ptr_type.
6043 
6044    Return the alias type for the group starting at FIRST_STMT.  */
6045 
6046 static tree
get_group_alias_ptr_type(gimple * first_stmt)6047 get_group_alias_ptr_type (gimple *first_stmt)
6048 {
6049   struct data_reference *first_dr, *next_dr;
6050   gimple *next_stmt;
6051 
6052   first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6053   next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
6054   while (next_stmt)
6055     {
6056       next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
6057       if (get_alias_set (DR_REF (first_dr))
6058             != get_alias_set (DR_REF (next_dr)))
6059           {
6060             if (dump_enabled_p ())
6061               dump_printf_loc (MSG_NOTE, vect_location,
6062                                    "conflicting alias set types.\n");
6063             return ptr_type_node;
6064           }
6065       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6066     }
6067   return reference_alias_ptr_type (DR_REF (first_dr));
6068 }
6069 
6070 
6071 /* Function vectorizable_store.
6072 
6073    Check if STMT defines a non scalar data-ref (array/pointer/structure) that
6074    can be vectorized.
6075    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6076    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6077    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
6078 
6079 static bool
vectorizable_store(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node)6080 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6081                     slp_tree slp_node)
6082 {
6083   tree data_ref;
6084   tree op;
6085   tree vec_oprnd = NULL_TREE;
6086   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6087   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6088   tree elem_type;
6089   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6090   struct loop *loop = NULL;
6091   machine_mode vec_mode;
6092   tree dummy;
6093   enum dr_alignment_support alignment_support_scheme;
6094   gimple *def_stmt;
6095   enum vect_def_type rhs_dt = vect_unknown_def_type;
6096   enum vect_def_type mask_dt = vect_unknown_def_type;
6097   stmt_vec_info prev_stmt_info = NULL;
6098   tree dataref_ptr = NULL_TREE;
6099   tree dataref_offset = NULL_TREE;
6100   gimple *ptr_incr = NULL;
6101   int ncopies;
6102   int j;
6103   gimple *next_stmt, *first_stmt;
6104   bool grouped_store;
6105   unsigned int group_size, i;
6106   vec<tree> oprnds = vNULL;
6107   vec<tree> result_chain = vNULL;
6108   bool inv_p;
6109   tree offset = NULL_TREE;
6110   vec<tree> vec_oprnds = vNULL;
6111   bool slp = (slp_node != NULL);
6112   unsigned int vec_num;
6113   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6114   vec_info *vinfo = stmt_info->vinfo;
6115   tree aggr_type;
6116   gather_scatter_info gs_info;
6117   gimple *new_stmt;
6118   poly_uint64 vf;
6119   vec_load_store_type vls_type;
6120   tree ref_type;
6121 
6122   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6123     return false;
6124 
6125   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6126       && ! vec_stmt)
6127     return false;
6128 
6129   /* Is vectorizable store? */
6130 
6131   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6132   if (is_gimple_assign (stmt))
6133     {
6134       tree scalar_dest = gimple_assign_lhs (stmt);
6135       if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
6136             && is_pattern_stmt_p (stmt_info))
6137           scalar_dest = TREE_OPERAND (scalar_dest, 0);
6138       if (TREE_CODE (scalar_dest) != ARRAY_REF
6139             && TREE_CODE (scalar_dest) != BIT_FIELD_REF
6140             && TREE_CODE (scalar_dest) != INDIRECT_REF
6141             && TREE_CODE (scalar_dest) != COMPONENT_REF
6142             && TREE_CODE (scalar_dest) != IMAGPART_EXPR
6143             && TREE_CODE (scalar_dest) != REALPART_EXPR
6144             && TREE_CODE (scalar_dest) != MEM_REF)
6145           return false;
6146     }
6147   else
6148     {
6149       gcall *call = dyn_cast <gcall *> (stmt);
6150       if (!call || !gimple_call_internal_p (call))
6151           return false;
6152 
6153       internal_fn ifn = gimple_call_internal_fn (call);
6154       if (!internal_store_fn_p (ifn))
6155           return false;
6156 
6157       if (slp_node != NULL)
6158           {
6159             if (dump_enabled_p ())
6160               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6161                                    "SLP of masked stores not supported.\n");
6162             return false;
6163           }
6164 
6165       int mask_index = internal_fn_mask_index (ifn);
6166       if (mask_index >= 0)
6167           {
6168             mask = gimple_call_arg (call, mask_index);
6169             if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
6170                                                      &mask_vectype))
6171               return false;
6172           }
6173     }
6174 
6175   op = vect_get_store_rhs (stmt);
6176 
6177   /* Cannot have hybrid store SLP -- that would mean storing to the
6178      same location twice.  */
6179   gcc_assert (slp == PURE_SLP_STMT (stmt_info));
6180 
6181   tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
6182   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6183 
6184   if (loop_vinfo)
6185     {
6186       loop = LOOP_VINFO_LOOP (loop_vinfo);
6187       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6188     }
6189   else
6190     vf = 1;
6191 
6192   /* Multiple types in SLP are handled by creating the appropriate number of
6193      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
6194      case of SLP.  */
6195   if (slp)
6196     ncopies = 1;
6197   else
6198     ncopies = vect_get_num_copies (loop_vinfo, vectype);
6199 
6200   gcc_assert (ncopies >= 1);
6201 
6202   /* FORNOW.  This restriction should be relaxed.  */
6203   if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
6204     {
6205       if (dump_enabled_p ())
6206           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6207                                "multiple types in nested loop.\n");
6208       return false;
6209     }
6210 
6211   if (!vect_check_store_rhs (stmt, op, &rhs_dt, &rhs_vectype, &vls_type))
6212     return false;
6213 
6214   elem_type = TREE_TYPE (vectype);
6215   vec_mode = TYPE_MODE (vectype);
6216 
6217   if (!STMT_VINFO_DATA_REF (stmt_info))
6218     return false;
6219 
6220   vect_memory_access_type memory_access_type;
6221   if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
6222                                   &memory_access_type, &gs_info))
6223     return false;
6224 
6225   if (mask)
6226     {
6227       if (memory_access_type == VMAT_CONTIGUOUS)
6228           {
6229             if (!VECTOR_MODE_P (vec_mode)
6230                 || !can_vec_mask_load_store_p (vec_mode,
6231                                                        TYPE_MODE (mask_vectype), false))
6232               return false;
6233           }
6234       else if (memory_access_type != VMAT_LOAD_STORE_LANES
6235                  && (memory_access_type != VMAT_GATHER_SCATTER || gs_info.decl))
6236           {
6237             if (dump_enabled_p ())
6238               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6239                                    "unsupported access type for masked store.\n");
6240             return false;
6241           }
6242     }
6243   else
6244     {
6245       /* FORNOW. In some cases can vectorize even if data-type not supported
6246            (e.g. - array initialization with 0).  */
6247       if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
6248           return false;
6249     }
6250 
6251   grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
6252                        && memory_access_type != VMAT_GATHER_SCATTER
6253                        && (slp || memory_access_type != VMAT_CONTIGUOUS));
6254   if (grouped_store)
6255     {
6256       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6257       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6258       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6259     }
6260   else
6261     {
6262       first_stmt = stmt;
6263       first_dr = dr;
6264       group_size = vec_num = 1;
6265     }
6266 
6267   if (!vec_stmt) /* transformation not required.  */
6268     {
6269       STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6270 
6271       if (loop_vinfo
6272             && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6273           check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
6274                                           memory_access_type, &gs_info);
6275 
6276       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
6277       /* The SLP costs are calculated during SLP analysis.  */
6278       if (!slp_node)
6279           vect_model_store_cost (stmt_info, ncopies, memory_access_type,
6280                                      vls_type, NULL, NULL, NULL);
6281       return true;
6282     }
6283   gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6284 
6285   /* Transform.  */
6286 
6287   ensure_base_align (dr);
6288 
6289   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
6290     {
6291       tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
6292       tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6293       tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6294       tree ptr, mask, var, scale, perm_mask = NULL_TREE;
6295       edge pe = loop_preheader_edge (loop);
6296       gimple_seq seq;
6297       basic_block new_bb;
6298       enum { NARROW, NONE, WIDEN } modifier;
6299       poly_uint64 scatter_off_nunits
6300           = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6301 
6302       if (known_eq (nunits, scatter_off_nunits))
6303           modifier = NONE;
6304       else if (known_eq (nunits * 2, scatter_off_nunits))
6305           {
6306             modifier = WIDEN;
6307 
6308             /* Currently gathers and scatters are only supported for
6309                fixed-length vectors.  */
6310             unsigned int count = scatter_off_nunits.to_constant ();
6311             vec_perm_builder sel (count, count, 1);
6312             for (i = 0; i < (unsigned int) count; ++i)
6313               sel.quick_push (i | (count / 2));
6314 
6315             vec_perm_indices indices (sel, 1, count);
6316             perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
6317                                                               indices);
6318             gcc_assert (perm_mask != NULL_TREE);
6319           }
6320       else if (known_eq (nunits, scatter_off_nunits * 2))
6321           {
6322             modifier = NARROW;
6323 
6324             /* Currently gathers and scatters are only supported for
6325                fixed-length vectors.  */
6326             unsigned int count = nunits.to_constant ();
6327             vec_perm_builder sel (count, count, 1);
6328             for (i = 0; i < (unsigned int) count; ++i)
6329               sel.quick_push (i | (count / 2));
6330 
6331             vec_perm_indices indices (sel, 2, count);
6332             perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6333             gcc_assert (perm_mask != NULL_TREE);
6334             ncopies *= 2;
6335           }
6336       else
6337           gcc_unreachable ();
6338 
6339       rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6340       ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6341       masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6342       idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6343       srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6344       scaletype = TREE_VALUE (arglist);
6345 
6346       gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6347                                  && TREE_CODE (rettype) == VOID_TYPE);
6348 
6349       ptr = fold_convert (ptrtype, gs_info.base);
6350       if (!is_gimple_min_invariant (ptr))
6351           {
6352             ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6353             new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6354             gcc_assert (!new_bb);
6355           }
6356 
6357       /* Currently we support only unconditional scatter stores,
6358            so mask should be all ones.  */
6359       mask = build_int_cst (masktype, -1);
6360       mask = vect_init_vector (stmt, mask, masktype, NULL);
6361 
6362       scale = build_int_cst (scaletype, gs_info.scale);
6363 
6364       prev_stmt_info = NULL;
6365       for (j = 0; j < ncopies; ++j)
6366           {
6367             if (j == 0)
6368               {
6369                 src = vec_oprnd1
6370                     = vect_get_vec_def_for_operand (op, stmt);
6371                 op = vec_oprnd0
6372                     = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6373               }
6374             else if (modifier != NONE && (j & 1))
6375               {
6376                 if (modifier == WIDEN)
6377                     {
6378                       src = vec_oprnd1
6379                         = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
6380                       op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6381                                                        stmt, gsi);
6382                     }
6383                 else if (modifier == NARROW)
6384                     {
6385                       src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6386                                                         stmt, gsi);
6387                       op = vec_oprnd0
6388                         = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6389                                                                   vec_oprnd0);
6390                     }
6391                 else
6392                     gcc_unreachable ();
6393               }
6394             else
6395               {
6396                 src = vec_oprnd1
6397                     = vect_get_vec_def_for_stmt_copy (rhs_dt, vec_oprnd1);
6398                 op = vec_oprnd0
6399                     = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6400                                                               vec_oprnd0);
6401               }
6402 
6403             if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6404               {
6405                 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6406                                             TYPE_VECTOR_SUBPARTS (srctype)));
6407                 var = vect_get_new_ssa_name (srctype, vect_simple_var);
6408                 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6409                 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6410                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6411                 src = var;
6412               }
6413 
6414             if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6415               {
6416                 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6417                                             TYPE_VECTOR_SUBPARTS (idxtype)));
6418                 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6419                 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6420                 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6421                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6422                 op = var;
6423               }
6424 
6425             new_stmt
6426               = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
6427 
6428             vect_finish_stmt_generation (stmt, new_stmt, gsi);
6429 
6430             if (prev_stmt_info == NULL)
6431               STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6432             else
6433               STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6434             prev_stmt_info = vinfo_for_stmt (new_stmt);
6435           }
6436       return true;
6437     }
6438 
6439   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6440     {
6441       gimple *group_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6442       GROUP_STORE_COUNT (vinfo_for_stmt (group_stmt))++;
6443     }
6444 
6445   if (grouped_store)
6446     {
6447       /* FORNOW */
6448       gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
6449 
6450       /* We vectorize all the stmts of the interleaving group when we
6451            reach the last stmt in the group.  */
6452       if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
6453             < GROUP_SIZE (vinfo_for_stmt (first_stmt))
6454             && !slp)
6455           {
6456             *vec_stmt = NULL;
6457             return true;
6458           }
6459 
6460       if (slp)
6461         {
6462           grouped_store = false;
6463           /* VEC_NUM is the number of vect stmts to be created for this
6464              group.  */
6465           vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6466           first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6467             gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
6468           first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6469             op = vect_get_store_rhs (first_stmt);
6470         }
6471       else
6472         /* VEC_NUM is the number of vect stmts to be created for this
6473            group.  */
6474           vec_num = group_size;
6475 
6476       ref_type = get_group_alias_ptr_type (first_stmt);
6477     }
6478   else
6479     ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6480 
6481   if (dump_enabled_p ())
6482     dump_printf_loc (MSG_NOTE, vect_location,
6483                      "transform store. ncopies = %d\n", ncopies);
6484 
6485   if (memory_access_type == VMAT_ELEMENTWISE
6486       || memory_access_type == VMAT_STRIDED_SLP)
6487     {
6488       gimple_stmt_iterator incr_gsi;
6489       bool insert_after;
6490       gimple *incr;
6491       tree offvar;
6492       tree ivstep;
6493       tree running_off;
6494       tree stride_base, stride_step, alias_off;
6495       tree vec_oprnd;
6496       unsigned int g;
6497       /* Checked by get_load_store_type.  */
6498       unsigned int const_nunits = nunits.to_constant ();
6499 
6500       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6501       gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6502 
6503       stride_base
6504           = fold_build_pointer_plus
6505               (DR_BASE_ADDRESS (first_dr),
6506                size_binop (PLUS_EXPR,
6507                                convert_to_ptrofftype (DR_OFFSET (first_dr)),
6508                                convert_to_ptrofftype (DR_INIT (first_dr))));
6509       stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6510 
6511       /* For a store with loop-invariant (but other than power-of-2)
6512          stride (i.e. not a grouped access) like so:
6513 
6514              for (i = 0; i < n; i += stride)
6515                array[i] = ...;
6516 
6517            we generate a new induction variable and new stores from
6518            the components of the (vectorized) rhs:
6519 
6520              for (j = 0; ; j += VF*stride)
6521                vectemp = ...;
6522                tmp1 = vectemp[0];
6523                array[j] = tmp1;
6524                tmp2 = vectemp[1];
6525                array[j + stride] = tmp2;
6526                ...
6527          */
6528 
6529       unsigned nstores = const_nunits;
6530       unsigned lnel = 1;
6531       tree ltype = elem_type;
6532       tree lvectype = vectype;
6533       if (slp)
6534           {
6535             if (group_size < const_nunits
6536                 && const_nunits % group_size == 0)
6537               {
6538                 nstores = const_nunits / group_size;
6539                 lnel = group_size;
6540                 ltype = build_vector_type (elem_type, group_size);
6541                 lvectype = vectype;
6542 
6543                 /* First check if vec_extract optab doesn't support extraction
6544                      of vector elts directly.  */
6545                 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6546                 machine_mode vmode;
6547                 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6548                       || !VECTOR_MODE_P (vmode)
6549                       || !targetm.vector_mode_supported_p (vmode)
6550                       || (convert_optab_handler (vec_extract_optab,
6551                                                        TYPE_MODE (vectype), vmode)
6552                           == CODE_FOR_nothing))
6553                     {
6554                       /* Try to avoid emitting an extract of vector elements
6555                          by performing the extracts using an integer type of the
6556                          same size, extracting from a vector of those and then
6557                          re-interpreting it as the original vector type if
6558                          supported.  */
6559                       unsigned lsize
6560                         = group_size * GET_MODE_BITSIZE (elmode);
6561                       elmode = int_mode_for_size (lsize, 0).require ();
6562                       unsigned int lnunits = const_nunits / group_size;
6563                       /* If we can't construct such a vector fall back to
6564                          element extracts from the original vector type and
6565                          element size stores.  */
6566                       if (mode_for_vector (elmode, lnunits).exists (&vmode)
6567                           && VECTOR_MODE_P (vmode)
6568                           && targetm.vector_mode_supported_p (vmode)
6569                           && (convert_optab_handler (vec_extract_optab,
6570                                                              vmode, elmode)
6571                                 != CODE_FOR_nothing))
6572                         {
6573                           nstores = lnunits;
6574                           lnel = group_size;
6575                           ltype = build_nonstandard_integer_type (lsize, 1);
6576                           lvectype = build_vector_type (ltype, nstores);
6577                         }
6578                       /* Else fall back to vector extraction anyway.
6579                          Fewer stores are more important than avoiding spilling
6580                          of the vector we extract from.  Compared to the
6581                          construction case in vectorizable_load no store-forwarding
6582                          issue exists here for reasonable archs.  */
6583                     }
6584               }
6585             else if (group_size >= const_nunits
6586                        && group_size % const_nunits == 0)
6587               {
6588                 nstores = 1;
6589                 lnel = const_nunits;
6590                 ltype = vectype;
6591                 lvectype = vectype;
6592               }
6593             ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6594             ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6595           }
6596 
6597       ivstep = stride_step;
6598       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6599                                   build_int_cst (TREE_TYPE (ivstep), vf));
6600 
6601       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6602 
6603       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
6604       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
6605       create_iv (stride_base, ivstep, NULL,
6606                      loop, &incr_gsi, insert_after,
6607                      &offvar, NULL);
6608       incr = gsi_stmt (incr_gsi);
6609       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6610 
6611       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
6612 
6613       prev_stmt_info = NULL;
6614       alias_off = build_int_cst (ref_type, 0);
6615       next_stmt = first_stmt;
6616       for (g = 0; g < group_size; g++)
6617           {
6618             running_off = offvar;
6619             if (g)
6620               {
6621                 tree size = TYPE_SIZE_UNIT (ltype);
6622                 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6623                                               size);
6624                 tree newoff = copy_ssa_name (running_off, NULL);
6625                 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6626                                                     running_off, pos);
6627                 vect_finish_stmt_generation (stmt, incr, gsi);
6628                 running_off = newoff;
6629               }
6630             unsigned int group_el = 0;
6631             unsigned HOST_WIDE_INT
6632               elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6633             for (j = 0; j < ncopies; j++)
6634               {
6635                 /* We've set op and dt above, from vect_get_store_rhs,
6636                      and first_stmt == stmt.  */
6637                 if (j == 0)
6638                     {
6639                       if (slp)
6640                         {
6641                           vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6642                                                    slp_node);
6643                           vec_oprnd = vec_oprnds[0];
6644                         }
6645                       else
6646                         {
6647                           op = vect_get_store_rhs (next_stmt);
6648                           vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6649                         }
6650                     }
6651                 else
6652                     {
6653                       if (slp)
6654                         vec_oprnd = vec_oprnds[j];
6655                       else
6656                         {
6657                           vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
6658                           vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt,
6659                                                                                   vec_oprnd);
6660                         }
6661                     }
6662                 /* Pun the vector to extract from if necessary.  */
6663                 if (lvectype != vectype)
6664                     {
6665                       tree tem = make_ssa_name (lvectype);
6666                       gimple *pun
6667                         = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6668                                                                       lvectype, vec_oprnd));
6669                       vect_finish_stmt_generation (stmt, pun, gsi);
6670                       vec_oprnd = tem;
6671                     }
6672                 for (i = 0; i < nstores; i++)
6673                     {
6674                       tree newref, newoff;
6675                       gimple *incr, *assign;
6676                       tree size = TYPE_SIZE (ltype);
6677                       /* Extract the i'th component.  */
6678                       tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6679                                                     bitsize_int (i), size);
6680                       tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6681                                                      size, pos);
6682 
6683                       elem = force_gimple_operand_gsi (gsi, elem, true,
6684                                                                NULL_TREE, true,
6685                                                                GSI_SAME_STMT);
6686 
6687                       tree this_off = build_int_cst (TREE_TYPE (alias_off),
6688                                                              group_el * elsz);
6689                       newref = build2 (MEM_REF, ltype,
6690                                            running_off, this_off);
6691                       vect_copy_ref_info (newref, DR_REF (first_dr));
6692 
6693                       /* And store it to *running_off.  */
6694                       assign = gimple_build_assign (newref, elem);
6695                       vect_finish_stmt_generation (stmt, assign, gsi);
6696 
6697                       group_el += lnel;
6698                       if (! slp
6699                           || group_el == group_size)
6700                         {
6701                           newoff = copy_ssa_name (running_off, NULL);
6702                           incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6703                                                               running_off, stride_step);
6704                           vect_finish_stmt_generation (stmt, incr, gsi);
6705 
6706                           running_off = newoff;
6707                           group_el = 0;
6708                         }
6709                       if (g == group_size - 1
6710                           && !slp)
6711                         {
6712                           if (j == 0 && i == 0)
6713                               STMT_VINFO_VEC_STMT (stmt_info)
6714                                   = *vec_stmt = assign;
6715                           else
6716                               STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6717                           prev_stmt_info = vinfo_for_stmt (assign);
6718                         }
6719                     }
6720               }
6721             next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6722             if (slp)
6723               break;
6724           }
6725 
6726       vec_oprnds.release ();
6727       return true;
6728     }
6729 
6730   auto_vec<tree> dr_chain (group_size);
6731   oprnds.create (group_size);
6732 
6733   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6734   gcc_assert (alignment_support_scheme);
6735   vec_loop_masks *loop_masks
6736     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6737        ? &LOOP_VINFO_MASKS (loop_vinfo)
6738        : NULL);
6739   /* Targets with store-lane instructions must not require explicit
6740      realignment.  vect_supportable_dr_alignment always returns either
6741      dr_aligned or dr_unaligned_supported for masked operations.  */
6742   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6743                  && !mask
6744                  && !loop_masks)
6745                 || alignment_support_scheme == dr_aligned
6746                 || alignment_support_scheme == dr_unaligned_supported);
6747 
6748   if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6749       || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6750     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6751 
6752   tree bump;
6753   tree vec_offset = NULL_TREE;
6754   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6755     {
6756       aggr_type = NULL_TREE;
6757       bump = NULL_TREE;
6758     }
6759   else if (memory_access_type == VMAT_GATHER_SCATTER)
6760     {
6761       aggr_type = elem_type;
6762       vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
6763                                                &bump, &vec_offset);
6764     }
6765   else
6766     {
6767       if (memory_access_type == VMAT_LOAD_STORE_LANES)
6768           aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6769       else
6770           aggr_type = vectype;
6771       bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
6772     }
6773 
6774   if (mask)
6775     LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6776 
6777   /* In case the vectorization factor (VF) is bigger than the number
6778      of elements that we can fit in a vectype (nunits), we have to generate
6779      more than one vector stmt - i.e - we need to "unroll" the
6780      vector stmt by a factor VF/nunits.  For more details see documentation in
6781      vect_get_vec_def_for_copy_stmt.  */
6782 
6783   /* In case of interleaving (non-unit grouped access):
6784 
6785         S1:  &base + 2 = x2
6786         S2:  &base = x0
6787         S3:  &base + 1 = x1
6788         S4:  &base + 3 = x3
6789 
6790      We create vectorized stores starting from base address (the access of the
6791      first stmt in the chain (S2 in the above example), when the last store stmt
6792      of the chain (S4) is reached:
6793 
6794         VS1: &base = vx2
6795           VS2: &base + vec_size*1 = vx0
6796           VS3: &base + vec_size*2 = vx1
6797           VS4: &base + vec_size*3 = vx3
6798 
6799      Then permutation statements are generated:
6800 
6801           VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6802           VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6803           ...
6804 
6805      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6806      (the order of the data-refs in the output of vect_permute_store_chain
6807      corresponds to the order of scalar stmts in the interleaving chain - see
6808      the documentation of vect_permute_store_chain()).
6809 
6810      In case of both multiple types and interleaving, above vector stores and
6811      permutation stmts are created for every copy.  The result vector stmts are
6812      put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6813      STMT_VINFO_RELATED_STMT for the next copies.
6814   */
6815 
6816   prev_stmt_info = NULL;
6817   tree vec_mask = NULL_TREE;
6818   for (j = 0; j < ncopies; j++)
6819     {
6820 
6821       if (j == 0)
6822           {
6823           if (slp)
6824             {
6825                 /* Get vectorized arguments for SLP_NODE.  */
6826               vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6827                                  NULL, slp_node);
6828 
6829               vec_oprnd = vec_oprnds[0];
6830             }
6831           else
6832             {
6833                 /* For interleaved stores we collect vectorized defs for all the
6834                      stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6835                      used as an input to vect_permute_store_chain(), and OPRNDS as
6836                      an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6837 
6838                      If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6839                      OPRNDS are of size 1.  */
6840                 next_stmt = first_stmt;
6841                 for (i = 0; i < group_size; i++)
6842                     {
6843                       /* Since gaps are not supported for interleaved stores,
6844                          GROUP_SIZE is the exact number of stmts in the chain.
6845                          Therefore, NEXT_STMT can't be NULL_TREE.  In case that
6846                          there is no interleaving, GROUP_SIZE is 1, and only one
6847                          iteration of the loop will be executed.  */
6848                       op = vect_get_store_rhs (next_stmt);
6849                       vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6850                       dr_chain.quick_push (vec_oprnd);
6851                       oprnds.quick_push (vec_oprnd);
6852                       next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6853                     }
6854                 if (mask)
6855                     vec_mask = vect_get_vec_def_for_operand (mask, stmt,
6856                                                                        mask_vectype);
6857               }
6858 
6859             /* We should have catched mismatched types earlier.  */
6860             gcc_assert (useless_type_conversion_p (vectype,
6861                                                              TREE_TYPE (vec_oprnd)));
6862             bool simd_lane_access_p
6863               = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6864             if (simd_lane_access_p
6865                 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6866                 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6867                 && integer_zerop (DR_OFFSET (first_dr))
6868                 && integer_zerop (DR_INIT (first_dr))
6869                 && alias_sets_conflict_p (get_alias_set (aggr_type),
6870                                                   get_alias_set (TREE_TYPE (ref_type))))
6871               {
6872                 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6873                 dataref_offset = build_int_cst (ref_type, 0);
6874                 inv_p = false;
6875               }
6876             else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6877               {
6878                 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
6879                                                      &dataref_ptr, &vec_offset);
6880                 inv_p = false;
6881               }
6882             else
6883               dataref_ptr
6884                 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6885                                                     simd_lane_access_p ? loop : NULL,
6886                                                     offset, &dummy, gsi, &ptr_incr,
6887                                                     simd_lane_access_p, &inv_p,
6888                                                     NULL_TREE, bump);
6889             gcc_assert (bb_vinfo || !inv_p);
6890           }
6891       else
6892           {
6893             /* For interleaved stores we created vectorized defs for all the
6894                defs stored in OPRNDS in the previous iteration (previous copy).
6895                DR_CHAIN is then used as an input to vect_permute_store_chain(),
6896                and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6897                next copy.
6898                If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6899                OPRNDS are of size 1.  */
6900             for (i = 0; i < group_size; i++)
6901               {
6902                 op = oprnds[i];
6903                 vect_is_simple_use (op, vinfo, &def_stmt, &rhs_dt);
6904                 vec_oprnd = vect_get_vec_def_for_stmt_copy (rhs_dt, op);
6905                 dr_chain[i] = vec_oprnd;
6906                 oprnds[i] = vec_oprnd;
6907               }
6908             if (mask)
6909               vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
6910             if (dataref_offset)
6911               dataref_offset
6912                 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
6913             else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6914               vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6915                                                                        vec_offset);
6916             else
6917               dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6918                                                      bump);
6919           }
6920 
6921       if (memory_access_type == VMAT_LOAD_STORE_LANES)
6922           {
6923             tree vec_array;
6924 
6925             /* Combine all the vectors into an array.  */
6926             vec_array = create_vector_array (vectype, vec_num);
6927             for (i = 0; i < vec_num; i++)
6928               {
6929                 vec_oprnd = dr_chain[i];
6930                 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6931               }
6932 
6933             tree final_mask = NULL;
6934             if (loop_masks)
6935               final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
6936                                                        vectype, j);
6937             if (vec_mask)
6938               final_mask = prepare_load_store_mask (mask_vectype, final_mask,
6939                                                               vec_mask, gsi);
6940 
6941             gcall *call;
6942             if (final_mask)
6943               {
6944                 /* Emit:
6945                        MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
6946                                              VEC_ARRAY).  */
6947                 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
6948                 tree alias_ptr = build_int_cst (ref_type, align);
6949                 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
6950                                                              dataref_ptr, alias_ptr,
6951                                                              final_mask, vec_array);
6952               }
6953             else
6954               {
6955                 /* Emit:
6956                        MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
6957                 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6958                 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6959                                                              vec_array);
6960                 gimple_call_set_lhs (call, data_ref);
6961               }
6962             gimple_call_set_nothrow (call, true);
6963             new_stmt = call;
6964             vect_finish_stmt_generation (stmt, new_stmt, gsi);
6965           }
6966       else
6967           {
6968             new_stmt = NULL;
6969             if (grouped_store)
6970               {
6971                 if (j == 0)
6972                     result_chain.create (group_size);
6973                 /* Permute.  */
6974                 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6975                                                   &result_chain);
6976               }
6977 
6978             next_stmt = first_stmt;
6979             for (i = 0; i < vec_num; i++)
6980               {
6981                 unsigned align, misalign;
6982 
6983                 tree final_mask = NULL_TREE;
6984                 if (loop_masks)
6985                     final_mask = vect_get_loop_mask (gsi, loop_masks,
6986                                                              vec_num * ncopies,
6987                                                              vectype, vec_num * j + i);
6988                 if (vec_mask)
6989                     final_mask = prepare_load_store_mask (mask_vectype, final_mask,
6990                                                                   vec_mask, gsi);
6991 
6992                 if (memory_access_type == VMAT_GATHER_SCATTER)
6993                     {
6994                       tree scale = size_int (gs_info.scale);
6995                       gcall *call;
6996                       if (loop_masks)
6997                         call = gimple_build_call_internal
6998                           (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
6999                            scale, vec_oprnd, final_mask);
7000                       else
7001                         call = gimple_build_call_internal
7002                           (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
7003                            scale, vec_oprnd);
7004                       gimple_call_set_nothrow (call, true);
7005                       new_stmt = call;
7006                       vect_finish_stmt_generation (stmt, new_stmt, gsi);
7007                       break;
7008                     }
7009 
7010                 if (i > 0)
7011                     /* Bump the vector pointer.  */
7012                     dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7013                                                          stmt, bump);
7014 
7015                 if (slp)
7016                     vec_oprnd = vec_oprnds[i];
7017                 else if (grouped_store)
7018                     /* For grouped stores vectorized defs are interleaved in
7019                        vect_permute_store_chain().  */
7020                     vec_oprnd = result_chain[i];
7021 
7022                 align = DR_TARGET_ALIGNMENT (first_dr);
7023                 if (aligned_access_p (first_dr))
7024                     misalign = 0;
7025                 else if (DR_MISALIGNMENT (first_dr) == -1)
7026                     {
7027                       align = dr_alignment (vect_dr_behavior (first_dr));
7028                       misalign = 0;
7029                     }
7030                 else
7031                     misalign = DR_MISALIGNMENT (first_dr);
7032                 if (dataref_offset == NULL_TREE
7033                       && TREE_CODE (dataref_ptr) == SSA_NAME)
7034                     set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
7035                                                   misalign);
7036 
7037                 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7038                     {
7039                       tree perm_mask = perm_mask_for_reverse (vectype);
7040                       tree perm_dest
7041                         = vect_create_destination_var (vect_get_store_rhs (stmt),
7042                                                                vectype);
7043                       tree new_temp = make_ssa_name (perm_dest);
7044 
7045                       /* Generate the permute statement.  */
7046                       gimple *perm_stmt
7047                         = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
7048                                                      vec_oprnd, perm_mask);
7049                       vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7050 
7051                       perm_stmt = SSA_NAME_DEF_STMT (new_temp);
7052                       vec_oprnd = new_temp;
7053                     }
7054 
7055                 /* Arguments are ready.  Create the new vector stmt.  */
7056                 if (final_mask)
7057                     {
7058                       align = least_bit_hwi (misalign | align);
7059                       tree ptr = build_int_cst (ref_type, align);
7060                       gcall *call
7061                         = gimple_build_call_internal (IFN_MASK_STORE, 4,
7062                                                               dataref_ptr, ptr,
7063                                                               final_mask, vec_oprnd);
7064                       gimple_call_set_nothrow (call, true);
7065                       new_stmt = call;
7066                     }
7067                 else
7068                     {
7069                       data_ref = fold_build2 (MEM_REF, vectype,
7070                                                     dataref_ptr,
7071                                                     dataref_offset
7072                                                     ? dataref_offset
7073                                                     : build_int_cst (ref_type, 0));
7074                       if (aligned_access_p (first_dr))
7075                         ;
7076                       else if (DR_MISALIGNMENT (first_dr) == -1)
7077                         TREE_TYPE (data_ref)
7078                           = build_aligned_type (TREE_TYPE (data_ref),
7079                                                       align * BITS_PER_UNIT);
7080                       else
7081                         TREE_TYPE (data_ref)
7082                           = build_aligned_type (TREE_TYPE (data_ref),
7083                                                       TYPE_ALIGN (elem_type));
7084                       vect_copy_ref_info (data_ref, DR_REF (first_dr));
7085                       new_stmt = gimple_build_assign (data_ref, vec_oprnd);
7086                     }
7087                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7088 
7089                 if (slp)
7090                     continue;
7091 
7092                 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
7093                 if (!next_stmt)
7094                     break;
7095               }
7096           }
7097       if (!slp)
7098           {
7099             if (j == 0)
7100               STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7101             else
7102               STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7103             prev_stmt_info = vinfo_for_stmt (new_stmt);
7104           }
7105     }
7106 
7107   oprnds.release ();
7108   result_chain.release ();
7109   vec_oprnds.release ();
7110 
7111   return true;
7112 }
7113 
7114 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7115    VECTOR_CST mask.  No checks are made that the target platform supports the
7116    mask, so callers may wish to test can_vec_perm_const_p separately, or use
7117    vect_gen_perm_mask_checked.  */
7118 
7119 tree
vect_gen_perm_mask_any(tree vectype,const vec_perm_indices & sel)7120 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
7121 {
7122   tree mask_type;
7123 
7124   poly_uint64 nunits = sel.length ();
7125   gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
7126 
7127   mask_type = build_vector_type (ssizetype, nunits);
7128   return vec_perm_indices_to_tree (mask_type, sel);
7129 }
7130 
7131 /* Checked version of vect_gen_perm_mask_any.  Asserts can_vec_perm_const_p,
7132    i.e. that the target supports the pattern _for arbitrary input vectors_.  */
7133 
7134 tree
vect_gen_perm_mask_checked(tree vectype,const vec_perm_indices & sel)7135 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
7136 {
7137   gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
7138   return vect_gen_perm_mask_any (vectype, sel);
7139 }
7140 
7141 /* Given a vector variable X and Y, that was generated for the scalar
7142    STMT, generate instructions to permute the vector elements of X and Y
7143    using permutation mask MASK_VEC, insert them at *GSI and return the
7144    permuted vector variable.  */
7145 
7146 static tree
permute_vec_elements(tree x,tree y,tree mask_vec,gimple * stmt,gimple_stmt_iterator * gsi)7147 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
7148                           gimple_stmt_iterator *gsi)
7149 {
7150   tree vectype = TREE_TYPE (x);
7151   tree perm_dest, data_ref;
7152   gimple *perm_stmt;
7153 
7154   tree scalar_dest = gimple_get_lhs (stmt);
7155   if (TREE_CODE (scalar_dest) == SSA_NAME)
7156     perm_dest = vect_create_destination_var (scalar_dest, vectype);
7157   else
7158     perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
7159   data_ref = make_ssa_name (perm_dest);
7160 
7161   /* Generate the permute statement.  */
7162   perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
7163   vect_finish_stmt_generation (stmt, perm_stmt, gsi);
7164 
7165   return data_ref;
7166 }
7167 
7168 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
7169    inserting them on the loops preheader edge.  Returns true if we
7170    were successful in doing so (and thus STMT can be moved then),
7171    otherwise returns false.  */
7172 
7173 static bool
hoist_defs_of_uses(gimple * stmt,struct loop * loop)7174 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
7175 {
7176   ssa_op_iter i;
7177   tree op;
7178   bool any = false;
7179 
7180   FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7181     {
7182       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7183       if (!gimple_nop_p (def_stmt)
7184             && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7185           {
7186             /* Make sure we don't need to recurse.  While we could do
7187                so in simple cases when there are more complex use webs
7188                we don't have an easy way to preserve stmt order to fulfil
7189                dependencies within them.  */
7190             tree op2;
7191             ssa_op_iter i2;
7192             if (gimple_code (def_stmt) == GIMPLE_PHI)
7193               return false;
7194             FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
7195               {
7196                 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
7197                 if (!gimple_nop_p (def_stmt2)
7198                       && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
7199                     return false;
7200               }
7201             any = true;
7202           }
7203     }
7204 
7205   if (!any)
7206     return true;
7207 
7208   FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
7209     {
7210       gimple *def_stmt = SSA_NAME_DEF_STMT (op);
7211       if (!gimple_nop_p (def_stmt)
7212             && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
7213           {
7214             gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7215             gsi_remove (&gsi, false);
7216             gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
7217           }
7218     }
7219 
7220   return true;
7221 }
7222 
7223 /* vectorizable_load.
7224 
7225    Check if STMT reads a non scalar data-ref (array/pointer/structure) that
7226    can be vectorized.
7227    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7228    stmt to replace it, put it in VEC_STMT, and insert it at BSI.
7229    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
7230 
7231 static bool
vectorizable_load(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,slp_tree slp_node,slp_instance slp_node_instance)7232 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
7233                    slp_tree slp_node, slp_instance slp_node_instance)
7234 {
7235   tree scalar_dest;
7236   tree vec_dest = NULL;
7237   tree data_ref = NULL;
7238   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7239   stmt_vec_info prev_stmt_info;
7240   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7241   struct loop *loop = NULL;
7242   struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
7243   bool nested_in_vect_loop = false;
7244   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
7245   tree elem_type;
7246   tree new_temp;
7247   machine_mode mode;
7248   gimple *new_stmt = NULL;
7249   tree dummy;
7250   enum dr_alignment_support alignment_support_scheme;
7251   tree dataref_ptr = NULL_TREE;
7252   tree dataref_offset = NULL_TREE;
7253   gimple *ptr_incr = NULL;
7254   int ncopies;
7255   int i, j;
7256   unsigned int group_size;
7257   poly_uint64 group_gap_adj;
7258   tree msq = NULL_TREE, lsq;
7259   tree offset = NULL_TREE;
7260   tree byte_offset = NULL_TREE;
7261   tree realignment_token = NULL_TREE;
7262   gphi *phi = NULL;
7263   vec<tree> dr_chain = vNULL;
7264   bool grouped_load = false;
7265   gimple *first_stmt;
7266   gimple *first_stmt_for_drptr = NULL;
7267   bool inv_p;
7268   bool compute_in_loop = false;
7269   struct loop *at_loop;
7270   int vec_num;
7271   bool slp = (slp_node != NULL);
7272   bool slp_perm = false;
7273   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7274   poly_uint64 vf;
7275   tree aggr_type;
7276   gather_scatter_info gs_info;
7277   vec_info *vinfo = stmt_info->vinfo;
7278   tree ref_type;
7279   enum vect_def_type mask_dt = vect_unknown_def_type;
7280 
7281   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7282     return false;
7283 
7284   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7285       && ! vec_stmt)
7286     return false;
7287 
7288   tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7289   if (is_gimple_assign (stmt))
7290     {
7291       scalar_dest = gimple_assign_lhs (stmt);
7292       if (TREE_CODE (scalar_dest) != SSA_NAME)
7293           return false;
7294 
7295       tree_code code = gimple_assign_rhs_code (stmt);
7296       if (code != ARRAY_REF
7297             && code != BIT_FIELD_REF
7298             && code != INDIRECT_REF
7299             && code != COMPONENT_REF
7300             && code != IMAGPART_EXPR
7301             && code != REALPART_EXPR
7302             && code != MEM_REF
7303             && TREE_CODE_CLASS (code) != tcc_declaration)
7304           return false;
7305     }
7306   else
7307     {
7308       gcall *call = dyn_cast <gcall *> (stmt);
7309       if (!call || !gimple_call_internal_p (call))
7310           return false;
7311 
7312       internal_fn ifn = gimple_call_internal_fn (call);
7313       if (!internal_load_fn_p (ifn))
7314           return false;
7315 
7316       scalar_dest = gimple_call_lhs (call);
7317       if (!scalar_dest)
7318           return false;
7319 
7320       if (slp_node != NULL)
7321           {
7322             if (dump_enabled_p ())
7323               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7324                                    "SLP of masked loads not supported.\n");
7325             return false;
7326           }
7327 
7328       int mask_index = internal_fn_mask_index (ifn);
7329       if (mask_index >= 0)
7330           {
7331             mask = gimple_call_arg (call, mask_index);
7332             if (!vect_check_load_store_mask (stmt, mask, &mask_dt,
7333                                                      &mask_vectype))
7334               return false;
7335           }
7336     }
7337 
7338   if (!STMT_VINFO_DATA_REF (stmt_info))
7339     return false;
7340 
7341   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7342   poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7343 
7344   if (loop_vinfo)
7345     {
7346       loop = LOOP_VINFO_LOOP (loop_vinfo);
7347       nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
7348       vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7349     }
7350   else
7351     vf = 1;
7352 
7353   /* Multiple types in SLP are handled by creating the appropriate number of
7354      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
7355      case of SLP.  */
7356   if (slp)
7357     ncopies = 1;
7358   else
7359     ncopies = vect_get_num_copies (loop_vinfo, vectype);
7360 
7361   gcc_assert (ncopies >= 1);
7362 
7363   /* FORNOW. This restriction should be relaxed.  */
7364   if (nested_in_vect_loop && ncopies > 1)
7365     {
7366       if (dump_enabled_p ())
7367         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7368                          "multiple types in nested loop.\n");
7369       return false;
7370     }
7371 
7372   /* Invalidate assumptions made by dependence analysis when vectorization
7373      on the unrolled body effectively re-orders stmts.  */
7374   if (ncopies > 1
7375       && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7376       && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7377                        STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7378     {
7379       if (dump_enabled_p ())
7380           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7381                                "cannot perform implicit CSE when unrolling "
7382                                "with negative dependence distance\n");
7383       return false;
7384     }
7385 
7386   elem_type = TREE_TYPE (vectype);
7387   mode = TYPE_MODE (vectype);
7388 
7389   /* FORNOW. In some cases can vectorize even if data-type not supported
7390     (e.g. - data copies).  */
7391   if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
7392     {
7393       if (dump_enabled_p ())
7394         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7395                          "Aligned load, but unsupported type.\n");
7396       return false;
7397     }
7398 
7399   /* Check if the load is a part of an interleaving chain.  */
7400   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7401     {
7402       grouped_load = true;
7403       /* FORNOW */
7404       gcc_assert (!nested_in_vect_loop);
7405       gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
7406 
7407       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7408       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7409 
7410       if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7411           slp_perm = true;
7412 
7413       /* Invalidate assumptions made by dependence analysis when vectorization
7414            on the unrolled body effectively re-orders stmts.  */
7415       if (!PURE_SLP_STMT (stmt_info)
7416             && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7417             && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7418                            STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7419           {
7420             if (dump_enabled_p ())
7421               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7422                                    "cannot perform implicit CSE when performing "
7423                                    "group loads with negative dependence distance\n");
7424             return false;
7425           }
7426 
7427       /* Similarly when the stmt is a load that is both part of a SLP
7428          instance and a loop vectorized stmt via the same-dr mechanism
7429            we have to give up.  */
7430       if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
7431             && (STMT_SLP_TYPE (stmt_info)
7432                 != STMT_SLP_TYPE (vinfo_for_stmt
7433                                          (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
7434           {
7435             if (dump_enabled_p ())
7436               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7437                                    "conflicting SLP types for CSEd load\n");
7438             return false;
7439           }
7440     }
7441   else
7442     group_size = 1;
7443 
7444   vect_memory_access_type memory_access_type;
7445   if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
7446                                   &memory_access_type, &gs_info))
7447     return false;
7448 
7449   if (mask)
7450     {
7451       if (memory_access_type == VMAT_CONTIGUOUS)
7452           {
7453             machine_mode vec_mode = TYPE_MODE (vectype);
7454             if (!VECTOR_MODE_P (vec_mode)
7455                 || !can_vec_mask_load_store_p (vec_mode,
7456                                                        TYPE_MODE (mask_vectype), true))
7457               return false;
7458           }
7459       else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7460           {
7461             tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7462             tree masktype
7463               = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7464             if (TREE_CODE (masktype) == INTEGER_TYPE)
7465               {
7466                 if (dump_enabled_p ())
7467                     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7468                                          "masked gather with integer mask not"
7469                                          " supported.");
7470                 return false;
7471               }
7472           }
7473       else if (memory_access_type != VMAT_LOAD_STORE_LANES
7474                  && memory_access_type != VMAT_GATHER_SCATTER)
7475           {
7476             if (dump_enabled_p ())
7477               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7478                                    "unsupported access type for masked load.\n");
7479             return false;
7480           }
7481     }
7482 
7483   if (!vec_stmt) /* transformation not required.  */
7484     {
7485       if (!slp)
7486           STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7487 
7488       if (loop_vinfo
7489             && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7490           check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7491                                           memory_access_type, &gs_info);
7492 
7493       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
7494       /* The SLP costs are calculated during SLP analysis.  */
7495       if (! slp_node)
7496           vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7497                                     NULL, NULL, NULL);
7498       return true;
7499     }
7500 
7501   if (!slp)
7502     gcc_assert (memory_access_type
7503                     == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7504 
7505   if (dump_enabled_p ())
7506     dump_printf_loc (MSG_NOTE, vect_location,
7507                      "transform load. ncopies = %d\n", ncopies);
7508 
7509   /* Transform.  */
7510 
7511   ensure_base_align (dr);
7512 
7513   if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7514     {
7515       vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask,
7516                                             mask_dt);
7517       return true;
7518     }
7519 
7520   if (memory_access_type == VMAT_ELEMENTWISE
7521       || memory_access_type == VMAT_STRIDED_SLP)
7522     {
7523       gimple_stmt_iterator incr_gsi;
7524       bool insert_after;
7525       gimple *incr;
7526       tree offvar;
7527       tree ivstep;
7528       tree running_off;
7529       vec<constructor_elt, va_gc> *v = NULL;
7530       tree stride_base, stride_step, alias_off;
7531       /* Checked by get_load_store_type.  */
7532       unsigned int const_nunits = nunits.to_constant ();
7533       unsigned HOST_WIDE_INT cst_offset = 0;
7534 
7535       gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7536       gcc_assert (!nested_in_vect_loop);
7537 
7538       if (grouped_load)
7539           {
7540             first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7541             first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7542           }
7543       else
7544           {
7545             first_stmt = stmt;
7546             first_dr = dr;
7547           }
7548       if (slp && grouped_load)
7549           {
7550             group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7551             ref_type = get_group_alias_ptr_type (first_stmt);
7552           }
7553       else
7554           {
7555             if (grouped_load)
7556               cst_offset
7557                 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
7558                      * vect_get_place_in_interleaving_chain (stmt, first_stmt));
7559             group_size = 1;
7560             ref_type = reference_alias_ptr_type (DR_REF (dr));
7561           }
7562 
7563       stride_base
7564           = fold_build_pointer_plus
7565               (DR_BASE_ADDRESS (first_dr),
7566                size_binop (PLUS_EXPR,
7567                                convert_to_ptrofftype (DR_OFFSET (first_dr)),
7568                                convert_to_ptrofftype (DR_INIT (first_dr))));
7569       stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7570 
7571       /* For a load with loop-invariant (but other than power-of-2)
7572          stride (i.e. not a grouped access) like so:
7573 
7574              for (i = 0; i < n; i += stride)
7575                ... = array[i];
7576 
7577            we generate a new induction variable and new accesses to
7578            form a new vector (or vectors, depending on ncopies):
7579 
7580              for (j = 0; ; j += VF*stride)
7581                tmp1 = array[j];
7582                tmp2 = array[j + stride];
7583                ...
7584                vectemp = {tmp1, tmp2, ...}
7585          */
7586 
7587       ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7588                                   build_int_cst (TREE_TYPE (stride_step), vf));
7589 
7590       standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7591 
7592       stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7593       ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7594       create_iv (stride_base, ivstep, NULL,
7595                      loop, &incr_gsi, insert_after,
7596                      &offvar, NULL);
7597       incr = gsi_stmt (incr_gsi);
7598       set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7599 
7600       stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7601 
7602       prev_stmt_info = NULL;
7603       running_off = offvar;
7604       alias_off = build_int_cst (ref_type, 0);
7605       int nloads = const_nunits;
7606       int lnel = 1;
7607       tree ltype = TREE_TYPE (vectype);
7608       tree lvectype = vectype;
7609       auto_vec<tree> dr_chain;
7610       if (memory_access_type == VMAT_STRIDED_SLP)
7611           {
7612             if (group_size < const_nunits)
7613               {
7614                 /* First check if vec_init optab supports construction from
7615                      vector elts directly.  */
7616                 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7617                 machine_mode vmode;
7618                 if (mode_for_vector (elmode, group_size).exists (&vmode)
7619                       && VECTOR_MODE_P (vmode)
7620                       && targetm.vector_mode_supported_p (vmode)
7621                       && (convert_optab_handler (vec_init_optab,
7622                                                        TYPE_MODE (vectype), vmode)
7623                           != CODE_FOR_nothing))
7624                     {
7625                       nloads = const_nunits / group_size;
7626                       lnel = group_size;
7627                       ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7628                     }
7629                 else
7630                     {
7631                       /* Otherwise avoid emitting a constructor of vector elements
7632                          by performing the loads using an integer type of the same
7633                          size, constructing a vector of those and then
7634                          re-interpreting it as the original vector type.
7635                          This avoids a huge runtime penalty due to the general
7636                          inability to perform store forwarding from smaller stores
7637                          to a larger load.  */
7638                       unsigned lsize
7639                         = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7640                       elmode = int_mode_for_size (lsize, 0).require ();
7641                       unsigned int lnunits = const_nunits / group_size;
7642                       /* If we can't construct such a vector fall back to
7643                          element loads of the original vector type.  */
7644                       if (mode_for_vector (elmode, lnunits).exists (&vmode)
7645                           && VECTOR_MODE_P (vmode)
7646                           && targetm.vector_mode_supported_p (vmode)
7647                           && (convert_optab_handler (vec_init_optab, vmode, elmode)
7648                                 != CODE_FOR_nothing))
7649                         {
7650                           nloads = lnunits;
7651                           lnel = group_size;
7652                           ltype = build_nonstandard_integer_type (lsize, 1);
7653                           lvectype = build_vector_type (ltype, nloads);
7654                         }
7655                     }
7656               }
7657             else
7658               {
7659                 nloads = 1;
7660                 lnel = const_nunits;
7661                 ltype = vectype;
7662               }
7663             ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7664           }
7665       /* Load vector(1) scalar_type if it's 1 element-wise vectype.  */
7666       else if (nloads == 1)
7667           ltype = vectype;
7668 
7669       if (slp)
7670           {
7671             /* For SLP permutation support we need to load the whole group,
7672                not only the number of vector stmts the permutation result
7673                fits in.  */
7674             if (slp_perm)
7675               {
7676                 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7677                      variable VF.  */
7678                 unsigned int const_vf = vf.to_constant ();
7679                 ncopies = CEIL (group_size * const_vf, const_nunits);
7680                 dr_chain.create (ncopies);
7681               }
7682             else
7683               ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7684           }
7685       unsigned int group_el = 0;
7686       unsigned HOST_WIDE_INT
7687           elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7688       for (j = 0; j < ncopies; j++)
7689           {
7690             if (nloads > 1)
7691               vec_alloc (v, nloads);
7692             for (i = 0; i < nloads; i++)
7693               {
7694                 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7695                                                        group_el * elsz + cst_offset);
7696                 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
7697                 vect_copy_ref_info (data_ref, DR_REF (first_dr));
7698                 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
7699                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7700                 if (nloads > 1)
7701                     CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7702                                                   gimple_assign_lhs (new_stmt));
7703 
7704                 group_el += lnel;
7705                 if (! slp
7706                       || group_el == group_size)
7707                     {
7708                       tree newoff = copy_ssa_name (running_off);
7709                       gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7710                                                                   running_off, stride_step);
7711                       vect_finish_stmt_generation (stmt, incr, gsi);
7712 
7713                       running_off = newoff;
7714                       group_el = 0;
7715                     }
7716               }
7717             if (nloads > 1)
7718               {
7719                 tree vec_inv = build_constructor (lvectype, v);
7720                 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7721                 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7722                 if (lvectype != vectype)
7723                     {
7724                       new_stmt = gimple_build_assign (make_ssa_name (vectype),
7725                                                               VIEW_CONVERT_EXPR,
7726                                                               build1 (VIEW_CONVERT_EXPR,
7727                                                                         vectype, new_temp));
7728                       vect_finish_stmt_generation (stmt, new_stmt, gsi);
7729                     }
7730               }
7731 
7732             if (slp)
7733               {
7734                 if (slp_perm)
7735                     dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7736                 else
7737                     SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7738               }
7739             else
7740               {
7741                 if (j == 0)
7742                     STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7743                 else
7744                     STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7745                 prev_stmt_info = vinfo_for_stmt (new_stmt);
7746               }
7747           }
7748       if (slp_perm)
7749           {
7750             unsigned n_perms;
7751             vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7752                                                   slp_node_instance, false, &n_perms);
7753           }
7754       return true;
7755     }
7756 
7757   if (memory_access_type == VMAT_GATHER_SCATTER
7758       || (!slp && memory_access_type == VMAT_CONTIGUOUS))
7759     grouped_load = false;
7760 
7761   if (grouped_load)
7762     {
7763       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7764       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7765       /* For SLP vectorization we directly vectorize a subchain
7766          without permutation.  */
7767       if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7768           first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7769       /* For BB vectorization always use the first stmt to base
7770            the data ref pointer on.  */
7771       if (bb_vinfo)
7772           first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7773 
7774       /* Check if the chain of loads is already vectorized.  */
7775       if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7776             /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7777                ???  But we can only do so if there is exactly one
7778                as we have no way to get at the rest.  Leave the CSE
7779                opportunity alone.
7780                ???  With the group load eventually participating
7781                in multiple different permutations (having multiple
7782                slp nodes which refer to the same group) the CSE
7783                is even wrong code.  See PR56270.  */
7784             && !slp)
7785           {
7786             *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7787             return true;
7788           }
7789       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7790       group_gap_adj = 0;
7791 
7792       /* VEC_NUM is the number of vect stmts to be created for this group.  */
7793       if (slp)
7794           {
7795             grouped_load = false;
7796             /* For SLP permutation support we need to load the whole group,
7797                not only the number of vector stmts the permutation result
7798                fits in.  */
7799             if (slp_perm)
7800               {
7801                 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7802                      variable VF.  */
7803                 unsigned int const_vf = vf.to_constant ();
7804                 unsigned int const_nunits = nunits.to_constant ();
7805                 vec_num = CEIL (group_size * const_vf, const_nunits);
7806                 group_gap_adj = vf * group_size - nunits * vec_num;
7807               }
7808             else
7809               {
7810                 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7811                 group_gap_adj
7812                     = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7813               }
7814           }
7815       else
7816           vec_num = group_size;
7817 
7818       ref_type = get_group_alias_ptr_type (first_stmt);
7819     }
7820   else
7821     {
7822       first_stmt = stmt;
7823       first_dr = dr;
7824       group_size = vec_num = 1;
7825       group_gap_adj = 0;
7826       ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7827     }
7828 
7829   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7830   gcc_assert (alignment_support_scheme);
7831   vec_loop_masks *loop_masks
7832     = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7833        ? &LOOP_VINFO_MASKS (loop_vinfo)
7834        : NULL);
7835   /* Targets with store-lane instructions must not require explicit
7836      realignment.  vect_supportable_dr_alignment always returns either
7837      dr_aligned or dr_unaligned_supported for masked operations.  */
7838   gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7839                  && !mask
7840                  && !loop_masks)
7841                 || alignment_support_scheme == dr_aligned
7842                 || alignment_support_scheme == dr_unaligned_supported);
7843 
7844   /* In case the vectorization factor (VF) is bigger than the number
7845      of elements that we can fit in a vectype (nunits), we have to generate
7846      more than one vector stmt - i.e - we need to "unroll" the
7847      vector stmt by a factor VF/nunits.  In doing so, we record a pointer
7848      from one copy of the vector stmt to the next, in the field
7849      STMT_VINFO_RELATED_STMT.  This is necessary in order to allow following
7850      stages to find the correct vector defs to be used when vectorizing
7851      stmts that use the defs of the current stmt.  The example below
7852      illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7853      need to create 4 vectorized stmts):
7854 
7855      before vectorization:
7856                                 RELATED_STMT    VEC_STMT
7857         S1:     x = memref      -               -
7858         S2:     z = x + 1       -               -
7859 
7860      step 1: vectorize stmt S1:
7861         We first create the vector stmt VS1_0, and, as usual, record a
7862         pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7863         Next, we create the vector stmt VS1_1, and record a pointer to
7864         it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7865         Similarly, for VS1_2 and VS1_3.  This is the resulting chain of
7866         stmts and pointers:
7867                                 RELATED_STMT    VEC_STMT
7868         VS1_0:  vx0 = memref0   VS1_1           -
7869         VS1_1:  vx1 = memref1   VS1_2           -
7870         VS1_2:  vx2 = memref2   VS1_3           -
7871         VS1_3:  vx3 = memref3   -               -
7872         S1:     x = load        -               VS1_0
7873         S2:     z = x + 1       -               -
7874 
7875      See in documentation in vect_get_vec_def_for_stmt_copy for how the
7876      information we recorded in RELATED_STMT field is used to vectorize
7877      stmt S2.  */
7878 
7879   /* In case of interleaving (non-unit grouped access):
7880 
7881      S1:  x2 = &base + 2
7882      S2:  x0 = &base
7883      S3:  x1 = &base + 1
7884      S4:  x3 = &base + 3
7885 
7886      Vectorized loads are created in the order of memory accesses
7887      starting from the access of the first stmt of the chain:
7888 
7889      VS1: vx0 = &base
7890      VS2: vx1 = &base + vec_size*1
7891      VS3: vx3 = &base + vec_size*2
7892      VS4: vx4 = &base + vec_size*3
7893 
7894      Then permutation statements are generated:
7895 
7896      VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7897      VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7898        ...
7899 
7900      And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7901      (the order of the data-refs in the output of vect_permute_load_chain
7902      corresponds to the order of scalar stmts in the interleaving chain - see
7903      the documentation of vect_permute_load_chain()).
7904      The generation of permutation stmts and recording them in
7905      STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7906 
7907      In case of both multiple types and interleaving, the vector loads and
7908      permutation stmts above are created for every copy.  The result vector
7909      stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7910      corresponding STMT_VINFO_RELATED_STMT for the next copies.  */
7911 
7912   /* If the data reference is aligned (dr_aligned) or potentially unaligned
7913      on a target that supports unaligned accesses (dr_unaligned_supported)
7914      we generate the following code:
7915          p = initial_addr;
7916          indx = 0;
7917          loop {
7918              p = p + indx * vectype_size;
7919            vec_dest = *(p);
7920            indx = indx + 1;
7921          }
7922 
7923      Otherwise, the data reference is potentially unaligned on a target that
7924      does not support unaligned accesses (dr_explicit_realign_optimized) -
7925      then generate the following code, in which the data in each iteration is
7926      obtained by two vector loads, one from the previous iteration, and one
7927      from the current iteration:
7928          p1 = initial_addr;
7929          msq_init = *(floor(p1))
7930          p2 = initial_addr + VS - 1;
7931          realignment_token = call target_builtin;
7932          indx = 0;
7933          loop {
7934            p2 = p2 + indx * vectype_size
7935            lsq = *(floor(p2))
7936            vec_dest = realign_load (msq, lsq, realignment_token)
7937            indx = indx + 1;
7938            msq = lsq;
7939          }   */
7940 
7941   /* If the misalignment remains the same throughout the execution of the
7942      loop, we can create the init_addr and permutation mask at the loop
7943      preheader.  Otherwise, it needs to be created inside the loop.
7944      This can only occur when vectorizing memory accesses in the inner-loop
7945      nested within an outer-loop that is being vectorized.  */
7946 
7947   if (nested_in_vect_loop
7948       && !multiple_p (DR_STEP_ALIGNMENT (dr),
7949                           GET_MODE_SIZE (TYPE_MODE (vectype))))
7950     {
7951       gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7952       compute_in_loop = true;
7953     }
7954 
7955   if ((alignment_support_scheme == dr_explicit_realign_optimized
7956        || alignment_support_scheme == dr_explicit_realign)
7957       && !compute_in_loop)
7958     {
7959       msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7960                                             alignment_support_scheme, NULL_TREE,
7961                                             &at_loop);
7962       if (alignment_support_scheme == dr_explicit_realign_optimized)
7963           {
7964             phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7965             byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7966                                             size_one_node);
7967           }
7968     }
7969   else
7970     at_loop = loop;
7971 
7972   if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7973     offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7974 
7975   tree bump;
7976   tree vec_offset = NULL_TREE;
7977   if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7978     {
7979       aggr_type = NULL_TREE;
7980       bump = NULL_TREE;
7981     }
7982   else if (memory_access_type == VMAT_GATHER_SCATTER)
7983     {
7984       aggr_type = elem_type;
7985       vect_get_strided_load_store_ops (stmt, loop_vinfo, &gs_info,
7986                                                &bump, &vec_offset);
7987     }
7988   else
7989     {
7990       if (memory_access_type == VMAT_LOAD_STORE_LANES)
7991           aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7992       else
7993           aggr_type = vectype;
7994       bump = vect_get_data_ptr_increment (dr, aggr_type, memory_access_type);
7995     }
7996 
7997   tree vec_mask = NULL_TREE;
7998   prev_stmt_info = NULL;
7999   poly_uint64 group_elt = 0;
8000   for (j = 0; j < ncopies; j++)
8001     {
8002       /* 1. Create the vector or array pointer update chain.  */
8003       if (j == 0)
8004           {
8005             bool simd_lane_access_p
8006               = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
8007             if (simd_lane_access_p
8008                 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
8009                 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
8010                 && integer_zerop (DR_OFFSET (first_dr))
8011                 && integer_zerop (DR_INIT (first_dr))
8012                 && alias_sets_conflict_p (get_alias_set (aggr_type),
8013                                                   get_alias_set (TREE_TYPE (ref_type)))
8014                 && (alignment_support_scheme == dr_aligned
8015                       || alignment_support_scheme == dr_unaligned_supported))
8016               {
8017                 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
8018                 dataref_offset = build_int_cst (ref_type, 0);
8019                 inv_p = false;
8020               }
8021             else if (first_stmt_for_drptr
8022                        && first_stmt != first_stmt_for_drptr)
8023               {
8024                 dataref_ptr
8025                     = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
8026                                                       at_loop, offset, &dummy, gsi,
8027                                                       &ptr_incr, simd_lane_access_p,
8028                                                       &inv_p, byte_offset, bump);
8029                 /* Adjust the pointer by the difference to first_stmt.  */
8030                 data_reference_p ptrdr
8031                     = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
8032                 tree diff = fold_convert (sizetype,
8033                                                   size_binop (MINUS_EXPR,
8034                                                                 DR_INIT (first_dr),
8035                                                                 DR_INIT (ptrdr)));
8036                 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8037                                                        stmt, diff);
8038               }
8039             else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8040               {
8041                 vect_get_gather_scatter_ops (loop, stmt, &gs_info,
8042                                                      &dataref_ptr, &vec_offset);
8043                 inv_p = false;
8044               }
8045             else
8046               dataref_ptr
8047                 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
8048                                                     offset, &dummy, gsi, &ptr_incr,
8049                                                     simd_lane_access_p, &inv_p,
8050                                                     byte_offset, bump);
8051             if (mask)
8052               vec_mask = vect_get_vec_def_for_operand (mask, stmt,
8053                                                                  mask_vectype);
8054           }
8055       else
8056           {
8057             if (dataref_offset)
8058               dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
8059                                                         bump);
8060             else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8061               vec_offset = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
8062                                                                        vec_offset);
8063             else
8064               dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8065                                                      stmt, bump);
8066             if (mask)
8067               vec_mask = vect_get_vec_def_for_stmt_copy (mask_dt, vec_mask);
8068           }
8069 
8070       if (grouped_load || slp_perm)
8071           dr_chain.create (vec_num);
8072 
8073       if (memory_access_type == VMAT_LOAD_STORE_LANES)
8074           {
8075             tree vec_array;
8076 
8077             vec_array = create_vector_array (vectype, vec_num);
8078 
8079             tree final_mask = NULL_TREE;
8080             if (loop_masks)
8081               final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8082                                                        vectype, j);
8083             if (vec_mask)
8084               final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8085                                                               vec_mask, gsi);
8086 
8087             gcall *call;
8088             if (final_mask)
8089               {
8090                 /* Emit:
8091                        VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8092                                                     VEC_MASK).  */
8093                 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8094                 tree alias_ptr = build_int_cst (ref_type, align);
8095                 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
8096                                                              dataref_ptr, alias_ptr,
8097                                                              final_mask);
8098               }
8099             else
8100               {
8101                 /* Emit:
8102                        VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
8103                 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8104                 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
8105               }
8106             gimple_call_set_lhs (call, vec_array);
8107             gimple_call_set_nothrow (call, true);
8108             new_stmt = call;
8109             vect_finish_stmt_generation (stmt, new_stmt, gsi);
8110 
8111             /* Extract each vector into an SSA_NAME.  */
8112             for (i = 0; i < vec_num; i++)
8113               {
8114                 new_temp = read_vector_array (stmt, gsi, scalar_dest,
8115                                                       vec_array, i);
8116                 dr_chain.quick_push (new_temp);
8117               }
8118 
8119             /* Record the mapping between SSA_NAMEs and statements.  */
8120             vect_record_grouped_load_vectors (stmt, dr_chain);
8121           }
8122       else
8123           {
8124             for (i = 0; i < vec_num; i++)
8125               {
8126                 tree final_mask = NULL_TREE;
8127                 if (loop_masks
8128                       && memory_access_type != VMAT_INVARIANT)
8129                     final_mask = vect_get_loop_mask (gsi, loop_masks,
8130                                                              vec_num * ncopies,
8131                                                              vectype, vec_num * j + i);
8132                 if (vec_mask)
8133                     final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8134                                                                   vec_mask, gsi);
8135 
8136                 if (i > 0)
8137                     dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8138                                                          stmt, bump);
8139 
8140                 /* 2. Create the vector-load in the loop.  */
8141                 switch (alignment_support_scheme)
8142                     {
8143                     case dr_aligned:
8144                     case dr_unaligned_supported:
8145                       {
8146                         unsigned int align, misalign;
8147 
8148                         if (memory_access_type == VMAT_GATHER_SCATTER)
8149                           {
8150                               tree scale = size_int (gs_info.scale);
8151                               gcall *call;
8152                               if (loop_masks)
8153                                 call = gimple_build_call_internal
8154                                   (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
8155                                    vec_offset, scale, final_mask);
8156                               else
8157                                 call = gimple_build_call_internal
8158                                   (IFN_GATHER_LOAD, 3, dataref_ptr,
8159                                    vec_offset, scale);
8160                               gimple_call_set_nothrow (call, true);
8161                               new_stmt = call;
8162                               data_ref = NULL_TREE;
8163                               break;
8164                           }
8165 
8166                         align = DR_TARGET_ALIGNMENT (dr);
8167                         if (alignment_support_scheme == dr_aligned)
8168                           {
8169                               gcc_assert (aligned_access_p (first_dr));
8170                               misalign = 0;
8171                           }
8172                         else if (DR_MISALIGNMENT (first_dr) == -1)
8173                           {
8174                               align = dr_alignment (vect_dr_behavior (first_dr));
8175                               misalign = 0;
8176                           }
8177                         else
8178                           misalign = DR_MISALIGNMENT (first_dr);
8179                         if (dataref_offset == NULL_TREE
8180                               && TREE_CODE (dataref_ptr) == SSA_NAME)
8181                           set_ptr_info_alignment (get_ptr_info (dataref_ptr),
8182                                                         align, misalign);
8183 
8184                         if (final_mask)
8185                           {
8186                               align = least_bit_hwi (misalign | align);
8187                               tree ptr = build_int_cst (ref_type, align);
8188                               gcall *call
8189                                 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
8190                                                                       dataref_ptr, ptr,
8191                                                                       final_mask);
8192                               gimple_call_set_nothrow (call, true);
8193                               new_stmt = call;
8194                               data_ref = NULL_TREE;
8195                           }
8196                         else
8197                           {
8198                               data_ref
8199                                 = fold_build2 (MEM_REF, vectype, dataref_ptr,
8200                                                    dataref_offset
8201                                                    ? dataref_offset
8202                                                    : build_int_cst (ref_type, 0));
8203                               if (alignment_support_scheme == dr_aligned)
8204                                 ;
8205                               else if (DR_MISALIGNMENT (first_dr) == -1)
8206                                 TREE_TYPE (data_ref)
8207                                   = build_aligned_type (TREE_TYPE (data_ref),
8208                                                               align * BITS_PER_UNIT);
8209                               else
8210                                 TREE_TYPE (data_ref)
8211                                   = build_aligned_type (TREE_TYPE (data_ref),
8212                                                               TYPE_ALIGN (elem_type));
8213                           }
8214                         break;
8215                       }
8216                     case dr_explicit_realign:
8217                       {
8218                         tree ptr, bump;
8219 
8220                         tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8221 
8222                         if (compute_in_loop)
8223                           msq = vect_setup_realignment (first_stmt, gsi,
8224                                                                 &realignment_token,
8225                                                                 dr_explicit_realign,
8226                                                                 dataref_ptr, NULL);
8227 
8228                         if (TREE_CODE (dataref_ptr) == SSA_NAME)
8229                           ptr = copy_ssa_name (dataref_ptr);
8230                         else
8231                           ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
8232                         unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8233                         new_stmt = gimple_build_assign
8234                                          (ptr, BIT_AND_EXPR, dataref_ptr,
8235                                           build_int_cst
8236                                           (TREE_TYPE (dataref_ptr),
8237                                            -(HOST_WIDE_INT) align));
8238                         vect_finish_stmt_generation (stmt, new_stmt, gsi);
8239                         data_ref
8240                           = build2 (MEM_REF, vectype, ptr,
8241                                         build_int_cst (ref_type, 0));
8242                         vect_copy_ref_info (data_ref, DR_REF (first_dr));
8243                         vec_dest = vect_create_destination_var (scalar_dest,
8244                                                                           vectype);
8245                         new_stmt = gimple_build_assign (vec_dest, data_ref);
8246                         new_temp = make_ssa_name (vec_dest, new_stmt);
8247                         gimple_assign_set_lhs (new_stmt, new_temp);
8248                         gimple_set_vdef (new_stmt, gimple_vdef (stmt));
8249                         gimple_set_vuse (new_stmt, gimple_vuse (stmt));
8250                         vect_finish_stmt_generation (stmt, new_stmt, gsi);
8251                         msq = new_temp;
8252 
8253                         bump = size_binop (MULT_EXPR, vs,
8254                                                TYPE_SIZE_UNIT (elem_type));
8255                         bump = size_binop (MINUS_EXPR, bump, size_one_node);
8256                         ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
8257                         new_stmt = gimple_build_assign
8258                                          (NULL_TREE, BIT_AND_EXPR, ptr,
8259                                           build_int_cst
8260                                           (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
8261                         ptr = copy_ssa_name (ptr, new_stmt);
8262                         gimple_assign_set_lhs (new_stmt, ptr);
8263                         vect_finish_stmt_generation (stmt, new_stmt, gsi);
8264                         data_ref
8265                           = build2 (MEM_REF, vectype, ptr,
8266                                         build_int_cst (ref_type, 0));
8267                         break;
8268                       }
8269                     case dr_explicit_realign_optimized:
8270                       {
8271                         if (TREE_CODE (dataref_ptr) == SSA_NAME)
8272                           new_temp = copy_ssa_name (dataref_ptr);
8273                         else
8274                           new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
8275                         unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
8276                         new_stmt = gimple_build_assign
8277                           (new_temp, BIT_AND_EXPR, dataref_ptr,
8278                            build_int_cst (TREE_TYPE (dataref_ptr),
8279                                              -(HOST_WIDE_INT) align));
8280                         vect_finish_stmt_generation (stmt, new_stmt, gsi);
8281                         data_ref
8282                           = build2 (MEM_REF, vectype, new_temp,
8283                                         build_int_cst (ref_type, 0));
8284                         break;
8285                       }
8286                     default:
8287                       gcc_unreachable ();
8288                     }
8289                 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8290                 /* DATA_REF is null if we've already built the statement.  */
8291                 if (data_ref)
8292                     {
8293                       vect_copy_ref_info (data_ref, DR_REF (first_dr));
8294                       new_stmt = gimple_build_assign (vec_dest, data_ref);
8295                     }
8296                 new_temp = make_ssa_name (vec_dest, new_stmt);
8297                 gimple_set_lhs (new_stmt, new_temp);
8298                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8299 
8300                 /* 3. Handle explicit realignment if necessary/supported.
8301                      Create in loop:
8302                        vec_dest = realign_load (msq, lsq, realignment_token)  */
8303                 if (alignment_support_scheme == dr_explicit_realign_optimized
8304                       || alignment_support_scheme == dr_explicit_realign)
8305                     {
8306                       lsq = gimple_assign_lhs (new_stmt);
8307                       if (!realignment_token)
8308                         realignment_token = dataref_ptr;
8309                       vec_dest = vect_create_destination_var (scalar_dest, vectype);
8310                       new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
8311                                                               msq, lsq, realignment_token);
8312                       new_temp = make_ssa_name (vec_dest, new_stmt);
8313                       gimple_assign_set_lhs (new_stmt, new_temp);
8314                       vect_finish_stmt_generation (stmt, new_stmt, gsi);
8315 
8316                       if (alignment_support_scheme == dr_explicit_realign_optimized)
8317                         {
8318                           gcc_assert (phi);
8319                           if (i == vec_num - 1 && j == ncopies - 1)
8320                               add_phi_arg (phi, lsq,
8321                                              loop_latch_edge (containing_loop),
8322                                              UNKNOWN_LOCATION);
8323                           msq = lsq;
8324                         }
8325                     }
8326 
8327                 /* 4. Handle invariant-load.  */
8328                 if (inv_p && !bb_vinfo)
8329                     {
8330                       gcc_assert (!grouped_load);
8331                       /* If we have versioned for aliasing or the loop doesn't
8332                          have any data dependencies that would preclude this,
8333                          then we are sure this is a loop invariant load and
8334                          thus we can insert it on the preheader edge.  */
8335                       if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8336                           && !nested_in_vect_loop
8337                           && hoist_defs_of_uses (stmt, loop))
8338                         {
8339                           if (dump_enabled_p ())
8340                               {
8341                                 dump_printf_loc (MSG_NOTE, vect_location,
8342                                                      "hoisting out of the vectorized "
8343                                                      "loop: ");
8344                                 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8345                               }
8346                           tree tem = copy_ssa_name (scalar_dest);
8347                           gsi_insert_on_edge_immediate
8348                               (loop_preheader_edge (loop),
8349                                gimple_build_assign (tem,
8350                                                         unshare_expr
8351                                                           (gimple_assign_rhs1 (stmt))));
8352                           new_temp = vect_init_vector (stmt, tem, vectype, NULL);
8353                           new_stmt = SSA_NAME_DEF_STMT (new_temp);
8354                           set_vinfo_for_stmt (new_stmt,
8355                                                     new_stmt_vec_info (new_stmt, vinfo));
8356                         }
8357                       else
8358                         {
8359                           gimple_stmt_iterator gsi2 = *gsi;
8360                           gsi_next (&gsi2);
8361                           new_temp = vect_init_vector (stmt, scalar_dest,
8362                                                                vectype, &gsi2);
8363                           new_stmt = SSA_NAME_DEF_STMT (new_temp);
8364                         }
8365                     }
8366 
8367                 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8368                     {
8369                       tree perm_mask = perm_mask_for_reverse (vectype);
8370                       new_temp = permute_vec_elements (new_temp, new_temp,
8371                                                                perm_mask, stmt, gsi);
8372                       new_stmt = SSA_NAME_DEF_STMT (new_temp);
8373                     }
8374 
8375                 /* Collect vector loads and later create their permutation in
8376                      vect_transform_grouped_load ().  */
8377                 if (grouped_load || slp_perm)
8378                     dr_chain.quick_push (new_temp);
8379 
8380                 /* Store vector loads in the corresponding SLP_NODE.  */
8381                 if (slp && !slp_perm)
8382                     SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8383 
8384                 /* With SLP permutation we load the gaps as well, without
8385                    we need to skip the gaps after we manage to fully load
8386                      all elements.  group_gap_adj is GROUP_SIZE here.  */
8387                 group_elt += nunits;
8388                 if (maybe_ne (group_gap_adj, 0U)
8389                       && !slp_perm
8390                       && known_eq (group_elt, group_size - group_gap_adj))
8391                     {
8392                       poly_wide_int bump_val
8393                         = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8394                            * group_gap_adj);
8395                       tree bump = wide_int_to_tree (sizetype, bump_val);
8396                       dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8397                                                              stmt, bump);
8398                       group_elt = 0;
8399                     }
8400               }
8401             /* Bump the vector pointer to account for a gap or for excess
8402                elements loaded for a permuted SLP load.  */
8403             if (maybe_ne (group_gap_adj, 0U) && slp_perm)
8404               {
8405                 poly_wide_int bump_val
8406                     = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
8407                        * group_gap_adj);
8408                 tree bump = wide_int_to_tree (sizetype, bump_val);
8409                 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8410                                                        stmt, bump);
8411               }
8412           }
8413 
8414       if (slp && !slp_perm)
8415           continue;
8416 
8417       if (slp_perm)
8418         {
8419             unsigned n_perms;
8420           if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
8421                                              slp_node_instance, false,
8422                                                        &n_perms))
8423             {
8424               dr_chain.release ();
8425               return false;
8426             }
8427         }
8428       else
8429         {
8430           if (grouped_load)
8431               {
8432                 if (memory_access_type != VMAT_LOAD_STORE_LANES)
8433                     vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
8434                 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8435               }
8436           else
8437               {
8438                 if (j == 0)
8439                   STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8440                 else
8441                   STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8442                 prev_stmt_info = vinfo_for_stmt (new_stmt);
8443               }
8444         }
8445       dr_chain.release ();
8446     }
8447 
8448   return true;
8449 }
8450 
8451 /* Function vect_is_simple_cond.
8452 
8453    Input:
8454    LOOP - the loop that is being vectorized.
8455    COND - Condition that is checked for simple use.
8456 
8457    Output:
8458    *COMP_VECTYPE - the vector type for the comparison.
8459    *DTS - The def types for the arguments of the comparison
8460 
8461    Returns whether a COND can be vectorized.  Checks whether
8462    condition operands are supportable using vec_is_simple_use.  */
8463 
8464 static bool
vect_is_simple_cond(tree cond,vec_info * vinfo,tree * comp_vectype,enum vect_def_type * dts,tree vectype)8465 vect_is_simple_cond (tree cond, vec_info *vinfo,
8466                          tree *comp_vectype, enum vect_def_type *dts,
8467                          tree vectype)
8468 {
8469   tree lhs, rhs;
8470   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8471 
8472   /* Mask case.  */
8473   if (TREE_CODE (cond) == SSA_NAME
8474       && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
8475     {
8476       gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
8477       if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
8478                                      &dts[0], comp_vectype)
8479             || !*comp_vectype
8480             || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8481           return false;
8482       return true;
8483     }
8484 
8485   if (!COMPARISON_CLASS_P (cond))
8486     return false;
8487 
8488   lhs = TREE_OPERAND (cond, 0);
8489   rhs = TREE_OPERAND (cond, 1);
8490 
8491   if (TREE_CODE (lhs) == SSA_NAME)
8492     {
8493       gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
8494       if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
8495           return false;
8496     }
8497   else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8498              || TREE_CODE (lhs) == FIXED_CST)
8499     dts[0] = vect_constant_def;
8500   else
8501     return false;
8502 
8503   if (TREE_CODE (rhs) == SSA_NAME)
8504     {
8505       gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
8506       if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
8507           return false;
8508     }
8509   else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8510              || TREE_CODE (rhs) == FIXED_CST)
8511     dts[1] = vect_constant_def;
8512   else
8513     return false;
8514 
8515   if (vectype1 && vectype2
8516       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8517                        TYPE_VECTOR_SUBPARTS (vectype2)))
8518     return false;
8519 
8520   *comp_vectype = vectype1 ? vectype1 : vectype2;
8521   /* Invariant comparison.  */
8522   if (! *comp_vectype && vectype)
8523     {
8524       tree scalar_type = TREE_TYPE (lhs);
8525       /* If we can widen the comparison to match vectype do so.  */
8526       if (INTEGRAL_TYPE_P (scalar_type)
8527             && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8528                                     TYPE_SIZE (TREE_TYPE (vectype))))
8529           scalar_type = build_nonstandard_integer_type
8530             (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8531              TYPE_UNSIGNED (scalar_type));
8532       *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8533     }
8534 
8535   return true;
8536 }
8537 
8538 /* vectorizable_condition.
8539 
8540    Check if STMT is conditional modify expression that can be vectorized.
8541    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8542    stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
8543    at GSI.
8544 
8545    When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8546    to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
8547    else clause if it is 2).
8548 
8549    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
8550 
8551 bool
vectorizable_condition(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,tree reduc_def,int reduc_index,slp_tree slp_node)8552 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
8553                               gimple **vec_stmt, tree reduc_def, int reduc_index,
8554                               slp_tree slp_node)
8555 {
8556   tree scalar_dest = NULL_TREE;
8557   tree vec_dest = NULL_TREE;
8558   tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8559   tree then_clause, else_clause;
8560   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8561   tree comp_vectype = NULL_TREE;
8562   tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8563   tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
8564   tree vec_compare;
8565   tree new_temp;
8566   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8567   enum vect_def_type dts[4]
8568     = {vect_unknown_def_type, vect_unknown_def_type,
8569        vect_unknown_def_type, vect_unknown_def_type};
8570   int ndts = 4;
8571   int ncopies;
8572   enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8573   stmt_vec_info prev_stmt_info = NULL;
8574   int i, j;
8575   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8576   vec<tree> vec_oprnds0 = vNULL;
8577   vec<tree> vec_oprnds1 = vNULL;
8578   vec<tree> vec_oprnds2 = vNULL;
8579   vec<tree> vec_oprnds3 = vNULL;
8580   tree vec_cmp_type;
8581   bool masked = false;
8582 
8583   if (reduc_index && STMT_SLP_TYPE (stmt_info))
8584     return false;
8585 
8586   vect_reduction_type reduction_type
8587     = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8588   if (reduction_type == TREE_CODE_REDUCTION)
8589     {
8590       if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8591           return false;
8592 
8593       if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8594             && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8595                  && reduc_def))
8596           return false;
8597 
8598       /* FORNOW: not yet supported.  */
8599       if (STMT_VINFO_LIVE_P (stmt_info))
8600           {
8601             if (dump_enabled_p ())
8602               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8603                                    "value used after loop.\n");
8604             return false;
8605           }
8606     }
8607 
8608   /* Is vectorizable conditional operation?  */
8609   if (!is_gimple_assign (stmt))
8610     return false;
8611 
8612   code = gimple_assign_rhs_code (stmt);
8613 
8614   if (code != COND_EXPR)
8615     return false;
8616 
8617   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8618   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8619 
8620   if (slp_node)
8621     ncopies = 1;
8622   else
8623     ncopies = vect_get_num_copies (loop_vinfo, vectype);
8624 
8625   gcc_assert (ncopies >= 1);
8626   if (reduc_index && ncopies > 1)
8627     return false; /* FORNOW */
8628 
8629   cond_expr = gimple_assign_rhs1 (stmt);
8630   then_clause = gimple_assign_rhs2 (stmt);
8631   else_clause = gimple_assign_rhs3 (stmt);
8632 
8633   if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8634                                   &comp_vectype, &dts[0], slp_node ? NULL : vectype)
8635       || !comp_vectype)
8636     return false;
8637 
8638   gimple *def_stmt;
8639   if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
8640                                  &vectype1))
8641     return false;
8642   if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
8643                                  &vectype2))
8644     return false;
8645 
8646   if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8647     return false;
8648 
8649   if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8650     return false;
8651 
8652   masked = !COMPARISON_CLASS_P (cond_expr);
8653   vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8654 
8655   if (vec_cmp_type == NULL_TREE)
8656     return false;
8657 
8658   cond_code = TREE_CODE (cond_expr);
8659   if (!masked)
8660     {
8661       cond_expr0 = TREE_OPERAND (cond_expr, 0);
8662       cond_expr1 = TREE_OPERAND (cond_expr, 1);
8663     }
8664 
8665   if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8666     {
8667       /* Boolean values may have another representation in vectors
8668            and therefore we prefer bit operations over comparison for
8669            them (which also works for scalar masks).  We store opcodes
8670            to use in bitop1 and bitop2.  Statement is vectorized as
8671            BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8672            depending on bitop1 and bitop2 arity.  */
8673       switch (cond_code)
8674           {
8675           case GT_EXPR:
8676             bitop1 = BIT_NOT_EXPR;
8677             bitop2 = BIT_AND_EXPR;
8678             break;
8679           case GE_EXPR:
8680             bitop1 = BIT_NOT_EXPR;
8681             bitop2 = BIT_IOR_EXPR;
8682             break;
8683           case LT_EXPR:
8684             bitop1 = BIT_NOT_EXPR;
8685             bitop2 = BIT_AND_EXPR;
8686             std::swap (cond_expr0, cond_expr1);
8687             break;
8688           case LE_EXPR:
8689             bitop1 = BIT_NOT_EXPR;
8690             bitop2 = BIT_IOR_EXPR;
8691             std::swap (cond_expr0, cond_expr1);
8692             break;
8693           case NE_EXPR:
8694             bitop1 = BIT_XOR_EXPR;
8695             break;
8696           case EQ_EXPR:
8697             bitop1 = BIT_XOR_EXPR;
8698             bitop2 = BIT_NOT_EXPR;
8699             break;
8700           default:
8701             return false;
8702           }
8703       cond_code = SSA_NAME;
8704     }
8705 
8706   if (!vec_stmt)
8707     {
8708       STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8709       if (bitop1 != NOP_EXPR)
8710           {
8711             machine_mode mode = TYPE_MODE (comp_vectype);
8712             optab optab;
8713 
8714             optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8715             if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8716               return false;
8717 
8718             if (bitop2 != NOP_EXPR)
8719               {
8720                 optab = optab_for_tree_code (bitop2, comp_vectype,
8721                                                      optab_default);
8722                 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8723                     return false;
8724               }
8725           }
8726       if (expand_vec_cond_expr_p (vectype, comp_vectype,
8727                                              cond_code))
8728           {
8729             if (!slp_node)
8730               vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8731             return true;
8732           }
8733       return false;
8734     }
8735 
8736   /* Transform.  */
8737 
8738   if (!slp_node)
8739     {
8740       vec_oprnds0.create (1);
8741       vec_oprnds1.create (1);
8742       vec_oprnds2.create (1);
8743       vec_oprnds3.create (1);
8744     }
8745 
8746   /* Handle def.  */
8747   scalar_dest = gimple_assign_lhs (stmt);
8748   if (reduction_type != EXTRACT_LAST_REDUCTION)
8749     vec_dest = vect_create_destination_var (scalar_dest, vectype);
8750 
8751   /* Handle cond expr.  */
8752   for (j = 0; j < ncopies; j++)
8753     {
8754       gimple *new_stmt = NULL;
8755       if (j == 0)
8756           {
8757           if (slp_node)
8758             {
8759               auto_vec<tree, 4> ops;
8760                 auto_vec<vec<tree>, 4> vec_defs;
8761 
8762                 if (masked)
8763                     ops.safe_push (cond_expr);
8764                 else
8765                     {
8766                       ops.safe_push (cond_expr0);
8767                       ops.safe_push (cond_expr1);
8768                     }
8769               ops.safe_push (then_clause);
8770               ops.safe_push (else_clause);
8771               vect_get_slp_defs (ops, slp_node, &vec_defs);
8772                 vec_oprnds3 = vec_defs.pop ();
8773                 vec_oprnds2 = vec_defs.pop ();
8774                 if (!masked)
8775                     vec_oprnds1 = vec_defs.pop ();
8776                 vec_oprnds0 = vec_defs.pop ();
8777             }
8778           else
8779             {
8780                 gimple *gtemp;
8781                 if (masked)
8782                     {
8783                       vec_cond_lhs
8784                         = vect_get_vec_def_for_operand (cond_expr, stmt,
8785                                                                 comp_vectype);
8786                       vect_is_simple_use (cond_expr, stmt_info->vinfo,
8787                                               &gtemp, &dts[0]);
8788                     }
8789                 else
8790                     {
8791                       vec_cond_lhs
8792                         = vect_get_vec_def_for_operand (cond_expr0,
8793                                                                 stmt, comp_vectype);
8794                       vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8795 
8796                       vec_cond_rhs
8797                         = vect_get_vec_def_for_operand (cond_expr1,
8798                                                                 stmt, comp_vectype);
8799                       vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
8800                     }
8801                 if (reduc_index == 1)
8802                     vec_then_clause = reduc_def;
8803                 else
8804                     {
8805                       vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8806                                                                                   stmt);
8807                     vect_is_simple_use (then_clause, loop_vinfo,
8808                                               &gtemp, &dts[2]);
8809                     }
8810                 if (reduc_index == 2)
8811                     vec_else_clause = reduc_def;
8812                 else
8813                     {
8814                       vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8815                                                                                   stmt);
8816                       vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
8817                     }
8818               }
8819           }
8820       else
8821           {
8822             vec_cond_lhs
8823               = vect_get_vec_def_for_stmt_copy (dts[0],
8824                                                         vec_oprnds0.pop ());
8825             if (!masked)
8826               vec_cond_rhs
8827                 = vect_get_vec_def_for_stmt_copy (dts[1],
8828                                                             vec_oprnds1.pop ());
8829 
8830             vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8831                                                                           vec_oprnds2.pop ());
8832             vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8833                                                                           vec_oprnds3.pop ());
8834           }
8835 
8836       if (!slp_node)
8837         {
8838             vec_oprnds0.quick_push (vec_cond_lhs);
8839             if (!masked)
8840               vec_oprnds1.quick_push (vec_cond_rhs);
8841             vec_oprnds2.quick_push (vec_then_clause);
8842             vec_oprnds3.quick_push (vec_else_clause);
8843           }
8844 
8845       /* Arguments are ready.  Create the new vector stmt.  */
8846       FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8847         {
8848           vec_then_clause = vec_oprnds2[i];
8849           vec_else_clause = vec_oprnds3[i];
8850 
8851             if (masked)
8852               vec_compare = vec_cond_lhs;
8853             else
8854               {
8855                 vec_cond_rhs = vec_oprnds1[i];
8856                 if (bitop1 == NOP_EXPR)
8857                     vec_compare = build2 (cond_code, vec_cmp_type,
8858                                               vec_cond_lhs, vec_cond_rhs);
8859                 else
8860                     {
8861                       new_temp = make_ssa_name (vec_cmp_type);
8862                       if (bitop1 == BIT_NOT_EXPR)
8863                         new_stmt = gimple_build_assign (new_temp, bitop1,
8864                                                                 vec_cond_rhs);
8865                       else
8866                         new_stmt
8867                           = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8868                                                        vec_cond_rhs);
8869                       vect_finish_stmt_generation (stmt, new_stmt, gsi);
8870                       if (bitop2 == NOP_EXPR)
8871                         vec_compare = new_temp;
8872                       else if (bitop2 == BIT_NOT_EXPR)
8873                         {
8874                           /* Instead of doing ~x ? y : z do x ? z : y.  */
8875                           vec_compare = new_temp;
8876                           std::swap (vec_then_clause, vec_else_clause);
8877                         }
8878                       else
8879                         {
8880                           vec_compare = make_ssa_name (vec_cmp_type);
8881                           new_stmt
8882                               = gimple_build_assign (vec_compare, bitop2,
8883                                                          vec_cond_lhs, new_temp);
8884                           vect_finish_stmt_generation (stmt, new_stmt, gsi);
8885                         }
8886                     }
8887               }
8888             if (reduction_type == EXTRACT_LAST_REDUCTION)
8889               {
8890                 if (!is_gimple_val (vec_compare))
8891                     {
8892                       tree vec_compare_name = make_ssa_name (vec_cmp_type);
8893                       new_stmt = gimple_build_assign (vec_compare_name,
8894                                                               vec_compare);
8895                       vect_finish_stmt_generation (stmt, new_stmt, gsi);
8896                       vec_compare = vec_compare_name;
8897                     }
8898                 gcc_assert (reduc_index == 2);
8899                 new_stmt = gimple_build_call_internal
8900                     (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
8901                      vec_then_clause);
8902                 gimple_call_set_lhs (new_stmt, scalar_dest);
8903                 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
8904                 if (stmt == gsi_stmt (*gsi))
8905                     vect_finish_replace_stmt (stmt, new_stmt);
8906                 else
8907                     {
8908                       /* In this case we're moving the definition to later in the
8909                          block.  That doesn't matter because the only uses of the
8910                          lhs are in phi statements.  */
8911                       gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
8912                       gsi_remove (&old_gsi, true);
8913                       vect_finish_stmt_generation (stmt, new_stmt, gsi);
8914                     }
8915               }
8916             else
8917               {
8918                 new_temp = make_ssa_name (vec_dest);
8919                 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8920                                                         vec_compare, vec_then_clause,
8921                                                         vec_else_clause);
8922                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8923               }
8924           if (slp_node)
8925             SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8926         }
8927 
8928         if (slp_node)
8929           continue;
8930 
8931         if (j == 0)
8932           STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8933         else
8934           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8935 
8936         prev_stmt_info = vinfo_for_stmt (new_stmt);
8937     }
8938 
8939   vec_oprnds0.release ();
8940   vec_oprnds1.release ();
8941   vec_oprnds2.release ();
8942   vec_oprnds3.release ();
8943 
8944   return true;
8945 }
8946 
8947 /* vectorizable_comparison.
8948 
8949    Check if STMT is comparison expression that can be vectorized.
8950    If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8951    comparison, put it in VEC_STMT, and insert it at GSI.
8952 
8953    Return FALSE if not a vectorizable STMT, TRUE otherwise.  */
8954 
8955 static bool
vectorizable_comparison(gimple * stmt,gimple_stmt_iterator * gsi,gimple ** vec_stmt,tree reduc_def,slp_tree slp_node)8956 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8957                                gimple **vec_stmt, tree reduc_def,
8958                                slp_tree slp_node)
8959 {
8960   tree lhs, rhs1, rhs2;
8961   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8962   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8963   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8964   tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8965   tree new_temp;
8966   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8967   enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8968   int ndts = 2;
8969   poly_uint64 nunits;
8970   int ncopies;
8971   enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8972   stmt_vec_info prev_stmt_info = NULL;
8973   int i, j;
8974   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8975   vec<tree> vec_oprnds0 = vNULL;
8976   vec<tree> vec_oprnds1 = vNULL;
8977   gimple *def_stmt;
8978   tree mask_type;
8979   tree mask;
8980 
8981   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8982     return false;
8983 
8984   if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8985     return false;
8986 
8987   mask_type = vectype;
8988   nunits = TYPE_VECTOR_SUBPARTS (vectype);
8989 
8990   if (slp_node)
8991     ncopies = 1;
8992   else
8993     ncopies = vect_get_num_copies (loop_vinfo, vectype);
8994 
8995   gcc_assert (ncopies >= 1);
8996   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8997       && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8998              && reduc_def))
8999     return false;
9000 
9001   if (STMT_VINFO_LIVE_P (stmt_info))
9002     {
9003       if (dump_enabled_p ())
9004           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9005                                "value used after loop.\n");
9006       return false;
9007     }
9008 
9009   if (!is_gimple_assign (stmt))
9010     return false;
9011 
9012   code = gimple_assign_rhs_code (stmt);
9013 
9014   if (TREE_CODE_CLASS (code) != tcc_comparison)
9015     return false;
9016 
9017   rhs1 = gimple_assign_rhs1 (stmt);
9018   rhs2 = gimple_assign_rhs2 (stmt);
9019 
9020   if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
9021                                  &dts[0], &vectype1))
9022     return false;
9023 
9024   if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
9025                                  &dts[1], &vectype2))
9026     return false;
9027 
9028   if (vectype1 && vectype2
9029       && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9030                        TYPE_VECTOR_SUBPARTS (vectype2)))
9031     return false;
9032 
9033   vectype = vectype1 ? vectype1 : vectype2;
9034 
9035   /* Invariant comparison.  */
9036   if (!vectype)
9037     {
9038       vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
9039       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
9040           return false;
9041     }
9042   else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
9043     return false;
9044 
9045   /* Can't compare mask and non-mask types.  */
9046   if (vectype1 && vectype2
9047       && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
9048     return false;
9049 
9050   /* Boolean values may have another representation in vectors
9051      and therefore we prefer bit operations over comparison for
9052      them (which also works for scalar masks).  We store opcodes
9053      to use in bitop1 and bitop2.  Statement is vectorized as
9054        BITOP2 (rhs1 BITOP1 rhs2) or
9055        rhs1 BITOP2 (BITOP1 rhs2)
9056      depending on bitop1 and bitop2 arity.  */
9057   if (VECTOR_BOOLEAN_TYPE_P (vectype))
9058     {
9059       if (code == GT_EXPR)
9060           {
9061             bitop1 = BIT_NOT_EXPR;
9062             bitop2 = BIT_AND_EXPR;
9063           }
9064       else if (code == GE_EXPR)
9065           {
9066             bitop1 = BIT_NOT_EXPR;
9067             bitop2 = BIT_IOR_EXPR;
9068           }
9069       else if (code == LT_EXPR)
9070           {
9071             bitop1 = BIT_NOT_EXPR;
9072             bitop2 = BIT_AND_EXPR;
9073             std::swap (rhs1, rhs2);
9074             std::swap (dts[0], dts[1]);
9075           }
9076       else if (code == LE_EXPR)
9077           {
9078             bitop1 = BIT_NOT_EXPR;
9079             bitop2 = BIT_IOR_EXPR;
9080             std::swap (rhs1, rhs2);
9081             std::swap (dts[0], dts[1]);
9082           }
9083       else
9084           {
9085             bitop1 = BIT_XOR_EXPR;
9086             if (code == EQ_EXPR)
9087               bitop2 = BIT_NOT_EXPR;
9088           }
9089     }
9090 
9091   if (!vec_stmt)
9092     {
9093       STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
9094       if (!slp_node)
9095           vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
9096                                         dts, ndts, NULL, NULL);
9097       if (bitop1 == NOP_EXPR)
9098           return expand_vec_cmp_expr_p (vectype, mask_type, code);
9099       else
9100           {
9101             machine_mode mode = TYPE_MODE (vectype);
9102             optab optab;
9103 
9104             optab = optab_for_tree_code (bitop1, vectype, optab_default);
9105             if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9106               return false;
9107 
9108             if (bitop2 != NOP_EXPR)
9109               {
9110                 optab = optab_for_tree_code (bitop2, vectype, optab_default);
9111                 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
9112                     return false;
9113               }
9114             return true;
9115           }
9116     }
9117 
9118   /* Transform.  */
9119   if (!slp_node)
9120     {
9121       vec_oprnds0.create (1);
9122       vec_oprnds1.create (1);
9123     }
9124 
9125   /* Handle def.  */
9126   lhs = gimple_assign_lhs (stmt);
9127   mask = vect_create_destination_var (lhs, mask_type);
9128 
9129   /* Handle cmp expr.  */
9130   for (j = 0; j < ncopies; j++)
9131     {
9132       gassign *new_stmt = NULL;
9133       if (j == 0)
9134           {
9135             if (slp_node)
9136               {
9137                 auto_vec<tree, 2> ops;
9138                 auto_vec<vec<tree>, 2> vec_defs;
9139 
9140                 ops.safe_push (rhs1);
9141                 ops.safe_push (rhs2);
9142                 vect_get_slp_defs (ops, slp_node, &vec_defs);
9143                 vec_oprnds1 = vec_defs.pop ();
9144                 vec_oprnds0 = vec_defs.pop ();
9145               }
9146             else
9147               {
9148                 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
9149                 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
9150               }
9151           }
9152       else
9153           {
9154             vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
9155                                                                  vec_oprnds0.pop ());
9156             vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
9157                                                                  vec_oprnds1.pop ());
9158           }
9159 
9160       if (!slp_node)
9161           {
9162             vec_oprnds0.quick_push (vec_rhs1);
9163             vec_oprnds1.quick_push (vec_rhs2);
9164           }
9165 
9166       /* Arguments are ready.  Create the new vector stmt.  */
9167       FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
9168           {
9169             vec_rhs2 = vec_oprnds1[i];
9170 
9171             new_temp = make_ssa_name (mask);
9172             if (bitop1 == NOP_EXPR)
9173               {
9174                 new_stmt = gimple_build_assign (new_temp, code,
9175                                                         vec_rhs1, vec_rhs2);
9176                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9177               }
9178             else
9179               {
9180                 if (bitop1 == BIT_NOT_EXPR)
9181                     new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
9182                 else
9183                     new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
9184                                                             vec_rhs2);
9185                 vect_finish_stmt_generation (stmt, new_stmt, gsi);
9186                 if (bitop2 != NOP_EXPR)
9187                     {
9188                       tree res = make_ssa_name (mask);
9189                       if (bitop2 == BIT_NOT_EXPR)
9190                         new_stmt = gimple_build_assign (res, bitop2, new_temp);
9191                       else
9192                         new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
9193                                                                 new_temp);
9194                       vect_finish_stmt_generation (stmt, new_stmt, gsi);
9195                     }
9196               }
9197             if (slp_node)
9198               SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9199           }
9200 
9201       if (slp_node)
9202           continue;
9203 
9204       if (j == 0)
9205           STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
9206       else
9207           STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
9208 
9209       prev_stmt_info = vinfo_for_stmt (new_stmt);
9210     }
9211 
9212   vec_oprnds0.release ();
9213   vec_oprnds1.release ();
9214 
9215   return true;
9216 }
9217 
9218 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9219    can handle all live statements in the node.  Otherwise return true
9220    if STMT is not live or if vectorizable_live_operation can handle it.
9221    GSI and VEC_STMT are as for vectorizable_live_operation.  */
9222 
9223 static bool
can_vectorize_live_stmts(gimple * stmt,gimple_stmt_iterator * gsi,slp_tree slp_node,gimple ** vec_stmt)9224 can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
9225                                 slp_tree slp_node, gimple **vec_stmt)
9226 {
9227   if (slp_node)
9228     {
9229       gimple *slp_stmt;
9230       unsigned int i;
9231       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
9232           {
9233             stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
9234             if (STMT_VINFO_LIVE_P (slp_stmt_info)
9235                 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
9236                                                          vec_stmt))
9237               return false;
9238           }
9239     }
9240   else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
9241              && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
9242     return false;
9243 
9244   return true;
9245 }
9246 
9247 /* Make sure the statement is vectorizable.  */
9248 
9249 bool
vect_analyze_stmt(gimple * stmt,bool * need_to_vectorize,slp_tree node,slp_instance node_instance)9250 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
9251                        slp_instance node_instance)
9252 {
9253   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9254   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9255   enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
9256   bool ok;
9257   gimple *pattern_stmt;
9258   gimple_seq pattern_def_seq;
9259 
9260   if (dump_enabled_p ())
9261     {
9262       dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
9263       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9264     }
9265 
9266   if (gimple_has_volatile_ops (stmt))
9267     {
9268       if (dump_enabled_p ())
9269         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9270                          "not vectorized: stmt has volatile operands\n");
9271 
9272       return false;
9273     }
9274 
9275   /* Skip stmts that do not need to be vectorized. In loops this is expected
9276      to include:
9277      - the COND_EXPR which is the loop exit condition
9278      - any LABEL_EXPRs in the loop
9279      - computations that are used only for array indexing or loop control.
9280      In basic blocks we only analyze statements that are a part of some SLP
9281      instance, therefore, all the statements are relevant.
9282 
9283      Pattern statement needs to be analyzed instead of the original statement
9284      if the original statement is not relevant.  Otherwise, we analyze both
9285      statements.  In basic blocks we are called from some SLP instance
9286      traversal, don't analyze pattern stmts instead, the pattern stmts
9287      already will be part of SLP instance.  */
9288 
9289   pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
9290   if (!STMT_VINFO_RELEVANT_P (stmt_info)
9291       && !STMT_VINFO_LIVE_P (stmt_info))
9292     {
9293       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9294           && pattern_stmt
9295           && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9296               || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9297         {
9298           /* Analyze PATTERN_STMT instead of the original stmt.  */
9299           stmt = pattern_stmt;
9300           stmt_info = vinfo_for_stmt (pattern_stmt);
9301           if (dump_enabled_p ())
9302             {
9303               dump_printf_loc (MSG_NOTE, vect_location,
9304                                "==> examining pattern statement: ");
9305               dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9306             }
9307         }
9308       else
9309         {
9310           if (dump_enabled_p ())
9311             dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
9312 
9313           return true;
9314         }
9315     }
9316   else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9317              && node == NULL
9318            && pattern_stmt
9319            && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
9320                || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
9321     {
9322       /* Analyze PATTERN_STMT too.  */
9323       if (dump_enabled_p ())
9324         {
9325           dump_printf_loc (MSG_NOTE, vect_location,
9326                            "==> examining pattern statement: ");
9327           dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
9328         }
9329 
9330       if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
9331                                     node_instance))
9332         return false;
9333    }
9334 
9335   if (is_pattern_stmt_p (stmt_info)
9336       && node == NULL
9337       && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
9338     {
9339       gimple_stmt_iterator si;
9340 
9341       for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
9342           {
9343             gimple *pattern_def_stmt = gsi_stmt (si);
9344             if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
9345                 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
9346               {
9347                 /* Analyze def stmt of STMT if it's a pattern stmt.  */
9348                 if (dump_enabled_p ())
9349                     {
9350                       dump_printf_loc (MSG_NOTE, vect_location,
9351                                    "==> examining pattern def statement: ");
9352                       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
9353                     }
9354 
9355                 if (!vect_analyze_stmt (pattern_def_stmt,
9356                                               need_to_vectorize, node, node_instance))
9357                     return false;
9358               }
9359           }
9360     }
9361 
9362   switch (STMT_VINFO_DEF_TYPE (stmt_info))
9363     {
9364       case vect_internal_def:
9365         break;
9366 
9367       case vect_reduction_def:
9368       case vect_nested_cycle:
9369          gcc_assert (!bb_vinfo
9370                          && (relevance == vect_used_in_outer
9371                                || relevance == vect_used_in_outer_by_reduction
9372                                || relevance == vect_used_by_reduction
9373                                || relevance == vect_unused_in_scope
9374                                || relevance == vect_used_only_live));
9375          break;
9376 
9377       case vect_induction_def:
9378           gcc_assert (!bb_vinfo);
9379           break;
9380 
9381       case vect_constant_def:
9382       case vect_external_def:
9383       case vect_unknown_def_type:
9384       default:
9385         gcc_unreachable ();
9386     }
9387 
9388   if (STMT_VINFO_RELEVANT_P (stmt_info))
9389     {
9390       gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
9391       gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
9392                       || (is_gimple_call (stmt)
9393                           && gimple_call_lhs (stmt) == NULL_TREE));
9394       *need_to_vectorize = true;
9395     }
9396 
9397   if (PURE_SLP_STMT (stmt_info) && !node)
9398     {
9399       dump_printf_loc (MSG_NOTE, vect_location,
9400                            "handled only by SLP analysis\n");
9401       return true;
9402     }
9403 
9404   ok = true;
9405   if (!bb_vinfo
9406       && (STMT_VINFO_RELEVANT_P (stmt_info)
9407             || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
9408     ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
9409             || vectorizable_conversion (stmt, NULL, NULL, node)
9410             || vectorizable_shift (stmt, NULL, NULL, node)
9411             || vectorizable_operation (stmt, NULL, NULL, node)
9412             || vectorizable_assignment (stmt, NULL, NULL, node)
9413             || vectorizable_load (stmt, NULL, NULL, node, NULL)
9414             || vectorizable_call (stmt, NULL, NULL, node)
9415             || vectorizable_store (stmt, NULL, NULL, node)
9416             || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
9417             || vectorizable_induction (stmt, NULL, NULL, node)
9418             || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
9419             || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
9420   else
9421     {
9422       if (bb_vinfo)
9423           ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
9424                 || vectorizable_conversion (stmt, NULL, NULL, node)
9425                 || vectorizable_shift (stmt, NULL, NULL, node)
9426                 || vectorizable_operation (stmt, NULL, NULL, node)
9427                 || vectorizable_assignment (stmt, NULL, NULL, node)
9428                 || vectorizable_load (stmt, NULL, NULL, node, NULL)
9429                 || vectorizable_call (stmt, NULL, NULL, node)
9430                 || vectorizable_store (stmt, NULL, NULL, node)
9431                 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
9432                 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
9433     }
9434 
9435   if (!ok)
9436     {
9437       if (dump_enabled_p ())
9438         {
9439           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9440                            "not vectorized: relevant stmt not ");
9441           dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
9442           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
9443         }
9444 
9445       return false;
9446     }
9447 
9448   if (bb_vinfo)
9449     return true;
9450 
9451   /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9452       need extra handling, except for vectorizable reductions.  */
9453   if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9454       && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
9455     {
9456       if (dump_enabled_p ())
9457         {
9458           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9459                            "not vectorized: live stmt not supported: ");
9460           dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
9461         }
9462 
9463        return false;
9464     }
9465 
9466   return true;
9467 }
9468 
9469 
9470 /* Function vect_transform_stmt.
9471 
9472    Create a vectorized stmt to replace STMT, and insert it at BSI.  */
9473 
9474 bool
vect_transform_stmt(gimple * stmt,gimple_stmt_iterator * gsi,bool * grouped_store,slp_tree slp_node,slp_instance slp_node_instance)9475 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
9476                          bool *grouped_store, slp_tree slp_node,
9477                      slp_instance slp_node_instance)
9478 {
9479   bool is_store = false;
9480   gimple *vec_stmt = NULL;
9481   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9482   bool done;
9483 
9484   gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
9485   gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9486 
9487   bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
9488                        && nested_in_vect_loop_p
9489                             (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
9490                                stmt));
9491 
9492   switch (STMT_VINFO_TYPE (stmt_info))
9493     {
9494     case type_demotion_vec_info_type:
9495     case type_promotion_vec_info_type:
9496     case type_conversion_vec_info_type:
9497       done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
9498       gcc_assert (done);
9499       break;
9500 
9501     case induc_vec_info_type:
9502       done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
9503       gcc_assert (done);
9504       break;
9505 
9506     case shift_vec_info_type:
9507       done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
9508       gcc_assert (done);
9509       break;
9510 
9511     case op_vec_info_type:
9512       done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
9513       gcc_assert (done);
9514       break;
9515 
9516     case assignment_vec_info_type:
9517       done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
9518       gcc_assert (done);
9519       break;
9520 
9521     case load_vec_info_type:
9522       done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
9523                                 slp_node_instance);
9524       gcc_assert (done);
9525       break;
9526 
9527     case store_vec_info_type:
9528       done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
9529       gcc_assert (done);
9530       if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
9531           {
9532             /* In case of interleaving, the whole chain is vectorized when the
9533                last store in the chain is reached.  Store stmts before the last
9534                one are skipped, and there vec_stmt_info shouldn't be freed
9535                meanwhile.  */
9536             *grouped_store = true;
9537             stmt_vec_info group_info
9538               = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
9539             if (GROUP_STORE_COUNT (group_info) == GROUP_SIZE (group_info))
9540               is_store = true;
9541           }
9542       else
9543           is_store = true;
9544       break;
9545 
9546     case condition_vec_info_type:
9547       done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
9548       gcc_assert (done);
9549       break;
9550 
9551     case comparison_vec_info_type:
9552       done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
9553       gcc_assert (done);
9554       break;
9555 
9556     case call_vec_info_type:
9557       done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
9558       stmt = gsi_stmt (*gsi);
9559       break;
9560 
9561     case call_simd_clone_vec_info_type:
9562       done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
9563       stmt = gsi_stmt (*gsi);
9564       break;
9565 
9566     case reduc_vec_info_type:
9567       done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
9568                                              slp_node_instance);
9569       gcc_assert (done);
9570       break;
9571 
9572     default:
9573       if (!STMT_VINFO_LIVE_P (stmt_info))
9574           {
9575             if (dump_enabled_p ())
9576               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9577                              "stmt not supported.\n");
9578             gcc_unreachable ();
9579           }
9580     }
9581 
9582   /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9583      This would break hybrid SLP vectorization.  */
9584   if (slp_node)
9585     gcc_assert (!vec_stmt
9586                     && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
9587 
9588   /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9589      is being vectorized, but outside the immediately enclosing loop.  */
9590   if (vec_stmt
9591       && nested_p
9592       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9593       && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
9594           || STMT_VINFO_RELEVANT (stmt_info) ==
9595                                            vect_used_in_outer_by_reduction))
9596     {
9597       struct loop *innerloop = LOOP_VINFO_LOOP (
9598                                 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
9599       imm_use_iterator imm_iter;
9600       use_operand_p use_p;
9601       tree scalar_dest;
9602       gimple *exit_phi;
9603 
9604       if (dump_enabled_p ())
9605         dump_printf_loc (MSG_NOTE, vect_location,
9606                          "Record the vdef for outer-loop vectorization.\n");
9607 
9608       /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9609         (to be used when vectorizing outer-loop stmts that use the DEF of
9610         STMT).  */
9611       if (gimple_code (stmt) == GIMPLE_PHI)
9612         scalar_dest = PHI_RESULT (stmt);
9613       else
9614         scalar_dest = gimple_get_lhs (stmt);
9615 
9616       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9617        {
9618          if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9619            {
9620              exit_phi = USE_STMT (use_p);
9621              STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
9622            }
9623        }
9624     }
9625 
9626   /* Handle stmts whose DEF is used outside the loop-nest that is
9627      being vectorized.  */
9628   if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
9629     {
9630       done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
9631       gcc_assert (done);
9632     }
9633 
9634   if (vec_stmt)
9635     STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
9636 
9637   return is_store;
9638 }
9639 
9640 
9641 /* Remove a group of stores (for SLP or interleaving), free their
9642    stmt_vec_info.  */
9643 
9644 void
vect_remove_stores(gimple * first_stmt)9645 vect_remove_stores (gimple *first_stmt)
9646 {
9647   gimple *next = first_stmt;
9648   gimple *tmp;
9649   gimple_stmt_iterator next_si;
9650 
9651   while (next)
9652     {
9653       stmt_vec_info stmt_info = vinfo_for_stmt (next);
9654 
9655       tmp = GROUP_NEXT_ELEMENT (stmt_info);
9656       if (is_pattern_stmt_p (stmt_info))
9657           next = STMT_VINFO_RELATED_STMT (stmt_info);
9658       /* Free the attached stmt_vec_info and remove the stmt.  */
9659       next_si = gsi_for_stmt (next);
9660       unlink_stmt_vdef (next);
9661       gsi_remove (&next_si, true);
9662       release_defs (next);
9663       free_stmt_vec_info (next);
9664       next = tmp;
9665     }
9666 }
9667 
9668 
9669 /* Function new_stmt_vec_info.
9670 
9671    Create and initialize a new stmt_vec_info struct for STMT.  */
9672 
9673 stmt_vec_info
new_stmt_vec_info(gimple * stmt,vec_info * vinfo)9674 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
9675 {
9676   stmt_vec_info res;
9677   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9678 
9679   STMT_VINFO_TYPE (res) = undef_vec_info_type;
9680   STMT_VINFO_STMT (res) = stmt;
9681   res->vinfo = vinfo;
9682   STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
9683   STMT_VINFO_LIVE_P (res) = false;
9684   STMT_VINFO_VECTYPE (res) = NULL;
9685   STMT_VINFO_VEC_STMT (res) = NULL;
9686   STMT_VINFO_VECTORIZABLE (res) = true;
9687   STMT_VINFO_IN_PATTERN_P (res) = false;
9688   STMT_VINFO_RELATED_STMT (res) = NULL;
9689   STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
9690   STMT_VINFO_DATA_REF (res) = NULL;
9691   STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
9692   STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
9693 
9694   if (gimple_code (stmt) == GIMPLE_PHI
9695       && is_loop_header_bb_p (gimple_bb (stmt)))
9696     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9697   else
9698     STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9699 
9700   STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
9701   STMT_SLP_TYPE (res) = loop_vect;
9702   STMT_VINFO_NUM_SLP_USES (res) = 0;
9703 
9704   GROUP_FIRST_ELEMENT (res) = NULL;
9705   GROUP_NEXT_ELEMENT (res) = NULL;
9706   GROUP_SIZE (res) = 0;
9707   GROUP_STORE_COUNT (res) = 0;
9708   GROUP_GAP (res) = 0;
9709   GROUP_SAME_DR_STMT (res) = NULL;
9710 
9711   return res;
9712 }
9713 
9714 
9715 /* Create a hash table for stmt_vec_info. */
9716 
9717 void
init_stmt_vec_info_vec(void)9718 init_stmt_vec_info_vec (void)
9719 {
9720   gcc_assert (!stmt_vec_info_vec.exists ());
9721   stmt_vec_info_vec.create (50);
9722 }
9723 
9724 
9725 /* Free hash table for stmt_vec_info. */
9726 
9727 void
free_stmt_vec_info_vec(void)9728 free_stmt_vec_info_vec (void)
9729 {
9730   unsigned int i;
9731   stmt_vec_info info;
9732   FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9733     if (info != NULL)
9734       free_stmt_vec_info (STMT_VINFO_STMT (info));
9735   gcc_assert (stmt_vec_info_vec.exists ());
9736   stmt_vec_info_vec.release ();
9737 }
9738 
9739 
9740 /* Free stmt vectorization related info.  */
9741 
9742 void
free_stmt_vec_info(gimple * stmt)9743 free_stmt_vec_info (gimple *stmt)
9744 {
9745   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9746 
9747   if (!stmt_info)
9748     return;
9749 
9750   /* Check if this statement has a related "pattern stmt"
9751      (introduced by the vectorizer during the pattern recognition
9752      pass).  Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9753      too.  */
9754   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9755     {
9756       stmt_vec_info patt_info
9757           = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9758       if (patt_info)
9759           {
9760             gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
9761             gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
9762             gimple_set_bb (patt_stmt, NULL);
9763             tree lhs = gimple_get_lhs (patt_stmt);
9764             if (lhs && TREE_CODE (lhs) == SSA_NAME)
9765               release_ssa_name (lhs);
9766             if (seq)
9767               {
9768                 gimple_stmt_iterator si;
9769                 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
9770                     {
9771                       gimple *seq_stmt = gsi_stmt (si);
9772                       gimple_set_bb (seq_stmt, NULL);
9773                       lhs = gimple_get_lhs (seq_stmt);
9774                       if (lhs && TREE_CODE (lhs) == SSA_NAME)
9775                         release_ssa_name (lhs);
9776                       free_stmt_vec_info (seq_stmt);
9777                     }
9778               }
9779             free_stmt_vec_info (patt_stmt);
9780           }
9781     }
9782 
9783   STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
9784   STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
9785   set_vinfo_for_stmt (stmt, NULL);
9786   free (stmt_info);
9787 }
9788 
9789 
9790 /* Function get_vectype_for_scalar_type_and_size.
9791 
9792    Returns the vector type corresponding to SCALAR_TYPE  and SIZE as supported
9793    by the target.  */
9794 
9795 tree
get_vectype_for_scalar_type_and_size(tree scalar_type,poly_uint64 size)9796 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9797 {
9798   tree orig_scalar_type = scalar_type;
9799   scalar_mode inner_mode;
9800   machine_mode simd_mode;
9801   poly_uint64 nunits;
9802   tree vectype;
9803 
9804   if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9805       && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9806     return NULL_TREE;
9807 
9808   unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9809 
9810   /* For vector types of elements whose mode precision doesn't
9811      match their types precision we use a element type of mode
9812      precision.  The vectorization routines will have to make sure
9813      they support the proper result truncation/extension.
9814      We also make sure to build vector types with INTEGER_TYPE
9815      component type only.  */
9816   if (INTEGRAL_TYPE_P (scalar_type)
9817       && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9818             || TREE_CODE (scalar_type) != INTEGER_TYPE))
9819     scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9820                                                               TYPE_UNSIGNED (scalar_type));
9821 
9822   /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9823      When the component mode passes the above test simply use a type
9824      corresponding to that mode.  The theory is that any use that
9825      would cause problems with this will disable vectorization anyway.  */
9826   else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9827              && !INTEGRAL_TYPE_P (scalar_type))
9828     scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9829 
9830   /* We can't build a vector type of elements with alignment bigger than
9831      their size.  */
9832   else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9833     scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9834                                                               TYPE_UNSIGNED (scalar_type));
9835 
9836   /* If we felt back to using the mode fail if there was
9837      no scalar type for it.  */
9838   if (scalar_type == NULL_TREE)
9839     return NULL_TREE;
9840 
9841   /* If no size was supplied use the mode the target prefers.   Otherwise
9842      lookup a vector mode of the specified size.  */
9843   if (known_eq (size, 0U))
9844     simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9845   else if (!multiple_p (size, nbytes, &nunits)
9846              || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9847     return NULL_TREE;
9848   /* NOTE: nunits == 1 is allowed to support single element vector types.  */
9849   if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
9850     return NULL_TREE;
9851 
9852   vectype = build_vector_type (scalar_type, nunits);
9853 
9854   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9855       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9856     return NULL_TREE;
9857 
9858   /* Re-attach the address-space qualifier if we canonicalized the scalar
9859      type.  */
9860   if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9861     return build_qualified_type
9862                (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9863 
9864   return vectype;
9865 }
9866 
9867 poly_uint64 current_vector_size;
9868 
9869 /* Function get_vectype_for_scalar_type.
9870 
9871    Returns the vector type corresponding to SCALAR_TYPE as supported
9872    by the target.  */
9873 
9874 tree
get_vectype_for_scalar_type(tree scalar_type)9875 get_vectype_for_scalar_type (tree scalar_type)
9876 {
9877   tree vectype;
9878   vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9879                                                               current_vector_size);
9880   if (vectype
9881       && known_eq (current_vector_size, 0U))
9882     current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9883   return vectype;
9884 }
9885 
9886 /* Function get_mask_type_for_scalar_type.
9887 
9888    Returns the mask type corresponding to a result of comparison
9889    of vectors of specified SCALAR_TYPE as supported by target.  */
9890 
9891 tree
get_mask_type_for_scalar_type(tree scalar_type)9892 get_mask_type_for_scalar_type (tree scalar_type)
9893 {
9894   tree vectype = get_vectype_for_scalar_type (scalar_type);
9895 
9896   if (!vectype)
9897     return NULL;
9898 
9899   return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9900                                           current_vector_size);
9901 }
9902 
9903 /* Function get_same_sized_vectype
9904 
9905    Returns a vector type corresponding to SCALAR_TYPE of size
9906    VECTOR_TYPE if supported by the target.  */
9907 
9908 tree
get_same_sized_vectype(tree scalar_type,tree vector_type)9909 get_same_sized_vectype (tree scalar_type, tree vector_type)
9910 {
9911   if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9912     return build_same_sized_truth_vector_type (vector_type);
9913 
9914   return get_vectype_for_scalar_type_and_size
9915              (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9916 }
9917 
9918 /* Function vect_is_simple_use.
9919 
9920    Input:
9921    VINFO - the vect info of the loop or basic block that is being vectorized.
9922    OPERAND - operand in the loop or bb.
9923    Output:
9924    DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9925    DT - the type of definition
9926 
9927    Returns whether a stmt with OPERAND can be vectorized.
9928    For loops, supportable operands are constants, loop invariants, and operands
9929    that are defined by the current iteration of the loop.  Unsupportable
9930    operands are those that are defined by a previous iteration of the loop (as
9931    is the case in reduction/induction computations).
9932    For basic blocks, supportable operands are constants and bb invariants.
9933    For now, operands defined outside the basic block are not supported.  */
9934 
9935 bool
vect_is_simple_use(tree operand,vec_info * vinfo,gimple ** def_stmt,enum vect_def_type * dt)9936 vect_is_simple_use (tree operand, vec_info *vinfo,
9937                     gimple **def_stmt, enum vect_def_type *dt)
9938 {
9939   *def_stmt = NULL;
9940   *dt = vect_unknown_def_type;
9941 
9942   if (dump_enabled_p ())
9943     {
9944       dump_printf_loc (MSG_NOTE, vect_location,
9945                        "vect_is_simple_use: operand ");
9946       dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9947       dump_printf (MSG_NOTE, "\n");
9948     }
9949 
9950   if (CONSTANT_CLASS_P (operand))
9951     {
9952       *dt = vect_constant_def;
9953       return true;
9954     }
9955 
9956   if (is_gimple_min_invariant (operand))
9957     {
9958       *dt = vect_external_def;
9959       return true;
9960     }
9961 
9962   if (TREE_CODE (operand) != SSA_NAME)
9963     {
9964       if (dump_enabled_p ())
9965           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9966                                "not ssa-name.\n");
9967       return false;
9968     }
9969 
9970   if (SSA_NAME_IS_DEFAULT_DEF (operand))
9971     {
9972       *dt = vect_external_def;
9973       return true;
9974     }
9975 
9976   *def_stmt = SSA_NAME_DEF_STMT (operand);
9977   if (dump_enabled_p ())
9978     {
9979       dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9980       dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9981     }
9982 
9983   if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9984     *dt = vect_external_def;
9985   else
9986     {
9987       stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9988       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9989     }
9990 
9991   if (dump_enabled_p ())
9992     {
9993       dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9994       switch (*dt)
9995           {
9996           case vect_uninitialized_def:
9997             dump_printf (MSG_NOTE, "uninitialized\n");
9998             break;
9999           case vect_constant_def:
10000             dump_printf (MSG_NOTE, "constant\n");
10001             break;
10002           case vect_external_def:
10003             dump_printf (MSG_NOTE, "external\n");
10004             break;
10005           case vect_internal_def:
10006             dump_printf (MSG_NOTE, "internal\n");
10007             break;
10008           case vect_induction_def:
10009             dump_printf (MSG_NOTE, "induction\n");
10010             break;
10011           case vect_reduction_def:
10012             dump_printf (MSG_NOTE, "reduction\n");
10013             break;
10014           case vect_double_reduction_def:
10015             dump_printf (MSG_NOTE, "double reduction\n");
10016             break;
10017           case vect_nested_cycle:
10018             dump_printf (MSG_NOTE, "nested cycle\n");
10019             break;
10020           case vect_unknown_def_type:
10021             dump_printf (MSG_NOTE, "unknown\n");
10022             break;
10023           }
10024     }
10025 
10026   if (*dt == vect_unknown_def_type)
10027     {
10028       if (dump_enabled_p ())
10029         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10030                          "Unsupported pattern.\n");
10031       return false;
10032     }
10033 
10034   switch (gimple_code (*def_stmt))
10035     {
10036     case GIMPLE_PHI:
10037     case GIMPLE_ASSIGN:
10038     case GIMPLE_CALL:
10039       break;
10040     default:
10041       if (dump_enabled_p ())
10042         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10043                          "unsupported defining stmt:\n");
10044       return false;
10045     }
10046 
10047   return true;
10048 }
10049 
10050 /* Function vect_is_simple_use.
10051 
10052    Same as vect_is_simple_use but also determines the vector operand
10053    type of OPERAND and stores it to *VECTYPE.  If the definition of
10054    OPERAND is vect_uninitialized_def, vect_constant_def or
10055    vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10056    is responsible to compute the best suited vector type for the
10057    scalar operand.  */
10058 
10059 bool
vect_is_simple_use(tree operand,vec_info * vinfo,gimple ** def_stmt,enum vect_def_type * dt,tree * vectype)10060 vect_is_simple_use (tree operand, vec_info *vinfo,
10061                         gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
10062 {
10063   if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
10064     return false;
10065 
10066   /* Now get a vector type if the def is internal, otherwise supply
10067      NULL_TREE and leave it up to the caller to figure out a proper
10068      type for the use stmt.  */
10069   if (*dt == vect_internal_def
10070       || *dt == vect_induction_def
10071       || *dt == vect_reduction_def
10072       || *dt == vect_double_reduction_def
10073       || *dt == vect_nested_cycle)
10074     {
10075       stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
10076 
10077       if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10078           && !STMT_VINFO_RELEVANT (stmt_info)
10079           && !STMT_VINFO_LIVE_P (stmt_info))
10080           stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
10081 
10082       *vectype = STMT_VINFO_VECTYPE (stmt_info);
10083       gcc_assert (*vectype != NULL_TREE);
10084     }
10085   else if (*dt == vect_uninitialized_def
10086              || *dt == vect_constant_def
10087              || *dt == vect_external_def)
10088     *vectype = NULL_TREE;
10089   else
10090     gcc_unreachable ();
10091 
10092   return true;
10093 }
10094 
10095 
10096 /* Function supportable_widening_operation
10097 
10098    Check whether an operation represented by the code CODE is a
10099    widening operation that is supported by the target platform in
10100    vector form (i.e., when operating on arguments of type VECTYPE_IN
10101    producing a result of type VECTYPE_OUT).
10102 
10103    Widening operations we currently support are NOP (CONVERT), FLOAT
10104    and WIDEN_MULT.  This function checks if these operations are supported
10105    by the target platform either directly (via vector tree-codes), or via
10106    target builtins.
10107 
10108    Output:
10109    - CODE1 and CODE2 are codes of vector operations to be used when
10110    vectorizing the operation, if available.
10111    - MULTI_STEP_CVT determines the number of required intermediate steps in
10112    case of multi-step conversion (like char->short->int - in that case
10113    MULTI_STEP_CVT will be 1).
10114    - INTERM_TYPES contains the intermediate type required to perform the
10115    widening operation (short in the above example).  */
10116 
10117 bool
supportable_widening_operation(enum tree_code code,gimple * stmt,tree vectype_out,tree vectype_in,enum tree_code * code1,enum tree_code * code2,int * multi_step_cvt,vec<tree> * interm_types)10118 supportable_widening_operation (enum tree_code code, gimple *stmt,
10119                                         tree vectype_out, tree vectype_in,
10120                                 enum tree_code *code1, enum tree_code *code2,
10121                                 int *multi_step_cvt,
10122                                 vec<tree> *interm_types)
10123 {
10124   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
10125   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
10126   struct loop *vect_loop = NULL;
10127   machine_mode vec_mode;
10128   enum insn_code icode1, icode2;
10129   optab optab1, optab2;
10130   tree vectype = vectype_in;
10131   tree wide_vectype = vectype_out;
10132   enum tree_code c1, c2;
10133   int i;
10134   tree prev_type, intermediate_type;
10135   machine_mode intermediate_mode, prev_mode;
10136   optab optab3, optab4;
10137 
10138   *multi_step_cvt = 0;
10139   if (loop_info)
10140     vect_loop = LOOP_VINFO_LOOP (loop_info);
10141 
10142   switch (code)
10143     {
10144     case WIDEN_MULT_EXPR:
10145       /* The result of a vectorized widening operation usually requires
10146            two vectors (because the widened results do not fit into one vector).
10147            The generated vector results would normally be expected to be
10148            generated in the same order as in the original scalar computation,
10149            i.e. if 8 results are generated in each vector iteration, they are
10150            to be organized as follows:
10151                     vect1: [res1,res2,res3,res4],
10152                     vect2: [res5,res6,res7,res8].
10153 
10154            However, in the special case that the result of the widening
10155            operation is used in a reduction computation only, the order doesn't
10156            matter (because when vectorizing a reduction we change the order of
10157            the computation).  Some targets can take advantage of this and
10158            generate more efficient code.  For example, targets like Altivec,
10159            that support widen_mult using a sequence of {mult_even,mult_odd}
10160            generate the following vectors:
10161                     vect1: [res1,res3,res5,res7],
10162                     vect2: [res2,res4,res6,res8].
10163 
10164            When vectorizing outer-loops, we execute the inner-loop sequentially
10165            (each vectorized inner-loop iteration contributes to VF outer-loop
10166            iterations in parallel).  We therefore don't allow to change the
10167            order of the computation in the inner-loop during outer-loop
10168            vectorization.  */
10169       /* TODO: Another case in which order doesn't *really* matter is when we
10170            widen and then contract again, e.g. (short)((int)x * y >> 8).
10171            Normally, pack_trunc performs an even/odd permute, whereas the
10172            repack from an even/odd expansion would be an interleave, which
10173            would be significantly simpler for e.g. AVX2.  */
10174       /* In any case, in order to avoid duplicating the code below, recurse
10175            on VEC_WIDEN_MULT_EVEN_EXPR.  If it succeeds, all the return values
10176            are properly set up for the caller.  If we fail, we'll continue with
10177            a VEC_WIDEN_MULT_LO/HI_EXPR check.  */
10178       if (vect_loop
10179             && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
10180             && !nested_in_vect_loop_p (vect_loop, stmt)
10181             && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
10182                                                        stmt, vectype_out, vectype_in,
10183                                                        code1, code2, multi_step_cvt,
10184                                                        interm_types))
10185         {
10186           /* Elements in a vector with vect_used_by_reduction property cannot
10187              be reordered if the use chain with this property does not have the
10188              same operation.  One such an example is s += a * b, where elements
10189              in a and b cannot be reordered.  Here we check if the vector defined
10190              by STMT is only directly used in the reduction statement.  */
10191           tree lhs = gimple_assign_lhs (stmt);
10192           use_operand_p dummy;
10193           gimple *use_stmt;
10194           stmt_vec_info use_stmt_info = NULL;
10195           if (single_imm_use (lhs, &dummy, &use_stmt)
10196               && (use_stmt_info = vinfo_for_stmt (use_stmt))
10197               && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
10198             return true;
10199         }
10200       c1 = VEC_WIDEN_MULT_LO_EXPR;
10201       c2 = VEC_WIDEN_MULT_HI_EXPR;
10202       break;
10203 
10204     case DOT_PROD_EXPR:
10205       c1 = DOT_PROD_EXPR;
10206       c2 = DOT_PROD_EXPR;
10207       break;
10208 
10209     case SAD_EXPR:
10210       c1 = SAD_EXPR;
10211       c2 = SAD_EXPR;
10212       break;
10213 
10214     case VEC_WIDEN_MULT_EVEN_EXPR:
10215       /* Support the recursion induced just above.  */
10216       c1 = VEC_WIDEN_MULT_EVEN_EXPR;
10217       c2 = VEC_WIDEN_MULT_ODD_EXPR;
10218       break;
10219 
10220     case WIDEN_LSHIFT_EXPR:
10221       c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
10222       c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
10223       break;
10224 
10225     CASE_CONVERT:
10226       c1 = VEC_UNPACK_LO_EXPR;
10227       c2 = VEC_UNPACK_HI_EXPR;
10228       break;
10229 
10230     case FLOAT_EXPR:
10231       c1 = VEC_UNPACK_FLOAT_LO_EXPR;
10232       c2 = VEC_UNPACK_FLOAT_HI_EXPR;
10233       break;
10234 
10235     case FIX_TRUNC_EXPR:
10236       /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
10237            VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
10238            computing the operation.  */
10239       return false;
10240 
10241     default:
10242       gcc_unreachable ();
10243     }
10244 
10245   if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
10246     std::swap (c1, c2);
10247 
10248   if (code == FIX_TRUNC_EXPR)
10249     {
10250       /* The signedness is determined from output operand.  */
10251       optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10252       optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
10253     }
10254   else
10255     {
10256       optab1 = optab_for_tree_code (c1, vectype, optab_default);
10257       optab2 = optab_for_tree_code (c2, vectype, optab_default);
10258     }
10259 
10260   if (!optab1 || !optab2)
10261     return false;
10262 
10263   vec_mode = TYPE_MODE (vectype);
10264   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
10265        || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
10266     return false;
10267 
10268   *code1 = c1;
10269   *code2 = c2;
10270 
10271   if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10272       && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10273       /* For scalar masks we may have different boolean
10274            vector types having the same QImode.  Thus we
10275            add additional check for elements number.  */
10276     return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10277               || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
10278                                TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10279 
10280   /* Check if it's a multi-step conversion that can be done using intermediate
10281      types.  */
10282 
10283   prev_type = vectype;
10284   prev_mode = vec_mode;
10285 
10286   if (!CONVERT_EXPR_CODE_P (code))
10287     return false;
10288 
10289   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10290      intermediate steps in promotion sequence.  We try
10291      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10292      not.  */
10293   interm_types->create (MAX_INTERM_CVT_STEPS);
10294   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10295     {
10296       intermediate_mode = insn_data[icode1].operand[0].mode;
10297       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10298           {
10299             intermediate_type = vect_halve_mask_nunits (prev_type);
10300             if (intermediate_mode != TYPE_MODE (intermediate_type))
10301               return false;
10302           }
10303       else
10304           intermediate_type
10305             = lang_hooks.types.type_for_mode (intermediate_mode,
10306                                                       TYPE_UNSIGNED (prev_type));
10307 
10308       optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
10309       optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
10310 
10311       if (!optab3 || !optab4
10312           || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
10313             || insn_data[icode1].operand[0].mode != intermediate_mode
10314             || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
10315             || insn_data[icode2].operand[0].mode != intermediate_mode
10316             || ((icode1 = optab_handler (optab3, intermediate_mode))
10317                 == CODE_FOR_nothing)
10318             || ((icode2 = optab_handler (optab4, intermediate_mode))
10319                 == CODE_FOR_nothing))
10320           break;
10321 
10322       interm_types->quick_push (intermediate_type);
10323       (*multi_step_cvt)++;
10324 
10325       if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
10326             && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
10327           return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10328                     || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
10329                                    TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
10330 
10331       prev_type = intermediate_type;
10332       prev_mode = intermediate_mode;
10333     }
10334 
10335   interm_types->release ();
10336   return false;
10337 }
10338 
10339 
10340 /* Function supportable_narrowing_operation
10341 
10342    Check whether an operation represented by the code CODE is a
10343    narrowing operation that is supported by the target platform in
10344    vector form (i.e., when operating on arguments of type VECTYPE_IN
10345    and producing a result of type VECTYPE_OUT).
10346 
10347    Narrowing operations we currently support are NOP (CONVERT) and
10348    FIX_TRUNC.  This function checks if these operations are supported by
10349    the target platform directly via vector tree-codes.
10350 
10351    Output:
10352    - CODE1 is the code of a vector operation to be used when
10353    vectorizing the operation, if available.
10354    - MULTI_STEP_CVT determines the number of required intermediate steps in
10355    case of multi-step conversion (like int->short->char - in that case
10356    MULTI_STEP_CVT will be 1).
10357    - INTERM_TYPES contains the intermediate type required to perform the
10358    narrowing operation (short in the above example).   */
10359 
10360 bool
supportable_narrowing_operation(enum tree_code code,tree vectype_out,tree vectype_in,enum tree_code * code1,int * multi_step_cvt,vec<tree> * interm_types)10361 supportable_narrowing_operation (enum tree_code code,
10362                                          tree vectype_out, tree vectype_in,
10363                                          enum tree_code *code1, int *multi_step_cvt,
10364                                  vec<tree> *interm_types)
10365 {
10366   machine_mode vec_mode;
10367   enum insn_code icode1;
10368   optab optab1, interm_optab;
10369   tree vectype = vectype_in;
10370   tree narrow_vectype = vectype_out;
10371   enum tree_code c1;
10372   tree intermediate_type, prev_type;
10373   machine_mode intermediate_mode, prev_mode;
10374   int i;
10375   bool uns;
10376 
10377   *multi_step_cvt = 0;
10378   switch (code)
10379     {
10380     CASE_CONVERT:
10381       c1 = VEC_PACK_TRUNC_EXPR;
10382       break;
10383 
10384     case FIX_TRUNC_EXPR:
10385       c1 = VEC_PACK_FIX_TRUNC_EXPR;
10386       break;
10387 
10388     case FLOAT_EXPR:
10389       /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
10390            tree code and optabs used for computing the operation.  */
10391       return false;
10392 
10393     default:
10394       gcc_unreachable ();
10395     }
10396 
10397   if (code == FIX_TRUNC_EXPR)
10398     /* The signedness is determined from output operand.  */
10399     optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
10400   else
10401     optab1 = optab_for_tree_code (c1, vectype, optab_default);
10402 
10403   if (!optab1)
10404     return false;
10405 
10406   vec_mode = TYPE_MODE (vectype);
10407   if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
10408     return false;
10409 
10410   *code1 = c1;
10411 
10412   if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10413     /* For scalar masks we may have different boolean
10414        vector types having the same QImode.  Thus we
10415        add additional check for elements number.  */
10416     return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10417               || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
10418                                TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10419 
10420   /* Check if it's a multi-step conversion that can be done using intermediate
10421      types.  */
10422   prev_mode = vec_mode;
10423   prev_type = vectype;
10424   if (code == FIX_TRUNC_EXPR)
10425     uns = TYPE_UNSIGNED (vectype_out);
10426   else
10427     uns = TYPE_UNSIGNED (vectype);
10428 
10429   /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10430      conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10431      costly than signed.  */
10432   if (code == FIX_TRUNC_EXPR && uns)
10433     {
10434       enum insn_code icode2;
10435 
10436       intermediate_type
10437           = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
10438       interm_optab
10439           = optab_for_tree_code (c1, intermediate_type, optab_default);
10440       if (interm_optab != unknown_optab
10441             && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
10442             && insn_data[icode1].operand[0].mode
10443                == insn_data[icode2].operand[0].mode)
10444           {
10445             uns = false;
10446             optab1 = interm_optab;
10447             icode1 = icode2;
10448           }
10449     }
10450 
10451   /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10452      intermediate steps in promotion sequence.  We try
10453      MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not.  */
10454   interm_types->create (MAX_INTERM_CVT_STEPS);
10455   for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
10456     {
10457       intermediate_mode = insn_data[icode1].operand[0].mode;
10458       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
10459           {
10460             intermediate_type = vect_double_mask_nunits (prev_type);
10461             if (intermediate_mode != TYPE_MODE (intermediate_type))
10462               return false;
10463           }
10464       else
10465           intermediate_type
10466             = lang_hooks.types.type_for_mode (intermediate_mode, uns);
10467       interm_optab
10468           = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10469                                      optab_default);
10470       if (!interm_optab
10471             || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10472             || insn_data[icode1].operand[0].mode != intermediate_mode
10473             || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10474                 == CODE_FOR_nothing))
10475           break;
10476 
10477       interm_types->quick_push (intermediate_type);
10478       (*multi_step_cvt)++;
10479 
10480       if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10481           return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10482                     || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10483                                    TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10484 
10485       prev_mode = intermediate_mode;
10486       prev_type = intermediate_type;
10487       optab1 = interm_optab;
10488     }
10489 
10490   interm_types->release ();
10491   return false;
10492 }
10493 
10494 /* Generate and return a statement that sets vector mask MASK such that
10495    MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I.  */
10496 
10497 gcall *
vect_gen_while(tree mask,tree start_index,tree end_index)10498 vect_gen_while (tree mask, tree start_index, tree end_index)
10499 {
10500   tree cmp_type = TREE_TYPE (start_index);
10501   tree mask_type = TREE_TYPE (mask);
10502   gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10503                                                                    cmp_type, mask_type,
10504                                                                    OPTIMIZE_FOR_SPEED));
10505   gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10506                                                       start_index, end_index,
10507                                                       build_zero_cst (mask_type));
10508   gimple_call_set_lhs (call, mask);
10509   return call;
10510 }
10511 
10512 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10513    J + START_INDEX < END_INDEX for all J <= I.  Add the statements to SEQ.  */
10514 
10515 tree
vect_gen_while_not(gimple_seq * seq,tree mask_type,tree start_index,tree end_index)10516 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10517                         tree end_index)
10518 {
10519   tree tmp = make_ssa_name (mask_type);
10520   gcall *call = vect_gen_while (tmp, start_index, end_index);
10521   gimple_seq_add_stmt (seq, call);
10522   return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
10523 }
10524