xref: /dragonfly/contrib/gcc-8.0/gcc/tree-vectorizer.c (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1 /* Vectorizer
2    Copyright (C) 2003-2018 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3.  If not see
19 <http://www.gnu.org/licenses/>.  */
20 
21 /* Loop and basic block vectorizer.
22 
23   This file contains drivers for the three vectorizers:
24   (1) loop vectorizer (inter-iteration parallelism),
25   (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop
26       vectorizer)
27   (3) BB vectorizer (out-of-loops), aka SLP
28 
29   The rest of the vectorizer's code is organized as follows:
30   - tree-vect-loop.c - loop specific parts such as reductions, etc. These are
31     used by drivers (1) and (2).
32   - tree-vect-loop-manip.c - vectorizer's loop control-flow utilities, used by
33     drivers (1) and (2).
34   - tree-vect-slp.c - BB vectorization specific analysis and transformation,
35     used by drivers (2) and (3).
36   - tree-vect-stmts.c - statements analysis and transformation (used by all).
37   - tree-vect-data-refs.c - vectorizer specific data-refs analysis and
38     manipulations (used by all).
39   - tree-vect-patterns.c - vectorizable code patterns detector (used by all)
40 
41   Here's a poor attempt at illustrating that:
42 
43      tree-vectorizer.c:
44      loop_vect()  loop_aware_slp()  slp_vect()
45           |        /           \          /
46           |       /             \        /
47           tree-vect-loop.c  tree-vect-slp.c
48                 | \      \  /      /   |
49                 |  \      \/      /    |
50                 |   \     /\     /     |
51                 |    \   /  \   /      |
52          tree-vect-stmts.c  tree-vect-data-refs.c
53                        \      /
54                     tree-vect-patterns.c
55 */
56 
57 #include "config.h"
58 #include "system.h"
59 #include "coretypes.h"
60 #include "backend.h"
61 #include "tree.h"
62 #include "gimple.h"
63 #include "predict.h"
64 #include "tree-pass.h"
65 #include "ssa.h"
66 #include "cgraph.h"
67 #include "fold-const.h"
68 #include "stor-layout.h"
69 #include "gimple-iterator.h"
70 #include "gimple-walk.h"
71 #include "tree-ssa-loop-manip.h"
72 #include "tree-ssa-loop-niter.h"
73 #include "tree-cfg.h"
74 #include "cfgloop.h"
75 #include "tree-vectorizer.h"
76 #include "tree-ssa-propagate.h"
77 #include "dbgcnt.h"
78 #include "tree-scalar-evolution.h"
79 #include "stringpool.h"
80 #include "attribs.h"
81 
82 
83 /* Loop or bb location.  */
84 source_location vect_location;
85 
86 /* Vector mapping GIMPLE stmt to stmt_vec_info. */
87 vec<stmt_vec_info> stmt_vec_info_vec;
88 
89 /* For mapping simduid to vectorization factor.  */
90 
91 struct simduid_to_vf : free_ptr_hash<simduid_to_vf>
92 {
93   unsigned int simduid;
94   poly_uint64 vf;
95 
96   /* hash_table support.  */
97   static inline hashval_t hash (const simduid_to_vf *);
98   static inline int equal (const simduid_to_vf *, const simduid_to_vf *);
99 };
100 
101 inline hashval_t
hash(const simduid_to_vf * p)102 simduid_to_vf::hash (const simduid_to_vf *p)
103 {
104   return p->simduid;
105 }
106 
107 inline int
equal(const simduid_to_vf * p1,const simduid_to_vf * p2)108 simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2)
109 {
110   return p1->simduid == p2->simduid;
111 }
112 
113 /* This hash maps the OMP simd array to the corresponding simduid used
114    to index into it.  Like thus,
115 
116         _7 = GOMP_SIMD_LANE (simduid.0)
117         ...
118         ...
119         D.1737[_7] = stuff;
120 
121 
122    This hash maps from the OMP simd array (D.1737[]) to DECL_UID of
123    simduid.0.  */
124 
125 struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid>
126 {
127   tree decl;
128   unsigned int simduid;
129 
130   /* hash_table support.  */
131   static inline hashval_t hash (const simd_array_to_simduid *);
132   static inline int equal (const simd_array_to_simduid *,
133                                  const simd_array_to_simduid *);
134 };
135 
136 inline hashval_t
hash(const simd_array_to_simduid * p)137 simd_array_to_simduid::hash (const simd_array_to_simduid *p)
138 {
139   return DECL_UID (p->decl);
140 }
141 
142 inline int
equal(const simd_array_to_simduid * p1,const simd_array_to_simduid * p2)143 simd_array_to_simduid::equal (const simd_array_to_simduid *p1,
144                                     const simd_array_to_simduid *p2)
145 {
146   return p1->decl == p2->decl;
147 }
148 
149 /* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE,
150    into their corresponding constants and remove
151    IFN_GOMP_SIMD_ORDERED_{START,END}.  */
152 
153 static void
adjust_simduid_builtins(hash_table<simduid_to_vf> * htab)154 adjust_simduid_builtins (hash_table<simduid_to_vf> *htab)
155 {
156   basic_block bb;
157 
158   FOR_EACH_BB_FN (bb, cfun)
159     {
160       gimple_stmt_iterator i;
161 
162       for (i = gsi_start_bb (bb); !gsi_end_p (i); )
163           {
164             poly_uint64 vf = 1;
165             enum internal_fn ifn;
166             gimple *stmt = gsi_stmt (i);
167             tree t;
168             if (!is_gimple_call (stmt)
169                 || !gimple_call_internal_p (stmt))
170               {
171                 gsi_next (&i);
172                 continue;
173               }
174             ifn = gimple_call_internal_fn (stmt);
175             switch (ifn)
176               {
177               case IFN_GOMP_SIMD_LANE:
178               case IFN_GOMP_SIMD_VF:
179               case IFN_GOMP_SIMD_LAST_LANE:
180                 break;
181               case IFN_GOMP_SIMD_ORDERED_START:
182               case IFN_GOMP_SIMD_ORDERED_END:
183                 if (integer_onep (gimple_call_arg (stmt, 0)))
184                     {
185                       enum built_in_function bcode
186                         = (ifn == IFN_GOMP_SIMD_ORDERED_START
187                            ? BUILT_IN_GOMP_ORDERED_START
188                            : BUILT_IN_GOMP_ORDERED_END);
189                       gimple *g
190                         = gimple_build_call (builtin_decl_explicit (bcode), 0);
191                       tree vdef = gimple_vdef (stmt);
192                       gimple_set_vdef (g, vdef);
193                       SSA_NAME_DEF_STMT (vdef) = g;
194                       gimple_set_vuse (g, gimple_vuse (stmt));
195                       gsi_replace (&i, g, true);
196                       continue;
197                     }
198                 gsi_remove (&i, true);
199                 unlink_stmt_vdef (stmt);
200                 continue;
201               default:
202                 gsi_next (&i);
203                 continue;
204               }
205             tree arg = gimple_call_arg (stmt, 0);
206             gcc_assert (arg != NULL_TREE);
207             gcc_assert (TREE_CODE (arg) == SSA_NAME);
208             simduid_to_vf *p = NULL, data;
209             data.simduid = DECL_UID (SSA_NAME_VAR (arg));
210             /* Need to nullify loop safelen field since it's value is not
211                valid after transformation.  */
212             if (bb->loop_father && bb->loop_father->safelen > 0)
213               bb->loop_father->safelen = 0;
214             if (htab)
215               {
216                 p = htab->find (&data);
217                 if (p)
218                     vf = p->vf;
219               }
220             switch (ifn)
221               {
222               case IFN_GOMP_SIMD_VF:
223                 t = build_int_cst (unsigned_type_node, vf);
224                 break;
225               case IFN_GOMP_SIMD_LANE:
226                 t = build_int_cst (unsigned_type_node, 0);
227                 break;
228               case IFN_GOMP_SIMD_LAST_LANE:
229                 t = gimple_call_arg (stmt, 1);
230                 break;
231               default:
232                 gcc_unreachable ();
233               }
234             tree lhs = gimple_call_lhs (stmt);
235             if (lhs)
236               replace_uses_by (lhs, t);
237             release_defs (stmt);
238             gsi_remove (&i, true);
239           }
240     }
241 }
242 
243 /* Helper structure for note_simd_array_uses.  */
244 
245 struct note_simd_array_uses_struct
246 {
247   hash_table<simd_array_to_simduid> **htab;
248   unsigned int simduid;
249 };
250 
251 /* Callback for note_simd_array_uses, called through walk_gimple_op.  */
252 
253 static tree
note_simd_array_uses_cb(tree * tp,int * walk_subtrees,void * data)254 note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data)
255 {
256   struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
257   struct note_simd_array_uses_struct *ns
258     = (struct note_simd_array_uses_struct *) wi->info;
259 
260   if (TYPE_P (*tp))
261     *walk_subtrees = 0;
262   else if (VAR_P (*tp)
263              && lookup_attribute ("omp simd array", DECL_ATTRIBUTES (*tp))
264              && DECL_CONTEXT (*tp) == current_function_decl)
265     {
266       simd_array_to_simduid data;
267       if (!*ns->htab)
268           *ns->htab = new hash_table<simd_array_to_simduid> (15);
269       data.decl = *tp;
270       data.simduid = ns->simduid;
271       simd_array_to_simduid **slot = (*ns->htab)->find_slot (&data, INSERT);
272       if (*slot == NULL)
273           {
274             simd_array_to_simduid *p = XNEW (simd_array_to_simduid);
275             *p = data;
276             *slot = p;
277           }
278       else if ((*slot)->simduid != ns->simduid)
279           (*slot)->simduid = -1U;
280       *walk_subtrees = 0;
281     }
282   return NULL_TREE;
283 }
284 
285 /* Find "omp simd array" temporaries and map them to corresponding
286    simduid.  */
287 
288 static void
note_simd_array_uses(hash_table<simd_array_to_simduid> ** htab)289 note_simd_array_uses (hash_table<simd_array_to_simduid> **htab)
290 {
291   basic_block bb;
292   gimple_stmt_iterator gsi;
293   struct walk_stmt_info wi;
294   struct note_simd_array_uses_struct ns;
295 
296   memset (&wi, 0, sizeof (wi));
297   wi.info = &ns;
298   ns.htab = htab;
299 
300   FOR_EACH_BB_FN (bb, cfun)
301     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
302       {
303           gimple *stmt = gsi_stmt (gsi);
304           if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
305             continue;
306           switch (gimple_call_internal_fn (stmt))
307             {
308             case IFN_GOMP_SIMD_LANE:
309             case IFN_GOMP_SIMD_VF:
310             case IFN_GOMP_SIMD_LAST_LANE:
311               break;
312             default:
313               continue;
314             }
315           tree lhs = gimple_call_lhs (stmt);
316           if (lhs == NULL_TREE)
317             continue;
318           imm_use_iterator use_iter;
319           gimple *use_stmt;
320           ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0)));
321           FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs)
322             if (!is_gimple_debug (use_stmt))
323               walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi);
324       }
325 }
326 
327 /* Shrink arrays with "omp simd array" attribute to the corresponding
328    vectorization factor.  */
329 
330 static void
shrink_simd_arrays(hash_table<simd_array_to_simduid> * simd_array_to_simduid_htab,hash_table<simduid_to_vf> * simduid_to_vf_htab)331 shrink_simd_arrays
332   (hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab,
333    hash_table<simduid_to_vf> *simduid_to_vf_htab)
334 {
335   for (hash_table<simd_array_to_simduid>::iterator iter
336            = simd_array_to_simduid_htab->begin ();
337        iter != simd_array_to_simduid_htab->end (); ++iter)
338     if ((*iter)->simduid != -1U)
339       {
340           tree decl = (*iter)->decl;
341           poly_uint64 vf = 1;
342           if (simduid_to_vf_htab)
343             {
344               simduid_to_vf *p = NULL, data;
345               data.simduid = (*iter)->simduid;
346               p = simduid_to_vf_htab->find (&data);
347               if (p)
348                 vf = p->vf;
349             }
350           tree atype
351             = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf);
352           TREE_TYPE (decl) = atype;
353           relayout_decl (decl);
354       }
355 
356   delete simd_array_to_simduid_htab;
357 }
358 
359 /* Initialize the vec_info with kind KIND_IN and target cost data
360    TARGET_COST_DATA_IN.  */
361 
vec_info(vec_info::vec_kind kind_in,void * target_cost_data_in)362 vec_info::vec_info (vec_info::vec_kind kind_in, void *target_cost_data_in)
363   : kind (kind_in),
364     datarefs (vNULL),
365     ddrs (vNULL),
366     target_cost_data (target_cost_data_in)
367 {
368 }
369 
~vec_info()370 vec_info::~vec_info ()
371 {
372   slp_instance instance;
373   struct data_reference *dr;
374   unsigned int i;
375 
376   FOR_EACH_VEC_ELT (datarefs, i, dr)
377     if (dr->aux)
378       {
379         free (dr->aux);
380         dr->aux = NULL;
381       }
382 
383   FOR_EACH_VEC_ELT (slp_instances, i, instance)
384     vect_free_slp_instance (instance);
385 
386   free_data_refs (datarefs);
387   free_dependence_relations (ddrs);
388   destroy_cost_data (target_cost_data);
389 }
390 
391 /* A helper function to free scev and LOOP niter information, as well as
392    clear loop constraint LOOP_C_FINITE.  */
393 
394 void
vect_free_loop_info_assumptions(struct loop * loop)395 vect_free_loop_info_assumptions (struct loop *loop)
396 {
397   scev_reset_htab ();
398   /* We need to explicitly reset upper bound information since they are
399      used even after free_numbers_of_iterations_estimates.  */
400   loop->any_upper_bound = false;
401   loop->any_likely_upper_bound = false;
402   free_numbers_of_iterations_estimates (loop);
403   loop_constraint_clear (loop, LOOP_C_FINITE);
404 }
405 
406 /* Return whether STMT is inside the region we try to vectorize.  */
407 
408 bool
vect_stmt_in_region_p(vec_info * vinfo,gimple * stmt)409 vect_stmt_in_region_p (vec_info *vinfo, gimple *stmt)
410 {
411   if (!gimple_bb (stmt))
412     return false;
413 
414   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
415     {
416       struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
417       if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
418           return false;
419     }
420   else
421     {
422       bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
423       if (gimple_bb (stmt) != BB_VINFO_BB (bb_vinfo)
424             || gimple_uid (stmt) == -1U
425             || gimple_code (stmt) == GIMPLE_PHI)
426           return false;
427     }
428 
429   return true;
430 }
431 
432 
433 /* If LOOP has been versioned during ifcvt, return the internal call
434    guarding it.  */
435 
436 static gimple *
vect_loop_vectorized_call(struct loop * loop)437 vect_loop_vectorized_call (struct loop *loop)
438 {
439   basic_block bb = loop_preheader_edge (loop)->src;
440   gimple *g;
441   do
442     {
443       g = last_stmt (bb);
444       if (g)
445           break;
446       if (!single_pred_p (bb))
447           break;
448       bb = single_pred (bb);
449     }
450   while (1);
451   if (g && gimple_code (g) == GIMPLE_COND)
452     {
453       gimple_stmt_iterator gsi = gsi_for_stmt (g);
454       gsi_prev (&gsi);
455       if (!gsi_end_p (gsi))
456           {
457             g = gsi_stmt (gsi);
458             if (gimple_call_internal_p (g, IFN_LOOP_VECTORIZED)
459                 && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num
460                       || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num))
461               return g;
462           }
463     }
464   return NULL;
465 }
466 
467 /* If LOOP has been versioned during loop distribution, return the gurading
468    internal call.  */
469 
470 static gimple *
vect_loop_dist_alias_call(struct loop * loop)471 vect_loop_dist_alias_call (struct loop *loop)
472 {
473   basic_block bb;
474   basic_block entry;
475   struct loop *outer, *orig;
476   gimple_stmt_iterator gsi;
477   gimple *g;
478 
479   if (loop->orig_loop_num == 0)
480     return NULL;
481 
482   orig = get_loop (cfun, loop->orig_loop_num);
483   if (orig == NULL)
484     {
485       /* The original loop is somehow destroyed.  Clear the information.  */
486       loop->orig_loop_num = 0;
487       return NULL;
488     }
489 
490   if (loop != orig)
491     bb = nearest_common_dominator (CDI_DOMINATORS, loop->header, orig->header);
492   else
493     bb = loop_preheader_edge (loop)->src;
494 
495   outer = bb->loop_father;
496   entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
497 
498   /* Look upward in dominance tree.  */
499   for (; bb != entry && flow_bb_inside_loop_p (outer, bb);
500        bb = get_immediate_dominator (CDI_DOMINATORS, bb))
501     {
502       g = last_stmt (bb);
503       if (g == NULL || gimple_code (g) != GIMPLE_COND)
504           continue;
505 
506       gsi = gsi_for_stmt (g);
507       gsi_prev (&gsi);
508       if (gsi_end_p (gsi))
509           continue;
510 
511       g = gsi_stmt (gsi);
512       /* The guarding internal function call must have the same distribution
513            alias id.  */
514       if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS)
515             && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->orig_loop_num))
516           return g;
517     }
518   return NULL;
519 }
520 
521 /* Set the uids of all the statements in basic blocks inside loop
522    represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal
523    call guarding the loop which has been if converted.  */
524 static void
set_uid_loop_bbs(loop_vec_info loop_vinfo,gimple * loop_vectorized_call)525 set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
526 {
527   tree arg = gimple_call_arg (loop_vectorized_call, 1);
528   basic_block *bbs;
529   unsigned int i;
530   struct loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg));
531 
532   LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop;
533   gcc_checking_assert (vect_loop_vectorized_call (scalar_loop)
534                            == loop_vectorized_call);
535   /* If we are going to vectorize outer loop, prevent vectorization
536      of the inner loop in the scalar loop - either the scalar loop is
537      thrown away, so it is a wasted work, or is used only for
538      a few iterations.  */
539   if (scalar_loop->inner)
540     {
541       gimple *g = vect_loop_vectorized_call (scalar_loop->inner);
542       if (g)
543           {
544             arg = gimple_call_arg (g, 0);
545             get_loop (cfun, tree_to_shwi (arg))->dont_vectorize = true;
546             fold_loop_internal_call (g, boolean_false_node);
547           }
548     }
549   bbs = get_loop_body (scalar_loop);
550   for (i = 0; i < scalar_loop->num_nodes; i++)
551     {
552       basic_block bb = bbs[i];
553       gimple_stmt_iterator gsi;
554       for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
555           {
556             gimple *phi = gsi_stmt (gsi);
557             gimple_set_uid (phi, 0);
558           }
559       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
560           {
561             gimple *stmt = gsi_stmt (gsi);
562             gimple_set_uid (stmt, 0);
563           }
564     }
565   free (bbs);
566 }
567 
568 /* Function vectorize_loops.
569 
570    Entry point to loop vectorization phase.  */
571 
572 unsigned
vectorize_loops(void)573 vectorize_loops (void)
574 {
575   unsigned int i;
576   unsigned int num_vectorized_loops = 0;
577   unsigned int vect_loops_num;
578   struct loop *loop;
579   hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL;
580   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
581   bool any_ifcvt_loops = false;
582   unsigned ret = 0;
583   struct loop *new_loop;
584 
585   vect_loops_num = number_of_loops (cfun);
586 
587   /* Bail out if there are no loops.  */
588   if (vect_loops_num <= 1)
589     return 0;
590 
591   if (cfun->has_simduid_loops)
592     note_simd_array_uses (&simd_array_to_simduid_htab);
593 
594   init_stmt_vec_info_vec ();
595 
596   /*  ----------- Analyze loops. -----------  */
597 
598   /* If some loop was duplicated, it gets bigger number
599      than all previously defined loops.  This fact allows us to run
600      only over initial loops skipping newly generated ones.  */
601   FOR_EACH_LOOP (loop, 0)
602     if (loop->dont_vectorize)
603       {
604           any_ifcvt_loops = true;
605           /* If-conversion sometimes versions both the outer loop
606              (for the case when outer loop vectorization might be
607              desirable) as well as the inner loop in the scalar version
608              of the loop.  So we have:
609               if (LOOP_VECTORIZED (1, 3))
610                 {
611                     loop1
612                       loop2
613                 }
614               else
615                 loop3 (copy of loop1)
616                     if (LOOP_VECTORIZED (4, 5))
617                       loop4 (copy of loop2)
618                     else
619                       loop5 (copy of loop4)
620              If FOR_EACH_LOOP gives us loop3 first (which has
621              dont_vectorize set), make sure to process loop1 before loop4;
622              so that we can prevent vectorization of loop4 if loop1
623              is successfully vectorized.  */
624           if (loop->inner)
625             {
626               gimple *loop_vectorized_call
627                 = vect_loop_vectorized_call (loop);
628               if (loop_vectorized_call
629                     && vect_loop_vectorized_call (loop->inner))
630                 {
631                     tree arg = gimple_call_arg (loop_vectorized_call, 0);
632                     struct loop *vector_loop
633                       = get_loop (cfun, tree_to_shwi (arg));
634                     if (vector_loop && vector_loop != loop)
635                       {
636                         loop = vector_loop;
637                         /* Make sure we don't vectorize it twice.  */
638                         loop->dont_vectorize = true;
639                         goto try_vectorize;
640                       }
641                 }
642             }
643       }
644     else
645       {
646           loop_vec_info loop_vinfo, orig_loop_vinfo;
647           gimple *loop_vectorized_call, *loop_dist_alias_call;
648        try_vectorize:
649           if (!((flag_tree_loop_vectorize
650                  && optimize_loop_nest_for_speed_p (loop))
651                 || loop->force_vectorize))
652             continue;
653           orig_loop_vinfo = NULL;
654           loop_vectorized_call = vect_loop_vectorized_call (loop);
655           loop_dist_alias_call = vect_loop_dist_alias_call (loop);
656        vectorize_epilogue:
657           vect_location = find_loop_location (loop);
658         if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
659               && dump_enabled_p ())
660             dump_printf (MSG_NOTE, "\nAnalyzing loop at %s:%d\n",
661                        LOCATION_FILE (vect_location),
662                            LOCATION_LINE (vect_location));
663 
664           loop_vinfo = vect_analyze_loop (loop, orig_loop_vinfo);
665           loop->aux = loop_vinfo;
666 
667           if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
668             {
669               /* Free existing information if loop is analyzed with some
670                  assumptions.  */
671               if (loop_constraint_set_p (loop, LOOP_C_FINITE))
672                 vect_free_loop_info_assumptions (loop);
673 
674               /* If we applied if-conversion then try to vectorize the
675                  BB of innermost loops.
676                  ???  Ideally BB vectorization would learn to vectorize
677                  control flow by applying if-conversion on-the-fly, the
678                  following retains the if-converted loop body even when
679                  only non-if-converted parts took part in BB vectorization.  */
680               if (flag_tree_slp_vectorize != 0
681                     && loop_vectorized_call
682                     && ! loop->inner)
683                 {
684                     basic_block bb = loop->header;
685                     bool has_mask_load_store = false;
686                     for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
687                          !gsi_end_p (gsi); gsi_next (&gsi))
688                       {
689                         gimple *stmt = gsi_stmt (gsi);
690                         if (is_gimple_call (stmt)
691                               && gimple_call_internal_p (stmt)
692                               && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
693                                   || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
694                           {
695                               has_mask_load_store = true;
696                               break;
697                           }
698                         gimple_set_uid (stmt, -1);
699                         gimple_set_visited (stmt, false);
700                       }
701                     if (! has_mask_load_store && vect_slp_bb (bb))
702                       {
703                         dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
704                                              "basic block vectorized\n");
705                         fold_loop_internal_call (loop_vectorized_call,
706                                                        boolean_true_node);
707                         loop_vectorized_call = NULL;
708                         ret |= TODO_cleanup_cfg;
709                       }
710                 }
711               /* If outer loop vectorization fails for LOOP_VECTORIZED guarded
712                  loop, don't vectorize its inner loop; we'll attempt to
713                  vectorize LOOP_VECTORIZED guarded inner loop of the scalar
714                  loop version.  */
715               if (loop_vectorized_call && loop->inner)
716                 loop->inner->dont_vectorize = true;
717               continue;
718             }
719 
720         if (!dbg_cnt (vect_loop))
721             {
722               /* We may miss some if-converted loops due to
723                  debug counter.  Set any_ifcvt_loops to visit
724                  them at finalization.  */
725               any_ifcvt_loops = true;
726               /* Free existing information if loop is analyzed with some
727                  assumptions.  */
728               if (loop_constraint_set_p (loop, LOOP_C_FINITE))
729                 vect_free_loop_info_assumptions (loop);
730 
731               break;
732             }
733 
734           if (loop_vectorized_call)
735             set_uid_loop_bbs (loop_vinfo, loop_vectorized_call);
736         if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
737               && dump_enabled_p ())
738           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
739                            "loop vectorized\n");
740           new_loop = vect_transform_loop (loop_vinfo);
741           num_vectorized_loops++;
742           /* Now that the loop has been vectorized, allow it to be unrolled
743              etc.  */
744           loop->force_vectorize = false;
745 
746           if (loop->simduid)
747             {
748               simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf);
749               if (!simduid_to_vf_htab)
750                 simduid_to_vf_htab = new hash_table<simduid_to_vf> (15);
751               simduid_to_vf_data->simduid = DECL_UID (loop->simduid);
752               simduid_to_vf_data->vf = loop_vinfo->vectorization_factor;
753               *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT)
754                 = simduid_to_vf_data;
755             }
756 
757           if (loop_vectorized_call)
758             {
759               fold_loop_internal_call (loop_vectorized_call, boolean_true_node);
760               loop_vectorized_call = NULL;
761               ret |= TODO_cleanup_cfg;
762             }
763           if (loop_dist_alias_call)
764             {
765               tree value = gimple_call_arg (loop_dist_alias_call, 1);
766               fold_loop_internal_call (loop_dist_alias_call, value);
767               loop_dist_alias_call = NULL;
768               ret |= TODO_cleanup_cfg;
769             }
770 
771           if (new_loop)
772             {
773               /* Epilogue of vectorized loop must be vectorized too.  */
774               vect_loops_num = number_of_loops (cfun);
775               loop = new_loop;
776               orig_loop_vinfo = loop_vinfo;  /* To pass vect_analyze_loop.  */
777               goto vectorize_epilogue;
778             }
779       }
780 
781   vect_location = UNKNOWN_LOCATION;
782 
783   statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops);
784   if (dump_enabled_p ()
785       || (num_vectorized_loops > 0 && dump_enabled_p ()))
786     dump_printf_loc (MSG_NOTE, vect_location,
787                      "vectorized %u loops in function.\n",
788                      num_vectorized_loops);
789 
790   /*  ----------- Finalize. -----------  */
791 
792   if (any_ifcvt_loops)
793     for (i = 1; i < vect_loops_num; i++)
794       {
795           loop = get_loop (cfun, i);
796           if (loop && loop->dont_vectorize)
797             {
798               gimple *g = vect_loop_vectorized_call (loop);
799               if (g)
800                 {
801                     fold_loop_internal_call (g, boolean_false_node);
802                     ret |= TODO_cleanup_cfg;
803                     g = NULL;
804                 }
805               else
806                 g = vect_loop_dist_alias_call (loop);
807 
808               if (g)
809                 {
810                     fold_loop_internal_call (g, boolean_false_node);
811                     ret |= TODO_cleanup_cfg;
812                 }
813             }
814       }
815 
816   for (i = 1; i < vect_loops_num; i++)
817     {
818       loop_vec_info loop_vinfo;
819       bool has_mask_store;
820 
821       loop = get_loop (cfun, i);
822       if (!loop)
823           continue;
824       loop_vinfo = (loop_vec_info) loop->aux;
825       has_mask_store = false;
826       if (loop_vinfo)
827           has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
828       delete loop_vinfo;
829       if (has_mask_store
830             && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
831           optimize_mask_stores (loop);
832       loop->aux = NULL;
833     }
834 
835   free_stmt_vec_info_vec ();
836 
837   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
838   if (cfun->has_simduid_loops)
839     adjust_simduid_builtins (simduid_to_vf_htab);
840 
841   /* Shrink any "omp array simd" temporary arrays to the
842      actual vectorization factors.  */
843   if (simd_array_to_simduid_htab)
844     shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab);
845   delete simduid_to_vf_htab;
846   cfun->has_simduid_loops = false;
847 
848   if (num_vectorized_loops > 0)
849     {
850       /* If we vectorized any loop only virtual SSA form needs to be updated.
851            ???  Also while we try hard to update loop-closed SSA form we fail
852            to properly do this in some corner-cases (see PR56286).  */
853       rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals);
854       return TODO_cleanup_cfg;
855     }
856 
857   return ret;
858 }
859 
860 
861 /* Entry point to the simduid cleanup pass.  */
862 
863 namespace {
864 
865 const pass_data pass_data_simduid_cleanup =
866 {
867   GIMPLE_PASS, /* type */
868   "simduid", /* name */
869   OPTGROUP_NONE, /* optinfo_flags */
870   TV_NONE, /* tv_id */
871   ( PROP_ssa | PROP_cfg ), /* properties_required */
872   0, /* properties_provided */
873   0, /* properties_destroyed */
874   0, /* todo_flags_start */
875   0, /* todo_flags_finish */
876 };
877 
878 class pass_simduid_cleanup : public gimple_opt_pass
879 {
880 public:
pass_simduid_cleanup(gcc::context * ctxt)881   pass_simduid_cleanup (gcc::context *ctxt)
882     : gimple_opt_pass (pass_data_simduid_cleanup, ctxt)
883   {}
884 
885   /* opt_pass methods: */
clone()886   opt_pass * clone () { return new pass_simduid_cleanup (m_ctxt); }
gate(function * fun)887   virtual bool gate (function *fun) { return fun->has_simduid_loops; }
888   virtual unsigned int execute (function *);
889 
890 }; // class pass_simduid_cleanup
891 
892 unsigned int
execute(function * fun)893 pass_simduid_cleanup::execute (function *fun)
894 {
895   hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
896 
897   note_simd_array_uses (&simd_array_to_simduid_htab);
898 
899   /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins.  */
900   adjust_simduid_builtins (NULL);
901 
902   /* Shrink any "omp array simd" temporary arrays to the
903      actual vectorization factors.  */
904   if (simd_array_to_simduid_htab)
905     shrink_simd_arrays (simd_array_to_simduid_htab, NULL);
906   fun->has_simduid_loops = false;
907   return 0;
908 }
909 
910 }  // anon namespace
911 
912 gimple_opt_pass *
make_pass_simduid_cleanup(gcc::context * ctxt)913 make_pass_simduid_cleanup (gcc::context *ctxt)
914 {
915   return new pass_simduid_cleanup (ctxt);
916 }
917 
918 
919 /*  Entry point to basic block SLP phase.  */
920 
921 namespace {
922 
923 const pass_data pass_data_slp_vectorize =
924 {
925   GIMPLE_PASS, /* type */
926   "slp", /* name */
927   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
928   TV_TREE_SLP_VECTORIZATION, /* tv_id */
929   ( PROP_ssa | PROP_cfg ), /* properties_required */
930   0, /* properties_provided */
931   0, /* properties_destroyed */
932   0, /* todo_flags_start */
933   TODO_update_ssa, /* todo_flags_finish */
934 };
935 
936 class pass_slp_vectorize : public gimple_opt_pass
937 {
938 public:
pass_slp_vectorize(gcc::context * ctxt)939   pass_slp_vectorize (gcc::context *ctxt)
940     : gimple_opt_pass (pass_data_slp_vectorize, ctxt)
941   {}
942 
943   /* opt_pass methods: */
clone()944   opt_pass * clone () { return new pass_slp_vectorize (m_ctxt); }
gate(function *)945   virtual bool gate (function *) { return flag_tree_slp_vectorize != 0; }
946   virtual unsigned int execute (function *);
947 
948 }; // class pass_slp_vectorize
949 
950 unsigned int
execute(function * fun)951 pass_slp_vectorize::execute (function *fun)
952 {
953   basic_block bb;
954 
955   bool in_loop_pipeline = scev_initialized_p ();
956   if (!in_loop_pipeline)
957     {
958       loop_optimizer_init (LOOPS_NORMAL);
959       scev_initialize ();
960     }
961 
962   /* Mark all stmts as not belonging to the current region and unvisited.  */
963   FOR_EACH_BB_FN (bb, fun)
964     {
965       for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
966              gsi_next (&gsi))
967           {
968             gimple *stmt = gsi_stmt (gsi);
969             gimple_set_uid (stmt, -1);
970             gimple_set_visited (stmt, false);
971           }
972     }
973 
974   init_stmt_vec_info_vec ();
975 
976   FOR_EACH_BB_FN (bb, fun)
977     {
978       if (vect_slp_bb (bb))
979           dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
980                                "basic block vectorized\n");
981     }
982 
983   free_stmt_vec_info_vec ();
984 
985   if (!in_loop_pipeline)
986     {
987       scev_finalize ();
988       loop_optimizer_finalize ();
989     }
990 
991   return 0;
992 }
993 
994 } // anon namespace
995 
996 gimple_opt_pass *
make_pass_slp_vectorize(gcc::context * ctxt)997 make_pass_slp_vectorize (gcc::context *ctxt)
998 {
999   return new pass_slp_vectorize (ctxt);
1000 }
1001 
1002 
1003 /* Increase alignment of global arrays to improve vectorization potential.
1004    TODO:
1005    - Consider also structs that have an array field.
1006    - Use ipa analysis to prune arrays that can't be vectorized?
1007      This should involve global alignment analysis and in the future also
1008      array padding.  */
1009 
1010 static unsigned get_vec_alignment_for_type (tree);
1011 static hash_map<tree, unsigned> *type_align_map;
1012 
1013 /* Return alignment of array's vector type corresponding to scalar type.
1014    0 if no vector type exists.  */
1015 static unsigned
get_vec_alignment_for_array_type(tree type)1016 get_vec_alignment_for_array_type (tree type)
1017 {
1018   gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
1019   poly_uint64 array_size, vector_size;
1020 
1021   tree vectype = get_vectype_for_scalar_type (strip_array_types (type));
1022   if (!vectype
1023       || !poly_int_tree_p (TYPE_SIZE (type), &array_size)
1024       || !poly_int_tree_p (TYPE_SIZE (vectype), &vector_size)
1025       || maybe_lt (array_size, vector_size))
1026     return 0;
1027 
1028   return TYPE_ALIGN (vectype);
1029 }
1030 
1031 /* Return alignment of field having maximum alignment of vector type
1032    corresponding to it's scalar type. For now, we only consider fields whose
1033    offset is a multiple of it's vector alignment.
1034    0 if no suitable field is found.  */
1035 static unsigned
get_vec_alignment_for_record_type(tree type)1036 get_vec_alignment_for_record_type (tree type)
1037 {
1038   gcc_assert (TREE_CODE (type) == RECORD_TYPE);
1039 
1040   unsigned max_align = 0, alignment;
1041   HOST_WIDE_INT offset;
1042   tree offset_tree;
1043 
1044   if (TYPE_PACKED (type))
1045     return 0;
1046 
1047   unsigned *slot = type_align_map->get (type);
1048   if (slot)
1049     return *slot;
1050 
1051   for (tree field = first_field (type);
1052        field != NULL_TREE;
1053        field = DECL_CHAIN (field))
1054     {
1055       /* Skip if not FIELD_DECL or if alignment is set by user.  */
1056       if (TREE_CODE (field) != FIELD_DECL
1057             || DECL_USER_ALIGN (field)
1058             || DECL_ARTIFICIAL (field))
1059           continue;
1060 
1061       /* We don't need to process the type further if offset is variable,
1062            since the offsets of remaining members will also be variable.  */
1063       if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST
1064             || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST)
1065           break;
1066 
1067       /* Similarly stop processing the type if offset_tree
1068            does not fit in unsigned HOST_WIDE_INT.  */
1069       offset_tree = bit_position (field);
1070       if (!tree_fits_uhwi_p (offset_tree))
1071           break;
1072 
1073       offset = tree_to_uhwi (offset_tree);
1074       alignment = get_vec_alignment_for_type (TREE_TYPE (field));
1075 
1076       /* Get maximum alignment of vectorized field/array among those members
1077            whose offset is multiple of the vector alignment.  */
1078       if (alignment
1079             && (offset % alignment == 0)
1080             && (alignment > max_align))
1081           max_align = alignment;
1082     }
1083 
1084   type_align_map->put (type, max_align);
1085   return max_align;
1086 }
1087 
1088 /* Return alignment of vector type corresponding to decl's scalar type
1089    or 0 if it doesn't exist or the vector alignment is lesser than
1090    decl's alignment.  */
1091 static unsigned
get_vec_alignment_for_type(tree type)1092 get_vec_alignment_for_type (tree type)
1093 {
1094   if (type == NULL_TREE)
1095     return 0;
1096 
1097   gcc_assert (TYPE_P (type));
1098 
1099   static unsigned alignment = 0;
1100   switch (TREE_CODE (type))
1101     {
1102       case ARRAY_TYPE:
1103           alignment = get_vec_alignment_for_array_type (type);
1104           break;
1105       case RECORD_TYPE:
1106           alignment = get_vec_alignment_for_record_type (type);
1107           break;
1108       default:
1109           alignment = 0;
1110           break;
1111     }
1112 
1113   return (alignment > TYPE_ALIGN (type)) ? alignment : 0;
1114 }
1115 
1116 /* Entry point to increase_alignment pass.  */
1117 static unsigned int
increase_alignment(void)1118 increase_alignment (void)
1119 {
1120   varpool_node *vnode;
1121 
1122   vect_location = UNKNOWN_LOCATION;
1123   type_align_map = new hash_map<tree, unsigned>;
1124 
1125   /* Increase the alignment of all global arrays for vectorization.  */
1126   FOR_EACH_DEFINED_VARIABLE (vnode)
1127     {
1128       tree decl = vnode->decl;
1129       unsigned int alignment;
1130 
1131       if ((decl_in_symtab_p (decl)
1132             && !symtab_node::get (decl)->can_increase_alignment_p ())
1133             || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl))
1134           continue;
1135 
1136       alignment = get_vec_alignment_for_type (TREE_TYPE (decl));
1137       if (alignment && vect_can_force_dr_alignment_p (decl, alignment))
1138         {
1139             vnode->increase_alignment (alignment);
1140           dump_printf (MSG_NOTE, "Increasing alignment of decl: ");
1141           dump_generic_expr (MSG_NOTE, TDF_SLIM, decl);
1142           dump_printf (MSG_NOTE, "\n");
1143         }
1144     }
1145 
1146   delete type_align_map;
1147   return 0;
1148 }
1149 
1150 
1151 namespace {
1152 
1153 const pass_data pass_data_ipa_increase_alignment =
1154 {
1155   SIMPLE_IPA_PASS, /* type */
1156   "increase_alignment", /* name */
1157   OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */
1158   TV_IPA_OPT, /* tv_id */
1159   0, /* properties_required */
1160   0, /* properties_provided */
1161   0, /* properties_destroyed */
1162   0, /* todo_flags_start */
1163   0, /* todo_flags_finish */
1164 };
1165 
1166 class pass_ipa_increase_alignment : public simple_ipa_opt_pass
1167 {
1168 public:
pass_ipa_increase_alignment(gcc::context * ctxt)1169   pass_ipa_increase_alignment (gcc::context *ctxt)
1170     : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt)
1171   {}
1172 
1173   /* opt_pass methods: */
gate(function *)1174   virtual bool gate (function *)
1175     {
1176       return flag_section_anchors && flag_tree_loop_vectorize;
1177     }
1178 
execute(function *)1179   virtual unsigned int execute (function *) { return increase_alignment (); }
1180 
1181 }; // class pass_ipa_increase_alignment
1182 
1183 } // anon namespace
1184 
1185 simple_ipa_opt_pass *
make_pass_ipa_increase_alignment(gcc::context * ctxt)1186 make_pass_ipa_increase_alignment (gcc::context *ctxt)
1187 {
1188   return new pass_ipa_increase_alignment (ctxt);
1189 }
1190