1 /* Analysis Utilities for Loop Vectorization.
2    Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 
5 This file is part of GCC.
6 
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11 
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING.  If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301, USA.  */
21 
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "target.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
33 #include "timevar.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "params.h"
38 #include "tree-chrec.h"
39 #include "tree-data-ref.h"
40 #include "tree-scalar-evolution.h"
41 #include "tree-vectorizer.h"
42 
43 /* Main analysis functions.  */
44 static loop_vec_info vect_analyze_loop_form (struct loop *);
45 static bool vect_analyze_data_refs (loop_vec_info);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
47 static void vect_analyze_scalar_cycles (loop_vec_info);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info);
51 static bool vect_compute_data_refs_alignment (loop_vec_info);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info);
53 static bool vect_analyze_operations (loop_vec_info);
54 static bool vect_determine_vectorization_factor (loop_vec_info);
55 
56 /* Utility functions for the analyses.  */
57 static bool exist_non_indexing_operands_for_use_p (tree, tree);
58 static void vect_mark_relevant (VEC(tree,heap) **, tree, bool, bool);
59 static bool vect_stmt_relevant_p (tree, loop_vec_info, bool *, bool *);
60 static tree vect_get_loop_niters (struct loop *, tree *);
61 static bool vect_analyze_data_ref_dependence
62   (struct data_dependence_relation *, loop_vec_info);
63 static bool vect_compute_data_ref_alignment (struct data_reference *);
64 static bool vect_analyze_data_ref_access (struct data_reference *);
65 static bool vect_can_advance_ivs_p (loop_vec_info);
66 static void vect_update_misalignment_for_peel
67   (struct data_reference *, struct data_reference *, int npeel);
68 
69 
70 /* Function vect_determine_vectorization_factor
71 
72    Determine the vectorization factor (VF). VF is the number of data elements
73    that are operated upon in parallel in a single iteration of the vectorized
74    loop. For example, when vectorizing a loop that operates on 4byte elements,
75    on a target with vector size (VS) 16byte, the VF is set to 4, since 4
76    elements can fit in a single vector register.
77 
78    We currently support vectorization of loops in which all types operated upon
79    are of the same size. Therefore this function currently sets VF according to
80    the size of the types operated upon, and fails if there are multiple sizes
81    in the loop.
82 
83    VF is also the factor by which the loop iterations are strip-mined, e.g.:
84    original loop:
85         for (i=0; i<N; i++){
86           a[i] = b[i] + c[i];
87         }
88 
89    vectorized loop:
90         for (i=0; i<N; i+=VF){
91           a[i:VF] = b[i:VF] + c[i:VF];
92         }
93 */
94 
95 static bool
vect_determine_vectorization_factor(loop_vec_info loop_vinfo)96 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
97 {
98   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
99   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
100   int nbbs = loop->num_nodes;
101   block_stmt_iterator si;
102   unsigned int vectorization_factor = 0;
103   int i;
104   tree scalar_type;
105 
106   if (vect_print_dump_info (REPORT_DETAILS))
107     fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
108 
109   for (i = 0; i < nbbs; i++)
110     {
111       basic_block bb = bbs[i];
112 
113       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
114         {
115           tree stmt = bsi_stmt (si);
116           unsigned int nunits;
117           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
118           tree vectype;
119 
120           if (vect_print_dump_info (REPORT_DETAILS))
121             {
122               fprintf (vect_dump, "==> examining statement: ");
123               print_generic_expr (vect_dump, stmt, TDF_SLIM);
124             }
125 
126           gcc_assert (stmt_info);
127           /* skip stmts which do not need to be vectorized.  */
128           if (!STMT_VINFO_RELEVANT_P (stmt_info)
129 	      && !STMT_VINFO_LIVE_P (stmt_info))
130             {
131               if (vect_print_dump_info (REPORT_DETAILS))
132                 fprintf (vect_dump, "skip.");
133               continue;
134             }
135 
136           if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
137             {
138               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
139                 {
140                   fprintf (vect_dump, "not vectorized: vector stmt in loop:");
141                   print_generic_expr (vect_dump, stmt, TDF_SLIM);
142                 }
143               return false;
144             }
145 
146 	  if (STMT_VINFO_VECTYPE (stmt_info))
147 	    {
148 	      vectype = STMT_VINFO_VECTYPE (stmt_info);
149 	      scalar_type = TREE_TYPE (vectype);
150 	    }
151 	  else
152 	    {
153 	      if (STMT_VINFO_DATA_REF (stmt_info))
154 		scalar_type =
155 			TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
156 	      else if (TREE_CODE (stmt) == MODIFY_EXPR)
157 		scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
158 	      else
159 		scalar_type = TREE_TYPE (stmt);
160 
161 	      if (vect_print_dump_info (REPORT_DETAILS))
162 		{
163 		  fprintf (vect_dump, "get vectype for scalar type:  ");
164 		  print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
165 		}
166 
167 	      vectype = get_vectype_for_scalar_type (scalar_type);
168 	      if (!vectype)
169 		{
170 		  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
171 		    {
172 		      fprintf (vect_dump,
173 			       "not vectorized: unsupported data-type ");
174 		      print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
175 		    }
176 		  return false;
177 		}
178 	      STMT_VINFO_VECTYPE (stmt_info) = vectype;
179             }
180 
181           if (vect_print_dump_info (REPORT_DETAILS))
182             {
183               fprintf (vect_dump, "vectype: ");
184               print_generic_expr (vect_dump, vectype, TDF_SLIM);
185             }
186 
187           nunits = TYPE_VECTOR_SUBPARTS (vectype);
188           if (vect_print_dump_info (REPORT_DETAILS))
189             fprintf (vect_dump, "nunits = %d", nunits);
190 
191           if (vectorization_factor)
192             {
193               /* FORNOW: don't allow mixed units.
194                  This restriction will be relaxed in the future.  */
195               if (nunits != vectorization_factor)
196                 {
197                   if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
198                     fprintf (vect_dump, "not vectorized: mixed data-types");
199                   return false;
200                 }
201             }
202           else
203             vectorization_factor = nunits;
204 
205           gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
206                         * vectorization_factor == UNITS_PER_SIMD_WORD);
207         }
208     }
209 
210   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
211 
212   if (vectorization_factor <= 1)
213     {
214       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
215         fprintf (vect_dump, "not vectorized: unsupported data-type");
216       return false;
217     }
218   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
219 
220   return true;
221 }
222 
223 
224 /* Function vect_analyze_operations.
225 
226    Scan the loop stmts and make sure they are all vectorizable.  */
227 
228 static bool
vect_analyze_operations(loop_vec_info loop_vinfo)229 vect_analyze_operations (loop_vec_info loop_vinfo)
230 {
231   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
232   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
233   int nbbs = loop->num_nodes;
234   block_stmt_iterator si;
235   unsigned int vectorization_factor = 0;
236   int i;
237   bool ok;
238   tree phi;
239   stmt_vec_info stmt_info;
240   bool need_to_vectorize = false;
241 
242   if (vect_print_dump_info (REPORT_DETAILS))
243     fprintf (vect_dump, "=== vect_analyze_operations ===");
244 
245   gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
246   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
247 
248   for (i = 0; i < nbbs; i++)
249     {
250       basic_block bb = bbs[i];
251 
252       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
253         {
254 	  stmt_info = vinfo_for_stmt (phi);
255 	  if (vect_print_dump_info (REPORT_DETAILS))
256 	    {
257 	      fprintf (vect_dump, "examining phi: ");
258 	      print_generic_expr (vect_dump, phi, TDF_SLIM);
259 	    }
260 
261 	  gcc_assert (stmt_info);
262 
263 	  if (STMT_VINFO_LIVE_P (stmt_info))
264 	    {
265 	      /* FORNOW: not yet supported.  */
266 	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
267 		fprintf (vect_dump, "not vectorized: value used after loop.");
268 	    return false;
269 	  }
270 
271 	  if (STMT_VINFO_RELEVANT_P (stmt_info))
272 	    {
273 	      /* Most likely a reduction-like computation that is used
274 	         in the loop.  */
275 	      if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
276 	        fprintf (vect_dump, "not vectorized: unsupported pattern.");
277  	     return false;
278 	    }
279 	}
280 
281       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
282 	{
283 	  tree stmt = bsi_stmt (si);
284 	  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
285 
286 	  if (vect_print_dump_info (REPORT_DETAILS))
287 	    {
288 	      fprintf (vect_dump, "==> examining statement: ");
289 	      print_generic_expr (vect_dump, stmt, TDF_SLIM);
290 	    }
291 
292 	  gcc_assert (stmt_info);
293 
294 	  /* skip stmts which do not need to be vectorized.
295 	     this is expected to include:
296 	     - the COND_EXPR which is the loop exit condition
297 	     - any LABEL_EXPRs in the loop
298 	     - computations that are used only for array indexing or loop
299 	     control  */
300 
301 	  if (!STMT_VINFO_RELEVANT_P (stmt_info)
302 	      && !STMT_VINFO_LIVE_P (stmt_info))
303 	    {
304 	      if (vect_print_dump_info (REPORT_DETAILS))
305 	        fprintf (vect_dump, "irrelevant.");
306 	      continue;
307 	    }
308 
309           if (STMT_VINFO_RELEVANT_P (stmt_info))
310             {
311               gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
312               gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
313 
314 	      ok = (vectorizable_operation (stmt, NULL, NULL)
315 		    || vectorizable_assignment (stmt, NULL, NULL)
316 		    || vectorizable_load (stmt, NULL, NULL)
317 		    || vectorizable_store (stmt, NULL, NULL)
318 		    || vectorizable_condition (stmt, NULL, NULL));
319 
320 	      if (!ok)
321 		{
322 		  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
323 		    {
324 		      fprintf (vect_dump,
325 			       "not vectorized: relevant stmt not supported: ");
326 		      print_generic_expr (vect_dump, stmt, TDF_SLIM);
327 		    }
328 		  return false;
329 		}
330 	      need_to_vectorize = true;
331             }
332 
333 	  if (STMT_VINFO_LIVE_P (stmt_info))
334 	    {
335 	      ok = vectorizable_reduction (stmt, NULL, NULL);
336 
337 	      if (ok)
338                 need_to_vectorize = true;
339               else
340 	        ok = vectorizable_live_operation (stmt, NULL, NULL);
341 
342 	      if (!ok)
343 		{
344 		  if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
345 		    {
346 		      fprintf (vect_dump,
347 			       "not vectorized: live stmt not supported: ");
348 		      print_generic_expr (vect_dump, stmt, TDF_SLIM);
349 		    }
350 		  return false;
351 		}
352 	    }
353 	} /* stmts in bb */
354     } /* bbs */
355 
356   /* TODO: Analyze cost. Decide if worth while to vectorize.  */
357 
358   /* All operations in the loop are either irrelevant (deal with loop
359      control, or dead), or only used outside the loop and can be moved
360      out of the loop (e.g. invariants, inductions).  The loop can be
361      optimized away by scalar optimizations.  We're better off not
362      touching this loop.  */
363   if (!need_to_vectorize)
364     {
365       if (vect_print_dump_info (REPORT_DETAILS))
366 	fprintf (vect_dump,
367 		 "All the computation can be taken out of the loop.");
368       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
369         fprintf (vect_dump,
370 		 "not vectorized: redundant loop. no profit to vectorize.");
371       return false;
372     }
373 
374   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
375       && vect_print_dump_info (REPORT_DETAILS))
376     fprintf (vect_dump,
377         "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
378         vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
379 
380   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
381       && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
382     {
383       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
384 	fprintf (vect_dump, "not vectorized: iteration count too small.");
385       return false;
386     }
387 
388   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
389       || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
390       || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
391     {
392       if (vect_print_dump_info (REPORT_DETAILS))
393         fprintf (vect_dump, "epilog loop required.");
394       if (!vect_can_advance_ivs_p (loop_vinfo))
395         {
396           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
397             fprintf (vect_dump,
398                      "not vectorized: can't create epilog loop 1.");
399           return false;
400         }
401       if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit))
402         {
403           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
404             fprintf (vect_dump,
405                      "not vectorized: can't create epilog loop 2.");
406           return false;
407         }
408     }
409 
410   return true;
411 }
412 
413 
414 /* Function exist_non_indexing_operands_for_use_p
415 
416    USE is one of the uses attached to STMT. Check if USE is
417    used in STMT for anything other than indexing an array.  */
418 
419 static bool
exist_non_indexing_operands_for_use_p(tree use,tree stmt)420 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
421 {
422   tree operand;
423   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
424 
425   /* USE corresponds to some operand in STMT. If there is no data
426      reference in STMT, then any operand that corresponds to USE
427      is not indexing an array.  */
428   if (!STMT_VINFO_DATA_REF (stmt_info))
429     return true;
430 
431   /* STMT has a data_ref. FORNOW this means that its of one of
432      the following forms:
433      -1- ARRAY_REF = var
434      -2- var = ARRAY_REF
435      (This should have been verified in analyze_data_refs).
436 
437      'var' in the second case corresponds to a def, not a use,
438      so USE cannot correspond to any operands that are not used
439      for array indexing.
440 
441      Therefore, all we need to check is if STMT falls into the
442      first case, and whether var corresponds to USE.  */
443 
444   if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
445     return false;
446 
447   operand = TREE_OPERAND (stmt, 1);
448 
449   if (TREE_CODE (operand) != SSA_NAME)
450     return false;
451 
452   if (operand == use)
453     return true;
454 
455   return false;
456 }
457 
458 
459 /* Function vect_analyze_scalar_cycles.
460 
461    Examine the cross iteration def-use cycles of scalar variables, by
462    analyzing the loop (scalar) PHIs; Classify each cycle as one of the
463    following: invariant, induction, reduction, unknown.
464 
465    Some forms of scalar cycles are not yet supported.
466 
467    Example1: reduction: (unsupported yet)
468 
469               loop1:
470               for (i=0; i<N; i++)
471                  sum += a[i];
472 
473    Example2: induction: (unsupported yet)
474 
475               loop2:
476               for (i=0; i<N; i++)
477                  a[i] = i;
478 
479    Note: the following loop *is* vectorizable:
480 
481               loop3:
482               for (i=0; i<N; i++)
483                  a[i] = b[i];
484 
485          even though it has a def-use cycle caused by the induction variable i:
486 
487               loop: i_2 = PHI (i_0, i_1)
488                     a[i_2] = ...;
489                     i_1 = i_2 + 1;
490                     GOTO loop;
491 
492          because the def-use cycle in loop3 is considered "not relevant" - i.e.,
493          it does not need to be vectorized because it is only used for array
494          indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
495          loop2 on the other hand is relevant (it is being written to memory).
496 */
497 
498 static void
vect_analyze_scalar_cycles(loop_vec_info loop_vinfo)499 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
500 {
501   tree phi;
502   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
503   basic_block bb = loop->header;
504   tree dummy;
505 
506   if (vect_print_dump_info (REPORT_DETAILS))
507     fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
508 
509   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
510     {
511       tree access_fn = NULL;
512       tree def = PHI_RESULT (phi);
513       stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
514       tree reduc_stmt;
515 
516       if (vect_print_dump_info (REPORT_DETAILS))
517 	{
518           fprintf (vect_dump, "Analyze phi: ");
519           print_generic_expr (vect_dump, phi, TDF_SLIM);
520 	}
521 
522       /* Skip virtual phi's. The data dependences that are associated with
523          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
524 
525       if (!is_gimple_reg (SSA_NAME_VAR (def)))
526 	{
527 	  if (vect_print_dump_info (REPORT_DETAILS))
528 	    fprintf (vect_dump, "virtual phi. skip.");
529 	  continue;
530 	}
531 
532       STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
533 
534       /* Analyze the evolution function.  */
535 
536       access_fn = analyze_scalar_evolution (loop, def);
537 
538       if (!access_fn)
539 	continue;
540 
541       if (vect_print_dump_info (REPORT_DETAILS))
542         {
543            fprintf (vect_dump, "Access function of PHI: ");
544            print_generic_expr (vect_dump, access_fn, TDF_SLIM);
545         }
546 
547       if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
548 	{
549 	  if (vect_print_dump_info (REPORT_DETAILS))
550 	    fprintf (vect_dump, "Detected induction.");
551 	  STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
552           continue;
553 	}
554 
555       /* TODO: handle invariant phis  */
556 
557       reduc_stmt = vect_is_simple_reduction (loop, phi);
558       if (reduc_stmt)
559         {
560           if (vect_print_dump_info (REPORT_DETAILS))
561             fprintf (vect_dump, "Detected reduction.");
562           STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
563           STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
564                                                         vect_reduction_def;
565         }
566       else
567         if (vect_print_dump_info (REPORT_DETAILS))
568           fprintf (vect_dump, "Unknown def-use cycle pattern.");
569 
570     }
571 
572   return;
573 }
574 
575 
576 /* Function vect_analyze_data_ref_dependence.
577 
578    Return TRUE if there (might) exist a dependence between a memory-reference
579    DRA and a memory-reference DRB.  */
580 
581 static bool
vect_analyze_data_ref_dependence(struct data_dependence_relation * ddr,loop_vec_info loop_vinfo)582 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
583                                   loop_vec_info loop_vinfo)
584 {
585   unsigned int i;
586   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
587   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
588   struct data_reference *dra = DDR_A (ddr);
589   struct data_reference *drb = DDR_B (ddr);
590   stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
591   stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
592   lambda_vector dist_v;
593   unsigned int loop_depth;
594 
595   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
596     return false;
597 
598   if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
599     {
600       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
601         {
602           fprintf (vect_dump,
603                    "not vectorized: can't determine dependence between ");
604           print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
605           fprintf (vect_dump, " and ");
606           print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
607         }
608       return true;
609     }
610 
611   if (DDR_NUM_DIST_VECTS (ddr) == 0)
612     {
613       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
614         {
615           fprintf (vect_dump, "not vectorized: bad dist vector for ");
616           print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
617           fprintf (vect_dump, " and ");
618           print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
619         }
620       return true;
621     }
622 
623   loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
624   for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
625     {
626       int dist = dist_v[loop_depth];
627 
628       if (vect_print_dump_info (REPORT_DR_DETAILS))
629 	fprintf (vect_dump, "dependence distance  = %d.", dist);
630 
631       /* Same loop iteration.  */
632       if (dist % vectorization_factor == 0)
633 	{
634 	  /* Two references with distance zero have the same alignment.  */
635 	  VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
636 	  VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
637 	  if (vect_print_dump_info (REPORT_ALIGNMENT))
638 	    fprintf (vect_dump, "accesses have the same alignment.");
639 	  if (vect_print_dump_info (REPORT_DR_DETAILS))
640 	    {
641 	      fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
642 	      print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
643 	      fprintf (vect_dump, " and ");
644 	      print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
645 	    }
646 	  continue;
647 	}
648 
649       if (abs (dist) >= vectorization_factor)
650 	{
651 	  /* Dependence distance does not create dependence, as far as vectorization
652 	     is concerned, in this case.  */
653 	  if (vect_print_dump_info (REPORT_DR_DETAILS))
654 	    fprintf (vect_dump, "dependence distance >= VF.");
655 	  continue;
656 	}
657 
658       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
659 	{
660 	  fprintf (vect_dump,
661 		   "not vectorized: possible dependence between data-refs ");
662 	  print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
663 	  fprintf (vect_dump, " and ");
664 	  print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
665 	}
666 
667       return true;
668     }
669 
670   return false;
671 }
672 
673 
674 /* Function vect_analyze_data_ref_dependences.
675 
676    Examine all the data references in the loop, and make sure there do not
677    exist any data dependences between them.  */
678 
679 static bool
vect_analyze_data_ref_dependences(loop_vec_info loop_vinfo)680 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
681 {
682   unsigned int i;
683   VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
684   struct data_dependence_relation *ddr;
685 
686   if (vect_print_dump_info (REPORT_DETAILS))
687     fprintf (vect_dump, "=== vect_analyze_dependences ===");
688 
689   for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
690     if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
691       return false;
692 
693   return true;
694 }
695 
696 
697 /* Function vect_compute_data_ref_alignment
698 
699    Compute the misalignment of the data reference DR.
700 
701    Output:
702    1. If during the misalignment computation it is found that the data reference
703       cannot be vectorized then false is returned.
704    2. DR_MISALIGNMENT (DR) is defined.
705 
706    FOR NOW: No analysis is actually performed. Misalignment is calculated
707    only for trivial cases. TODO.  */
708 
709 static bool
vect_compute_data_ref_alignment(struct data_reference * dr)710 vect_compute_data_ref_alignment (struct data_reference *dr)
711 {
712   tree stmt = DR_STMT (dr);
713   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
714   tree ref = DR_REF (dr);
715   tree vectype;
716   tree base, base_addr;
717   bool base_aligned;
718   tree misalign;
719   tree aligned_to, alignment;
720 
721   if (vect_print_dump_info (REPORT_DETAILS))
722     fprintf (vect_dump, "vect_compute_data_ref_alignment:");
723 
724   /* Initialize misalignment to unknown.  */
725   DR_MISALIGNMENT (dr) = -1;
726 
727   misalign = DR_OFFSET_MISALIGNMENT (dr);
728   aligned_to = DR_ALIGNED_TO (dr);
729   base_addr = DR_BASE_ADDRESS (dr);
730   base = build_fold_indirect_ref (base_addr);
731   vectype = STMT_VINFO_VECTYPE (stmt_info);
732   alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
733 
734   if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
735       || !misalign)
736     {
737       if (vect_print_dump_info (REPORT_DETAILS))
738 	{
739 	  fprintf (vect_dump, "Unknown alignment for access: ");
740 	  print_generic_expr (vect_dump, base, TDF_SLIM);
741 	}
742       return true;
743     }
744 
745   if ((DECL_P (base)
746        && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
747 				alignment) >= 0)
748       || (TREE_CODE (base_addr) == SSA_NAME
749 	  && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
750 						      TREE_TYPE (base_addr)))),
751 				   alignment) >= 0))
752     base_aligned = true;
753   else
754     base_aligned = false;
755 
756   if (!base_aligned)
757     {
758       /* Do not change the alignment of global variables if
759 	 flag_section_anchors is enabled.  */
760       if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
761 	  || (TREE_STATIC (base) && flag_section_anchors))
762 	{
763 	  if (vect_print_dump_info (REPORT_DETAILS))
764 	    {
765 	      fprintf (vect_dump, "can't force alignment of ref: ");
766 	      print_generic_expr (vect_dump, ref, TDF_SLIM);
767 	    }
768 	  return true;
769 	}
770 
771       /* Force the alignment of the decl.
772 	 NOTE: This is the only change to the code we make during
773 	 the analysis phase, before deciding to vectorize the loop.  */
774       if (vect_print_dump_info (REPORT_DETAILS))
775 	fprintf (vect_dump, "force alignment");
776       DECL_ALIGN (base) = TYPE_ALIGN (vectype);
777       DECL_USER_ALIGN (base) = 1;
778     }
779 
780   /* At this point we assume that the base is aligned.  */
781   gcc_assert (base_aligned
782 	      || (TREE_CODE (base) == VAR_DECL
783 		  && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
784 
785   /* Modulo alignment.  */
786   misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
787 
788   if (!host_integerp (misalign, 1))
789     {
790       /* Negative or overflowed misalignment value.  */
791       if (vect_print_dump_info (REPORT_DETAILS))
792 	fprintf (vect_dump, "unexpected misalign value");
793       return false;
794     }
795 
796   DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign);
797 
798   if (vect_print_dump_info (REPORT_DETAILS))
799     {
800       fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
801       print_generic_expr (vect_dump, ref, TDF_SLIM);
802     }
803 
804   return true;
805 }
806 
807 
808 /* Function vect_compute_data_refs_alignment
809 
810    Compute the misalignment of data references in the loop.
811    Return FALSE if a data reference is found that cannot be vectorized.  */
812 
813 static bool
vect_compute_data_refs_alignment(loop_vec_info loop_vinfo)814 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
815 {
816   VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
817   struct data_reference *dr;
818   unsigned int i;
819 
820   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
821     if (!vect_compute_data_ref_alignment (dr))
822       return false;
823 
824   return true;
825 }
826 
827 
828 /* Function vect_update_misalignment_for_peel
829 
830    DR - the data reference whose misalignment is to be adjusted.
831    DR_PEEL - the data reference whose misalignment is being made
832              zero in the vector loop by the peel.
833    NPEEL - the number of iterations in the peel loop if the misalignment
834            of DR_PEEL is known at compile time.  */
835 
836 static void
vect_update_misalignment_for_peel(struct data_reference * dr,struct data_reference * dr_peel,int npeel)837 vect_update_misalignment_for_peel (struct data_reference *dr,
838                                    struct data_reference *dr_peel, int npeel)
839 {
840   unsigned int i;
841   int drsize;
842   VEC(dr_p,heap) *same_align_drs;
843   struct data_reference *current_dr;
844 
845   if (known_alignment_for_access_p (dr)
846       && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel))
847     {
848       DR_MISALIGNMENT (dr) = 0;
849       return;
850     }
851 
852   /* It can be assumed that the data refs with the same alignment as dr_peel
853      are aligned in the vector loop.  */
854   same_align_drs
855     = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
856   for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
857     {
858       if (current_dr != dr)
859         continue;
860       gcc_assert (DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel));
861       DR_MISALIGNMENT (dr) = 0;
862       return;
863     }
864 
865   if (known_alignment_for_access_p (dr)
866       && known_alignment_for_access_p (dr_peel))
867     {
868       drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
869       DR_MISALIGNMENT (dr) += npeel * drsize;
870       DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
871       return;
872     }
873 
874   DR_MISALIGNMENT (dr) = -1;
875 }
876 
877 
878 /* Function vect_verify_datarefs_alignment
879 
880    Return TRUE if all data references in the loop can be
881    handled with respect to alignment.  */
882 
883 static bool
vect_verify_datarefs_alignment(loop_vec_info loop_vinfo)884 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
885 {
886   VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
887   struct data_reference *dr;
888   enum dr_alignment_support supportable_dr_alignment;
889   unsigned int i;
890 
891   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
892     {
893       supportable_dr_alignment = vect_supportable_dr_alignment (dr);
894       if (!supportable_dr_alignment)
895         {
896           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
897             {
898               if (DR_IS_READ (dr))
899                 fprintf (vect_dump,
900                          "not vectorized: unsupported unaligned load.");
901               else
902                 fprintf (vect_dump,
903                          "not vectorized: unsupported unaligned store.");
904             }
905           return false;
906         }
907       if (supportable_dr_alignment != dr_aligned
908           && vect_print_dump_info (REPORT_ALIGNMENT))
909         fprintf (vect_dump, "Vectorizing an unaligned access.");
910     }
911   return true;
912 }
913 
914 
915 /* Function vector_alignment_reachable_p
916 
917    Return true if vector alignment for DR is reachable by peeling
918    a few loop iterations.  Return false otherwise.  */
919 
920 static bool
vector_alignment_reachable_p(struct data_reference * dr)921 vector_alignment_reachable_p (struct data_reference *dr)
922 {
923   tree stmt = DR_STMT (dr);
924   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
925   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
926 
927   /* If misalignment is known at the compile time then allow peeling
928      only if natural alignment is reachable through peeling.  */
929   if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
930     {
931       HOST_WIDE_INT elmsize =
932 		int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
933       if (vect_print_dump_info (REPORT_DETAILS))
934 	{
935 	  fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
936 	  fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
937 	}
938       if (DR_MISALIGNMENT (dr) % elmsize)
939 	{
940 	  if (vect_print_dump_info (REPORT_DETAILS))
941 	    fprintf (vect_dump, "data size does not divide the misalignment.\n");
942 	  return false;
943 	}
944     }
945 
946   if (!known_alignment_for_access_p (dr))
947     {
948       tree type = (TREE_TYPE (DR_REF (dr)));
949       tree ba = DR_BASE_OBJECT (dr);
950       bool is_packed = false;
951 
952       if (ba)
953 	is_packed = contains_packed_reference (ba);
954 
955       if (vect_print_dump_info (REPORT_DETAILS))
956 	fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
957       if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
958 	return true;
959       else
960 	return false;
961     }
962 
963   return true;
964 }
965 
966 /* Function vect_enhance_data_refs_alignment
967 
968    This pass will use loop versioning and loop peeling in order to enhance
969    the alignment of data references in the loop.
970 
971    FOR NOW: we assume that whatever versioning/peeling takes place, only the
972    original loop is to be vectorized; Any other loops that are created by
973    the transformations performed in this pass - are not supposed to be
974    vectorized. This restriction will be relaxed.
975 
976    This pass will require a cost model to guide it whether to apply peeling
977    or versioning or a combination of the two. For example, the scheme that
978    intel uses when given a loop with several memory accesses, is as follows:
979    choose one memory access ('p') which alignment you want to force by doing
980    peeling. Then, either (1) generate a loop in which 'p' is aligned and all
981    other accesses are not necessarily aligned, or (2) use loop versioning to
982    generate one loop in which all accesses are aligned, and another loop in
983    which only 'p' is necessarily aligned.
984 
985    ("Automatic Intra-Register Vectorization for the Intel Architecture",
986    Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
987    Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
988 
989    Devising a cost model is the most critical aspect of this work. It will
990    guide us on which access to peel for, whether to use loop versioning, how
991    many versions to create, etc. The cost model will probably consist of
992    generic considerations as well as target specific considerations (on
993    powerpc for example, misaligned stores are more painful than misaligned
994    loads).
995 
996    Here are the general steps involved in alignment enhancements:
997 
998      -- original loop, before alignment analysis:
999 	for (i=0; i<N; i++){
1000 	  x = q[i];			# DR_MISALIGNMENT(q) = unknown
1001 	  p[i] = y;			# DR_MISALIGNMENT(p) = unknown
1002 	}
1003 
1004      -- After vect_compute_data_refs_alignment:
1005 	for (i=0; i<N; i++){
1006 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
1007 	  p[i] = y;			# DR_MISALIGNMENT(p) = unknown
1008 	}
1009 
1010      -- Possibility 1: we do loop versioning:
1011      if (p is aligned) {
1012 	for (i=0; i<N; i++){	# loop 1A
1013 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
1014 	  p[i] = y;			# DR_MISALIGNMENT(p) = 0
1015 	}
1016      }
1017      else {
1018 	for (i=0; i<N; i++){	# loop 1B
1019 	  x = q[i];			# DR_MISALIGNMENT(q) = 3
1020 	  p[i] = y;			# DR_MISALIGNMENT(p) = unaligned
1021 	}
1022      }
1023 
1024      -- Possibility 2: we do loop peeling:
1025      for (i = 0; i < 3; i++){	# (scalar loop, not to be vectorized).
1026 	x = q[i];
1027 	p[i] = y;
1028      }
1029      for (i = 3; i < N; i++){	# loop 2A
1030 	x = q[i];			# DR_MISALIGNMENT(q) = 0
1031 	p[i] = y;			# DR_MISALIGNMENT(p) = unknown
1032      }
1033 
1034      -- Possibility 3: combination of loop peeling and versioning:
1035      for (i = 0; i < 3; i++){	# (scalar loop, not to be vectorized).
1036 	x = q[i];
1037 	p[i] = y;
1038      }
1039      if (p is aligned) {
1040 	for (i = 3; i<N; i++){	# loop 3A
1041 	  x = q[i];			# DR_MISALIGNMENT(q) = 0
1042 	  p[i] = y;			# DR_MISALIGNMENT(p) = 0
1043 	}
1044      }
1045      else {
1046 	for (i = 3; i<N; i++){	# loop 3B
1047 	  x = q[i];			# DR_MISALIGNMENT(q) = 0
1048 	  p[i] = y;			# DR_MISALIGNMENT(p) = unaligned
1049 	}
1050      }
1051 
1052      These loops are later passed to loop_transform to be vectorized. The
1053      vectorizer will use the alignment information to guide the transformation
1054      (whether to generate regular loads/stores, or with special handling for
1055      misalignment).  */
1056 
1057 static bool
vect_enhance_data_refs_alignment(loop_vec_info loop_vinfo)1058 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1059 {
1060   VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1061   enum dr_alignment_support supportable_dr_alignment;
1062   struct data_reference *dr0 = NULL;
1063   struct data_reference *dr;
1064   unsigned int i;
1065   bool do_peeling = false;
1066   bool do_versioning = false;
1067   bool stat;
1068 
1069   /* While cost model enhancements are expected in the future, the high level
1070      view of the code at this time is as follows:
1071 
1072      A) If there is a misaligned write then see if peeling to align this write
1073         can make all data references satisfy vect_supportable_dr_alignment.
1074         If so, update data structures as needed and return true.  Note that
1075         at this time vect_supportable_dr_alignment is known to return false
1076         for a a misaligned write.
1077 
1078      B) If peeling wasn't possible and there is a data reference with an
1079         unknown misalignment that does not satisfy vect_supportable_dr_alignment
1080         then see if loop versioning checks can be used to make all data
1081         references satisfy vect_supportable_dr_alignment.  If so, update
1082         data structures as needed and return true.
1083 
1084      C) If neither peeling nor versioning were successful then return false if
1085         any data reference does not satisfy vect_supportable_dr_alignment.
1086 
1087      D) Return true (all data references satisfy vect_supportable_dr_alignment).
1088 
1089      Note, Possibility 3 above (which is peeling and versioning together) is not
1090      being done at this time.  */
1091 
1092   /* (1) Peeling to force alignment.  */
1093 
1094   /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1095      Considerations:
1096      + How many accesses will become aligned due to the peeling
1097      - How many accesses will become unaligned due to the peeling,
1098        and the cost of misaligned accesses.
1099      - The cost of peeling (the extra runtime checks, the increase
1100        in code size).
1101 
1102      The scheme we use FORNOW: peel to force the alignment of the first
1103      misaligned store in the loop.
1104      Rationale: misaligned stores are not yet supported.
1105 
1106      TODO: Use a cost model.  */
1107 
1108   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1109     if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1110       {
1111         do_peeling = vector_alignment_reachable_p (dr);
1112         if (do_peeling)
1113           dr0 = dr;
1114         if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1115           fprintf (vect_dump, "vector alignment may not be reachable");
1116 	break;
1117       }
1118 
1119   /* Often peeling for alignment will require peeling for loop-bound, which in
1120      turn requires that we know how to adjust the loop ivs after the loop.  */
1121   if (!vect_can_advance_ivs_p (loop_vinfo))
1122     do_peeling = false;
1123 
1124   if (do_peeling)
1125     {
1126       int mis;
1127       int npeel = 0;
1128 
1129       if (known_alignment_for_access_p (dr0))
1130         {
1131           /* Since it's known at compile time, compute the number of iterations
1132              in the peeled loop (the peeling factor) for use in updating
1133              DR_MISALIGNMENT values.  The peeling factor is the vectorization
1134              factor minus the misalignment as an element count.  */
1135           mis = DR_MISALIGNMENT (dr0);
1136           mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1137           npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1138         }
1139 
1140       /* Ensure that all data refs can be vectorized after the peel.  */
1141       for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1142         {
1143           int save_misalignment;
1144 
1145 	  if (dr == dr0)
1146 	    continue;
1147 
1148 	  save_misalignment = DR_MISALIGNMENT (dr);
1149 	  vect_update_misalignment_for_peel (dr, dr0, npeel);
1150 	  supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1151 	  DR_MISALIGNMENT (dr) = save_misalignment;
1152 
1153 	  if (!supportable_dr_alignment)
1154 	    {
1155 	      do_peeling = false;
1156 	      break;
1157 	    }
1158 	}
1159 
1160       if (do_peeling)
1161         {
1162           /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1163              If the misalignment of DR_i is identical to that of dr0 then set
1164              DR_MISALIGNMENT (DR_i) to zero.  If the misalignment of DR_i and
1165              dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1166              by the peeling factor times the element size of DR_i (MOD the
1167              vectorization factor times the size).  Otherwise, the
1168              misalignment of DR_i must be set to unknown.  */
1169 	  for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1170 	    if (dr != dr0)
1171 	      vect_update_misalignment_for_peel (dr, dr0, npeel);
1172 
1173           LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1174           LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1175           DR_MISALIGNMENT (dr0) = 0;
1176 	  if (vect_print_dump_info (REPORT_ALIGNMENT))
1177             fprintf (vect_dump, "Alignment of access forced using peeling.");
1178 
1179           if (vect_print_dump_info (REPORT_DETAILS))
1180             fprintf (vect_dump, "Peeling for alignment will be applied.");
1181 
1182 	  stat = vect_verify_datarefs_alignment (loop_vinfo);
1183 	  gcc_assert (stat);
1184           return stat;
1185         }
1186     }
1187 
1188 
1189   /* (2) Versioning to force alignment.  */
1190 
1191   /* Try versioning if:
1192      1) flag_tree_vect_loop_version is TRUE
1193      2) optimize_size is FALSE
1194      3) there is at least one unsupported misaligned data ref with an unknown
1195         misalignment, and
1196      4) all misaligned data refs with a known misalignment are supported, and
1197      5) the number of runtime alignment checks is within reason.  */
1198 
1199   do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1200 
1201   if (do_versioning)
1202     {
1203       for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1204         {
1205           if (aligned_access_p (dr))
1206             continue;
1207 
1208           supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1209 
1210           if (!supportable_dr_alignment)
1211             {
1212               tree stmt;
1213               int mask;
1214               tree vectype;
1215 
1216               if (known_alignment_for_access_p (dr)
1217                   || VEC_length (tree,
1218                                  LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1219                      >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1220                 {
1221                   do_versioning = false;
1222                   break;
1223                 }
1224 
1225               stmt = DR_STMT (dr);
1226               vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1227               gcc_assert (vectype);
1228 
1229               /* The rightmost bits of an aligned address must be zeros.
1230                  Construct the mask needed for this test.  For example,
1231                  GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1232                  mask must be 15 = 0xf. */
1233               mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1234 
1235               /* FORNOW: use the same mask to test all potentially unaligned
1236                  references in the loop.  The vectorizer currently supports
1237                  a single vector size, see the reference to
1238                  GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1239                  vectorization factor is computed.  */
1240               gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1241                           || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1242               LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1243               VEC_safe_push (tree, heap,
1244                              LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1245                              DR_STMT (dr));
1246             }
1247         }
1248 
1249       /* Versioning requires at least one misaligned data reference.  */
1250       if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1251         do_versioning = false;
1252       else if (!do_versioning)
1253         VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1254     }
1255 
1256   if (do_versioning)
1257     {
1258       VEC(tree,heap) *may_misalign_stmts
1259         = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1260       tree stmt;
1261 
1262       /* It can now be assumed that the data references in the statements
1263          in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1264          of the loop being vectorized.  */
1265       for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1266         {
1267           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1268           dr = STMT_VINFO_DATA_REF (stmt_info);
1269           DR_MISALIGNMENT (dr) = 0;
1270 	  if (vect_print_dump_info (REPORT_ALIGNMENT))
1271             fprintf (vect_dump, "Alignment of access forced using versioning.");
1272         }
1273 
1274       if (vect_print_dump_info (REPORT_DETAILS))
1275         fprintf (vect_dump, "Versioning for alignment will be applied.");
1276 
1277       /* Peeling and versioning can't be done together at this time.  */
1278       gcc_assert (! (do_peeling && do_versioning));
1279 
1280       stat = vect_verify_datarefs_alignment (loop_vinfo);
1281       gcc_assert (stat);
1282       return stat;
1283     }
1284 
1285   /* This point is reached if neither peeling nor versioning is being done.  */
1286   gcc_assert (! (do_peeling || do_versioning));
1287 
1288   stat = vect_verify_datarefs_alignment (loop_vinfo);
1289   return stat;
1290 }
1291 
1292 
1293 /* Function vect_analyze_data_refs_alignment
1294 
1295    Analyze the alignment of the data-references in the loop.
1296    Return FALSE if a data reference is found that cannot be vectorized.  */
1297 
1298 static bool
vect_analyze_data_refs_alignment(loop_vec_info loop_vinfo)1299 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1300 {
1301   if (vect_print_dump_info (REPORT_DETAILS))
1302     fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1303 
1304   if (!vect_compute_data_refs_alignment (loop_vinfo))
1305     {
1306       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1307 	fprintf (vect_dump,
1308 		 "not vectorized: can't calculate alignment for data ref.");
1309       return false;
1310     }
1311 
1312   return true;
1313 }
1314 
1315 
1316 /* Function vect_analyze_data_ref_access.
1317 
1318    Analyze the access pattern of the data-reference DR. For now, a data access
1319    has to be consecutive to be considered vectorizable.  */
1320 
1321 static bool
vect_analyze_data_ref_access(struct data_reference * dr)1322 vect_analyze_data_ref_access (struct data_reference *dr)
1323 {
1324   tree step = DR_STEP (dr);
1325   tree scalar_type = TREE_TYPE (DR_REF (dr));
1326 
1327   if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1328     {
1329       if (vect_print_dump_info (REPORT_DETAILS))
1330 	fprintf (vect_dump, "not consecutive access");
1331       return false;
1332     }
1333   return true;
1334 }
1335 
1336 
1337 /* Function vect_analyze_data_ref_accesses.
1338 
1339    Analyze the access pattern of all the data references in the loop.
1340 
1341    FORNOW: the only access pattern that is considered vectorizable is a
1342 	   simple step 1 (consecutive) access.
1343 
1344    FORNOW: handle only arrays and pointer accesses.  */
1345 
1346 static bool
vect_analyze_data_ref_accesses(loop_vec_info loop_vinfo)1347 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1348 {
1349   unsigned int i;
1350   VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1351   struct data_reference *dr;
1352 
1353   if (vect_print_dump_info (REPORT_DETAILS))
1354     fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1355 
1356   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1357     if (!vect_analyze_data_ref_access (dr))
1358       {
1359 	if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1360 	  fprintf (vect_dump, "not vectorized: complicated access pattern.");
1361 	return false;
1362       }
1363 
1364   return true;
1365 }
1366 
1367 
1368 /* Function vect_analyze_data_refs.
1369 
1370   Find all the data references in the loop.
1371 
1372    The general structure of the analysis of data refs in the vectorizer is as
1373    follows:
1374    1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1375       find and analyze all data-refs in the loop and their dependences.
1376    2- vect_analyze_dependences(): apply dependence testing using ddrs.
1377    3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1378    4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1379 
1380 */
1381 
1382 static bool
vect_analyze_data_refs(loop_vec_info loop_vinfo)1383 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1384 {
1385   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1386   unsigned int i;
1387   VEC (data_reference_p, heap) *datarefs;
1388   struct data_reference *dr;
1389   tree scalar_type;
1390 
1391   if (vect_print_dump_info (REPORT_DETAILS))
1392     fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1393 
1394   compute_data_dependences_for_loop (loop, false,
1395                                      &LOOP_VINFO_DATAREFS (loop_vinfo),
1396                                      &LOOP_VINFO_DDRS (loop_vinfo));
1397 
1398   /* Go through the data-refs, check that the analysis succeeded. Update pointer
1399      from stmt_vec_info struct to DR and vectype.  */
1400   datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1401 
1402   for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1403     {
1404       tree stmt;
1405       stmt_vec_info stmt_info;
1406 
1407       if (!dr || !DR_REF (dr))
1408         {
1409           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1410 	    fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1411           return false;
1412         }
1413 
1414       /* Update DR field in stmt_vec_info struct.  */
1415       stmt = DR_STMT (dr);
1416       stmt_info = vinfo_for_stmt (stmt);
1417 
1418       if (STMT_VINFO_DATA_REF (stmt_info))
1419         {
1420           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1421             {
1422               fprintf (vect_dump,
1423                        "not vectorized: more than one data ref in stmt: ");
1424               print_generic_expr (vect_dump, stmt, TDF_SLIM);
1425             }
1426           return false;
1427         }
1428       STMT_VINFO_DATA_REF (stmt_info) = dr;
1429 
1430       /* Check that analysis of the data-ref succeeded.  */
1431       if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1432           || !DR_STEP (dr))
1433         {
1434           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1435             {
1436               fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1437               print_generic_expr (vect_dump, stmt, TDF_SLIM);
1438             }
1439           return false;
1440         }
1441       if (!DR_MEMTAG (dr))
1442         {
1443           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1444             {
1445               fprintf (vect_dump, "not vectorized: no memory tag for ");
1446               print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1447             }
1448           return false;
1449         }
1450 
1451       /* Set vectype for STMT.  */
1452       scalar_type = TREE_TYPE (DR_REF (dr));
1453       STMT_VINFO_VECTYPE (stmt_info) =
1454                 get_vectype_for_scalar_type (scalar_type);
1455       if (!STMT_VINFO_VECTYPE (stmt_info))
1456         {
1457           if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1458             {
1459               fprintf (vect_dump,
1460                        "not vectorized: no vectype for stmt: ");
1461               print_generic_expr (vect_dump, stmt, TDF_SLIM);
1462               fprintf (vect_dump, " scalar_type: ");
1463               print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1464             }
1465           return false;
1466         }
1467     }
1468 
1469   return true;
1470 }
1471 
1472 
1473 /* Utility functions used by vect_mark_stmts_to_be_vectorized.  */
1474 
1475 /* Function vect_mark_relevant.
1476 
1477    Mark STMT as "relevant for vectorization" and add it to WORKLIST.  */
1478 
1479 static void
vect_mark_relevant(VEC (tree,heap)** worklist,tree stmt,bool relevant_p,bool live_p)1480 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
1481 		    bool relevant_p, bool live_p)
1482 {
1483   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1484   bool save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
1485   bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
1486 
1487   if (vect_print_dump_info (REPORT_DETAILS))
1488     fprintf (vect_dump, "mark relevant %d, live %d.",relevant_p, live_p);
1489 
1490   if (STMT_VINFO_IN_PATTERN_P (stmt_info))
1491     {
1492       tree pattern_stmt;
1493 
1494       /* This is the last stmt in a sequence that was detected as a
1495          pattern that can potentially be vectorized.  Don't mark the stmt
1496          as relevant/live because it's not going to vectorized.
1497          Instead mark the pattern-stmt that replaces it.  */
1498       if (vect_print_dump_info (REPORT_DETAILS))
1499         fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
1500       pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1501       stmt_info = vinfo_for_stmt (pattern_stmt);
1502       gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
1503       save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
1504       save_live_p = STMT_VINFO_LIVE_P (stmt_info);
1505       stmt = pattern_stmt;
1506     }
1507 
1508   STMT_VINFO_LIVE_P (stmt_info) |= live_p;
1509   STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p;
1510 
1511   if (TREE_CODE (stmt) == PHI_NODE)
1512     /* Don't put phi-nodes in the worklist. Phis that are marked relevant
1513        or live will fail vectorization later on.  */
1514     return;
1515 
1516   if (STMT_VINFO_RELEVANT_P (stmt_info) == save_relevant_p
1517       && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
1518     {
1519       if (vect_print_dump_info (REPORT_DETAILS))
1520         fprintf (vect_dump, "already marked relevant/live.");
1521       return;
1522     }
1523 
1524   VEC_safe_push (tree, heap, *worklist, stmt);
1525 }
1526 
1527 
1528 /* Function vect_stmt_relevant_p.
1529 
1530    Return true if STMT in loop that is represented by LOOP_VINFO is
1531    "relevant for vectorization".
1532 
1533    A stmt is considered "relevant for vectorization" if:
1534    - it has uses outside the loop.
1535    - it has vdefs (it alters memory).
1536    - control stmts in the loop (except for the exit condition).
1537 
1538    CHECKME: what other side effects would the vectorizer allow?  */
1539 
1540 static bool
vect_stmt_relevant_p(tree stmt,loop_vec_info loop_vinfo,bool * relevant_p,bool * live_p)1541 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
1542 		      bool *relevant_p, bool *live_p)
1543 {
1544   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1545   ssa_op_iter op_iter;
1546   imm_use_iterator imm_iter;
1547   use_operand_p use_p;
1548   def_operand_p def_p;
1549 
1550   *relevant_p = false;
1551   *live_p = false;
1552 
1553   /* cond stmt other than loop exit cond.  */
1554   if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
1555     *relevant_p = true;
1556 
1557   /* changing memory.  */
1558   if (TREE_CODE (stmt) != PHI_NODE)
1559     if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
1560       {
1561 	if (vect_print_dump_info (REPORT_DETAILS))
1562 	  fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
1563 	*relevant_p = true;
1564       }
1565 
1566   /* uses outside the loop.  */
1567   FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
1568     {
1569       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
1570 	{
1571 	  basic_block bb = bb_for_stmt (USE_STMT (use_p));
1572 	  if (!flow_bb_inside_loop_p (loop, bb))
1573 	    {
1574 	      if (vect_print_dump_info (REPORT_DETAILS))
1575 		fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
1576 
1577 	      /* We expect all such uses to be in the loop exit phis
1578 		 (because of loop closed form)   */
1579 	      gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
1580 	      gcc_assert (bb == loop->single_exit->dest);
1581 
1582               *live_p = true;
1583 	    }
1584 	}
1585     }
1586 
1587   return (*live_p || *relevant_p);
1588 }
1589 
1590 
1591 /* Function vect_mark_stmts_to_be_vectorized.
1592 
1593    Not all stmts in the loop need to be vectorized. For example:
1594 
1595      for i...
1596        for j...
1597    1.    T0 = i + j
1598    2.	 T1 = a[T0]
1599 
1600    3.    j = j + 1
1601 
1602    Stmt 1 and 3 do not need to be vectorized, because loop control and
1603    addressing of vectorized data-refs are handled differently.
1604 
1605    This pass detects such stmts.  */
1606 
1607 static bool
vect_mark_stmts_to_be_vectorized(loop_vec_info loop_vinfo)1608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
1609 {
1610   VEC(tree,heap) *worklist;
1611   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1612   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1613   unsigned int nbbs = loop->num_nodes;
1614   block_stmt_iterator si;
1615   tree stmt, use;
1616   stmt_ann_t ann;
1617   ssa_op_iter iter;
1618   unsigned int i;
1619   stmt_vec_info stmt_vinfo;
1620   basic_block bb;
1621   tree phi;
1622   bool relevant_p, live_p;
1623   tree def, def_stmt;
1624   enum vect_def_type dt;
1625 
1626   if (vect_print_dump_info (REPORT_DETAILS))
1627     fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
1628 
1629   worklist = VEC_alloc (tree, heap, 64);
1630 
1631   /* 1. Init worklist.  */
1632 
1633   bb = loop->header;
1634   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1635     {
1636       if (vect_print_dump_info (REPORT_DETAILS))
1637         {
1638           fprintf (vect_dump, "init: phi relevant? ");
1639           print_generic_expr (vect_dump, phi, TDF_SLIM);
1640         }
1641 
1642       if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant_p, &live_p))
1643 	vect_mark_relevant (&worklist, phi, relevant_p, live_p);
1644     }
1645 
1646   for (i = 0; i < nbbs; i++)
1647     {
1648       bb = bbs[i];
1649       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1650 	{
1651 	  stmt = bsi_stmt (si);
1652 
1653 	  if (vect_print_dump_info (REPORT_DETAILS))
1654 	    {
1655 	      fprintf (vect_dump, "init: stmt relevant? ");
1656 	      print_generic_expr (vect_dump, stmt, TDF_SLIM);
1657 	    }
1658 
1659 	  if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant_p, &live_p))
1660             vect_mark_relevant (&worklist, stmt, relevant_p, live_p);
1661 	}
1662     }
1663 
1664 
1665   /* 2. Process_worklist */
1666 
1667   while (VEC_length (tree, worklist) > 0)
1668     {
1669       stmt = VEC_pop (tree, worklist);
1670 
1671       if (vect_print_dump_info (REPORT_DETAILS))
1672 	{
1673           fprintf (vect_dump, "worklist: examine stmt: ");
1674           print_generic_expr (vect_dump, stmt, TDF_SLIM);
1675 	}
1676 
1677       /* Examine the USEs of STMT. For each ssa-name USE thta is defined
1678          in the loop, mark the stmt that defines it (DEF_STMT) as
1679          relevant/irrelevant and live/dead according to the liveness and
1680          relevance properties of STMT.
1681        */
1682 
1683       gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1684 
1685       ann = stmt_ann (stmt);
1686       stmt_vinfo = vinfo_for_stmt (stmt);
1687 
1688       relevant_p = STMT_VINFO_RELEVANT_P (stmt_vinfo);
1689       live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
1690 
1691       /* Generally, the liveness and relevance properties of STMT are
1692          propagated to the DEF_STMTs of its USEs:
1693              STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
1694              STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- relevant_p
1695 
1696          Exceptions:
1697 
1698 	 (case 1)
1699            If USE is used only for address computations (e.g. array indexing),
1700            which does not need to be directly vectorized, then the
1701            liveness/relevance of the respective DEF_STMT is left unchanged.
1702 
1703 	 (case 2)
1704            If STMT has been identified as defining a reduction variable, then
1705 	   we have two cases:
1706 	   (case 2.1)
1707 	     The last use of STMT is the reduction-variable, which is defined
1708 	     by a loop-header-phi. We don't want to mark the phi as live or
1709 	     relevant (because it does not need to be vectorized, it is handled
1710              as part of the vectorization of the reduction), so in this case we
1711 	     skip the call to vect_mark_relevant.
1712 	   (case 2.2)
1713 	     The rest of the uses of STMT are defined in the loop body. For
1714              the def_stmt of these uses we want to set liveness/relevance
1715              as follows:
1716                STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
1717                STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true
1718              because even though STMT is classified as live (since it defines a
1719              value that is used across loop iterations) and irrelevant (since it
1720              is not used inside the loop), it will be vectorized, and therefore
1721              the corresponding DEF_STMTs need to marked as relevant.
1722        */
1723 
1724       /* case 2.2:  */
1725       if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1726         {
1727           gcc_assert (!relevant_p && live_p);
1728           relevant_p = true;
1729           live_p = false;
1730         }
1731 
1732       FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1733 	{
1734 	  /* case 1: we are only interested in uses that need to be vectorized.
1735 	     Uses that are used for address computation are not considered
1736 	     relevant.
1737 	   */
1738 	  if (!exist_non_indexing_operands_for_use_p (use, stmt))
1739 	    continue;
1740 
1741 	  if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
1742             {
1743               if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1744                 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
1745 	      VEC_free (tree, heap, worklist);
1746               return false;
1747             }
1748 
1749 	  if (!def_stmt || IS_EMPTY_STMT (def_stmt))
1750 	    continue;
1751 
1752           if (vect_print_dump_info (REPORT_DETAILS))
1753             {
1754               fprintf (vect_dump, "worklist: examine use %d: ", i);
1755               print_generic_expr (vect_dump, use, TDF_SLIM);
1756             }
1757 
1758 	  bb = bb_for_stmt (def_stmt);
1759           if (!flow_bb_inside_loop_p (loop, bb))
1760             continue;
1761 
1762 	  /* case 2.1: the reduction-use does not mark the defining-phi
1763 	     as relevant.  */
1764 	  if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
1765 	      && TREE_CODE (def_stmt) == PHI_NODE)
1766 	    continue;
1767 
1768 	  vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p);
1769 	}
1770     }				/* while worklist */
1771 
1772   VEC_free (tree, heap, worklist);
1773   return true;
1774 }
1775 
1776 
1777 /* Function vect_can_advance_ivs_p
1778 
1779    In case the number of iterations that LOOP iterates is unknown at compile
1780    time, an epilog loop will be generated, and the loop induction variables
1781    (IVs) will be "advanced" to the value they are supposed to take just before
1782    the epilog loop.  Here we check that the access function of the loop IVs
1783    and the expression that represents the loop bound are simple enough.
1784    These restrictions will be relaxed in the future.  */
1785 
1786 static bool
vect_can_advance_ivs_p(loop_vec_info loop_vinfo)1787 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1788 {
1789   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1790   basic_block bb = loop->header;
1791   tree phi;
1792 
1793   /* Analyze phi functions of the loop header.  */
1794 
1795   if (vect_print_dump_info (REPORT_DETAILS))
1796     fprintf (vect_dump, "=== vect_can_advance_ivs_p ===");
1797 
1798   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1799     {
1800       tree access_fn = NULL;
1801       tree evolution_part;
1802 
1803       if (vect_print_dump_info (REPORT_DETAILS))
1804 	{
1805           fprintf (vect_dump, "Analyze phi: ");
1806           print_generic_expr (vect_dump, phi, TDF_SLIM);
1807 	}
1808 
1809       /* Skip virtual phi's. The data dependences that are associated with
1810          virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.  */
1811 
1812       if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1813 	{
1814 	  if (vect_print_dump_info (REPORT_DETAILS))
1815 	    fprintf (vect_dump, "virtual phi. skip.");
1816 	  continue;
1817 	}
1818 
1819       /* Skip reduction phis.  */
1820 
1821       if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1822         {
1823           if (vect_print_dump_info (REPORT_DETAILS))
1824             fprintf (vect_dump, "reduc phi. skip.");
1825           continue;
1826         }
1827 
1828       /* Analyze the evolution function.  */
1829 
1830       access_fn = instantiate_parameters
1831 	(loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1832 
1833       if (!access_fn)
1834 	{
1835 	  if (vect_print_dump_info (REPORT_DETAILS))
1836 	    fprintf (vect_dump, "No Access function.");
1837 	  return false;
1838 	}
1839 
1840       if (vect_print_dump_info (REPORT_DETAILS))
1841         {
1842 	  fprintf (vect_dump, "Access function of PHI: ");
1843 	  print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1844         }
1845 
1846       evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1847 
1848       if (evolution_part == NULL_TREE)
1849         {
1850 	  if (vect_print_dump_info (REPORT_DETAILS))
1851 	    fprintf (vect_dump, "No evolution.");
1852 	  return false;
1853         }
1854 
1855       /* FORNOW: We do not transform initial conditions of IVs
1856 	 which evolution functions are a polynomial of degree >= 2.  */
1857 
1858       if (tree_is_chrec (evolution_part))
1859 	return false;
1860     }
1861 
1862   return true;
1863 }
1864 
1865 
1866 /* Function vect_get_loop_niters.
1867 
1868    Determine how many iterations the loop is executed.
1869    If an expression that represents the number of iterations
1870    can be constructed, place it in NUMBER_OF_ITERATIONS.
1871    Return the loop exit condition.  */
1872 
1873 static tree
vect_get_loop_niters(struct loop * loop,tree * number_of_iterations)1874 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
1875 {
1876   tree niters;
1877 
1878   if (vect_print_dump_info (REPORT_DETAILS))
1879     fprintf (vect_dump, "=== get_loop_niters ===");
1880 
1881   niters = number_of_iterations_in_loop (loop);
1882 
1883   if (niters != NULL_TREE
1884       && niters != chrec_dont_know)
1885     {
1886       *number_of_iterations = niters;
1887 
1888       if (vect_print_dump_info (REPORT_DETAILS))
1889 	{
1890 	  fprintf (vect_dump, "==> get_loop_niters:" );
1891 	  print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
1892 	}
1893     }
1894 
1895   return get_loop_exit_condition (loop);
1896 }
1897 
1898 
1899 /* Function vect_analyze_loop_form.
1900 
1901    Verify the following restrictions (some may be relaxed in the future):
1902    - it's an inner-most loop
1903    - number of BBs = 2 (which are the loop header and the latch)
1904    - the loop has a pre-header
1905    - the loop has a single entry and exit
1906    - the loop exit condition is simple enough, and the number of iterations
1907      can be analyzed (a countable loop).  */
1908 
1909 static loop_vec_info
vect_analyze_loop_form(struct loop * loop)1910 vect_analyze_loop_form (struct loop *loop)
1911 {
1912   loop_vec_info loop_vinfo;
1913   tree loop_cond;
1914   tree number_of_iterations = NULL;
1915 
1916   if (vect_print_dump_info (REPORT_DETAILS))
1917     fprintf (vect_dump, "=== vect_analyze_loop_form ===");
1918 
1919   if (loop->inner)
1920     {
1921       if (vect_print_dump_info (REPORT_OUTER_LOOPS))
1922         fprintf (vect_dump, "not vectorized: nested loop.");
1923       return NULL;
1924     }
1925 
1926   if (!loop->single_exit
1927       || loop->num_nodes != 2
1928       || EDGE_COUNT (loop->header->preds) != 2)
1929     {
1930       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1931         {
1932           if (!loop->single_exit)
1933             fprintf (vect_dump, "not vectorized: multiple exits.");
1934           else if (loop->num_nodes != 2)
1935             fprintf (vect_dump, "not vectorized: too many BBs in loop.");
1936           else if (EDGE_COUNT (loop->header->preds) != 2)
1937             fprintf (vect_dump, "not vectorized: too many incoming edges.");
1938         }
1939 
1940       return NULL;
1941     }
1942 
1943   /* We assume that the loop exit condition is at the end of the loop. i.e,
1944      that the loop is represented as a do-while (with a proper if-guard
1945      before the loop if needed), where the loop header contains all the
1946      executable statements, and the latch is empty.  */
1947   if (!empty_block_p (loop->latch)
1948         || phi_nodes (loop->latch))
1949     {
1950       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1951         fprintf (vect_dump, "not vectorized: unexpected loop form.");
1952       return NULL;
1953     }
1954 
1955   /* Make sure there exists a single-predecessor exit bb:  */
1956   if (!single_pred_p (loop->single_exit->dest))
1957     {
1958       edge e = loop->single_exit;
1959       if (!(e->flags & EDGE_ABNORMAL))
1960 	{
1961 	  split_loop_exit_edge (e);
1962 	  if (vect_print_dump_info (REPORT_DETAILS))
1963 	    fprintf (vect_dump, "split exit edge.");
1964 	}
1965       else
1966 	{
1967 	  if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1968 	    fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
1969 	  return NULL;
1970 	}
1971     }
1972 
1973   if (empty_block_p (loop->header))
1974     {
1975       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1976         fprintf (vect_dump, "not vectorized: empty loop.");
1977       return NULL;
1978     }
1979 
1980   loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1981   if (!loop_cond)
1982     {
1983       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1984 	fprintf (vect_dump, "not vectorized: complicated exit condition.");
1985       return NULL;
1986     }
1987 
1988   if (!number_of_iterations)
1989     {
1990       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1991 	fprintf (vect_dump,
1992 		 "not vectorized: number of iterations cannot be computed.");
1993       return NULL;
1994     }
1995 
1996   if (chrec_contains_undetermined (number_of_iterations))
1997     {
1998       if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1999         fprintf (vect_dump, "Infinite number of iterations.");
2000       return false;
2001     }
2002 
2003   loop_vinfo = new_loop_vec_info (loop);
2004   LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2005 
2006   if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2007     {
2008       if (vect_print_dump_info (REPORT_DETAILS))
2009         {
2010           fprintf (vect_dump, "Symbolic number of iterations is ");
2011           print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2012         }
2013     }
2014   else
2015   if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2016     {
2017       if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2018         fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2019       return NULL;
2020     }
2021 
2022   LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2023 
2024   return loop_vinfo;
2025 }
2026 
2027 
2028 /* Function vect_analyze_loop.
2029 
2030    Apply a set of analyses on LOOP, and create a loop_vec_info struct
2031    for it. The different analyses will record information in the
2032    loop_vec_info struct.  */
2033 loop_vec_info
vect_analyze_loop(struct loop * loop)2034 vect_analyze_loop (struct loop *loop)
2035 {
2036   bool ok;
2037   loop_vec_info loop_vinfo;
2038 
2039   if (vect_print_dump_info (REPORT_DETAILS))
2040     fprintf (vect_dump, "===== analyze_loop_nest =====");
2041 
2042   /* Check the CFG characteristics of the loop (nesting, entry/exit, etc.  */
2043 
2044   loop_vinfo = vect_analyze_loop_form (loop);
2045   if (!loop_vinfo)
2046     {
2047       if (vect_print_dump_info (REPORT_DETAILS))
2048 	fprintf (vect_dump, "bad loop form.");
2049       return NULL;
2050     }
2051 
2052   /* Find all data references in the loop (which correspond to vdefs/vuses)
2053      and analyze their evolution in the loop.
2054 
2055      FORNOW: Handle only simple, array references, which
2056      alignment can be forced, and aligned pointer-references.  */
2057 
2058   ok = vect_analyze_data_refs (loop_vinfo);
2059   if (!ok)
2060     {
2061       if (vect_print_dump_info (REPORT_DETAILS))
2062 	fprintf (vect_dump, "bad data references.");
2063       destroy_loop_vec_info (loop_vinfo);
2064       return NULL;
2065     }
2066 
2067   /* Classify all cross-iteration scalar data-flow cycles.
2068      Cross-iteration cycles caused by virtual phis are analyzed separately.  */
2069 
2070   vect_analyze_scalar_cycles (loop_vinfo);
2071 
2072   vect_pattern_recog (loop_vinfo);
2073 
2074   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
2075 
2076   ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2077   if (!ok)
2078     {
2079       if (vect_print_dump_info (REPORT_DETAILS))
2080 	fprintf (vect_dump, "unexpected pattern.");
2081       destroy_loop_vec_info (loop_vinfo);
2082       return NULL;
2083     }
2084 
2085   /* Analyze the alignment of the data-refs in the loop.
2086      Fail if a data reference is found that cannot be vectorized.  */
2087 
2088   ok = vect_analyze_data_refs_alignment (loop_vinfo);
2089   if (!ok)
2090     {
2091       if (vect_print_dump_info (REPORT_DETAILS))
2092 	fprintf (vect_dump, "bad data alignment.");
2093       destroy_loop_vec_info (loop_vinfo);
2094       return NULL;
2095     }
2096 
2097   ok = vect_determine_vectorization_factor (loop_vinfo);
2098   if (!ok)
2099     {
2100       if (vect_print_dump_info (REPORT_DETAILS))
2101         fprintf (vect_dump, "can't determine vectorization factor.");
2102       destroy_loop_vec_info (loop_vinfo);
2103       return NULL;
2104     }
2105 
2106   /* Analyze data dependences between the data-refs in the loop.
2107      FORNOW: fail at the first data dependence that we encounter.  */
2108 
2109   ok = vect_analyze_data_ref_dependences (loop_vinfo);
2110   if (!ok)
2111     {
2112       if (vect_print_dump_info (REPORT_DETAILS))
2113 	fprintf (vect_dump, "bad data dependence.");
2114       destroy_loop_vec_info (loop_vinfo);
2115       return NULL;
2116     }
2117 
2118   /* Analyze the access patterns of the data-refs in the loop (consecutive,
2119      complex, etc.). FORNOW: Only handle consecutive access pattern.  */
2120 
2121   ok = vect_analyze_data_ref_accesses (loop_vinfo);
2122   if (!ok)
2123     {
2124       if (vect_print_dump_info (REPORT_DETAILS))
2125 	fprintf (vect_dump, "bad data access.");
2126       destroy_loop_vec_info (loop_vinfo);
2127       return NULL;
2128     }
2129 
2130   /* This pass will decide on using loop versioning and/or loop peeling in
2131      order to enhance the alignment of data references in the loop.  */
2132 
2133   ok = vect_enhance_data_refs_alignment (loop_vinfo);
2134   if (!ok)
2135     {
2136       if (vect_print_dump_info (REPORT_DETAILS))
2137 	fprintf (vect_dump, "bad data alignment.");
2138       destroy_loop_vec_info (loop_vinfo);
2139       return NULL;
2140     }
2141 
2142   /* Scan all the operations in the loop and make sure they are
2143      vectorizable.  */
2144 
2145   ok = vect_analyze_operations (loop_vinfo);
2146   if (!ok)
2147     {
2148       if (vect_print_dump_info (REPORT_DETAILS))
2149 	fprintf (vect_dump, "bad operation or unsupported loop bound.");
2150       destroy_loop_vec_info (loop_vinfo);
2151       return NULL;
2152     }
2153 
2154   LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2155 
2156   return loop_vinfo;
2157 }
2158