1 /* Natural loop analysis code for GNU compiler.
2    Copyright (C) 2002-2022 Free Software Foundation, Inc.
3 
4 This file is part of GCC.
5 
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
10 
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 for more details.
15 
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3.  If not see
18 <http://www.gnu.org/licenses/>.  */
19 
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "predict.h"
27 #include "memmodel.h"
28 #include "emit-rtl.h"
29 #include "cfgloop.h"
30 #include "explow.h"
31 #include "expr.h"
32 #include "graphds.h"
33 #include "sreal.h"
34 #include "regs.h"
35 #include "function-abi.h"
36 
37 struct target_cfgloop default_target_cfgloop;
38 #if SWITCHABLE_TARGET
39 struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop;
40 #endif
41 
42 /* Checks whether BB is executed exactly once in each LOOP iteration.  */
43 
44 bool
just_once_each_iteration_p(const class loop * loop,const_basic_block bb)45 just_once_each_iteration_p (const class loop *loop, const_basic_block bb)
46 {
47   /* It must be executed at least once each iteration.  */
48   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
49     return false;
50 
51   /* And just once.  */
52   if (bb->loop_father != loop)
53     return false;
54 
55   /* But this was not enough.  We might have some irreducible loop here.  */
56   if (bb->flags & BB_IRREDUCIBLE_LOOP)
57     return false;
58 
59   return true;
60 }
61 
62 /* Marks blocks and edges that are part of non-recognized loops; i.e. we
63    throw away all latch edges and mark blocks inside any remaining cycle.
64    Everything is a bit complicated due to fact we do not want to do this
65    for parts of cycles that only "pass" through some loop -- i.e. for
66    each cycle, we want to mark blocks that belong directly to innermost
67    loop containing the whole cycle.
68 
69    LOOPS is the loop tree.  */
70 
71 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun))
72 #define BB_REPR(BB) ((BB)->index + 1)
73 
74 bool
mark_irreducible_loops(void)75 mark_irreducible_loops (void)
76 {
77   basic_block act;
78   struct graph_edge *ge;
79   edge e;
80   edge_iterator ei;
81   int src, dest;
82   unsigned depth;
83   struct graph *g;
84   int num = number_of_loops (cfun);
85   class loop *cloop;
86   bool irred_loop_found = false;
87   int i;
88 
89   gcc_assert (current_loops != NULL);
90 
91   /* Reset the flags.  */
92   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
93                       EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
94     {
95       act->flags &= ~BB_IRREDUCIBLE_LOOP;
96       FOR_EACH_EDGE (e, ei, act->succs)
97           e->flags &= ~EDGE_IRREDUCIBLE_LOOP;
98     }
99 
100   /* Create the edge lists.  */
101   g = new_graph (last_basic_block_for_fn (cfun) + num);
102 
103   FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun),
104                       EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb)
105     FOR_EACH_EDGE (e, ei, act->succs)
106       {
107           /* Ignore edges to exit.  */
108           if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
109             continue;
110 
111           src = BB_REPR (act);
112           dest = BB_REPR (e->dest);
113 
114           /* Ignore latch edges.  */
115           if (e->dest->loop_father->header == e->dest
116               && dominated_by_p (CDI_DOMINATORS, act, e->dest))
117             continue;
118 
119           /* Edges inside a single loop should be left where they are.  Edges
120              to subloop headers should lead to representative of the subloop,
121              but from the same place.
122 
123              Edges exiting loops should lead from representative
124              of the son of nearest common ancestor of the loops in that
125              act lays.  */
126 
127           if (e->dest->loop_father->header == e->dest)
128             dest = LOOP_REPR (e->dest->loop_father);
129 
130           if (!flow_bb_inside_loop_p (act->loop_father, e->dest))
131             {
132               depth = 1 + loop_depth (find_common_loop (act->loop_father,
133                                                                   e->dest->loop_father));
134               if (depth == loop_depth (act->loop_father))
135                 cloop = act->loop_father;
136               else
137                 cloop = (*act->loop_father->superloops)[depth];
138 
139               src = LOOP_REPR (cloop);
140             }
141 
142           add_edge (g, src, dest)->data = e;
143       }
144 
145   /* Find the strongly connected components.  */
146   graphds_scc (g, NULL);
147 
148   /* Mark the irreducible loops.  */
149   for (i = 0; i < g->n_vertices; i++)
150     for (ge = g->vertices[i].succ; ge; ge = ge->succ_next)
151       {
152           edge real = (edge) ge->data;
153           /* edge E in graph G is irreducible if it connects two vertices in the
154              same scc.  */
155 
156           /* All edges should lead from a component with higher number to the
157              one with lower one.  */
158           gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component);
159 
160           if (g->vertices[ge->src].component != g->vertices[ge->dest].component)
161             continue;
162 
163           real->flags |= EDGE_IRREDUCIBLE_LOOP;
164           irred_loop_found = true;
165           if (flow_bb_inside_loop_p (real->src->loop_father, real->dest))
166             real->src->flags |= BB_IRREDUCIBLE_LOOP;
167       }
168 
169   free_graph (g);
170 
171   loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS);
172   return irred_loop_found;
173 }
174 
175 /* Counts number of insns inside LOOP.  */
176 int
num_loop_insns(const class loop * loop)177 num_loop_insns (const class loop *loop)
178 {
179   basic_block *bbs, bb;
180   unsigned i, ninsns = 0;
181   rtx_insn *insn;
182 
183   bbs = get_loop_body (loop);
184   for (i = 0; i < loop->num_nodes; i++)
185     {
186       bb = bbs[i];
187       FOR_BB_INSNS (bb, insn)
188           if (NONDEBUG_INSN_P (insn))
189             ninsns++;
190     }
191   free (bbs);
192 
193   if (!ninsns)
194     ninsns = 1;     /* To avoid division by zero.  */
195 
196   return ninsns;
197 }
198 
199 /* Counts number of insns executed on average per iteration LOOP.  */
200 int
average_num_loop_insns(const class loop * loop)201 average_num_loop_insns (const class loop *loop)
202 {
203   basic_block *bbs, bb;
204   unsigned i, binsns;
205   sreal ninsns;
206   rtx_insn *insn;
207 
208   ninsns = 0;
209   bbs = get_loop_body (loop);
210   for (i = 0; i < loop->num_nodes; i++)
211     {
212       bb = bbs[i];
213 
214       binsns = 0;
215       FOR_BB_INSNS (bb, insn)
216           if (NONDEBUG_INSN_P (insn))
217             binsns++;
218 
219       ninsns += (sreal)binsns * bb->count.to_sreal_scale (loop->header->count);
220       /* Avoid overflows.   */
221       if (ninsns > 1000000)
222           {
223             free (bbs);
224             return 1000000;
225           }
226     }
227   free (bbs);
228 
229   int64_t ret = ninsns.to_int ();
230   if (!ret)
231     ret = 1; /* To avoid division by zero.  */
232 
233   return ret;
234 }
235 
236 /* Returns expected number of iterations of LOOP, according to
237    measured or guessed profile.
238 
239    This functions attempts to return "sane" value even if profile
240    information is not good enough to derive osmething.
241    If BY_PROFILE_ONLY is set, this logic is bypassed and function
242    return -1 in those scenarios.  */
243 
244 gcov_type
expected_loop_iterations_unbounded(const class loop * loop,bool * read_profile_p,bool by_profile_only)245 expected_loop_iterations_unbounded (const class loop *loop,
246                                             bool *read_profile_p,
247                                             bool by_profile_only)
248 {
249   edge e;
250   edge_iterator ei;
251   gcov_type expected = -1;
252 
253   if (read_profile_p)
254     *read_profile_p = false;
255 
256   /* If we have no profile at all, use AVG_LOOP_NITER.  */
257   if (profile_status_for_fn (cfun) == PROFILE_ABSENT)
258     {
259       if (by_profile_only)
260           return -1;
261       expected = param_avg_loop_niter;
262     }
263   else if (loop->latch && (loop->latch->count.initialized_p ()
264                                  || loop->header->count.initialized_p ()))
265     {
266       profile_count count_in = profile_count::zero (),
267                         count_latch = profile_count::zero ();
268 
269       FOR_EACH_EDGE (e, ei, loop->header->preds)
270           if (e->src == loop->latch)
271             count_latch = e->count ();
272           else
273             count_in += e->count ();
274 
275       if (!count_latch.initialized_p ())
276           {
277           if (by_profile_only)
278               return -1;
279             expected = param_avg_loop_niter;
280           }
281       else if (!count_in.nonzero_p ())
282           {
283           if (by_profile_only)
284               return -1;
285             expected = count_latch.to_gcov_type () * 2;
286           }
287       else
288           {
289             expected = (count_latch.to_gcov_type () + count_in.to_gcov_type ()
290                           - 1) / count_in.to_gcov_type ();
291             if (read_profile_p
292                 && count_latch.reliable_p () && count_in.reliable_p ())
293               *read_profile_p = true;
294           }
295     }
296   else
297     {
298       if (by_profile_only)
299           return -1;
300       expected = param_avg_loop_niter;
301     }
302 
303   if (!by_profile_only)
304     {
305       HOST_WIDE_INT max = get_max_loop_iterations_int (loop);
306       if (max != -1 && max < expected)
307         return max;
308     }
309 
310   return expected;
311 }
312 
313 /* Returns expected number of LOOP iterations.  The returned value is bounded
314    by REG_BR_PROB_BASE.  */
315 
316 unsigned
expected_loop_iterations(class loop * loop)317 expected_loop_iterations (class loop *loop)
318 {
319   gcov_type expected = expected_loop_iterations_unbounded (loop);
320   return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected);
321 }
322 
323 /* Returns the maximum level of nesting of subloops of LOOP.  */
324 
325 unsigned
get_loop_level(const class loop * loop)326 get_loop_level (const class loop *loop)
327 {
328   const class loop *ploop;
329   unsigned mx = 0, l;
330 
331   for (ploop = loop->inner; ploop; ploop = ploop->next)
332     {
333       l = get_loop_level (ploop);
334       if (l >= mx)
335           mx = l + 1;
336     }
337   return mx;
338 }
339 
340 /* Initialize the constants for computing set costs.  */
341 
342 void
init_set_costs(void)343 init_set_costs (void)
344 {
345   int speed;
346   rtx_insn *seq;
347   rtx reg1 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 1);
348   rtx reg2 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 2);
349   rtx addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 3);
350   rtx mem = validize_mem (gen_rtx_MEM (SImode, addr));
351   unsigned i;
352 
353   target_avail_regs = 0;
354   target_clobbered_regs = 0;
355   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
356     if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i)
357           && !fixed_regs[i])
358       {
359           target_avail_regs++;
360           /* ??? This is only a rough heuristic.  It doesn't cope well
361              with alternative ABIs, but that's an optimization rather than
362              correctness issue.  */
363           if (default_function_abi.clobbers_full_reg_p (i))
364             target_clobbered_regs++;
365       }
366 
367   target_res_regs = 3;
368 
369   for (speed = 0; speed < 2; speed++)
370      {
371       crtl->maybe_hot_insn_p = speed;
372       /* Set up the costs for using extra registers:
373 
374            1) If not many free registers remain, we should prefer having an
375               additional move to decreasing the number of available registers.
376               (TARGET_REG_COST).
377            2) If no registers are available, we need to spill, which may require
378               storing the old value to memory and loading it back
379               (TARGET_SPILL_COST).  */
380 
381       start_sequence ();
382       emit_move_insn (reg1, reg2);
383       seq = get_insns ();
384       end_sequence ();
385       target_reg_cost [speed] = seq_cost (seq, speed);
386 
387       start_sequence ();
388       emit_move_insn (mem, reg1);
389       emit_move_insn (reg2, mem);
390       seq = get_insns ();
391       end_sequence ();
392       target_spill_cost [speed] = seq_cost (seq, speed);
393     }
394   default_rtl_profile ();
395 }
396 
397 /* Estimates cost of increased register pressure caused by making N_NEW new
398    registers live around the loop.  N_OLD is the number of registers live
399    around the loop.  If CALL_P is true, also take into account that
400    call-used registers may be clobbered in the loop body, reducing the
401    number of available registers before we spill.  */
402 
403 unsigned
estimate_reg_pressure_cost(unsigned n_new,unsigned n_old,bool speed,bool call_p)404 estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed,
405                                   bool call_p)
406 {
407   unsigned cost;
408   unsigned regs_needed = n_new + n_old;
409   unsigned available_regs = target_avail_regs;
410 
411   /* If there is a call in the loop body, the call-clobbered registers
412      are not available for loop invariants.  */
413   if (call_p)
414     available_regs = available_regs - target_clobbered_regs;
415 
416   /* If we have enough registers, we should use them and not restrict
417      the transformations unnecessarily.  */
418   if (regs_needed + target_res_regs <= available_regs)
419     return 0;
420 
421   if (regs_needed <= available_regs)
422     /* If we are close to running out of registers, try to preserve
423        them.  */
424     cost = target_reg_cost [speed] * n_new;
425   else
426     /* If we run out of registers, it is very expensive to add another
427        one.  */
428     cost = target_spill_cost [speed] * n_new;
429 
430   if (optimize && (flag_ira_region == IRA_REGION_ALL
431                        || flag_ira_region == IRA_REGION_MIXED)
432       && number_of_loops (cfun) <= (unsigned) param_ira_max_loops_num)
433     /* IRA regional allocation deals with high register pressure
434        better.  So decrease the cost (to do more accurate the cost
435        calculation for IRA, we need to know how many registers lives
436        through the loop transparently).  */
437     cost /= 2;
438 
439   return cost;
440 }
441 
442 /* Sets EDGE_LOOP_EXIT flag for all loop exits.  */
443 
444 void
mark_loop_exit_edges(void)445 mark_loop_exit_edges (void)
446 {
447   basic_block bb;
448   edge e;
449 
450   if (number_of_loops (cfun) <= 1)
451     return;
452 
453   FOR_EACH_BB_FN (bb, cfun)
454     {
455       edge_iterator ei;
456 
457       FOR_EACH_EDGE (e, ei, bb->succs)
458           {
459             if (loop_outer (bb->loop_father)
460                 && loop_exit_edge_p (bb->loop_father, e))
461               e->flags |= EDGE_LOOP_EXIT;
462             else
463               e->flags &= ~EDGE_LOOP_EXIT;
464           }
465     }
466 }
467 
468 /* Return exit edge if loop has only one exit that is likely
469    to be executed on runtime (i.e. it is not EH or leading
470    to noreturn call.  */
471 
472 edge
single_likely_exit(class loop * loop,const vec<edge> & exits)473 single_likely_exit (class loop *loop, const vec<edge> &exits)
474 {
475   edge found = single_exit (loop);
476   unsigned i;
477   edge ex;
478 
479   if (found)
480     return found;
481   FOR_EACH_VEC_ELT (exits, i, ex)
482     {
483       if (probably_never_executed_edge_p (cfun, ex)
484             /* We want to rule out paths to noreturns but not low probabilities
485                resulting from adjustments or combining.
486                FIXME: once we have better quality tracking, make this more
487                robust.  */
488             || ex->probability <= profile_probability::very_unlikely ())
489           continue;
490       if (!found)
491           found = ex;
492       else
493           return NULL;
494     }
495   return found;
496 }
497 
498 
499 /* Gets basic blocks of a LOOP.  Header is the 0-th block, rest is in dfs
500    order against direction of edges from latch.  Specially, if
501    header != latch, latch is the 1-st block.  */
502 
503 auto_vec<basic_block>
get_loop_hot_path(const class loop * loop)504 get_loop_hot_path (const class loop *loop)
505 {
506   basic_block bb = loop->header;
507   auto_vec<basic_block> path;
508   bitmap visited = BITMAP_ALLOC (NULL);
509 
510   while (true)
511     {
512       edge_iterator ei;
513       edge e;
514       edge best = NULL;
515 
516       path.safe_push (bb);
517       bitmap_set_bit (visited, bb->index);
518       FOR_EACH_EDGE (e, ei, bb->succs)
519         if ((!best || e->probability > best->probability)
520               && !loop_exit_edge_p (loop, e)
521               && !bitmap_bit_p (visited, e->dest->index))
522             best = e;
523       if (!best || best->dest == loop->header)
524           break;
525       bb = best->dest;
526     }
527   BITMAP_FREE (visited);
528   return path;
529 }
530