xref: /dragonfly/contrib/gcc-8.0/gcc/bb-reorder.c (revision 95059079af47f9a66a175f374f2da1a5020e3255)
1 /* Basic block reordering routines for the GNU compiler.
2    Copyright (C) 2000-2018 Free Software Foundation, Inc.
3 
4    This file is part of GCC.
5 
6    GCC is free software; you can redistribute it and/or modify it
7    under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3, or (at your option)
9    any later version.
10 
11    GCC is distributed in the hope that it will be useful, but WITHOUT
12    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14    License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with GCC; see the file COPYING3.  If not see
18    <http://www.gnu.org/licenses/>.  */
19 
20 /* This file contains the "reorder blocks" pass, which changes the control
21    flow of a function to encounter fewer branches; the "partition blocks"
22    pass, which divides the basic blocks into "hot" and "cold" partitions,
23    which are kept separate; and the "duplicate computed gotos" pass, which
24    duplicates blocks ending in an indirect jump.
25 
26    There are two algorithms for "reorder blocks": the "simple" algorithm,
27    which just rearranges blocks, trying to minimize the number of executed
28    unconditional branches; and the "software trace cache" algorithm, which
29    also copies code, and in general tries a lot harder to have long linear
30    pieces of machine code executed.  This algorithm is described next.  */
31 
32 /* This (greedy) algorithm constructs traces in several rounds.
33    The construction starts from "seeds".  The seed for the first round
34    is the entry point of the function.  When there are more than one seed,
35    the one with the lowest key in the heap is selected first (see bb_to_key).
36    Then the algorithm repeatedly adds the most probable successor to the end
37    of a trace.  Finally it connects the traces.
38 
39    There are two parameters: Branch Threshold and Exec Threshold.
40    If the probability of an edge to a successor of the current basic block is
41    lower than Branch Threshold or its count is lower than Exec Threshold,
42    then the successor will be the seed in one of the next rounds.
43    Each round has these parameters lower than the previous one.
44    The last round has to have these parameters set to zero so that the
45    remaining blocks are picked up.
46 
47    The algorithm selects the most probable successor from all unvisited
48    successors and successors that have been added to this trace.
49    The other successors (that has not been "sent" to the next round) will be
50    other seeds for this round and the secondary traces will start from them.
51    If the successor has not been visited in this trace, it is added to the
52    trace (however, there is some heuristic for simple branches).
53    If the successor has been visited in this trace, a loop has been found.
54    If the loop has many iterations, the loop is rotated so that the source
55    block of the most probable edge going out of the loop is the last block
56    of the trace.
57    If the loop has few iterations and there is no edge from the last block of
58    the loop going out of the loop, the loop header is duplicated.
59 
60    When connecting traces, the algorithm first checks whether there is an edge
61    from the last block of a trace to the first block of another trace.
62    When there are still some unconnected traces it checks whether there exists
63    a basic block BB such that BB is a successor of the last block of a trace
64    and BB is a predecessor of the first block of another trace.  In this case,
65    BB is duplicated, added at the end of the first trace and the traces are
66    connected through it.
67    The rest of traces are simply connected so there will be a jump to the
68    beginning of the rest of traces.
69 
70    The above description is for the full algorithm, which is used when the
71    function is optimized for speed.  When the function is optimized for size,
72    in order to reduce long jumps and connect more fallthru edges, the
73    algorithm is modified as follows:
74    (1) Break long traces to short ones.  A trace is broken at a block that has
75    multiple predecessors/ successors during trace discovery.  When connecting
76    traces, only connect Trace n with Trace n + 1.  This change reduces most
77    long jumps compared with the above algorithm.
78    (2) Ignore the edge probability and count for fallthru edges.
79    (3) Keep the original order of blocks when there is no chance to fall
80    through.  We rely on the results of cfg_cleanup.
81 
82    To implement the change for code size optimization, block's index is
83    selected as the key and all traces are found in one round.
84 
85    References:
86 
87    "Software Trace Cache"
88    A. Ramirez, J. Larriba-Pey, C. Navarro, J. Torrellas and M. Valero; 1999
89    http://citeseer.nj.nec.com/15361.html
90 
91 */
92 
93 #include "config.h"
94 #define INCLUDE_ALGORITHM /* stable_sort */
95 #include "system.h"
96 #include "coretypes.h"
97 #include "backend.h"
98 #include "target.h"
99 #include "rtl.h"
100 #include "tree.h"
101 #include "cfghooks.h"
102 #include "df.h"
103 #include "memmodel.h"
104 #include "optabs.h"
105 #include "regs.h"
106 #include "emit-rtl.h"
107 #include "output.h"
108 #include "expr.h"
109 #include "params.h"
110 #include "tree-pass.h"
111 #include "cfgrtl.h"
112 #include "cfganal.h"
113 #include "cfgbuild.h"
114 #include "cfgcleanup.h"
115 #include "bb-reorder.h"
116 #include "except.h"
117 #include "fibonacci_heap.h"
118 #include "stringpool.h"
119 #include "attribs.h"
120 #include "common/common-target.h"
121 
122 /* The number of rounds.  In most cases there will only be 4 rounds, but
123    when partitioning hot and cold basic blocks into separate sections of
124    the object file there will be an extra round.  */
125 #define N_ROUNDS 5
126 
127 struct target_bb_reorder default_target_bb_reorder;
128 #if SWITCHABLE_TARGET
129 struct target_bb_reorder *this_target_bb_reorder = &default_target_bb_reorder;
130 #endif
131 
132 #define uncond_jump_length \
133   (this_target_bb_reorder->x_uncond_jump_length)
134 
135 /* Branch thresholds in thousandths (per mille) of the REG_BR_PROB_BASE.  */
136 static const int branch_threshold[N_ROUNDS] = {400, 200, 100, 0, 0};
137 
138 /* Exec thresholds in thousandths (per mille) of the count of bb 0.  */
139 static const int exec_threshold[N_ROUNDS] = {500, 200, 50, 0, 0};
140 
141 /* If edge count is lower than DUPLICATION_THRESHOLD per mille of entry
142    block the edge destination is not duplicated while connecting traces.  */
143 #define DUPLICATION_THRESHOLD 100
144 
145 typedef fibonacci_heap <long, basic_block_def> bb_heap_t;
146 typedef fibonacci_node <long, basic_block_def> bb_heap_node_t;
147 
148 /* Structure to hold needed information for each basic block.  */
149 struct bbro_basic_block_data
150 {
151   /* Which trace is the bb start of (-1 means it is not a start of any).  */
152   int start_of_trace;
153 
154   /* Which trace is the bb end of (-1 means it is not an end of any).  */
155   int end_of_trace;
156 
157   /* Which trace is the bb in?  */
158   int in_trace;
159 
160   /* Which trace was this bb visited in?  */
161   int visited;
162 
163   /* Cached maximum frequency of interesting incoming edges.
164      Minus one means not yet computed.  */
165   int priority;
166 
167   /* Which heap is BB in (if any)?  */
168   bb_heap_t *heap;
169 
170   /* Which heap node is BB in (if any)?  */
171   bb_heap_node_t *node;
172 };
173 
174 /* The current size of the following dynamic array.  */
175 static int array_size;
176 
177 /* The array which holds needed information for basic blocks.  */
178 static bbro_basic_block_data *bbd;
179 
180 /* To avoid frequent reallocation the size of arrays is greater than needed,
181    the number of elements is (not less than) 1.25 * size_wanted.  */
182 #define GET_ARRAY_SIZE(X) ((((X) / 4) + 1) * 5)
183 
184 /* Free the memory and set the pointer to NULL.  */
185 #define FREE(P) (gcc_assert (P), free (P), P = 0)
186 
187 /* Structure for holding information about a trace.  */
188 struct trace
189 {
190   /* First and last basic block of the trace.  */
191   basic_block first, last;
192 
193   /* The round of the STC creation which this trace was found in.  */
194   int round;
195 
196   /* The length (i.e. the number of basic blocks) of the trace.  */
197   int length;
198 };
199 
200 /* Maximum count of one of the entry blocks.  */
201 static profile_count max_entry_count;
202 
203 /* Local function prototypes.  */
204 static void find_traces_1_round (int, profile_count, struct trace *, int *,
205                                          int, bb_heap_t **, int);
206 static basic_block copy_bb (basic_block, edge, basic_block, int);
207 static long bb_to_key (basic_block);
208 static bool better_edge_p (const_basic_block, const_edge, profile_probability,
209                                  profile_count, profile_probability, profile_count,
210                                  const_edge);
211 static bool copy_bb_p (const_basic_block, int);
212 
213 /* Return the trace number in which BB was visited.  */
214 
215 static int
bb_visited_trace(const_basic_block bb)216 bb_visited_trace (const_basic_block bb)
217 {
218   gcc_assert (bb->index < array_size);
219   return bbd[bb->index].visited;
220 }
221 
222 /* This function marks BB that it was visited in trace number TRACE.  */
223 
224 static void
mark_bb_visited(basic_block bb,int trace)225 mark_bb_visited (basic_block bb, int trace)
226 {
227   bbd[bb->index].visited = trace;
228   if (bbd[bb->index].heap)
229     {
230       bbd[bb->index].heap->delete_node (bbd[bb->index].node);
231       bbd[bb->index].heap = NULL;
232       bbd[bb->index].node = NULL;
233     }
234 }
235 
236 /* Check to see if bb should be pushed into the next round of trace
237    collections or not.  Reasons for pushing the block forward are 1).
238    If the block is cold, we are doing partitioning, and there will be
239    another round (cold partition blocks are not supposed to be
240    collected into traces until the very last round); or 2). There will
241    be another round, and the basic block is not "hot enough" for the
242    current round of trace collection.  */
243 
244 static bool
push_to_next_round_p(const_basic_block bb,int round,int number_of_rounds,profile_count count_th)245 push_to_next_round_p (const_basic_block bb, int round, int number_of_rounds,
246                           profile_count count_th)
247 {
248   bool there_exists_another_round;
249   bool block_not_hot_enough;
250 
251   there_exists_another_round = round < number_of_rounds - 1;
252 
253   block_not_hot_enough = (bb->count < count_th
254                                 || probably_never_executed_bb_p (cfun, bb));
255 
256   if (there_exists_another_round
257       && block_not_hot_enough)
258     return true;
259   else
260     return false;
261 }
262 
263 /* Find the traces for Software Trace Cache.  Chain each trace through
264    RBI()->next.  Store the number of traces to N_TRACES and description of
265    traces to TRACES.  */
266 
267 static void
find_traces(int * n_traces,struct trace * traces)268 find_traces (int *n_traces, struct trace *traces)
269 {
270   int i;
271   int number_of_rounds;
272   edge e;
273   edge_iterator ei;
274   bb_heap_t *heap = new bb_heap_t (LONG_MIN);
275 
276   /* Add one extra round of trace collection when partitioning hot/cold
277      basic blocks into separate sections.  The last round is for all the
278      cold blocks (and ONLY the cold blocks).  */
279 
280   number_of_rounds = N_ROUNDS - 1;
281 
282   /* Insert entry points of function into heap.  */
283   max_entry_count = profile_count::zero ();
284   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
285     {
286       bbd[e->dest->index].heap = heap;
287       bbd[e->dest->index].node = heap->insert (bb_to_key (e->dest), e->dest);
288       if (e->dest->count > max_entry_count)
289           max_entry_count = e->dest->count;
290     }
291 
292   /* Find the traces.  */
293   for (i = 0; i < number_of_rounds; i++)
294     {
295       profile_count count_threshold;
296 
297       if (dump_file)
298           fprintf (dump_file, "STC - round %d\n", i + 1);
299 
300       count_threshold = max_entry_count.apply_scale (exec_threshold[i], 1000);
301 
302       find_traces_1_round (REG_BR_PROB_BASE * branch_threshold[i] / 1000,
303                                  count_threshold, traces, n_traces, i, &heap,
304                                  number_of_rounds);
305     }
306   delete heap;
307 
308   if (dump_file)
309     {
310       for (i = 0; i < *n_traces; i++)
311           {
312             basic_block bb;
313             fprintf (dump_file, "Trace %d (round %d):  ", i + 1,
314                        traces[i].round + 1);
315             for (bb = traces[i].first;
316                  bb != traces[i].last;
317                  bb = (basic_block) bb->aux)
318               {
319                 fprintf (dump_file, "%d [", bb->index);
320                 bb->count.dump (dump_file);
321                 fprintf (dump_file, "] ");
322               }
323             fprintf (dump_file, "%d [", bb->index);
324             bb->count.dump (dump_file);
325             fprintf (dump_file, "]\n");
326           }
327       fflush (dump_file);
328     }
329 }
330 
331 /* Rotate loop whose back edge is BACK_EDGE in the tail of trace TRACE
332    (with sequential number TRACE_N).  */
333 
334 static basic_block
rotate_loop(edge back_edge,struct trace * trace,int trace_n)335 rotate_loop (edge back_edge, struct trace *trace, int trace_n)
336 {
337   basic_block bb;
338 
339   /* Information about the best end (end after rotation) of the loop.  */
340   basic_block best_bb = NULL;
341   edge best_edge = NULL;
342   profile_count best_count = profile_count::uninitialized ();
343   /* The best edge is preferred when its destination is not visited yet
344      or is a start block of some trace.  */
345   bool is_preferred = false;
346 
347   /* Find the most frequent edge that goes out from current trace.  */
348   bb = back_edge->dest;
349   do
350     {
351       edge e;
352       edge_iterator ei;
353 
354       FOR_EACH_EDGE (e, ei, bb->succs)
355           if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
356               && bb_visited_trace (e->dest) != trace_n
357               && (e->flags & EDGE_CAN_FALLTHRU)
358               && !(e->flags & EDGE_COMPLEX))
359           {
360             if (is_preferred)
361               {
362                 /* The best edge is preferred.  */
363                 if (!bb_visited_trace (e->dest)
364                       || bbd[e->dest->index].start_of_trace >= 0)
365                     {
366                       /* The current edge E is also preferred.  */
367                       if (e->count () > best_count)
368                         {
369                           best_count = e->count ();
370                           best_edge = e;
371                           best_bb = bb;
372                         }
373                     }
374               }
375             else
376               {
377                 if (!bb_visited_trace (e->dest)
378                       || bbd[e->dest->index].start_of_trace >= 0)
379                     {
380                       /* The current edge E is preferred.  */
381                       is_preferred = true;
382                       best_count = e->count ();
383                       best_edge = e;
384                       best_bb = bb;
385                     }
386                 else
387                     {
388                       if (!best_edge || e->count () > best_count)
389                         {
390                           best_count = e->count ();
391                           best_edge = e;
392                           best_bb = bb;
393                         }
394                     }
395               }
396           }
397       bb = (basic_block) bb->aux;
398     }
399   while (bb != back_edge->dest);
400 
401   if (best_bb)
402     {
403       /* Rotate the loop so that the BEST_EDGE goes out from the last block of
404            the trace.  */
405       if (back_edge->dest == trace->first)
406           {
407             trace->first = (basic_block) best_bb->aux;
408           }
409       else
410           {
411             basic_block prev_bb;
412 
413             for (prev_bb = trace->first;
414                  prev_bb->aux != back_edge->dest;
415                  prev_bb = (basic_block) prev_bb->aux)
416               ;
417             prev_bb->aux = best_bb->aux;
418 
419             /* Try to get rid of uncond jump to cond jump.  */
420             if (single_succ_p (prev_bb))
421               {
422                 basic_block header = single_succ (prev_bb);
423 
424                 /* Duplicate HEADER if it is a small block containing cond jump
425                      in the end.  */
426                 if (any_condjump_p (BB_END (header)) && copy_bb_p (header, 0)
427                       && !CROSSING_JUMP_P (BB_END (header)))
428                     copy_bb (header, single_succ_edge (prev_bb), prev_bb, trace_n);
429               }
430           }
431     }
432   else
433     {
434       /* We have not found suitable loop tail so do no rotation.  */
435       best_bb = back_edge->src;
436     }
437   best_bb->aux = NULL;
438   return best_bb;
439 }
440 
441 /* One round of finding traces.  Find traces for BRANCH_TH and EXEC_TH i.e. do
442    not include basic blocks whose probability is lower than BRANCH_TH or whose
443    count is lower than EXEC_TH into traces (or whose count is lower than
444    COUNT_TH).  Store the new traces into TRACES and modify the number of
445    traces *N_TRACES.  Set the round (which the trace belongs to) to ROUND.
446    The function expects starting basic blocks to be in *HEAP and will delete
447    *HEAP and store starting points for the next round into new *HEAP.  */
448 
449 static void
find_traces_1_round(int branch_th,profile_count count_th,struct trace * traces,int * n_traces,int round,bb_heap_t ** heap,int number_of_rounds)450 find_traces_1_round (int branch_th, profile_count count_th,
451                          struct trace *traces, int *n_traces, int round,
452                          bb_heap_t **heap, int number_of_rounds)
453 {
454   /* Heap for discarded basic blocks which are possible starting points for
455      the next round.  */
456   bb_heap_t *new_heap = new bb_heap_t (LONG_MIN);
457   bool for_size = optimize_function_for_size_p (cfun);
458 
459   while (!(*heap)->empty ())
460     {
461       basic_block bb;
462       struct trace *trace;
463       edge best_edge, e;
464       long key;
465       edge_iterator ei;
466 
467       bb = (*heap)->extract_min ();
468       bbd[bb->index].heap = NULL;
469       bbd[bb->index].node = NULL;
470 
471       if (dump_file)
472           fprintf (dump_file, "Getting bb %d\n", bb->index);
473 
474       /* If the BB's count is too low, send BB to the next round.  When
475            partitioning hot/cold blocks into separate sections, make sure all
476            the cold blocks (and ONLY the cold blocks) go into the (extra) final
477            round.  When optimizing for size, do not push to next round.  */
478 
479       if (!for_size
480             && push_to_next_round_p (bb, round, number_of_rounds,
481                                            count_th))
482           {
483             int key = bb_to_key (bb);
484             bbd[bb->index].heap = new_heap;
485             bbd[bb->index].node = new_heap->insert (key, bb);
486 
487             if (dump_file)
488               fprintf (dump_file,
489                          "  Possible start point of next round: %d (key: %d)\n",
490                          bb->index, key);
491             continue;
492           }
493 
494       trace = traces + *n_traces;
495       trace->first = bb;
496       trace->round = round;
497       trace->length = 0;
498       bbd[bb->index].in_trace = *n_traces;
499       (*n_traces)++;
500 
501       do
502           {
503             bool ends_in_call;
504 
505             /* The probability and count of the best edge.  */
506             profile_probability best_prob = profile_probability::uninitialized ();
507             profile_count best_count = profile_count::uninitialized ();
508 
509             best_edge = NULL;
510             mark_bb_visited (bb, *n_traces);
511             trace->length++;
512 
513             if (dump_file)
514               fprintf (dump_file, "Basic block %d was visited in trace %d\n",
515                          bb->index, *n_traces);
516 
517             ends_in_call = block_ends_with_call_p (bb);
518 
519             /* Select the successor that will be placed after BB.  */
520             FOR_EACH_EDGE (e, ei, bb->succs)
521               {
522                 gcc_assert (!(e->flags & EDGE_FAKE));
523 
524                 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
525                     continue;
526 
527                 if (bb_visited_trace (e->dest)
528                       && bb_visited_trace (e->dest) != *n_traces)
529                     continue;
530 
531                 /* If partitioning hot/cold basic blocks, don't consider edges
532                      that cross section boundaries.  */
533                 if (BB_PARTITION (e->dest) != BB_PARTITION (bb))
534                     continue;
535 
536                 profile_probability prob = e->probability;
537                 profile_count count = e->dest->count;
538 
539                 /* The only sensible preference for a call instruction is the
540                      fallthru edge.  Don't bother selecting anything else.  */
541                 if (ends_in_call)
542                     {
543                       if (e->flags & EDGE_CAN_FALLTHRU)
544                         {
545                           best_edge = e;
546                           best_prob = prob;
547                           best_count = count;
548                         }
549                       continue;
550                     }
551 
552                 /* Edge that cannot be fallthru or improbable or infrequent
553                      successor (i.e. it is unsuitable successor).  When optimizing
554                      for size, ignore the probability and count.  */
555                 if (!(e->flags & EDGE_CAN_FALLTHRU) || (e->flags & EDGE_COMPLEX)
556                       || !prob.initialized_p ()
557                       || ((prob.to_reg_br_prob_base () < branch_th
558                           || e->count () < count_th) && (!for_size)))
559                     continue;
560 
561                 if (better_edge_p (bb, e, prob, count, best_prob, best_count,
562                                          best_edge))
563                     {
564                       best_edge = e;
565                       best_prob = prob;
566                       best_count = count;
567                     }
568               }
569 
570             /* If the best destination has multiple predecessors and can be
571                duplicated cheaper than a jump, don't allow it to be added to
572                a trace; we'll duplicate it when connecting the traces later.
573                However, we need to check that this duplication wouldn't leave
574                the best destination with only crossing predecessors, because
575                this would change its effective partition from hot to cold.  */
576             if (best_edge
577                 && EDGE_COUNT (best_edge->dest->preds) >= 2
578                 && copy_bb_p (best_edge->dest, 0))
579               {
580                 bool only_crossing_preds = true;
581                 edge e;
582                 edge_iterator ei;
583                 FOR_EACH_EDGE (e, ei, best_edge->dest->preds)
584                     if (e != best_edge && !(e->flags & EDGE_CROSSING))
585                       {
586                         only_crossing_preds = false;
587                         break;
588                       }
589                 if (!only_crossing_preds)
590                     best_edge = NULL;
591               }
592 
593             /* If the best destination has multiple successors or predecessors,
594                don't allow it to be added when optimizing for size.  This makes
595                sure predecessors with smaller index are handled before the best
596                destinarion.  It breaks long trace and reduces long jumps.
597 
598                Take if-then-else as an example.
599                     A
600                  / \
601                 B   C
602                  \ /
603                     D
604                If we do not remove the best edge B->D/C->D, the final order might
605                be A B D ... C.  C is at the end of the program.  If D's successors
606                and D are complicated, might need long jumps for A->C and C->D.
607                Similar issue for order: A C D ... B.
608 
609                After removing the best edge, the final result will be ABCD/ ACBD.
610                It does not add jump compared with the previous order.  But it
611                reduces the possibility of long jumps.  */
612             if (best_edge && for_size
613                 && (EDGE_COUNT (best_edge->dest->succs) > 1
614                      || EDGE_COUNT (best_edge->dest->preds) > 1))
615               best_edge = NULL;
616 
617             /* Add all non-selected successors to the heaps.  */
618             FOR_EACH_EDGE (e, ei, bb->succs)
619               {
620                 if (e == best_edge
621                       || e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
622                       || bb_visited_trace (e->dest))
623                     continue;
624 
625                 key = bb_to_key (e->dest);
626 
627                 if (bbd[e->dest->index].heap)
628                     {
629                       /* E->DEST is already in some heap.  */
630                       if (key != bbd[e->dest->index].node->get_key ())
631                         {
632                           if (dump_file)
633                               {
634                                 fprintf (dump_file,
635                                            "Changing key for bb %d from %ld to %ld.\n",
636                                            e->dest->index,
637                                            (long) bbd[e->dest->index].node->get_key (),
638                                            key);
639                               }
640                           bbd[e->dest->index].heap->replace_key
641                             (bbd[e->dest->index].node, key);
642                         }
643                     }
644                 else
645                     {
646                       bb_heap_t *which_heap = *heap;
647 
648                       profile_probability prob = e->probability;
649 
650                       if (!(e->flags & EDGE_CAN_FALLTHRU)
651                           || (e->flags & EDGE_COMPLEX)
652                           || !prob.initialized_p ()
653                           || prob.to_reg_br_prob_base () < branch_th
654                           || e->count () < count_th)
655                         {
656                           /* When partitioning hot/cold basic blocks, make sure
657                                the cold blocks (and only the cold blocks) all get
658                                pushed to the last round of trace collection.  When
659                                optimizing for size, do not push to next round.  */
660 
661                           if (!for_size && push_to_next_round_p (e->dest, round,
662                                                                            number_of_rounds,
663                                                                            count_th))
664                               which_heap = new_heap;
665                         }
666 
667                       bbd[e->dest->index].heap = which_heap;
668                       bbd[e->dest->index].node = which_heap->insert (key, e->dest);
669 
670                       if (dump_file)
671                         {
672                           fprintf (dump_file,
673                                      "  Possible start of %s round: %d (key: %ld)\n",
674                                      (which_heap == new_heap) ? "next" : "this",
675                                      e->dest->index, (long) key);
676                         }
677 
678                     }
679               }
680 
681             if (best_edge) /* Suitable successor was found.  */
682               {
683                 if (bb_visited_trace (best_edge->dest) == *n_traces)
684                     {
685                       /* We do nothing with one basic block loops.  */
686                       if (best_edge->dest != bb)
687                         {
688                           if (best_edge->count ()
689                                 > best_edge->dest->count.apply_scale (4, 5))
690                               {
691                                 /* The loop has at least 4 iterations.  If the loop
692                                    header is not the first block of the function
693                                    we can rotate the loop.  */
694 
695                                 if (best_edge->dest
696                                     != ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb)
697                                   {
698                                     if (dump_file)
699                                         {
700                                           fprintf (dump_file,
701                                                      "Rotating loop %d - %d\n",
702                                                      best_edge->dest->index, bb->index);
703                                         }
704                                     bb->aux = best_edge->dest;
705                                     bbd[best_edge->dest->index].in_trace =
706                                                                            (*n_traces) - 1;
707                                     bb = rotate_loop (best_edge, trace, *n_traces);
708                                   }
709                               }
710                           else
711                               {
712                                 /* The loop has less than 4 iterations.  */
713 
714                                 if (single_succ_p (bb)
715                                     && copy_bb_p (best_edge->dest,
716                                                       optimize_edge_for_speed_p
717                                                       (best_edge)))
718                                   {
719                                     bb = copy_bb (best_edge->dest, best_edge, bb,
720                                                       *n_traces);
721                                     trace->length++;
722                                   }
723                               }
724                         }
725 
726                       /* Terminate the trace.  */
727                       break;
728                     }
729                 else
730                     {
731                       /* Check for a situation
732 
733                         A
734                        /|
735                       B |
736                        \|
737                         C
738 
739                       where
740                       AB->count () + BC->count () >= AC->count ().
741                       (i.e. 2 * B->count >= AC->count )
742                       Best ordering is then A B C.
743 
744                       When optimizing for size, A B C is always the best order.
745 
746                       This situation is created for example by:
747 
748                       if (A) B;
749                       C;
750 
751                       */
752 
753                       FOR_EACH_EDGE (e, ei, bb->succs)
754                         if (e != best_edge
755                               && (e->flags & EDGE_CAN_FALLTHRU)
756                               && !(e->flags & EDGE_COMPLEX)
757                               && !bb_visited_trace (e->dest)
758                               && single_pred_p (e->dest)
759                               && !(e->flags & EDGE_CROSSING)
760                               && single_succ_p (e->dest)
761                               && (single_succ_edge (e->dest)->flags
762                                   & EDGE_CAN_FALLTHRU)
763                               && !(single_succ_edge (e->dest)->flags & EDGE_COMPLEX)
764                               && single_succ (e->dest) == best_edge->dest
765                               && (e->dest->count.apply_scale (2, 1)
766                                   >= best_edge->count () || for_size))
767                           {
768                               best_edge = e;
769                               if (dump_file)
770                                 fprintf (dump_file, "Selecting BB %d\n",
771                                            best_edge->dest->index);
772                               break;
773                           }
774 
775                       bb->aux = best_edge->dest;
776                       bbd[best_edge->dest->index].in_trace = (*n_traces) - 1;
777                       bb = best_edge->dest;
778                     }
779               }
780           }
781       while (best_edge);
782       trace->last = bb;
783       bbd[trace->first->index].start_of_trace = *n_traces - 1;
784       if (bbd[trace->last->index].end_of_trace != *n_traces - 1)
785           {
786             bbd[trace->last->index].end_of_trace = *n_traces - 1;
787             /* Update the cached maximum frequency for interesting predecessor
788                edges for successors of the new trace end.  */
789             FOR_EACH_EDGE (e, ei, trace->last->succs)
790               if (EDGE_FREQUENCY (e) > bbd[e->dest->index].priority)
791                 bbd[e->dest->index].priority = EDGE_FREQUENCY (e);
792           }
793 
794       /* The trace is terminated so we have to recount the keys in heap
795            (some block can have a lower key because now one of its predecessors
796            is an end of the trace).  */
797       FOR_EACH_EDGE (e, ei, bb->succs)
798           {
799             if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
800                 || bb_visited_trace (e->dest))
801               continue;
802 
803             if (bbd[e->dest->index].heap)
804               {
805                 key = bb_to_key (e->dest);
806                 if (key != bbd[e->dest->index].node->get_key ())
807                     {
808                       if (dump_file)
809                         {
810                           fprintf (dump_file,
811                                      "Changing key for bb %d from %ld to %ld.\n",
812                                      e->dest->index,
813                                      (long) bbd[e->dest->index].node->get_key (), key);
814                         }
815                       bbd[e->dest->index].heap->replace_key
816                         (bbd[e->dest->index].node, key);
817                     }
818               }
819           }
820     }
821 
822   delete (*heap);
823 
824   /* "Return" the new heap.  */
825   *heap = new_heap;
826 }
827 
828 /* Create a duplicate of the basic block OLD_BB and redirect edge E to it, add
829    it to trace after BB, mark OLD_BB visited and update pass' data structures
830    (TRACE is a number of trace which OLD_BB is duplicated to).  */
831 
832 static basic_block
copy_bb(basic_block old_bb,edge e,basic_block bb,int trace)833 copy_bb (basic_block old_bb, edge e, basic_block bb, int trace)
834 {
835   basic_block new_bb;
836 
837   new_bb = duplicate_block (old_bb, e, bb);
838   BB_COPY_PARTITION (new_bb, old_bb);
839 
840   gcc_assert (e->dest == new_bb);
841 
842   if (dump_file)
843     fprintf (dump_file,
844                "Duplicated bb %d (created bb %d)\n",
845                old_bb->index, new_bb->index);
846 
847   if (new_bb->index >= array_size
848       || last_basic_block_for_fn (cfun) > array_size)
849     {
850       int i;
851       int new_size;
852 
853       new_size = MAX (last_basic_block_for_fn (cfun), new_bb->index + 1);
854       new_size = GET_ARRAY_SIZE (new_size);
855       bbd = XRESIZEVEC (bbro_basic_block_data, bbd, new_size);
856       for (i = array_size; i < new_size; i++)
857           {
858             bbd[i].start_of_trace = -1;
859             bbd[i].end_of_trace = -1;
860             bbd[i].in_trace = -1;
861             bbd[i].visited = 0;
862             bbd[i].priority = -1;
863             bbd[i].heap = NULL;
864             bbd[i].node = NULL;
865           }
866       array_size = new_size;
867 
868       if (dump_file)
869           {
870             fprintf (dump_file,
871                        "Growing the dynamic array to %d elements.\n",
872                        array_size);
873           }
874     }
875 
876   gcc_assert (!bb_visited_trace (e->dest));
877   mark_bb_visited (new_bb, trace);
878   new_bb->aux = bb->aux;
879   bb->aux = new_bb;
880 
881   bbd[new_bb->index].in_trace = trace;
882 
883   return new_bb;
884 }
885 
886 /* Compute and return the key (for the heap) of the basic block BB.  */
887 
888 static long
bb_to_key(basic_block bb)889 bb_to_key (basic_block bb)
890 {
891   edge e;
892   edge_iterator ei;
893 
894   /* Use index as key to align with its original order.  */
895   if (optimize_function_for_size_p (cfun))
896     return bb->index;
897 
898   /* Do not start in probably never executed blocks.  */
899 
900   if (BB_PARTITION (bb) == BB_COLD_PARTITION
901       || probably_never_executed_bb_p (cfun, bb))
902     return BB_FREQ_MAX;
903 
904   /* Prefer blocks whose predecessor is an end of some trace
905      or whose predecessor edge is EDGE_DFS_BACK.  */
906   int priority = bbd[bb->index].priority;
907   if (priority == -1)
908     {
909       priority = 0;
910       FOR_EACH_EDGE (e, ei, bb->preds)
911           {
912             if ((e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
913                  && bbd[e->src->index].end_of_trace >= 0)
914                 || (e->flags & EDGE_DFS_BACK))
915               {
916                 int edge_freq = EDGE_FREQUENCY (e);
917 
918                 if (edge_freq > priority)
919                     priority = edge_freq;
920               }
921           }
922       bbd[bb->index].priority = priority;
923     }
924 
925   if (priority)
926     /* The block with priority should have significantly lower key.  */
927     return -(100 * BB_FREQ_MAX + 100 * priority + bb->count.to_frequency (cfun));
928 
929   return -bb->count.to_frequency (cfun);
930 }
931 
932 /* Return true when the edge E from basic block BB is better than the temporary
933    best edge (details are in function).  The probability of edge E is PROB. The
934    count of the successor is COUNT.  The current best probability is
935    BEST_PROB, the best count is BEST_COUNT.
936    The edge is considered to be equivalent when PROB does not differ much from
937    BEST_PROB; similarly for count.  */
938 
939 static bool
better_edge_p(const_basic_block bb,const_edge e,profile_probability prob,profile_count count,profile_probability best_prob,profile_count best_count,const_edge cur_best_edge)940 better_edge_p (const_basic_block bb, const_edge e, profile_probability prob,
941                  profile_count count, profile_probability best_prob,
942                  profile_count best_count, const_edge cur_best_edge)
943 {
944   bool is_better_edge;
945 
946   /* The BEST_* values do not have to be best, but can be a bit smaller than
947      maximum values.  */
948   profile_probability diff_prob = best_prob.apply_scale (1, 10);
949 
950   /* The smaller one is better to keep the original order.  */
951   if (optimize_function_for_size_p (cfun))
952     return !cur_best_edge
953              || cur_best_edge->dest->index > e->dest->index;
954 
955   /* Those edges are so expensive that continuing a trace is not useful
956      performance wise.  */
957   if (e->flags & (EDGE_ABNORMAL | EDGE_EH))
958     return false;
959 
960   if (prob > best_prob + diff_prob
961       || (!best_prob.initialized_p ()
962             && prob > profile_probability::guessed_never ()))
963     /* The edge has higher probability than the temporary best edge.  */
964     is_better_edge = true;
965   else if (prob < best_prob - diff_prob)
966     /* The edge has lower probability than the temporary best edge.  */
967     is_better_edge = false;
968   else
969     {
970       profile_count diff_count = best_count.apply_scale (1, 10);
971       if (count < best_count - diff_count
972             || (!best_count.initialized_p ()
973                 && count.nonzero_p ()))
974           /* The edge and the temporary best edge  have almost equivalent
975              probabilities.  The higher countuency of a successor now means
976              that there is another edge going into that successor.
977              This successor has lower countuency so it is better.  */
978           is_better_edge = true;
979       else if (count > best_count + diff_count)
980           /* This successor has higher countuency so it is worse.  */
981           is_better_edge = false;
982       else if (e->dest->prev_bb == bb)
983           /* The edges have equivalent probabilities and the successors
984              have equivalent frequencies.  Select the previous successor.  */
985           is_better_edge = true;
986       else
987           is_better_edge = false;
988     }
989 
990   return is_better_edge;
991 }
992 
993 /* Return true when the edge E is better than the temporary best edge
994    CUR_BEST_EDGE.  If SRC_INDEX_P is true, the function compares the src bb of
995    E and CUR_BEST_EDGE; otherwise it will compare the dest bb.
996    BEST_LEN is the trace length of src (or dest) bb in CUR_BEST_EDGE.
997    TRACES record the information about traces.
998    When optimizing for size, the edge with smaller index is better.
999    When optimizing for speed, the edge with bigger probability or longer trace
1000    is better.  */
1001 
1002 static bool
connect_better_edge_p(const_edge e,bool src_index_p,int best_len,const_edge cur_best_edge,struct trace * traces)1003 connect_better_edge_p (const_edge e, bool src_index_p, int best_len,
1004                            const_edge cur_best_edge, struct trace *traces)
1005 {
1006   int e_index;
1007   int b_index;
1008   bool is_better_edge;
1009 
1010   if (!cur_best_edge)
1011     return true;
1012 
1013   if (optimize_function_for_size_p (cfun))
1014     {
1015       e_index = src_index_p ? e->src->index : e->dest->index;
1016       b_index = src_index_p ? cur_best_edge->src->index
1017                                     : cur_best_edge->dest->index;
1018       /* The smaller one is better to keep the original order.  */
1019       return b_index > e_index;
1020     }
1021 
1022   if (src_index_p)
1023     {
1024       e_index = e->src->index;
1025 
1026       /* We are looking for predecessor, so probabilities are not that
1027            informative.  We do not want to connect A to B becuse A has
1028            only one sucessor (probablity is 100%) while there is edge
1029            A' to B where probability is 90% but which is much more frequent.  */
1030       if (e->count () > cur_best_edge->count ())
1031           /* The edge has higher probability than the temporary best edge.  */
1032           is_better_edge = true;
1033       else if (e->count () < cur_best_edge->count ())
1034           /* The edge has lower probability than the temporary best edge.  */
1035           is_better_edge = false;
1036       if (e->probability > cur_best_edge->probability)
1037           /* The edge has higher probability than the temporary best edge.  */
1038           is_better_edge = true;
1039       else if (e->probability < cur_best_edge->probability)
1040           /* The edge has lower probability than the temporary best edge.  */
1041           is_better_edge = false;
1042       else if (traces[bbd[e_index].end_of_trace].length > best_len)
1043           /* The edge and the temporary best edge have equivalent probabilities.
1044              The edge with longer trace is better.  */
1045           is_better_edge = true;
1046       else
1047           is_better_edge = false;
1048     }
1049   else
1050     {
1051       e_index = e->dest->index;
1052 
1053       if (e->probability > cur_best_edge->probability)
1054           /* The edge has higher probability than the temporary best edge.  */
1055           is_better_edge = true;
1056       else if (e->probability < cur_best_edge->probability)
1057           /* The edge has lower probability than the temporary best edge.  */
1058           is_better_edge = false;
1059       else if (traces[bbd[e_index].start_of_trace].length > best_len)
1060           /* The edge and the temporary best edge have equivalent probabilities.
1061              The edge with longer trace is better.  */
1062           is_better_edge = true;
1063       else
1064           is_better_edge = false;
1065     }
1066 
1067   return is_better_edge;
1068 }
1069 
1070 /* Connect traces in array TRACES, N_TRACES is the count of traces.  */
1071 
1072 static void
connect_traces(int n_traces,struct trace * traces)1073 connect_traces (int n_traces, struct trace *traces)
1074 {
1075   int i;
1076   bool *connected;
1077   bool two_passes;
1078   int last_trace;
1079   int current_pass;
1080   int current_partition;
1081   profile_count count_threshold;
1082   bool for_size = optimize_function_for_size_p (cfun);
1083 
1084   count_threshold = max_entry_count.apply_scale (DUPLICATION_THRESHOLD, 1000);
1085 
1086   connected = XCNEWVEC (bool, n_traces);
1087   last_trace = -1;
1088   current_pass = 1;
1089   current_partition = BB_PARTITION (traces[0].first);
1090   two_passes = false;
1091 
1092   if (crtl->has_bb_partition)
1093     for (i = 0; i < n_traces && !two_passes; i++)
1094       if (BB_PARTITION (traces[0].first)
1095             != BB_PARTITION (traces[i].first))
1096           two_passes = true;
1097 
1098   for (i = 0; i < n_traces || (two_passes && current_pass == 1) ; i++)
1099     {
1100       int t = i;
1101       int t2;
1102       edge e, best;
1103       int best_len;
1104 
1105       if (i >= n_traces)
1106           {
1107             gcc_assert (two_passes && current_pass == 1);
1108             i = 0;
1109             t = i;
1110             current_pass = 2;
1111             if (current_partition == BB_HOT_PARTITION)
1112               current_partition = BB_COLD_PARTITION;
1113             else
1114               current_partition = BB_HOT_PARTITION;
1115           }
1116 
1117       if (connected[t])
1118           continue;
1119 
1120       if (two_passes
1121             && BB_PARTITION (traces[t].first) != current_partition)
1122           continue;
1123 
1124       connected[t] = true;
1125 
1126       /* Find the predecessor traces.  */
1127       for (t2 = t; t2 > 0;)
1128           {
1129             edge_iterator ei;
1130             best = NULL;
1131             best_len = 0;
1132             FOR_EACH_EDGE (e, ei, traces[t2].first->preds)
1133               {
1134                 int si = e->src->index;
1135 
1136                 if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1137                       && (e->flags & EDGE_CAN_FALLTHRU)
1138                       && !(e->flags & EDGE_COMPLEX)
1139                       && bbd[si].end_of_trace >= 0
1140                       && !connected[bbd[si].end_of_trace]
1141                       && (BB_PARTITION (e->src) == current_partition)
1142                       && connect_better_edge_p (e, true, best_len, best, traces))
1143                     {
1144                       best = e;
1145                       best_len = traces[bbd[si].end_of_trace].length;
1146                     }
1147               }
1148             if (best)
1149               {
1150                 best->src->aux = best->dest;
1151                 t2 = bbd[best->src->index].end_of_trace;
1152                 connected[t2] = true;
1153 
1154                 if (dump_file)
1155                     {
1156                       fprintf (dump_file, "Connection: %d %d\n",
1157                                  best->src->index, best->dest->index);
1158                     }
1159               }
1160             else
1161               break;
1162           }
1163 
1164       if (last_trace >= 0)
1165           traces[last_trace].last->aux = traces[t2].first;
1166       last_trace = t;
1167 
1168       /* Find the successor traces.  */
1169       while (1)
1170           {
1171             /* Find the continuation of the chain.  */
1172             edge_iterator ei;
1173             best = NULL;
1174             best_len = 0;
1175             FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1176               {
1177                 int di = e->dest->index;
1178 
1179                 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1180                       && (e->flags & EDGE_CAN_FALLTHRU)
1181                       && !(e->flags & EDGE_COMPLEX)
1182                       && bbd[di].start_of_trace >= 0
1183                       && !connected[bbd[di].start_of_trace]
1184                       && (BB_PARTITION (e->dest) == current_partition)
1185                       && connect_better_edge_p (e, false, best_len, best, traces))
1186                     {
1187                       best = e;
1188                       best_len = traces[bbd[di].start_of_trace].length;
1189                     }
1190               }
1191 
1192             if (for_size)
1193               {
1194                 if (!best)
1195                     /* Stop finding the successor traces.  */
1196                     break;
1197 
1198                 /* It is OK to connect block n with block n + 1 or a block
1199                      before n.  For others, only connect to the loop header.  */
1200                 if (best->dest->index > (traces[t].last->index + 1))
1201                     {
1202                       int count = EDGE_COUNT (best->dest->preds);
1203 
1204                       FOR_EACH_EDGE (e, ei, best->dest->preds)
1205                         if (e->flags & EDGE_DFS_BACK)
1206                           count--;
1207 
1208                       /* If dest has multiple predecessors, skip it.  We expect
1209                          that one predecessor with smaller index connects with it
1210                          later.  */
1211                       if (count != 1)
1212                         break;
1213                     }
1214 
1215                 /* Only connect Trace n with Trace n + 1.  It is conservative
1216                      to keep the order as close as possible to the original order.
1217                      It also helps to reduce long jumps.  */
1218                 if (last_trace != bbd[best->dest->index].start_of_trace - 1)
1219                     break;
1220 
1221                 if (dump_file)
1222                     fprintf (dump_file, "Connection: %d %d\n",
1223                                best->src->index, best->dest->index);
1224 
1225                 t = bbd[best->dest->index].start_of_trace;
1226                 traces[last_trace].last->aux = traces[t].first;
1227                 connected[t] = true;
1228                 last_trace = t;
1229               }
1230             else if (best)
1231               {
1232                 if (dump_file)
1233                     {
1234                       fprintf (dump_file, "Connection: %d %d\n",
1235                                  best->src->index, best->dest->index);
1236                     }
1237                 t = bbd[best->dest->index].start_of_trace;
1238                 traces[last_trace].last->aux = traces[t].first;
1239                 connected[t] = true;
1240                 last_trace = t;
1241               }
1242             else
1243               {
1244                 /* Try to connect the traces by duplication of 1 block.  */
1245                 edge e2;
1246                 basic_block next_bb = NULL;
1247                 bool try_copy = false;
1248 
1249                 FOR_EACH_EDGE (e, ei, traces[t].last->succs)
1250                     if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1251                         && (e->flags & EDGE_CAN_FALLTHRU)
1252                         && !(e->flags & EDGE_COMPLEX)
1253                         && (!best || e->probability > best->probability))
1254                       {
1255                         edge_iterator ei;
1256                         edge best2 = NULL;
1257                         int best2_len = 0;
1258 
1259                         /* If the destination is a start of a trace which is only
1260                            one block long, then no need to search the successor
1261                            blocks of the trace.  Accept it.  */
1262                         if (bbd[e->dest->index].start_of_trace >= 0
1263                               && traces[bbd[e->dest->index].start_of_trace].length
1264                                  == 1)
1265                           {
1266                               best = e;
1267                               try_copy = true;
1268                               continue;
1269                           }
1270 
1271                         FOR_EACH_EDGE (e2, ei, e->dest->succs)
1272                           {
1273                               int di = e2->dest->index;
1274 
1275                               if (e2->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)
1276                                   || ((e2->flags & EDGE_CAN_FALLTHRU)
1277                                         && !(e2->flags & EDGE_COMPLEX)
1278                                         && bbd[di].start_of_trace >= 0
1279                                         && !connected[bbd[di].start_of_trace]
1280                                         && BB_PARTITION (e2->dest) == current_partition
1281                                         && e2->count () >= count_threshold
1282                                         && (!best2
1283                                             || e2->probability > best2->probability
1284                                             || (e2->probability == best2->probability
1285                                                   && traces[bbd[di].start_of_trace].length
1286                                                      > best2_len))))
1287                                 {
1288                                   best = e;
1289                                   best2 = e2;
1290                                   if (e2->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1291                                     best2_len = traces[bbd[di].start_of_trace].length;
1292                                   else
1293                                     best2_len = INT_MAX;
1294                                   next_bb = e2->dest;
1295                                   try_copy = true;
1296                                 }
1297                           }
1298                       }
1299 
1300                 /* Copy tiny blocks always; copy larger blocks only when the
1301                      edge is traversed frequently enough.  */
1302                 if (try_copy
1303                       && BB_PARTITION (best->src) == BB_PARTITION (best->dest)
1304                       && copy_bb_p (best->dest,
1305                                         optimize_edge_for_speed_p (best)
1306                                         && (!best->count ().initialized_p ()
1307                                             || best->count () >= count_threshold)))
1308                     {
1309                       basic_block new_bb;
1310 
1311                       if (dump_file)
1312                         {
1313                           fprintf (dump_file, "Connection: %d %d ",
1314                                      traces[t].last->index, best->dest->index);
1315                           if (!next_bb)
1316                               fputc ('\n', dump_file);
1317                           else if (next_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
1318                               fprintf (dump_file, "exit\n");
1319                           else
1320                               fprintf (dump_file, "%d\n", next_bb->index);
1321                         }
1322 
1323                       new_bb = copy_bb (best->dest, best, traces[t].last, t);
1324                       traces[t].last = new_bb;
1325                       if (next_bb && next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
1326                         {
1327                           t = bbd[next_bb->index].start_of_trace;
1328                           traces[last_trace].last->aux = traces[t].first;
1329                           connected[t] = true;
1330                           last_trace = t;
1331                         }
1332                       else
1333                         break;          /* Stop finding the successor traces.  */
1334                     }
1335                 else
1336                     break;    /* Stop finding the successor traces.  */
1337               }
1338           }
1339     }
1340 
1341   if (dump_file)
1342     {
1343       basic_block bb;
1344 
1345       fprintf (dump_file, "Final order:\n");
1346       for (bb = traces[0].first; bb; bb = (basic_block) bb->aux)
1347           fprintf (dump_file, "%d ", bb->index);
1348       fprintf (dump_file, "\n");
1349       fflush (dump_file);
1350     }
1351 
1352   FREE (connected);
1353 }
1354 
1355 /* Return true when BB can and should be copied. CODE_MAY_GROW is true
1356    when code size is allowed to grow by duplication.  */
1357 
1358 static bool
copy_bb_p(const_basic_block bb,int code_may_grow)1359 copy_bb_p (const_basic_block bb, int code_may_grow)
1360 {
1361   int size = 0;
1362   int max_size = uncond_jump_length;
1363   rtx_insn *insn;
1364 
1365   if (EDGE_COUNT (bb->preds) < 2)
1366     return false;
1367   if (!can_duplicate_block_p (bb))
1368     return false;
1369 
1370   /* Avoid duplicating blocks which have many successors (PR/13430).  */
1371   if (EDGE_COUNT (bb->succs) > 8)
1372     return false;
1373 
1374   if (code_may_grow && optimize_bb_for_speed_p (bb))
1375     max_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS);
1376 
1377   FOR_BB_INSNS (bb, insn)
1378     {
1379       if (INSN_P (insn))
1380           size += get_attr_min_length (insn);
1381     }
1382 
1383   if (size <= max_size)
1384     return true;
1385 
1386   if (dump_file)
1387     {
1388       fprintf (dump_file,
1389                  "Block %d can't be copied because its size = %d.\n",
1390                  bb->index, size);
1391     }
1392 
1393   return false;
1394 }
1395 
1396 /* Return the length of unconditional jump instruction.  */
1397 
1398 int
get_uncond_jump_length(void)1399 get_uncond_jump_length (void)
1400 {
1401   int length;
1402 
1403   start_sequence ();
1404   rtx_code_label *label = emit_label (gen_label_rtx ());
1405   rtx_insn *jump = emit_jump_insn (targetm.gen_jump (label));
1406   length = get_attr_min_length (jump);
1407   end_sequence ();
1408 
1409   return length;
1410 }
1411 
1412 /* Create a forwarder block to OLD_BB starting with NEW_LABEL and in the
1413    other partition wrt OLD_BB.  */
1414 
1415 static basic_block
create_eh_forwarder_block(rtx_code_label * new_label,basic_block old_bb)1416 create_eh_forwarder_block (rtx_code_label *new_label, basic_block old_bb)
1417 {
1418   /* Split OLD_BB, so that EH pads have always only incoming EH edges,
1419      bb_has_eh_pred bbs are treated specially by DF infrastructure.  */
1420   old_bb = split_block_after_labels (old_bb)->dest;
1421 
1422   /* Put the new label and a jump in the new basic block.  */
1423   rtx_insn *label = emit_label (new_label);
1424   rtx_code_label *old_label = block_label (old_bb);
1425   rtx_insn *jump = emit_jump_insn (targetm.gen_jump (old_label));
1426   JUMP_LABEL (jump) = old_label;
1427 
1428   /* Create the new basic block and put it in last position.  */
1429   basic_block last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
1430   basic_block new_bb = create_basic_block (label, jump, last_bb);
1431   new_bb->aux = last_bb->aux;
1432   new_bb->count = old_bb->count;
1433   last_bb->aux = new_bb;
1434 
1435   emit_barrier_after_bb (new_bb);
1436 
1437   make_single_succ_edge (new_bb, old_bb, 0);
1438 
1439   /* Make sure the new basic block is in the other partition.  */
1440   unsigned new_partition = BB_PARTITION (old_bb);
1441   new_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1442   BB_SET_PARTITION (new_bb, new_partition);
1443 
1444   return new_bb;
1445 }
1446 
1447 /* The common landing pad in block OLD_BB has edges from both partitions.
1448    Add a new landing pad that will just jump to the old one and split the
1449    edges so that no EH edge crosses partitions.  */
1450 
1451 static void
sjlj_fix_up_crossing_landing_pad(basic_block old_bb)1452 sjlj_fix_up_crossing_landing_pad (basic_block old_bb)
1453 {
1454   const unsigned lp_len = cfun->eh->lp_array->length ();
1455   edge_iterator ei;
1456   edge e;
1457 
1458   /* Generate the new common landing-pad label.  */
1459   rtx_code_label *new_label = gen_label_rtx ();
1460   LABEL_PRESERVE_P (new_label) = 1;
1461 
1462   /* Create the forwarder block.  */
1463   basic_block new_bb = create_eh_forwarder_block (new_label, old_bb);
1464 
1465   /* Create the map from old to new lp index and initialize it.  */
1466   unsigned *index_map = (unsigned *) alloca (lp_len * sizeof (unsigned));
1467   memset (index_map, 0, lp_len * sizeof (unsigned));
1468 
1469   /* Fix up the edges.  */
1470   for (ei = ei_start (old_bb->preds); (e = ei_safe_edge (ei)) != NULL; )
1471     if (e->src != new_bb && BB_PARTITION (e->src) == BB_PARTITION (new_bb))
1472       {
1473           rtx_insn *insn = BB_END (e->src);
1474           rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
1475 
1476           gcc_assert (note != NULL);
1477           const unsigned old_index = INTVAL (XEXP (note, 0));
1478 
1479           /* Generate the new landing-pad structure.  */
1480           if (index_map[old_index] == 0)
1481             {
1482               eh_landing_pad old_lp = (*cfun->eh->lp_array)[old_index];
1483               eh_landing_pad new_lp = gen_eh_landing_pad (old_lp->region);
1484               new_lp->post_landing_pad = old_lp->post_landing_pad;
1485               new_lp->landing_pad = new_label;
1486               index_map[old_index] = new_lp->index;
1487             }
1488           XEXP (note, 0) = GEN_INT (index_map[old_index]);
1489 
1490           /* Adjust the edge to the new destination.  */
1491           redirect_edge_succ (e, new_bb);
1492       }
1493     else
1494       ei_next (&ei);
1495 }
1496 
1497 /* The landing pad OLD_LP, in block OLD_BB, has edges from both partitions.
1498    Add a new landing pad that will just jump to the old one and split the
1499    edges so that no EH edge crosses partitions.  */
1500 
1501 static void
dw2_fix_up_crossing_landing_pad(eh_landing_pad old_lp,basic_block old_bb)1502 dw2_fix_up_crossing_landing_pad (eh_landing_pad old_lp, basic_block old_bb)
1503 {
1504   eh_landing_pad new_lp;
1505   edge_iterator ei;
1506   edge e;
1507 
1508   /* Generate the new landing-pad structure.  */
1509   new_lp = gen_eh_landing_pad (old_lp->region);
1510   new_lp->post_landing_pad = old_lp->post_landing_pad;
1511   new_lp->landing_pad = gen_label_rtx ();
1512   LABEL_PRESERVE_P (new_lp->landing_pad) = 1;
1513 
1514   /* Create the forwarder block.  */
1515   basic_block new_bb = create_eh_forwarder_block (new_lp->landing_pad, old_bb);
1516 
1517   /* Fix up the edges.  */
1518   for (ei = ei_start (old_bb->preds); (e = ei_safe_edge (ei)) != NULL; )
1519     if (e->src != new_bb && BB_PARTITION (e->src) == BB_PARTITION (new_bb))
1520       {
1521           rtx_insn *insn = BB_END (e->src);
1522           rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
1523 
1524           gcc_assert (note != NULL);
1525           gcc_checking_assert (INTVAL (XEXP (note, 0)) == old_lp->index);
1526           XEXP (note, 0) = GEN_INT (new_lp->index);
1527 
1528           /* Adjust the edge to the new destination.  */
1529           redirect_edge_succ (e, new_bb);
1530       }
1531     else
1532       ei_next (&ei);
1533 }
1534 
1535 
1536 /* Ensure that all hot bbs are included in a hot path through the
1537    procedure. This is done by calling this function twice, once
1538    with WALK_UP true (to look for paths from the entry to hot bbs) and
1539    once with WALK_UP false (to look for paths from hot bbs to the exit).
1540    Returns the updated value of COLD_BB_COUNT and adds newly-hot bbs
1541    to BBS_IN_HOT_PARTITION.  */
1542 
1543 static unsigned int
sanitize_hot_paths(bool walk_up,unsigned int cold_bb_count,vec<basic_block> * bbs_in_hot_partition)1544 sanitize_hot_paths (bool walk_up, unsigned int cold_bb_count,
1545                     vec<basic_block> *bbs_in_hot_partition)
1546 {
1547   /* Callers check this.  */
1548   gcc_checking_assert (cold_bb_count);
1549 
1550   /* Keep examining hot bbs while we still have some left to check
1551      and there are remaining cold bbs.  */
1552   vec<basic_block> hot_bbs_to_check = bbs_in_hot_partition->copy ();
1553   while (! hot_bbs_to_check.is_empty ()
1554          && cold_bb_count)
1555     {
1556       basic_block bb = hot_bbs_to_check.pop ();
1557       vec<edge, va_gc> *edges = walk_up ? bb->preds : bb->succs;
1558       edge e;
1559       edge_iterator ei;
1560       profile_probability highest_probability
1561                                          = profile_probability::uninitialized ();
1562       profile_count highest_count = profile_count::uninitialized ();
1563       bool found = false;
1564 
1565       /* Walk the preds/succs and check if there is at least one already
1566          marked hot. Keep track of the most frequent pred/succ so that we
1567          can mark it hot if we don't find one.  */
1568       FOR_EACH_EDGE (e, ei, edges)
1569         {
1570           basic_block reach_bb = walk_up ? e->src : e->dest;
1571 
1572           if (e->flags & EDGE_DFS_BACK)
1573             continue;
1574 
1575             /* Do not expect profile insanities when profile was not adjusted.  */
1576             if (e->probability == profile_probability::never ()
1577                 || e->count () == profile_count::zero ())
1578               continue;
1579 
1580           if (BB_PARTITION (reach_bb) != BB_COLD_PARTITION)
1581           {
1582             found = true;
1583             break;
1584           }
1585           /* The following loop will look for the hottest edge via
1586              the edge count, if it is non-zero, then fallback to
1587              the edge probability.  */
1588           if (!(e->count () > highest_count))
1589             highest_count = e->count ();
1590           if (!highest_probability.initialized_p ()
1591                 || e->probability > highest_probability)
1592             highest_probability = e->probability;
1593         }
1594 
1595       /* If bb is reached by (or reaches, in the case of !WALK_UP) another hot
1596          block (or unpartitioned, e.g. the entry block) then it is ok. If not,
1597          then the most frequent pred (or succ) needs to be adjusted.  In the
1598          case where multiple preds/succs have the same frequency (e.g. a
1599          50-50 branch), then both will be adjusted.  */
1600       if (found)
1601         continue;
1602 
1603       FOR_EACH_EDGE (e, ei, edges)
1604         {
1605           if (e->flags & EDGE_DFS_BACK)
1606             continue;
1607             /* Do not expect profile insanities when profile was not adjusted.  */
1608             if (e->probability == profile_probability::never ()
1609                 || e->count () == profile_count::zero ())
1610               continue;
1611           /* Select the hottest edge using the edge count, if it is non-zero,
1612              then fallback to the edge probability.  */
1613           if (highest_count.initialized_p ())
1614             {
1615               if (!(e->count () >= highest_count))
1616                 continue;
1617             }
1618           else if (!(e->probability >= highest_probability))
1619             continue;
1620 
1621           basic_block reach_bb = walk_up ? e->src : e->dest;
1622 
1623           /* We have a hot bb with an immediate dominator that is cold.
1624              The dominator needs to be re-marked hot.  */
1625           BB_SET_PARTITION (reach_bb, BB_HOT_PARTITION);
1626             if (dump_file)
1627               fprintf (dump_file, "Promoting bb %i to hot partition to sanitize "
1628                          "profile of bb %i in %s walk\n", reach_bb->index,
1629                          bb->index, walk_up ? "backward" : "forward");
1630           cold_bb_count--;
1631 
1632           /* Now we need to examine newly-hot reach_bb to see if it is also
1633              dominated by a cold bb.  */
1634           bbs_in_hot_partition->safe_push (reach_bb);
1635           hot_bbs_to_check.safe_push (reach_bb);
1636         }
1637     }
1638   hot_bbs_to_check.release ();
1639 
1640   return cold_bb_count;
1641 }
1642 
1643 
1644 /* Find the basic blocks that are rarely executed and need to be moved to
1645    a separate section of the .o file (to cut down on paging and improve
1646    cache locality).  Return a vector of all edges that cross.  */
1647 
1648 static vec<edge>
find_rarely_executed_basic_blocks_and_crossing_edges(void)1649 find_rarely_executed_basic_blocks_and_crossing_edges (void)
1650 {
1651   vec<edge> crossing_edges = vNULL;
1652   basic_block bb;
1653   edge e;
1654   edge_iterator ei;
1655   unsigned int cold_bb_count = 0;
1656   auto_vec<basic_block> bbs_in_hot_partition;
1657 
1658   propagate_unlikely_bbs_forward ();
1659 
1660   /* Mark which partition (hot/cold) each basic block belongs in.  */
1661   FOR_EACH_BB_FN (bb, cfun)
1662     {
1663       bool cold_bb = false;
1664 
1665       if (probably_never_executed_bb_p (cfun, bb))
1666         {
1667           /* Handle profile insanities created by upstream optimizations
1668              by also checking the incoming edge weights. If there is a non-cold
1669              incoming edge, conservatively prevent this block from being split
1670              into the cold section.  */
1671           cold_bb = true;
1672           FOR_EACH_EDGE (e, ei, bb->preds)
1673             if (!probably_never_executed_edge_p (cfun, e))
1674               {
1675                 cold_bb = false;
1676                 break;
1677               }
1678         }
1679       if (cold_bb)
1680         {
1681           BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1682           cold_bb_count++;
1683         }
1684       else
1685         {
1686           BB_SET_PARTITION (bb, BB_HOT_PARTITION);
1687           bbs_in_hot_partition.safe_push (bb);
1688         }
1689     }
1690 
1691   /* Ensure that hot bbs are included along a hot path from the entry to exit.
1692      Several different possibilities may include cold bbs along all paths
1693      to/from a hot bb. One is that there are edge weight insanities
1694      due to optimization phases that do not properly update basic block profile
1695      counts. The second is that the entry of the function may not be hot, because
1696      it is entered fewer times than the number of profile training runs, but there
1697      is a loop inside the function that causes blocks within the function to be
1698      above the threshold for hotness. This is fixed by walking up from hot bbs
1699      to the entry block, and then down from hot bbs to the exit, performing
1700      partitioning fixups as necessary.  */
1701   if (cold_bb_count)
1702     {
1703       mark_dfs_back_edges ();
1704       cold_bb_count = sanitize_hot_paths (true, cold_bb_count,
1705                                           &bbs_in_hot_partition);
1706       if (cold_bb_count)
1707         sanitize_hot_paths (false, cold_bb_count, &bbs_in_hot_partition);
1708 
1709       hash_set <basic_block> set;
1710       find_bbs_reachable_by_hot_paths (&set);
1711       FOR_EACH_BB_FN (bb, cfun)
1712           if (!set.contains (bb))
1713             BB_SET_PARTITION (bb, BB_COLD_PARTITION);
1714     }
1715 
1716   /* The format of .gcc_except_table does not allow landing pads to
1717      be in a different partition as the throw.  Fix this by either
1718      moving the landing pads or inserting forwarder landing pads.  */
1719   if (cfun->eh->lp_array)
1720     {
1721       const bool sjlj
1722           = (targetm_common.except_unwind_info (&global_options) == UI_SJLJ);
1723       unsigned i;
1724       eh_landing_pad lp;
1725 
1726       FOR_EACH_VEC_ELT (*cfun->eh->lp_array, i, lp)
1727           {
1728             bool all_same, all_diff;
1729 
1730             if (lp == NULL
1731                 || lp->landing_pad == NULL_RTX
1732                 || !LABEL_P (lp->landing_pad))
1733               continue;
1734 
1735             all_same = all_diff = true;
1736             bb = BLOCK_FOR_INSN (lp->landing_pad);
1737             FOR_EACH_EDGE (e, ei, bb->preds)
1738               {
1739                 gcc_assert (e->flags & EDGE_EH);
1740                 if (BB_PARTITION (bb) == BB_PARTITION (e->src))
1741                     all_diff = false;
1742                 else
1743                     all_same = false;
1744               }
1745 
1746             if (all_same)
1747               ;
1748             else if (all_diff)
1749               {
1750                 int which = BB_PARTITION (bb);
1751                 which ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
1752                 BB_SET_PARTITION (bb, which);
1753               }
1754             else if (sjlj)
1755               sjlj_fix_up_crossing_landing_pad (bb);
1756             else
1757               dw2_fix_up_crossing_landing_pad (lp, bb);
1758 
1759             /* There is a single, common landing pad in SJLJ mode.  */
1760             if (sjlj)
1761               break;
1762           }
1763     }
1764 
1765   /* Mark every edge that crosses between sections.  */
1766   FOR_EACH_BB_FN (bb, cfun)
1767     FOR_EACH_EDGE (e, ei, bb->succs)
1768       {
1769           unsigned int flags = e->flags;
1770 
1771         /* We should never have EDGE_CROSSING set yet.  */
1772           gcc_checking_assert ((flags & EDGE_CROSSING) == 0);
1773 
1774           if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
1775               && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1776               && BB_PARTITION (e->src) != BB_PARTITION (e->dest))
1777             {
1778               crossing_edges.safe_push (e);
1779               flags |= EDGE_CROSSING;
1780             }
1781 
1782           /* Now that we've split eh edges as appropriate, allow landing pads
1783              to be merged with the post-landing pads.  */
1784           flags &= ~EDGE_PRESERVE;
1785 
1786           e->flags = flags;
1787       }
1788 
1789   return crossing_edges;
1790 }
1791 
1792 /* Set the flag EDGE_CAN_FALLTHRU for edges that can be fallthru.  */
1793 
1794 static void
set_edge_can_fallthru_flag(void)1795 set_edge_can_fallthru_flag (void)
1796 {
1797   basic_block bb;
1798 
1799   FOR_EACH_BB_FN (bb, cfun)
1800     {
1801       edge e;
1802       edge_iterator ei;
1803 
1804       FOR_EACH_EDGE (e, ei, bb->succs)
1805           {
1806             e->flags &= ~EDGE_CAN_FALLTHRU;
1807 
1808             /* The FALLTHRU edge is also CAN_FALLTHRU edge.  */
1809             if (e->flags & EDGE_FALLTHRU)
1810               e->flags |= EDGE_CAN_FALLTHRU;
1811           }
1812 
1813       /* If the BB ends with an invertible condjump all (2) edges are
1814            CAN_FALLTHRU edges.  */
1815       if (EDGE_COUNT (bb->succs) != 2)
1816           continue;
1817       if (!any_condjump_p (BB_END (bb)))
1818           continue;
1819 
1820       rtx_jump_insn *bb_end_jump = as_a <rtx_jump_insn *> (BB_END (bb));
1821       if (!invert_jump (bb_end_jump, JUMP_LABEL (bb_end_jump), 0))
1822           continue;
1823       invert_jump (bb_end_jump, JUMP_LABEL (bb_end_jump), 0);
1824       EDGE_SUCC (bb, 0)->flags |= EDGE_CAN_FALLTHRU;
1825       EDGE_SUCC (bb, 1)->flags |= EDGE_CAN_FALLTHRU;
1826     }
1827 }
1828 
1829 /* If any destination of a crossing edge does not have a label, add label;
1830    Convert any easy fall-through crossing edges to unconditional jumps.  */
1831 
1832 static void
add_labels_and_missing_jumps(vec<edge> crossing_edges)1833 add_labels_and_missing_jumps (vec<edge> crossing_edges)
1834 {
1835   size_t i;
1836   edge e;
1837 
1838   FOR_EACH_VEC_ELT (crossing_edges, i, e)
1839     {
1840       basic_block src = e->src;
1841       basic_block dest = e->dest;
1842       rtx_jump_insn *new_jump;
1843 
1844       if (dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1845           continue;
1846 
1847       /* Make sure dest has a label.  */
1848       rtx_code_label *label = block_label (dest);
1849 
1850       /* Nothing to do for non-fallthru edges.  */
1851       if (src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
1852           continue;
1853       if ((e->flags & EDGE_FALLTHRU) == 0)
1854           continue;
1855 
1856       /* If the block does not end with a control flow insn, then we
1857            can trivially add a jump to the end to fixup the crossing.
1858            Otherwise the jump will have to go in a new bb, which will
1859            be handled by fix_up_fall_thru_edges function.  */
1860       if (control_flow_insn_p (BB_END (src)))
1861           continue;
1862 
1863       /* Make sure there's only one successor.  */
1864       gcc_assert (single_succ_p (src));
1865 
1866       new_jump = emit_jump_insn_after (targetm.gen_jump (label), BB_END (src));
1867       BB_END (src) = new_jump;
1868       JUMP_LABEL (new_jump) = label;
1869       LABEL_NUSES (label) += 1;
1870 
1871       emit_barrier_after_bb (src);
1872 
1873       /* Mark edge as non-fallthru.  */
1874       e->flags &= ~EDGE_FALLTHRU;
1875     }
1876 }
1877 
1878 /* Find any bb's where the fall-through edge is a crossing edge (note that
1879    these bb's must also contain a conditional jump or end with a call
1880    instruction; we've already dealt with fall-through edges for blocks
1881    that didn't have a conditional jump or didn't end with call instruction
1882    in the call to add_labels_and_missing_jumps).  Convert the fall-through
1883    edge to non-crossing edge by inserting a new bb to fall-through into.
1884    The new bb will contain an unconditional jump (crossing edge) to the
1885    original fall through destination.  */
1886 
1887 static void
fix_up_fall_thru_edges(void)1888 fix_up_fall_thru_edges (void)
1889 {
1890   basic_block cur_bb;
1891 
1892   FOR_EACH_BB_FN (cur_bb, cfun)
1893     {
1894       edge succ1;
1895       edge succ2;
1896       edge fall_thru = NULL;
1897       edge cond_jump = NULL;
1898 
1899       fall_thru = NULL;
1900       if (EDGE_COUNT (cur_bb->succs) > 0)
1901           succ1 = EDGE_SUCC (cur_bb, 0);
1902       else
1903           succ1 = NULL;
1904 
1905       if (EDGE_COUNT (cur_bb->succs) > 1)
1906           succ2 = EDGE_SUCC (cur_bb, 1);
1907       else
1908           succ2 = NULL;
1909 
1910       /* Find the fall-through edge.  */
1911 
1912       if (succ1
1913             && (succ1->flags & EDGE_FALLTHRU))
1914           {
1915             fall_thru = succ1;
1916             cond_jump = succ2;
1917           }
1918       else if (succ2
1919                  && (succ2->flags & EDGE_FALLTHRU))
1920           {
1921             fall_thru = succ2;
1922             cond_jump = succ1;
1923           }
1924       else if (succ2 && EDGE_COUNT (cur_bb->succs) > 2)
1925           fall_thru = find_fallthru_edge (cur_bb->succs);
1926 
1927       if (fall_thru && (fall_thru->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)))
1928           {
1929             /* Check to see if the fall-thru edge is a crossing edge.  */
1930 
1931             if (fall_thru->flags & EDGE_CROSSING)
1932               {
1933                 /* The fall_thru edge crosses; now check the cond jump edge, if
1934                      it exists.  */
1935 
1936                 bool cond_jump_crosses = true;
1937                 int invert_worked = 0;
1938                 rtx_insn *old_jump = BB_END (cur_bb);
1939 
1940                 /* Find the jump instruction, if there is one.  */
1941 
1942                 if (cond_jump)
1943                     {
1944                       if (!(cond_jump->flags & EDGE_CROSSING))
1945                         cond_jump_crosses = false;
1946 
1947                       /* We know the fall-thru edge crosses; if the cond
1948                          jump edge does NOT cross, and its destination is the
1949                          next block in the bb order, invert the jump
1950                          (i.e. fix it so the fall through does not cross and
1951                          the cond jump does).  */
1952 
1953                       if (!cond_jump_crosses)
1954                         {
1955                           /* Find label in fall_thru block. We've already added
1956                                any missing labels, so there must be one.  */
1957 
1958                           rtx_code_label *fall_thru_label
1959                               = block_label (fall_thru->dest);
1960 
1961                           if (old_jump && fall_thru_label)
1962                               {
1963                                 rtx_jump_insn *old_jump_insn
1964                                   = dyn_cast <rtx_jump_insn *> (old_jump);
1965                                 if (old_jump_insn)
1966                                   invert_worked = invert_jump (old_jump_insn,
1967                                                                        fall_thru_label, 0);
1968                               }
1969 
1970                           if (invert_worked)
1971                               {
1972                                 fall_thru->flags &= ~EDGE_FALLTHRU;
1973                                 cond_jump->flags |= EDGE_FALLTHRU;
1974                                 update_br_prob_note (cur_bb);
1975                                 std::swap (fall_thru, cond_jump);
1976                                 cond_jump->flags |= EDGE_CROSSING;
1977                                 fall_thru->flags &= ~EDGE_CROSSING;
1978                               }
1979                         }
1980                     }
1981 
1982                 if (cond_jump_crosses || !invert_worked)
1983                     {
1984                       /* This is the case where both edges out of the basic
1985                          block are crossing edges. Here we will fix up the
1986                          fall through edge. The jump edge will be taken care
1987                          of later.  The EDGE_CROSSING flag of fall_thru edge
1988                          is unset before the call to force_nonfallthru
1989                          function because if a new basic-block is created
1990                          this edge remains in the current section boundary
1991                          while the edge between new_bb and the fall_thru->dest
1992                          becomes EDGE_CROSSING.  */
1993 
1994                       fall_thru->flags &= ~EDGE_CROSSING;
1995                       basic_block new_bb = force_nonfallthru (fall_thru);
1996 
1997                       if (new_bb)
1998                         {
1999                           new_bb->aux = cur_bb->aux;
2000                           cur_bb->aux = new_bb;
2001 
2002                       /* This is done by force_nonfallthru_and_redirect.  */
2003                           gcc_assert (BB_PARTITION (new_bb)
2004                                   == BB_PARTITION (cur_bb));
2005 
2006                           single_succ_edge (new_bb)->flags |= EDGE_CROSSING;
2007                         }
2008                       else
2009                         {
2010                           /* If a new basic-block was not created; restore
2011                                the EDGE_CROSSING flag.  */
2012                           fall_thru->flags |= EDGE_CROSSING;
2013                         }
2014 
2015                       /* Add barrier after new jump */
2016                       emit_barrier_after_bb (new_bb ? new_bb : cur_bb);
2017                     }
2018               }
2019           }
2020     }
2021 }
2022 
2023 /* This function checks the destination block of a "crossing jump" to
2024    see if it has any crossing predecessors that begin with a code label
2025    and end with an unconditional jump.  If so, it returns that predecessor
2026    block.  (This is to avoid creating lots of new basic blocks that all
2027    contain unconditional jumps to the same destination).  */
2028 
2029 static basic_block
find_jump_block(basic_block jump_dest)2030 find_jump_block (basic_block jump_dest)
2031 {
2032   basic_block source_bb = NULL;
2033   edge e;
2034   rtx_insn *insn;
2035   edge_iterator ei;
2036 
2037   FOR_EACH_EDGE (e, ei, jump_dest->preds)
2038     if (e->flags & EDGE_CROSSING)
2039       {
2040           basic_block src = e->src;
2041 
2042           /* Check each predecessor to see if it has a label, and contains
2043              only one executable instruction, which is an unconditional jump.
2044              If so, we can use it.  */
2045 
2046           if (LABEL_P (BB_HEAD (src)))
2047             for (insn = BB_HEAD (src);
2048                  !INSN_P (insn) && insn != NEXT_INSN (BB_END (src));
2049                  insn = NEXT_INSN (insn))
2050               {
2051                 if (INSN_P (insn)
2052                       && insn == BB_END (src)
2053                       && JUMP_P (insn)
2054                       && !any_condjump_p (insn))
2055                     {
2056                       source_bb = src;
2057                       break;
2058                     }
2059               }
2060 
2061           if (source_bb)
2062             break;
2063       }
2064 
2065   return source_bb;
2066 }
2067 
2068 /* Find all BB's with conditional jumps that are crossing edges;
2069    insert a new bb and make the conditional jump branch to the new
2070    bb instead (make the new bb same color so conditional branch won't
2071    be a 'crossing' edge).  Insert an unconditional jump from the
2072    new bb to the original destination of the conditional jump.  */
2073 
2074 static void
fix_crossing_conditional_branches(void)2075 fix_crossing_conditional_branches (void)
2076 {
2077   basic_block cur_bb;
2078   basic_block new_bb;
2079   basic_block dest;
2080   edge succ1;
2081   edge succ2;
2082   edge crossing_edge;
2083   edge new_edge;
2084   rtx set_src;
2085   rtx old_label = NULL_RTX;
2086   rtx_code_label *new_label;
2087 
2088   FOR_EACH_BB_FN (cur_bb, cfun)
2089     {
2090       crossing_edge = NULL;
2091       if (EDGE_COUNT (cur_bb->succs) > 0)
2092           succ1 = EDGE_SUCC (cur_bb, 0);
2093       else
2094           succ1 = NULL;
2095 
2096       if (EDGE_COUNT (cur_bb->succs) > 1)
2097           succ2 = EDGE_SUCC (cur_bb, 1);
2098       else
2099           succ2 = NULL;
2100 
2101       /* We already took care of fall-through edges, so only one successor
2102            can be a crossing edge.  */
2103 
2104       if (succ1 && (succ1->flags & EDGE_CROSSING))
2105           crossing_edge = succ1;
2106       else if (succ2 && (succ2->flags & EDGE_CROSSING))
2107           crossing_edge = succ2;
2108 
2109       if (crossing_edge)
2110           {
2111             rtx_insn *old_jump = BB_END (cur_bb);
2112 
2113             /* Check to make sure the jump instruction is a
2114                conditional jump.  */
2115 
2116             set_src = NULL_RTX;
2117 
2118             if (any_condjump_p (old_jump))
2119               {
2120                 if (GET_CODE (PATTERN (old_jump)) == SET)
2121                     set_src = SET_SRC (PATTERN (old_jump));
2122                 else if (GET_CODE (PATTERN (old_jump)) == PARALLEL)
2123                     {
2124                       set_src = XVECEXP (PATTERN (old_jump), 0,0);
2125                       if (GET_CODE (set_src) == SET)
2126                         set_src = SET_SRC (set_src);
2127                       else
2128                         set_src = NULL_RTX;
2129                     }
2130               }
2131 
2132             if (set_src && (GET_CODE (set_src) == IF_THEN_ELSE))
2133               {
2134                 rtx_jump_insn *old_jump_insn =
2135                               as_a <rtx_jump_insn *> (old_jump);
2136 
2137                 if (GET_CODE (XEXP (set_src, 1)) == PC)
2138                     old_label = XEXP (set_src, 2);
2139                 else if (GET_CODE (XEXP (set_src, 2)) == PC)
2140                     old_label = XEXP (set_src, 1);
2141 
2142                 /* Check to see if new bb for jumping to that dest has
2143                      already been created; if so, use it; if not, create
2144                      a new one.  */
2145 
2146                 new_bb = find_jump_block (crossing_edge->dest);
2147 
2148                 if (new_bb)
2149                     new_label = block_label (new_bb);
2150                 else
2151                     {
2152                       basic_block last_bb;
2153                       rtx_code_label *old_jump_target;
2154                       rtx_jump_insn *new_jump;
2155 
2156                       /* Create new basic block to be dest for
2157                          conditional jump.  */
2158 
2159                       /* Put appropriate instructions in new bb.  */
2160 
2161                       new_label = gen_label_rtx ();
2162                       emit_label (new_label);
2163 
2164                       gcc_assert (GET_CODE (old_label) == LABEL_REF);
2165                       old_jump_target = old_jump_insn->jump_target ();
2166                       new_jump = as_a <rtx_jump_insn *>
2167                         (emit_jump_insn (targetm.gen_jump (old_jump_target)));
2168                       new_jump->set_jump_target (old_jump_target);
2169 
2170                       last_bb = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb;
2171                       new_bb = create_basic_block (new_label, new_jump, last_bb);
2172                       new_bb->aux = last_bb->aux;
2173                       last_bb->aux = new_bb;
2174 
2175                       emit_barrier_after_bb (new_bb);
2176 
2177                       /* Make sure new bb is in same partition as source
2178                          of conditional branch.  */
2179                       BB_COPY_PARTITION (new_bb, cur_bb);
2180                     }
2181 
2182                 /* Make old jump branch to new bb.  */
2183 
2184                 redirect_jump (old_jump_insn, new_label, 0);
2185 
2186                 /* Remove crossing_edge as predecessor of 'dest'.  */
2187 
2188                 dest = crossing_edge->dest;
2189 
2190                 redirect_edge_succ (crossing_edge, new_bb);
2191 
2192                 /* Make a new edge from new_bb to old dest; new edge
2193                      will be a successor for new_bb and a predecessor
2194                      for 'dest'.  */
2195 
2196                 if (EDGE_COUNT (new_bb->succs) == 0)
2197                     new_edge = make_single_succ_edge (new_bb, dest, 0);
2198                 else
2199                     new_edge = EDGE_SUCC (new_bb, 0);
2200 
2201                 crossing_edge->flags &= ~EDGE_CROSSING;
2202                 new_edge->flags |= EDGE_CROSSING;
2203               }
2204           }
2205     }
2206 }
2207 
2208 /* Find any unconditional branches that cross between hot and cold
2209    sections.  Convert them into indirect jumps instead.  */
2210 
2211 static void
fix_crossing_unconditional_branches(void)2212 fix_crossing_unconditional_branches (void)
2213 {
2214   basic_block cur_bb;
2215   rtx_insn *last_insn;
2216   rtx label;
2217   rtx label_addr;
2218   rtx_insn *indirect_jump_sequence;
2219   rtx_insn *jump_insn = NULL;
2220   rtx new_reg;
2221   rtx_insn *cur_insn;
2222   edge succ;
2223 
2224   FOR_EACH_BB_FN (cur_bb, cfun)
2225     {
2226       last_insn = BB_END (cur_bb);
2227 
2228       if (EDGE_COUNT (cur_bb->succs) < 1)
2229           continue;
2230 
2231       succ = EDGE_SUCC (cur_bb, 0);
2232 
2233       /* Check to see if bb ends in a crossing (unconditional) jump.  At
2234            this point, no crossing jumps should be conditional.  */
2235 
2236       if (JUMP_P (last_insn)
2237             && (succ->flags & EDGE_CROSSING))
2238           {
2239             gcc_assert (!any_condjump_p (last_insn));
2240 
2241             /* Make sure the jump is not already an indirect or table jump.  */
2242 
2243             if (!computed_jump_p (last_insn)
2244                 && !tablejump_p (last_insn, NULL, NULL))
2245               {
2246                 /* We have found a "crossing" unconditional branch.  Now
2247                      we must convert it to an indirect jump.  First create
2248                      reference of label, as target for jump.  */
2249 
2250                 label = JUMP_LABEL (last_insn);
2251                 label_addr = gen_rtx_LABEL_REF (Pmode, label);
2252                 LABEL_NUSES (label) += 1;
2253 
2254                 /* Get a register to use for the indirect jump.  */
2255 
2256                 new_reg = gen_reg_rtx (Pmode);
2257 
2258                 /* Generate indirect the jump sequence.  */
2259 
2260                 start_sequence ();
2261                 emit_move_insn (new_reg, label_addr);
2262                 emit_indirect_jump (new_reg);
2263                 indirect_jump_sequence = get_insns ();
2264                 end_sequence ();
2265 
2266                 /* Make sure every instruction in the new jump sequence has
2267                      its basic block set to be cur_bb.  */
2268 
2269                 for (cur_insn = indirect_jump_sequence; cur_insn;
2270                        cur_insn = NEXT_INSN (cur_insn))
2271                     {
2272                       if (!BARRIER_P (cur_insn))
2273                         BLOCK_FOR_INSN (cur_insn) = cur_bb;
2274                       if (JUMP_P (cur_insn))
2275                         jump_insn = cur_insn;
2276                     }
2277 
2278                 /* Insert the new (indirect) jump sequence immediately before
2279                      the unconditional jump, then delete the unconditional jump.  */
2280 
2281                 emit_insn_before (indirect_jump_sequence, last_insn);
2282                 delete_insn (last_insn);
2283 
2284                 JUMP_LABEL (jump_insn) = label;
2285                 LABEL_NUSES (label)++;
2286 
2287                 /* Make BB_END for cur_bb be the jump instruction (NOT the
2288                      barrier instruction at the end of the sequence...).  */
2289 
2290                 BB_END (cur_bb) = jump_insn;
2291               }
2292           }
2293     }
2294 }
2295 
2296 /* Update CROSSING_JUMP_P flags on all jump insns.  */
2297 
2298 static void
update_crossing_jump_flags(void)2299 update_crossing_jump_flags (void)
2300 {
2301   basic_block bb;
2302   edge e;
2303   edge_iterator ei;
2304 
2305   FOR_EACH_BB_FN (bb, cfun)
2306     FOR_EACH_EDGE (e, ei, bb->succs)
2307       if (e->flags & EDGE_CROSSING)
2308           {
2309             if (JUMP_P (BB_END (bb)))
2310               CROSSING_JUMP_P (BB_END (bb)) = 1;
2311             break;
2312           }
2313 }
2314 
2315 /* Reorder basic blocks using the software trace cache (STC) algorithm.  */
2316 
2317 static void
reorder_basic_blocks_software_trace_cache(void)2318 reorder_basic_blocks_software_trace_cache (void)
2319 {
2320   if (dump_file)
2321     fprintf (dump_file, "\nReordering with the STC algorithm.\n\n");
2322 
2323   int n_traces;
2324   int i;
2325   struct trace *traces;
2326 
2327   /* We are estimating the length of uncond jump insn only once since the code
2328      for getting the insn length always returns the minimal length now.  */
2329   if (uncond_jump_length == 0)
2330     uncond_jump_length = get_uncond_jump_length ();
2331 
2332   /* We need to know some information for each basic block.  */
2333   array_size = GET_ARRAY_SIZE (last_basic_block_for_fn (cfun));
2334   bbd = XNEWVEC (bbro_basic_block_data, array_size);
2335   for (i = 0; i < array_size; i++)
2336     {
2337       bbd[i].start_of_trace = -1;
2338       bbd[i].end_of_trace = -1;
2339       bbd[i].in_trace = -1;
2340       bbd[i].visited = 0;
2341       bbd[i].priority = -1;
2342       bbd[i].heap = NULL;
2343       bbd[i].node = NULL;
2344     }
2345 
2346   traces = XNEWVEC (struct trace, n_basic_blocks_for_fn (cfun));
2347   n_traces = 0;
2348   find_traces (&n_traces, traces);
2349   connect_traces (n_traces, traces);
2350   FREE (traces);
2351   FREE (bbd);
2352 }
2353 
2354 /* Return true if edge E1 is more desirable as a fallthrough edge than
2355    edge E2 is.  */
2356 
2357 static bool
edge_order(edge e1,edge e2)2358 edge_order (edge e1, edge e2)
2359 {
2360   return e1->count () > e2->count ();
2361 }
2362 
2363 /* Reorder basic blocks using the "simple" algorithm.  This tries to
2364    maximize the dynamic number of branches that are fallthrough, without
2365    copying instructions.  The algorithm is greedy, looking at the most
2366    frequently executed branch first.  */
2367 
2368 static void
reorder_basic_blocks_simple(void)2369 reorder_basic_blocks_simple (void)
2370 {
2371   if (dump_file)
2372     fprintf (dump_file, "\nReordering with the \"simple\" algorithm.\n\n");
2373 
2374   edge *edges = new edge[2 * n_basic_blocks_for_fn (cfun)];
2375 
2376   /* First, collect all edges that can be optimized by reordering blocks:
2377      simple jumps and conditional jumps, as well as the function entry edge.  */
2378 
2379   int n = 0;
2380   edges[n++] = EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0);
2381 
2382   basic_block bb;
2383   FOR_EACH_BB_FN (bb, cfun)
2384     {
2385       rtx_insn *end = BB_END (bb);
2386 
2387       if (computed_jump_p (end) || tablejump_p (end, NULL, NULL))
2388           continue;
2389 
2390       /* We cannot optimize asm goto.  */
2391       if (JUMP_P (end) && extract_asm_operands (end))
2392           continue;
2393 
2394       if (single_succ_p (bb))
2395           edges[n++] = EDGE_SUCC (bb, 0);
2396       else if (any_condjump_p (end))
2397           {
2398             edge e0 = EDGE_SUCC (bb, 0);
2399             edge e1 = EDGE_SUCC (bb, 1);
2400             /* When optimizing for size it is best to keep the original
2401                fallthrough edges.  */
2402             if (e1->flags & EDGE_FALLTHRU)
2403               std::swap (e0, e1);
2404             edges[n++] = e0;
2405             edges[n++] = e1;
2406           }
2407     }
2408 
2409   /* Sort the edges, the most desirable first.  When optimizing for size
2410      all edges are equally desirable.  */
2411 
2412   if (optimize_function_for_speed_p (cfun))
2413     std::stable_sort (edges, edges + n, edge_order);
2414 
2415   /* Now decide which of those edges to make fallthrough edges.  We set
2416      BB_VISITED if a block already has a fallthrough successor assigned
2417      to it.  We make ->AUX of an endpoint point to the opposite endpoint
2418      of a sequence of blocks that fall through, and ->AUX will be NULL
2419      for a block that is in such a sequence but not an endpoint anymore.
2420 
2421      To start with, everything points to itself, nothing is assigned yet.  */
2422 
2423   FOR_ALL_BB_FN (bb, cfun)
2424     {
2425       bb->aux = bb;
2426       bb->flags &= ~BB_VISITED;
2427     }
2428 
2429   EXIT_BLOCK_PTR_FOR_FN (cfun)->aux = 0;
2430 
2431   /* Now for all edges, the most desirable first, see if that edge can
2432      connect two sequences.  If it can, update AUX and BB_VISITED; if it
2433      cannot, zero out the edge in the table.  */
2434 
2435   for (int j = 0; j < n; j++)
2436     {
2437       edge e = edges[j];
2438 
2439       basic_block tail_a = e->src;
2440       basic_block head_b = e->dest;
2441       basic_block head_a = (basic_block) tail_a->aux;
2442       basic_block tail_b = (basic_block) head_b->aux;
2443 
2444       /* An edge cannot connect two sequences if:
2445            - it crosses partitions;
2446            - its src is not a current endpoint;
2447            - its dest is not a current endpoint;
2448            - or, it would create a loop.  */
2449 
2450       if (e->flags & EDGE_CROSSING
2451             || tail_a->flags & BB_VISITED
2452             || !tail_b
2453             || (!(head_b->flags & BB_VISITED) && head_b != tail_b)
2454             || tail_a == tail_b)
2455           {
2456             edges[j] = 0;
2457             continue;
2458           }
2459 
2460       tail_a->aux = 0;
2461       head_b->aux = 0;
2462       head_a->aux = tail_b;
2463       tail_b->aux = head_a;
2464       tail_a->flags |= BB_VISITED;
2465     }
2466 
2467   /* Put the pieces together, in the same order that the start blocks of
2468      the sequences already had.  The hot/cold partitioning gives a little
2469      complication: as a first pass only do this for blocks in the same
2470      partition as the start block, and (if there is anything left to do)
2471      in a second pass handle the other partition.  */
2472 
2473   basic_block last_tail = (basic_block) ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux;
2474 
2475   int current_partition
2476     = BB_PARTITION (last_tail == ENTRY_BLOCK_PTR_FOR_FN (cfun)
2477                         ? EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0)->dest
2478                         : last_tail);
2479   bool need_another_pass = true;
2480 
2481   for (int pass = 0; pass < 2 && need_another_pass; pass++)
2482     {
2483       need_another_pass = false;
2484 
2485       FOR_EACH_BB_FN (bb, cfun)
2486           if ((bb->flags & BB_VISITED && bb->aux) || bb->aux == bb)
2487             {
2488               if (BB_PARTITION (bb) != current_partition)
2489                 {
2490                     need_another_pass = true;
2491                     continue;
2492                 }
2493 
2494               last_tail->aux = bb;
2495               last_tail = (basic_block) bb->aux;
2496             }
2497 
2498       current_partition ^= BB_HOT_PARTITION | BB_COLD_PARTITION;
2499     }
2500 
2501   last_tail->aux = 0;
2502 
2503   /* Finally, link all the chosen fallthrough edges.  */
2504 
2505   for (int j = 0; j < n; j++)
2506     if (edges[j])
2507       edges[j]->src->aux = edges[j]->dest;
2508 
2509   delete[] edges;
2510 
2511   /* If the entry edge no longer falls through we have to make a new
2512      block so it can do so again.  */
2513 
2514   edge e = EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0);
2515   if (e->dest != ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux)
2516     {
2517       force_nonfallthru (e);
2518       e->src->aux = ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux;
2519     }
2520 }
2521 
2522 /* Reorder basic blocks.  The main entry point to this file.  */
2523 
2524 static void
reorder_basic_blocks(void)2525 reorder_basic_blocks (void)
2526 {
2527   gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
2528 
2529   if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1)
2530     return;
2531 
2532   set_edge_can_fallthru_flag ();
2533   mark_dfs_back_edges ();
2534 
2535   switch (flag_reorder_blocks_algorithm)
2536     {
2537     case REORDER_BLOCKS_ALGORITHM_SIMPLE:
2538       reorder_basic_blocks_simple ();
2539       break;
2540 
2541     case REORDER_BLOCKS_ALGORITHM_STC:
2542       reorder_basic_blocks_software_trace_cache ();
2543       break;
2544 
2545     default:
2546       gcc_unreachable ();
2547     }
2548 
2549   relink_block_chain (/*stay_in_cfglayout_mode=*/true);
2550 
2551   if (dump_file)
2552     {
2553       if (dump_flags & TDF_DETAILS)
2554           dump_reg_info (dump_file);
2555       dump_flow_info (dump_file, dump_flags);
2556     }
2557 
2558   /* Signal that rtl_verify_flow_info_1 can now verify that there
2559      is at most one switch between hot/cold sections.  */
2560   crtl->bb_reorder_complete = true;
2561 }
2562 
2563 /* Determine which partition the first basic block in the function
2564    belongs to, then find the first basic block in the current function
2565    that belongs to a different section, and insert a
2566    NOTE_INSN_SWITCH_TEXT_SECTIONS note immediately before it in the
2567    instruction stream.  When writing out the assembly code,
2568    encountering this note will make the compiler switch between the
2569    hot and cold text sections.  */
2570 
2571 void
insert_section_boundary_note(void)2572 insert_section_boundary_note (void)
2573 {
2574   basic_block bb;
2575   bool switched_sections = false;
2576   int current_partition = 0;
2577 
2578   if (!crtl->has_bb_partition)
2579     return;
2580 
2581   FOR_EACH_BB_FN (bb, cfun)
2582     {
2583       if (!current_partition)
2584           current_partition = BB_PARTITION (bb);
2585       if (BB_PARTITION (bb) != current_partition)
2586           {
2587             gcc_assert (!switched_sections);
2588           switched_sections = true;
2589           emit_note_before (NOTE_INSN_SWITCH_TEXT_SECTIONS, BB_HEAD (bb));
2590           current_partition = BB_PARTITION (bb);
2591           }
2592     }
2593 
2594   /* Make sure crtl->has_bb_partition matches reality even if bbpart finds
2595      some hot and some cold basic blocks, but later one of those kinds is
2596      optimized away.  */
2597   crtl->has_bb_partition = switched_sections;
2598 }
2599 
2600 namespace {
2601 
2602 const pass_data pass_data_reorder_blocks =
2603 {
2604   RTL_PASS, /* type */
2605   "bbro", /* name */
2606   OPTGROUP_NONE, /* optinfo_flags */
2607   TV_REORDER_BLOCKS, /* tv_id */
2608   0, /* properties_required */
2609   0, /* properties_provided */
2610   0, /* properties_destroyed */
2611   0, /* todo_flags_start */
2612   0, /* todo_flags_finish */
2613 };
2614 
2615 class pass_reorder_blocks : public rtl_opt_pass
2616 {
2617 public:
pass_reorder_blocks(gcc::context * ctxt)2618   pass_reorder_blocks (gcc::context *ctxt)
2619     : rtl_opt_pass (pass_data_reorder_blocks, ctxt)
2620   {}
2621 
2622   /* opt_pass methods: */
gate(function *)2623   virtual bool gate (function *)
2624     {
2625       if (targetm.cannot_modify_jumps_p ())
2626           return false;
2627       return (optimize > 0
2628                 && (flag_reorder_blocks || flag_reorder_blocks_and_partition));
2629     }
2630 
2631   virtual unsigned int execute (function *);
2632 
2633 }; // class pass_reorder_blocks
2634 
2635 unsigned int
execute(function * fun)2636 pass_reorder_blocks::execute (function *fun)
2637 {
2638   basic_block bb;
2639 
2640   /* Last attempt to optimize CFG, as scheduling, peepholing and insn
2641      splitting possibly introduced more crossjumping opportunities.  */
2642   cfg_layout_initialize (CLEANUP_EXPENSIVE);
2643 
2644   reorder_basic_blocks ();
2645   cleanup_cfg (CLEANUP_EXPENSIVE | CLEANUP_NO_PARTITIONING);
2646 
2647   FOR_EACH_BB_FN (bb, fun)
2648     if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (fun))
2649       bb->aux = bb->next_bb;
2650   cfg_layout_finalize ();
2651 
2652   return 0;
2653 }
2654 
2655 } // anon namespace
2656 
2657 rtl_opt_pass *
make_pass_reorder_blocks(gcc::context * ctxt)2658 make_pass_reorder_blocks (gcc::context *ctxt)
2659 {
2660   return new pass_reorder_blocks (ctxt);
2661 }
2662 
2663 /* Duplicate a block (that we already know ends in a computed jump) into its
2664    predecessors, where possible.  Return whether anything is changed.  */
2665 static bool
maybe_duplicate_computed_goto(basic_block bb,int max_size)2666 maybe_duplicate_computed_goto (basic_block bb, int max_size)
2667 {
2668   if (single_pred_p (bb))
2669     return false;
2670 
2671   /* Make sure that the block is small enough.  */
2672   rtx_insn *insn;
2673   FOR_BB_INSNS (bb, insn)
2674     if (INSN_P (insn))
2675       {
2676           max_size -= get_attr_min_length (insn);
2677           if (max_size < 0)
2678              return false;
2679       }
2680 
2681   bool changed = false;
2682   edge e;
2683   edge_iterator ei;
2684   for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei)); )
2685     {
2686       basic_block pred = e->src;
2687 
2688       /* Do not duplicate BB into PRED if that is the last predecessor, or if
2689            we cannot merge a copy of BB with PRED.  */
2690       if (single_pred_p (bb)
2691             || !single_succ_p (pred)
2692             || e->flags & EDGE_COMPLEX
2693             || pred->index < NUM_FIXED_BLOCKS
2694             || (JUMP_P (BB_END (pred)) && !simplejump_p (BB_END (pred)))
2695             || (JUMP_P (BB_END (pred)) && CROSSING_JUMP_P (BB_END (pred))))
2696           {
2697             ei_next (&ei);
2698             continue;
2699           }
2700 
2701       if (dump_file)
2702           fprintf (dump_file, "Duplicating computed goto bb %d into bb %d\n",
2703                      bb->index, e->src->index);
2704 
2705       /* Remember if PRED can be duplicated; if so, the copy of BB merged
2706            with PRED can be duplicated as well.  */
2707       bool can_dup_more = can_duplicate_block_p (pred);
2708 
2709       /* Make a copy of BB, merge it into PRED.  */
2710       basic_block copy = duplicate_block (bb, e, NULL);
2711       emit_barrier_after_bb (copy);
2712       reorder_insns_nobb (BB_HEAD (copy), BB_END (copy), BB_END (pred));
2713       merge_blocks (pred, copy);
2714 
2715       changed = true;
2716 
2717       /* Try to merge the resulting merged PRED into further predecessors.  */
2718       if (can_dup_more)
2719           maybe_duplicate_computed_goto (pred, max_size);
2720     }
2721 
2722   return changed;
2723 }
2724 
2725 /* Duplicate the blocks containing computed gotos.  This basically unfactors
2726    computed gotos that were factored early on in the compilation process to
2727    speed up edge based data flow.  We used to not unfactor them again, which
2728    can seriously pessimize code with many computed jumps in the source code,
2729    such as interpreters.  See e.g. PR15242.  */
2730 static void
duplicate_computed_gotos(function * fun)2731 duplicate_computed_gotos (function *fun)
2732 {
2733   /* We are estimating the length of uncond jump insn only once
2734      since the code for getting the insn length always returns
2735      the minimal length now.  */
2736   if (uncond_jump_length == 0)
2737     uncond_jump_length = get_uncond_jump_length ();
2738 
2739   /* Never copy a block larger than this.  */
2740   int max_size
2741     = uncond_jump_length * PARAM_VALUE (PARAM_MAX_GOTO_DUPLICATION_INSNS);
2742 
2743   bool changed = false;
2744 
2745   /* Try to duplicate all blocks that end in a computed jump and that
2746      can be duplicated at all.  */
2747   basic_block bb;
2748   FOR_EACH_BB_FN (bb, fun)
2749     if (computed_jump_p (BB_END (bb)) && can_duplicate_block_p (bb))
2750       changed |= maybe_duplicate_computed_goto (bb, max_size);
2751 
2752   /* Duplicating blocks will redirect edges and may cause hot blocks
2753     previously reached by both hot and cold blocks to become dominated
2754     only by cold blocks.  */
2755   if (changed)
2756     fixup_partitions ();
2757 }
2758 
2759 namespace {
2760 
2761 const pass_data pass_data_duplicate_computed_gotos =
2762 {
2763   RTL_PASS, /* type */
2764   "compgotos", /* name */
2765   OPTGROUP_NONE, /* optinfo_flags */
2766   TV_REORDER_BLOCKS, /* tv_id */
2767   0, /* properties_required */
2768   0, /* properties_provided */
2769   0, /* properties_destroyed */
2770   0, /* todo_flags_start */
2771   0, /* todo_flags_finish */
2772 };
2773 
2774 class pass_duplicate_computed_gotos : public rtl_opt_pass
2775 {
2776 public:
pass_duplicate_computed_gotos(gcc::context * ctxt)2777   pass_duplicate_computed_gotos (gcc::context *ctxt)
2778     : rtl_opt_pass (pass_data_duplicate_computed_gotos, ctxt)
2779   {}
2780 
2781   /* opt_pass methods: */
2782   virtual bool gate (function *);
2783   virtual unsigned int execute (function *);
2784 
2785 }; // class pass_duplicate_computed_gotos
2786 
2787 bool
gate(function * fun)2788 pass_duplicate_computed_gotos::gate (function *fun)
2789 {
2790   if (targetm.cannot_modify_jumps_p ())
2791     return false;
2792   return (optimize > 0
2793             && flag_expensive_optimizations
2794             && ! optimize_function_for_size_p (fun));
2795 }
2796 
2797 unsigned int
execute(function * fun)2798 pass_duplicate_computed_gotos::execute (function *fun)
2799 {
2800   duplicate_computed_gotos (fun);
2801 
2802   return 0;
2803 }
2804 
2805 } // anon namespace
2806 
2807 rtl_opt_pass *
make_pass_duplicate_computed_gotos(gcc::context * ctxt)2808 make_pass_duplicate_computed_gotos (gcc::context *ctxt)
2809 {
2810   return new pass_duplicate_computed_gotos (ctxt);
2811 }
2812 
2813 /* This function is the main 'entrance' for the optimization that
2814    partitions hot and cold basic blocks into separate sections of the
2815    .o file (to improve performance and cache locality).  Ideally it
2816    would be called after all optimizations that rearrange the CFG have
2817    been called.  However part of this optimization may introduce new
2818    register usage, so it must be called before register allocation has
2819    occurred.  This means that this optimization is actually called
2820    well before the optimization that reorders basic blocks (see
2821    function above).
2822 
2823    This optimization checks the feedback information to determine
2824    which basic blocks are hot/cold, updates flags on the basic blocks
2825    to indicate which section they belong in.  This information is
2826    later used for writing out sections in the .o file.  Because hot
2827    and cold sections can be arbitrarily large (within the bounds of
2828    memory), far beyond the size of a single function, it is necessary
2829    to fix up all edges that cross section boundaries, to make sure the
2830    instructions used can actually span the required distance.  The
2831    fixes are described below.
2832 
2833    Fall-through edges must be changed into jumps; it is not safe or
2834    legal to fall through across a section boundary.  Whenever a
2835    fall-through edge crossing a section boundary is encountered, a new
2836    basic block is inserted (in the same section as the fall-through
2837    source), and the fall through edge is redirected to the new basic
2838    block.  The new basic block contains an unconditional jump to the
2839    original fall-through target.  (If the unconditional jump is
2840    insufficient to cross section boundaries, that is dealt with a
2841    little later, see below).
2842 
2843    In order to deal with architectures that have short conditional
2844    branches (which cannot span all of memory) we take any conditional
2845    jump that attempts to cross a section boundary and add a level of
2846    indirection: it becomes a conditional jump to a new basic block, in
2847    the same section.  The new basic block contains an unconditional
2848    jump to the original target, in the other section.
2849 
2850    For those architectures whose unconditional branch is also
2851    incapable of reaching all of memory, those unconditional jumps are
2852    converted into indirect jumps, through a register.
2853 
2854    IMPORTANT NOTE: This optimization causes some messy interactions
2855    with the cfg cleanup optimizations; those optimizations want to
2856    merge blocks wherever possible, and to collapse indirect jump
2857    sequences (change "A jumps to B jumps to C" directly into "A jumps
2858    to C").  Those optimizations can undo the jump fixes that
2859    partitioning is required to make (see above), in order to ensure
2860    that jumps attempting to cross section boundaries are really able
2861    to cover whatever distance the jump requires (on many architectures
2862    conditional or unconditional jumps are not able to reach all of
2863    memory).  Therefore tests have to be inserted into each such
2864    optimization to make sure that it does not undo stuff necessary to
2865    cross partition boundaries.  This would be much less of a problem
2866    if we could perform this optimization later in the compilation, but
2867    unfortunately the fact that we may need to create indirect jumps
2868    (through registers) requires that this optimization be performed
2869    before register allocation.
2870 
2871    Hot and cold basic blocks are partitioned and put in separate
2872    sections of the .o file, to reduce paging and improve cache
2873    performance (hopefully).  This can result in bits of code from the
2874    same function being widely separated in the .o file.  However this
2875    is not obvious to the current bb structure.  Therefore we must take
2876    care to ensure that: 1). There are no fall_thru edges that cross
2877    between sections; 2). For those architectures which have "short"
2878    conditional branches, all conditional branches that attempt to
2879    cross between sections are converted to unconditional branches;
2880    and, 3). For those architectures which have "short" unconditional
2881    branches, all unconditional branches that attempt to cross between
2882    sections are converted to indirect jumps.
2883 
2884    The code for fixing up fall_thru edges that cross between hot and
2885    cold basic blocks does so by creating new basic blocks containing
2886    unconditional branches to the appropriate label in the "other"
2887    section.  The new basic block is then put in the same (hot or cold)
2888    section as the original conditional branch, and the fall_thru edge
2889    is modified to fall into the new basic block instead.  By adding
2890    this level of indirection we end up with only unconditional branches
2891    crossing between hot and cold sections.
2892 
2893    Conditional branches are dealt with by adding a level of indirection.
2894    A new basic block is added in the same (hot/cold) section as the
2895    conditional branch, and the conditional branch is retargeted to the
2896    new basic block.  The new basic block contains an unconditional branch
2897    to the original target of the conditional branch (in the other section).
2898 
2899    Unconditional branches are dealt with by converting them into
2900    indirect jumps.  */
2901 
2902 namespace {
2903 
2904 const pass_data pass_data_partition_blocks =
2905 {
2906   RTL_PASS, /* type */
2907   "bbpart", /* name */
2908   OPTGROUP_NONE, /* optinfo_flags */
2909   TV_REORDER_BLOCKS, /* tv_id */
2910   PROP_cfglayout, /* properties_required */
2911   0, /* properties_provided */
2912   0, /* properties_destroyed */
2913   0, /* todo_flags_start */
2914   0, /* todo_flags_finish */
2915 };
2916 
2917 class pass_partition_blocks : public rtl_opt_pass
2918 {
2919 public:
pass_partition_blocks(gcc::context * ctxt)2920   pass_partition_blocks (gcc::context *ctxt)
2921     : rtl_opt_pass (pass_data_partition_blocks, ctxt)
2922   {}
2923 
2924   /* opt_pass methods: */
2925   virtual bool gate (function *);
2926   virtual unsigned int execute (function *);
2927 
2928 }; // class pass_partition_blocks
2929 
2930 bool
gate(function * fun)2931 pass_partition_blocks::gate (function *fun)
2932 {
2933   /* The optimization to partition hot/cold basic blocks into separate
2934      sections of the .o file does not work well with linkonce or with
2935      user defined section attributes or with naked attribute.  Don't call
2936      it if either case arises.  */
2937   return (flag_reorder_blocks_and_partition
2938             && optimize
2939             /* See pass_reorder_blocks::gate.  We should not partition if
2940                we are going to omit the reordering.  */
2941             && optimize_function_for_speed_p (fun)
2942             && !DECL_COMDAT_GROUP (current_function_decl)
2943             && !lookup_attribute ("section", DECL_ATTRIBUTES (fun->decl))
2944             && !lookup_attribute ("naked", DECL_ATTRIBUTES (fun->decl))
2945             /* Workaround a bug in GDB where read_partial_die doesn't cope
2946                with DIEs with DW_AT_ranges, see PR81115.  */
2947             && !(in_lto_p && MAIN_NAME_P (DECL_NAME (fun->decl))));
2948 }
2949 
2950 unsigned
execute(function * fun)2951 pass_partition_blocks::execute (function *fun)
2952 {
2953   vec<edge> crossing_edges;
2954 
2955   if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
2956     return 0;
2957 
2958   df_set_flags (DF_DEFER_INSN_RESCAN);
2959 
2960   crossing_edges = find_rarely_executed_basic_blocks_and_crossing_edges ();
2961   if (!crossing_edges.exists ())
2962     /* Make sure to process deferred rescans and clear changeable df flags.  */
2963     return TODO_df_finish;
2964 
2965   crtl->has_bb_partition = true;
2966 
2967   /* Make sure the source of any crossing edge ends in a jump and the
2968      destination of any crossing edge has a label.  */
2969   add_labels_and_missing_jumps (crossing_edges);
2970 
2971   /* Convert all crossing fall_thru edges to non-crossing fall
2972      thrus to unconditional jumps (that jump to the original fall
2973      through dest).  */
2974   fix_up_fall_thru_edges ();
2975 
2976   /* If the architecture does not have conditional branches that can
2977      span all of memory, convert crossing conditional branches into
2978      crossing unconditional branches.  */
2979   if (!HAS_LONG_COND_BRANCH)
2980     fix_crossing_conditional_branches ();
2981 
2982   /* If the architecture does not have unconditional branches that
2983      can span all of memory, convert crossing unconditional branches
2984      into indirect jumps.  Since adding an indirect jump also adds
2985      a new register usage, update the register usage information as
2986      well.  */
2987   if (!HAS_LONG_UNCOND_BRANCH)
2988     fix_crossing_unconditional_branches ();
2989 
2990   update_crossing_jump_flags ();
2991 
2992   /* Clear bb->aux fields that the above routines were using.  */
2993   clear_aux_for_blocks ();
2994 
2995   crossing_edges.release ();
2996 
2997   /* ??? FIXME: DF generates the bb info for a block immediately.
2998      And by immediately, I mean *during* creation of the block.
2999 
3000           #0  df_bb_refs_collect
3001           #1  in df_bb_refs_record
3002           #2  in create_basic_block_structure
3003 
3004      Which means that the bb_has_eh_pred test in df_bb_refs_collect
3005      will *always* fail, because no edges can have been added to the
3006      block yet.  Which of course means we don't add the right
3007      artificial refs, which means we fail df_verify (much) later.
3008 
3009      Cleanest solution would seem to make DF_DEFER_INSN_RESCAN imply
3010      that we also shouldn't grab data from the new blocks those new
3011      insns are in either.  In this way one can create the block, link
3012      it up properly, and have everything Just Work later, when deferred
3013      insns are processed.
3014 
3015      In the meantime, we have no other option but to throw away all
3016      of the DF data and recompute it all.  */
3017   if (fun->eh->lp_array)
3018     {
3019       df_finish_pass (true);
3020       df_scan_alloc (NULL);
3021       df_scan_blocks ();
3022       /* Not all post-landing pads use all of the EH_RETURN_DATA_REGNO
3023            data.  We blindly generated all of them when creating the new
3024            landing pad.  Delete those assignments we don't use.  */
3025       df_set_flags (DF_LR_RUN_DCE);
3026       df_analyze ();
3027     }
3028 
3029   /* Make sure to process deferred rescans and clear changeable df flags.  */
3030   return TODO_df_finish;
3031 }
3032 
3033 } // anon namespace
3034 
3035 rtl_opt_pass *
make_pass_partition_blocks(gcc::context * ctxt)3036 make_pass_partition_blocks (gcc::context *ctxt)
3037 {
3038   return new pass_partition_blocks (ctxt);
3039 }
3040