1 /* Interprocedural constant propagation
2    Copyright (C) 2005-2022 Free Software Foundation, Inc.
3 
4    Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
5    <mjambor@suse.cz>
6 
7 This file is part of GCC.
8 
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13 
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17 for more details.
18 
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3.  If not see
21 <http://www.gnu.org/licenses/>.  */
22 
23 /* Interprocedural constant propagation (IPA-CP).
24 
25    The goal of this transformation is to
26 
27    1) discover functions which are always invoked with some arguments with the
28       same known constant values and modify the functions so that the
29       subsequent optimizations can take advantage of the knowledge, and
30 
31    2) partial specialization - create specialized versions of functions
32       transformed in this way if some parameters are known constants only in
33       certain contexts but the estimated tradeoff between speedup and cost size
34       is deemed good.
35 
36    The algorithm also propagates types and attempts to perform type based
37    devirtualization.  Types are propagated much like constants.
38 
39    The algorithm basically consists of three stages.  In the first, functions
40    are analyzed one at a time and jump functions are constructed for all known
41    call-sites.  In the second phase, the pass propagates information from the
42    jump functions across the call to reveal what values are available at what
43    call sites, performs estimations of effects of known values on functions and
44    their callees, and finally decides what specialized extra versions should be
45    created.  In the third, the special versions materialize and appropriate
46    calls are redirected.
47 
48    The algorithm used is to a certain extent based on "Interprocedural Constant
49    Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50    Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51    Cooper, Mary W. Hall, and Ken Kennedy.
52 
53 
54    First stage - intraprocedural analysis
55    =======================================
56 
57    This phase computes jump_function and modification flags.
58 
59    A jump function for a call-site represents the values passed as an actual
60    arguments of a given call-site. In principle, there are three types of
61    values:
62 
63    Pass through - the caller's formal parameter is passed as an actual
64                       argument, plus an operation on it can be performed.
65    Constant - a constant is passed as an actual argument.
66    Unknown - neither of the above.
67 
68    All jump function types are described in detail in ipa-prop.h, together with
69    the data structures that represent them and methods of accessing them.
70 
71    ipcp_generate_summary() is the main function of the first stage.
72 
73    Second stage - interprocedural analysis
74    ========================================
75 
76    This stage is itself divided into two phases.  In the first, we propagate
77    known values over the call graph, in the second, we make cloning decisions.
78    It uses a different algorithm than the original Callahan's paper.
79 
80    First, we traverse the functions topologically from callers to callees and,
81    for each strongly connected component (SCC), we propagate constants
82    according to previously computed jump functions.  We also record what known
83    values depend on other known values and estimate local effects.  Finally, we
84    propagate cumulative information about these effects from dependent values
85    to those on which they depend.
86 
87    Second, we again traverse the call graph in the same topological order and
88    make clones for functions which we know are called with the same values in
89    all contexts and decide about extra specialized clones of functions just for
90    some contexts - these decisions are based on both local estimates and
91    cumulative estimates propagated from callees.
92 
93    ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
94    third stage.
95 
96    Third phase - materialization of clones, call statement updates.
97    ============================================
98 
99    This stage is currently performed by call graph code (mainly in cgraphunit.cc
100    and tree-inline.cc) according to instructions inserted to the call graph by
101    the second stage.  */
102 
103 #include "config.h"
104 #include "system.h"
105 #include "coretypes.h"
106 #include "backend.h"
107 #include "tree.h"
108 #include "gimple-expr.h"
109 #include "gimple.h"
110 #include "predict.h"
111 #include "alloc-pool.h"
112 #include "tree-pass.h"
113 #include "cgraph.h"
114 #include "diagnostic.h"
115 #include "fold-const.h"
116 #include "gimple-fold.h"
117 #include "symbol-summary.h"
118 #include "tree-vrp.h"
119 #include "ipa-prop.h"
120 #include "tree-pretty-print.h"
121 #include "tree-inline.h"
122 #include "ipa-fnsummary.h"
123 #include "ipa-utils.h"
124 #include "tree-ssa-ccp.h"
125 #include "stringpool.h"
126 #include "attribs.h"
127 #include "dbgcnt.h"
128 #include "symtab-clones.h"
129 
130 template <typename valtype> class ipcp_value;
131 
132 /* Describes a particular source for an IPA-CP value.  */
133 
134 template <typename valtype>
135 struct ipcp_value_source
136 {
137 public:
138   /* Aggregate offset of the source, negative if the source is scalar value of
139      the argument itself.  */
140   HOST_WIDE_INT offset;
141   /* The incoming edge that brought the value.  */
142   cgraph_edge *cs;
143   /* If the jump function that resulted into his value was a pass-through or an
144      ancestor, this is the ipcp_value of the caller from which the described
145      value has been derived.  Otherwise it is NULL.  */
146   ipcp_value<valtype> *val;
147   /* Next pointer in a linked list of sources of a value.  */
148   ipcp_value_source *next;
149   /* If the jump function that resulted into his value was a pass-through or an
150      ancestor, this is the index of the parameter of the caller the jump
151      function references.  */
152   int index;
153 };
154 
155 /* Common ancestor for all ipcp_value instantiations.  */
156 
157 class ipcp_value_base
158 {
159 public:
160   /* Time benefit and that specializing the function for this value would bring
161      about in this function alone.  */
162   sreal local_time_benefit;
163   /* Time benefit that specializing the function for this value can bring about
164      in it's callees.  */
165   sreal prop_time_benefit;
166   /* Size cost that specializing the function for this value would bring about
167      in this function alone.  */
168   int local_size_cost;
169   /* Size cost that specializing the function for this value can bring about in
170      it's callees.  */
171   int prop_size_cost;
172 
ipcp_value_base()173   ipcp_value_base ()
174     : local_time_benefit (0), prop_time_benefit (0),
175       local_size_cost (0), prop_size_cost (0) {}
176 };
177 
178 /* Describes one particular value stored in struct ipcp_lattice.  */
179 
180 template <typename valtype>
181 class ipcp_value : public ipcp_value_base
182 {
183 public:
184   /* The actual value for the given parameter.  */
185   valtype value;
186   /* The list of sources from which this value originates.  */
187   ipcp_value_source <valtype> *sources = nullptr;
188   /* Next pointers in a linked list of all values in a lattice.  */
189   ipcp_value *next = nullptr;
190   /* Next pointers in a linked list of values in a strongly connected component
191      of values. */
192   ipcp_value *scc_next = nullptr;
193   /* Next pointers in a linked list of SCCs of values sorted topologically
194      according their sources.  */
195   ipcp_value  *topo_next = nullptr;
196   /* A specialized node created for this value, NULL if none has been (so far)
197      created.  */
198   cgraph_node *spec_node = nullptr;
199   /* Depth first search number and low link for topological sorting of
200      values.  */
201   int dfs = 0;
202   int low_link = 0;
203   /* SCC number to identify values which recursively feed into each other.
204      Values in the same SCC have the same SCC number.  */
205   int scc_no = 0;
206   /* Non zero if the value is generated from another value in the same lattice
207      for a self-recursive call, the actual number is how many times the
208      operation has been performed.  In the unlikely event of the value being
209      present in two chains fo self-recursive value generation chains, it is the
210      maximum.  */
211   unsigned self_recursion_generated_level = 0;
212   /* True if this value is currently on the topo-sort stack.  */
213   bool on_stack = false;
214 
215   void add_source (cgraph_edge *cs, ipcp_value *src_val, int src_idx,
216                        HOST_WIDE_INT offset);
217 
218   /* Return true if both THIS value and O feed into each other.  */
219 
same_scc(const ipcp_value<valtype> * o)220   bool same_scc (const ipcp_value<valtype> *o)
221   {
222     return o->scc_no == scc_no;
223   }
224 
225 /* Return true, if a this value has been generated for a self-recursive call as
226    a result of an arithmetic pass-through jump-function acting on a value in
227    the same lattice function.  */
228 
self_recursion_generated_p()229   bool self_recursion_generated_p ()
230   {
231     return self_recursion_generated_level > 0;
232   }
233 };
234 
235 /* Lattice describing potential values of a formal parameter of a function, or
236    a part of an aggregate.  TOP is represented by a lattice with zero values
237    and with contains_variable and bottom flags cleared.  BOTTOM is represented
238    by a lattice with the bottom flag set.  In that case, values and
239    contains_variable flag should be disregarded.  */
240 
241 template <typename valtype>
242 struct ipcp_lattice
243 {
244 public:
245   /* The list of known values and types in this lattice.  Note that values are
246      not deallocated if a lattice is set to bottom because there may be value
247      sources referencing them.  */
248   ipcp_value<valtype> *values;
249   /* Number of known values and types in this lattice.  */
250   int values_count;
251   /* The lattice contains a variable component (in addition to values).  */
252   bool contains_variable;
253   /* The value of the lattice is bottom (i.e. variable and unusable for any
254      propagation).  */
255   bool bottom;
256 
257   inline bool is_single_const ();
258   inline bool set_to_bottom ();
259   inline bool set_contains_variable ();
260   bool add_value (valtype newval, cgraph_edge *cs,
261                       ipcp_value<valtype> *src_val = NULL,
262                       int src_idx = 0, HOST_WIDE_INT offset = -1,
263                       ipcp_value<valtype> **val_p = NULL,
264                       unsigned same_lat_gen_level = 0);
265   void print (FILE * f, bool dump_sources, bool dump_benefits);
266 };
267 
268 /* Lattice of tree values with an offset to describe a part of an
269    aggregate.  */
270 
271 struct ipcp_agg_lattice : public ipcp_lattice<tree>
272 {
273 public:
274   /* Offset that is being described by this lattice. */
275   HOST_WIDE_INT offset;
276   /* Size so that we don't have to re-compute it every time we traverse the
277      list.  Must correspond to TYPE_SIZE of all lat values.  */
278   HOST_WIDE_INT size;
279   /* Next element of the linked list.  */
280   struct ipcp_agg_lattice *next;
281 };
282 
283 /* Lattice of known bits, only capable of holding one value.
284    Bitwise constant propagation propagates which bits of a
285    value are constant.
286    For eg:
287    int f(int x)
288    {
289      return some_op (x);
290    }
291 
292    int f1(int y)
293    {
294      if (cond)
295       return f (y & 0xff);
296      else
297       return f (y & 0xf);
298    }
299 
300    In the above case, the param 'x' will always have all
301    the bits (except the bits in lsb) set to 0.
302    Hence the mask of 'x' would be 0xff. The mask
303    reflects that the bits in lsb are unknown.
304    The actual propagated value is given by m_value & ~m_mask.  */
305 
306 class ipcp_bits_lattice
307 {
308 public:
bottom_p() const309   bool bottom_p () const { return m_lattice_val == IPA_BITS_VARYING; }
top_p() const310   bool top_p () const { return m_lattice_val == IPA_BITS_UNDEFINED; }
constant_p() const311   bool constant_p () const { return m_lattice_val == IPA_BITS_CONSTANT; }
312   bool set_to_bottom ();
313   bool set_to_constant (widest_int, widest_int);
314   bool known_nonzero_p () const;
315 
get_value() const316   widest_int get_value () const { return m_value; }
get_mask() const317   widest_int get_mask () const { return m_mask; }
318 
319   bool meet_with (ipcp_bits_lattice& other, unsigned, signop,
320                       enum tree_code, tree, bool);
321 
322   bool meet_with (widest_int, widest_int, unsigned);
323 
324   void print (FILE *);
325 
326 private:
327   enum { IPA_BITS_UNDEFINED, IPA_BITS_CONSTANT, IPA_BITS_VARYING } m_lattice_val;
328 
329   /* Similar to ccp_lattice_t, mask represents which bits of value are constant.
330      If a bit in mask is set to 0, then the corresponding bit in
331      value is known to be constant.  */
332   widest_int m_value, m_mask;
333 
334   bool meet_with_1 (widest_int, widest_int, unsigned, bool);
335   void get_value_and_mask (tree, widest_int *, widest_int *);
336 };
337 
338 /* Lattice of value ranges.  */
339 
340 class ipcp_vr_lattice
341 {
342 public:
343   value_range m_vr;
344 
345   inline bool bottom_p () const;
346   inline bool top_p () const;
347   inline bool set_to_bottom ();
348   bool meet_with (const value_range *p_vr);
349   bool meet_with (const ipcp_vr_lattice &other);
init()350   void init () { gcc_assert (m_vr.undefined_p ()); }
351   void print (FILE * f);
352 
353 private:
354   bool meet_with_1 (const value_range *other_vr);
355 };
356 
357 /* Structure containing lattices for a parameter itself and for pieces of
358    aggregates that are passed in the parameter or by a reference in a parameter
359    plus some other useful flags.  */
360 
361 class ipcp_param_lattices
362 {
363 public:
364   /* Lattice describing the value of the parameter itself.  */
365   ipcp_lattice<tree> itself;
366   /* Lattice describing the polymorphic contexts of a parameter.  */
367   ipcp_lattice<ipa_polymorphic_call_context> ctxlat;
368   /* Lattices describing aggregate parts.  */
369   ipcp_agg_lattice *aggs;
370   /* Lattice describing known bits.  */
371   ipcp_bits_lattice bits_lattice;
372   /* Lattice describing value range.  */
373   ipcp_vr_lattice m_value_range;
374   /* Number of aggregate lattices */
375   int aggs_count;
376   /* True if aggregate data were passed by reference (as opposed to by
377      value).  */
378   bool aggs_by_ref;
379   /* All aggregate lattices contain a variable component (in addition to
380      values).  */
381   bool aggs_contain_variable;
382   /* The value of all aggregate lattices is bottom (i.e. variable and unusable
383      for any propagation).  */
384   bool aggs_bottom;
385 
386   /* There is a virtual call based on this parameter.  */
387   bool virt_call;
388 };
389 
390 /* Allocation pools for values and their sources in ipa-cp.  */
391 
392 object_allocator<ipcp_value<tree> > ipcp_cst_values_pool
393   ("IPA-CP constant values");
394 
395 object_allocator<ipcp_value<ipa_polymorphic_call_context> >
396   ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
397 
398 object_allocator<ipcp_value_source<tree> > ipcp_sources_pool
399   ("IPA-CP value sources");
400 
401 object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool
402   ("IPA_CP aggregate lattices");
403 
404 /* Base count to use in heuristics when using profile feedback.  */
405 
406 static profile_count base_count;
407 
408 /* Original overall size of the program.  */
409 
410 static long overall_size, orig_overall_size;
411 
412 /* Node name to unique clone suffix number map.  */
413 static hash_map<const char *, unsigned> *clone_num_suffixes;
414 
415 /* Return the param lattices structure corresponding to the Ith formal
416    parameter of the function described by INFO.  */
417 static inline class ipcp_param_lattices *
ipa_get_parm_lattices(class ipa_node_params * info,int i)418 ipa_get_parm_lattices (class ipa_node_params *info, int i)
419 {
420   gcc_assert (i >= 0 && i < ipa_get_param_count (info));
421   gcc_checking_assert (!info->ipcp_orig_node);
422   gcc_checking_assert (info->lattices);
423   return &(info->lattices[i]);
424 }
425 
426 /* Return the lattice corresponding to the scalar value of the Ith formal
427    parameter of the function described by INFO.  */
428 static inline ipcp_lattice<tree> *
ipa_get_scalar_lat(class ipa_node_params * info,int i)429 ipa_get_scalar_lat (class ipa_node_params *info, int i)
430 {
431   class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
432   return &plats->itself;
433 }
434 
435 /* Return the lattice corresponding to the scalar value of the Ith formal
436    parameter of the function described by INFO.  */
437 static inline ipcp_lattice<ipa_polymorphic_call_context> *
ipa_get_poly_ctx_lat(class ipa_node_params * info,int i)438 ipa_get_poly_ctx_lat (class ipa_node_params *info, int i)
439 {
440   class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
441   return &plats->ctxlat;
442 }
443 
444 /* Return whether LAT is a lattice with a single constant and without an
445    undefined value.  */
446 
447 template <typename valtype>
448 inline bool
is_single_const()449 ipcp_lattice<valtype>::is_single_const ()
450 {
451   if (bottom || contains_variable || values_count != 1)
452     return false;
453   else
454     return true;
455 }
456 
457 /* Print V which is extracted from a value in a lattice to F.  */
458 
459 static void
print_ipcp_constant_value(FILE * f,tree v)460 print_ipcp_constant_value (FILE * f, tree v)
461 {
462   if (TREE_CODE (v) == ADDR_EXPR
463       && TREE_CODE (TREE_OPERAND (v, 0)) == CONST_DECL)
464     {
465       fprintf (f, "& ");
466       print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (v, 0)));
467     }
468   else
469     print_generic_expr (f, v);
470 }
471 
472 /* Print V which is extracted from a value in a lattice to F.  */
473 
474 static void
print_ipcp_constant_value(FILE * f,ipa_polymorphic_call_context v)475 print_ipcp_constant_value (FILE * f, ipa_polymorphic_call_context v)
476 {
477   v.dump(f, false);
478 }
479 
480 /* Print a lattice LAT to F.  */
481 
482 template <typename valtype>
483 void
print(FILE * f,bool dump_sources,bool dump_benefits)484 ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits)
485 {
486   ipcp_value<valtype> *val;
487   bool prev = false;
488 
489   if (bottom)
490     {
491       fprintf (f, "BOTTOM\n");
492       return;
493     }
494 
495   if (!values_count && !contains_variable)
496     {
497       fprintf (f, "TOP\n");
498       return;
499     }
500 
501   if (contains_variable)
502     {
503       fprintf (f, "VARIABLE");
504       prev = true;
505       if (dump_benefits)
506           fprintf (f, "\n");
507     }
508 
509   for (val = values; val; val = val->next)
510     {
511       if (dump_benefits && prev)
512           fprintf (f, "               ");
513       else if (!dump_benefits && prev)
514           fprintf (f, ", ");
515       else
516           prev = true;
517 
518       print_ipcp_constant_value (f, val->value);
519 
520       if (dump_sources)
521           {
522             ipcp_value_source<valtype> *s;
523 
524             if (val->self_recursion_generated_p ())
525               fprintf (f, " [self_gen(%i), from:",
526                          val->self_recursion_generated_level);
527             else
528               fprintf (f, " [scc: %i, from:", val->scc_no);
529             for (s = val->sources; s; s = s->next)
530               fprintf (f, " %i(%f)", s->cs->caller->order,
531                          s->cs->sreal_frequency ().to_double ());
532             fprintf (f, "]");
533           }
534 
535       if (dump_benefits)
536           fprintf (f, " [loc_time: %g, loc_size: %i, "
537                      "prop_time: %g, prop_size: %i]\n",
538                      val->local_time_benefit.to_double (), val->local_size_cost,
539                      val->prop_time_benefit.to_double (), val->prop_size_cost);
540     }
541   if (!dump_benefits)
542     fprintf (f, "\n");
543 }
544 
545 void
print(FILE * f)546 ipcp_bits_lattice::print (FILE *f)
547 {
548   if (top_p ())
549     fprintf (f, "         Bits unknown (TOP)\n");
550   else if (bottom_p ())
551     fprintf (f, "         Bits unusable (BOTTOM)\n");
552   else
553     {
554       fprintf (f, "         Bits: value = "); print_hex (get_value (), f);
555       fprintf (f, ", mask = "); print_hex (get_mask (), f);
556       fprintf (f, "\n");
557     }
558 }
559 
560 /* Print value range lattice to F.  */
561 
562 void
print(FILE * f)563 ipcp_vr_lattice::print (FILE * f)
564 {
565   dump_value_range (f, &m_vr);
566 }
567 
568 /* Print all ipcp_lattices of all functions to F.  */
569 
570 static void
print_all_lattices(FILE * f,bool dump_sources,bool dump_benefits)571 print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
572 {
573   struct cgraph_node *node;
574   int i, count;
575 
576   fprintf (f, "\nLattices:\n");
577   FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
578     {
579       class ipa_node_params *info;
580 
581       info = ipa_node_params_sum->get (node);
582       /* Skip unoptimized functions and constprop clones since we don't make
583            lattices for them.  */
584       if (!info || info->ipcp_orig_node)
585           continue;
586       fprintf (f, "  Node: %s:\n", node->dump_name ());
587       count = ipa_get_param_count (info);
588       for (i = 0; i < count; i++)
589           {
590             struct ipcp_agg_lattice *aglat;
591             class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
592             fprintf (f, "    param [%d]: ", i);
593             plats->itself.print (f, dump_sources, dump_benefits);
594             fprintf (f, "         ctxs: ");
595             plats->ctxlat.print (f, dump_sources, dump_benefits);
596             plats->bits_lattice.print (f);
597             fprintf (f, "         ");
598             plats->m_value_range.print (f);
599             fprintf (f, "\n");
600             if (plats->virt_call)
601               fprintf (f, "        virt_call flag set\n");
602 
603             if (plats->aggs_bottom)
604               {
605                 fprintf (f, "        AGGS BOTTOM\n");
606                 continue;
607               }
608             if (plats->aggs_contain_variable)
609               fprintf (f, "        AGGS VARIABLE\n");
610             for (aglat = plats->aggs; aglat; aglat = aglat->next)
611               {
612                 fprintf (f, "        %soffset " HOST_WIDE_INT_PRINT_DEC ": ",
613                            plats->aggs_by_ref ? "ref " : "", aglat->offset);
614                 aglat->print (f, dump_sources, dump_benefits);
615               }
616           }
617     }
618 }
619 
620 /* Determine whether it is at all technically possible to create clones of NODE
621    and store this information in the ipa_node_params structure associated
622    with NODE.  */
623 
624 static void
determine_versionability(struct cgraph_node * node,class ipa_node_params * info)625 determine_versionability (struct cgraph_node *node,
626                                 class ipa_node_params *info)
627 {
628   const char *reason = NULL;
629 
630   /* There are a number of generic reasons functions cannot be versioned.  We
631      also cannot remove parameters if there are type attributes such as fnspec
632      present.  */
633   if (node->alias || node->thunk)
634     reason = "alias or thunk";
635   else if (!node->versionable)
636     reason = "not a tree_versionable_function";
637   else if (node->get_availability () <= AVAIL_INTERPOSABLE)
638     reason = "insufficient body availability";
639   else if (!opt_for_fn (node->decl, optimize)
640              || !opt_for_fn (node->decl, flag_ipa_cp))
641     reason = "non-optimized function";
642   else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node->decl)))
643     {
644       /* Ideally we should clone the SIMD clones themselves and create
645            vector copies of them, so IPA-cp and SIMD clones can happily
646            coexist, but that may not be worth the effort.  */
647       reason = "function has SIMD clones";
648     }
649   else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node->decl)))
650     {
651       /* Ideally we should clone the target clones themselves and create
652            copies of them, so IPA-cp and target clones can happily
653            coexist, but that may not be worth the effort.  */
654       reason = "function target_clones attribute";
655     }
656   /* Don't clone decls local to a comdat group; it breaks and for C++
657      decloned constructors, inlining is always better anyway.  */
658   else if (node->comdat_local_p ())
659     reason = "comdat-local function";
660   else if (node->calls_comdat_local)
661     {
662       /* TODO: call is versionable if we make sure that all
663            callers are inside of a comdat group.  */
664       reason = "calls comdat-local function";
665     }
666 
667   /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
668      work only when inlined.  Cloning them may still lead to better code
669      because ipa-cp will not give up on cloning further.  If the function is
670      external this however leads to wrong code because we may end up producing
671      offline copy of the function.  */
672   if (DECL_EXTERNAL (node->decl))
673     for (cgraph_edge *edge = node->callees; !reason && edge;
674            edge = edge->next_callee)
675       if (fndecl_built_in_p (edge->callee->decl, BUILT_IN_NORMAL))
676         {
677             if (DECL_FUNCTION_CODE (edge->callee->decl) == BUILT_IN_VA_ARG_PACK)
678               reason = "external function which calls va_arg_pack";
679             if (DECL_FUNCTION_CODE (edge->callee->decl)
680                 == BUILT_IN_VA_ARG_PACK_LEN)
681               reason = "external function which calls va_arg_pack_len";
682         }
683 
684   if (reason && dump_file && !node->alias && !node->thunk)
685     fprintf (dump_file, "Function %s is not versionable, reason: %s.\n",
686                node->dump_name (), reason);
687 
688   info->versionable = (reason == NULL);
689 }
690 
691 /* Return true if it is at all technically possible to create clones of a
692    NODE.  */
693 
694 static bool
ipcp_versionable_function_p(struct cgraph_node * node)695 ipcp_versionable_function_p (struct cgraph_node *node)
696 {
697   ipa_node_params *info = ipa_node_params_sum->get (node);
698   return info && info->versionable;
699 }
700 
701 /* Structure holding accumulated information about callers of a node.  */
702 
703 struct caller_statistics
704 {
705   /* If requested (see below), self-recursive call counts are summed into this
706      field.  */
707   profile_count rec_count_sum;
708   /* The sum of all ipa counts of all the other (non-recursive) calls.  */
709   profile_count count_sum;
710   /* Sum of all frequencies for all calls.  */
711   sreal freq_sum;
712   /* Number of calls and hot calls respectively.  */
713   int n_calls, n_hot_calls;
714   /* If itself is set up, also count the number of non-self-recursive
715      calls.  */
716   int n_nonrec_calls;
717   /* If non-NULL, this is the node itself and calls from it should have their
718      counts included in rec_count_sum and not count_sum.  */
719   cgraph_node *itself;
720 };
721 
722 /* Initialize fields of STAT to zeroes and optionally set it up so that edges
723    from IGNORED_CALLER are not counted.  */
724 
725 static inline void
init_caller_stats(caller_statistics * stats,cgraph_node * itself=NULL)726 init_caller_stats (caller_statistics *stats, cgraph_node *itself = NULL)
727 {
728   stats->rec_count_sum = profile_count::zero ();
729   stats->count_sum = profile_count::zero ();
730   stats->n_calls = 0;
731   stats->n_hot_calls = 0;
732   stats->n_nonrec_calls = 0;
733   stats->freq_sum = 0;
734   stats->itself = itself;
735 }
736 
737 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
738    non-thunk incoming edges to NODE.  */
739 
740 static bool
gather_caller_stats(struct cgraph_node * node,void * data)741 gather_caller_stats (struct cgraph_node *node, void *data)
742 {
743   struct caller_statistics *stats = (struct caller_statistics *) data;
744   struct cgraph_edge *cs;
745 
746   for (cs = node->callers; cs; cs = cs->next_caller)
747     if (!cs->caller->thunk)
748       {
749           ipa_node_params *info = ipa_node_params_sum->get (cs->caller);
750           if (info && info->node_dead)
751             continue;
752 
753           if (cs->count.ipa ().initialized_p ())
754             {
755               if (stats->itself && stats->itself == cs->caller)
756                 stats->rec_count_sum += cs->count.ipa ();
757               else
758                 stats->count_sum += cs->count.ipa ();
759             }
760           stats->freq_sum += cs->sreal_frequency ();
761           stats->n_calls++;
762           if (stats->itself && stats->itself != cs->caller)
763             stats->n_nonrec_calls++;
764 
765           if (cs->maybe_hot_p ())
766             stats->n_hot_calls ++;
767       }
768   return false;
769 
770 }
771 
772 /* Return true if this NODE is viable candidate for cloning.  */
773 
774 static bool
ipcp_cloning_candidate_p(struct cgraph_node * node)775 ipcp_cloning_candidate_p (struct cgraph_node *node)
776 {
777   struct caller_statistics stats;
778 
779   gcc_checking_assert (node->has_gimple_body_p ());
780 
781   if (!opt_for_fn (node->decl, flag_ipa_cp_clone))
782     {
783       if (dump_file)
784           fprintf (dump_file, "Not considering %s for cloning; "
785                      "-fipa-cp-clone disabled.\n",
786                      node->dump_name ());
787       return false;
788     }
789 
790   if (node->optimize_for_size_p ())
791     {
792       if (dump_file)
793           fprintf (dump_file, "Not considering %s for cloning; "
794                      "optimizing it for size.\n",
795                      node->dump_name ());
796       return false;
797     }
798 
799   init_caller_stats (&stats);
800   node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false);
801 
802   if (ipa_size_summaries->get (node)->self_size < stats.n_calls)
803     {
804       if (dump_file)
805           fprintf (dump_file, "Considering %s for cloning; code might shrink.\n",
806                      node->dump_name ());
807       return true;
808     }
809 
810   /* When profile is available and function is hot, propagate into it even if
811      calls seems cold; constant propagation can improve function's speed
812      significantly.  */
813   if (stats.count_sum > profile_count::zero ()
814       && node->count.ipa ().initialized_p ())
815     {
816       if (stats.count_sum > node->count.ipa ().apply_scale (90, 100))
817           {
818             if (dump_file)
819               fprintf (dump_file, "Considering %s for cloning; "
820                          "usually called directly.\n",
821                          node->dump_name ());
822             return true;
823           }
824     }
825   if (!stats.n_hot_calls)
826     {
827       if (dump_file)
828           fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
829                      node->dump_name ());
830       return false;
831     }
832   if (dump_file)
833     fprintf (dump_file, "Considering %s for cloning.\n",
834                node->dump_name ());
835   return true;
836 }
837 
838 template <typename valtype>
839 class value_topo_info
840 {
841 public:
842   /* Head of the linked list of topologically sorted values. */
843   ipcp_value<valtype> *values_topo;
844   /* Stack for creating SCCs, represented by a linked list too.  */
845   ipcp_value<valtype> *stack;
846   /* Counter driving the algorithm in add_val_to_toposort.  */
847   int dfs_counter;
848 
value_topo_info()849   value_topo_info () : values_topo (NULL), stack (NULL), dfs_counter (0)
850   {}
851   void add_val (ipcp_value<valtype> *cur_val);
852   void propagate_effects ();
853 };
854 
855 /* Arrays representing a topological ordering of call graph nodes and a stack
856    of nodes used during constant propagation and also data required to perform
857    topological sort of values and propagation of benefits in the determined
858    order.  */
859 
860 class ipa_topo_info
861 {
862 public:
863   /* Array with obtained topological order of cgraph nodes.  */
864   struct cgraph_node **order;
865   /* Stack of cgraph nodes used during propagation within SCC until all values
866      in the SCC stabilize.  */
867   struct cgraph_node **stack;
868   int nnodes, stack_top;
869 
870   value_topo_info<tree> constants;
871   value_topo_info<ipa_polymorphic_call_context> contexts;
872 
ipa_topo_info()873   ipa_topo_info () : order(NULL), stack(NULL), nnodes(0), stack_top(0),
874     constants ()
875   {}
876 };
877 
878 /* Skip edges from and to nodes without ipa_cp enabled.
879    Ignore not available symbols.  */
880 
881 static bool
ignore_edge_p(cgraph_edge * e)882 ignore_edge_p (cgraph_edge *e)
883 {
884   enum availability avail;
885   cgraph_node *ultimate_target
886     = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
887 
888   return (avail <= AVAIL_INTERPOSABLE
889             || !opt_for_fn (ultimate_target->decl, optimize)
890             || !opt_for_fn (ultimate_target->decl, flag_ipa_cp));
891 }
892 
893 /* Allocate the arrays in TOPO and topologically sort the nodes into order.  */
894 
895 static void
build_toporder_info(class ipa_topo_info * topo)896 build_toporder_info (class ipa_topo_info *topo)
897 {
898   topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
899   topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
900 
901   gcc_checking_assert (topo->stack_top == 0);
902   topo->nnodes = ipa_reduced_postorder (topo->order, true,
903                                                   ignore_edge_p);
904 }
905 
906 /* Free information about strongly connected components and the arrays in
907    TOPO.  */
908 
909 static void
free_toporder_info(class ipa_topo_info * topo)910 free_toporder_info (class ipa_topo_info *topo)
911 {
912   ipa_free_postorder_info ();
913   free (topo->order);
914   free (topo->stack);
915 }
916 
917 /* Add NODE to the stack in TOPO, unless it is already there.  */
918 
919 static inline void
push_node_to_stack(class ipa_topo_info * topo,struct cgraph_node * node)920 push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node)
921 {
922   ipa_node_params *info = ipa_node_params_sum->get (node);
923   if (info->node_enqueued)
924     return;
925   info->node_enqueued = 1;
926   topo->stack[topo->stack_top++] = node;
927 }
928 
929 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
930    is empty.  */
931 
932 static struct cgraph_node *
pop_node_from_stack(class ipa_topo_info * topo)933 pop_node_from_stack (class ipa_topo_info *topo)
934 {
935   if (topo->stack_top)
936     {
937       struct cgraph_node *node;
938       topo->stack_top--;
939       node = topo->stack[topo->stack_top];
940       ipa_node_params_sum->get (node)->node_enqueued = 0;
941       return node;
942     }
943   else
944     return NULL;
945 }
946 
947 /* Set lattice LAT to bottom and return true if it previously was not set as
948    such.  */
949 
950 template <typename valtype>
951 inline bool
set_to_bottom()952 ipcp_lattice<valtype>::set_to_bottom ()
953 {
954   bool ret = !bottom;
955   bottom = true;
956   return ret;
957 }
958 
959 /* Mark lattice as containing an unknown value and return true if it previously
960    was not marked as such.  */
961 
962 template <typename valtype>
963 inline bool
set_contains_variable()964 ipcp_lattice<valtype>::set_contains_variable ()
965 {
966   bool ret = !contains_variable;
967   contains_variable = true;
968   return ret;
969 }
970 
971 /* Set all aggregate lattices in PLATS to bottom and return true if they were
972    not previously set as such.  */
973 
974 static inline bool
set_agg_lats_to_bottom(class ipcp_param_lattices * plats)975 set_agg_lats_to_bottom (class ipcp_param_lattices *plats)
976 {
977   bool ret = !plats->aggs_bottom;
978   plats->aggs_bottom = true;
979   return ret;
980 }
981 
982 /* Mark all aggregate lattices in PLATS as containing an unknown value and
983    return true if they were not previously marked as such.  */
984 
985 static inline bool
set_agg_lats_contain_variable(class ipcp_param_lattices * plats)986 set_agg_lats_contain_variable (class ipcp_param_lattices *plats)
987 {
988   bool ret = !plats->aggs_contain_variable;
989   plats->aggs_contain_variable = true;
990   return ret;
991 }
992 
993 bool
meet_with(const ipcp_vr_lattice & other)994 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice &other)
995 {
996   return meet_with_1 (&other.m_vr);
997 }
998 
999 /* Meet the current value of the lattice with value range described by VR
1000    lattice.  */
1001 
1002 bool
meet_with(const value_range * p_vr)1003 ipcp_vr_lattice::meet_with (const value_range *p_vr)
1004 {
1005   return meet_with_1 (p_vr);
1006 }
1007 
1008 /* Meet the current value of the lattice with value range described by
1009    OTHER_VR lattice.  Return TRUE if anything changed.  */
1010 
1011 bool
meet_with_1(const value_range * other_vr)1012 ipcp_vr_lattice::meet_with_1 (const value_range *other_vr)
1013 {
1014   if (bottom_p ())
1015     return false;
1016 
1017   if (other_vr->varying_p ())
1018     return set_to_bottom ();
1019 
1020   value_range save (m_vr);
1021   m_vr.union_ (other_vr);
1022   return !m_vr.equal_p (save);
1023 }
1024 
1025 /* Return true if value range information in the lattice is yet unknown.  */
1026 
1027 bool
top_p() const1028 ipcp_vr_lattice::top_p () const
1029 {
1030   return m_vr.undefined_p ();
1031 }
1032 
1033 /* Return true if value range information in the lattice is known to be
1034    unusable.  */
1035 
1036 bool
bottom_p() const1037 ipcp_vr_lattice::bottom_p () const
1038 {
1039   return m_vr.varying_p ();
1040 }
1041 
1042 /* Set value range information in the lattice to bottom.  Return true if it
1043    previously was in a different state.  */
1044 
1045 bool
set_to_bottom()1046 ipcp_vr_lattice::set_to_bottom ()
1047 {
1048   if (m_vr.varying_p ())
1049     return false;
1050   /* ?? We create all sorts of VARYING ranges for floats, structures,
1051      and other types which we cannot handle as ranges.  We should
1052      probably avoid handling them throughout the pass, but it's easier
1053      to create a sensible VARYING here and let the lattice
1054      propagate.  */
1055   m_vr.set_varying (integer_type_node);
1056   return true;
1057 }
1058 
1059 /* Set lattice value to bottom, if it already isn't the case.  */
1060 
1061 bool
set_to_bottom()1062 ipcp_bits_lattice::set_to_bottom ()
1063 {
1064   if (bottom_p ())
1065     return false;
1066   m_lattice_val = IPA_BITS_VARYING;
1067   m_value = 0;
1068   m_mask = -1;
1069   return true;
1070 }
1071 
1072 /* Set to constant if it isn't already. Only meant to be called
1073    when switching state from TOP.  */
1074 
1075 bool
set_to_constant(widest_int value,widest_int mask)1076 ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask)
1077 {
1078   gcc_assert (top_p ());
1079   m_lattice_val = IPA_BITS_CONSTANT;
1080   m_value = wi::bit_and (wi::bit_not (mask), value);
1081   m_mask = mask;
1082   return true;
1083 }
1084 
1085 /* Return true if any of the known bits are non-zero.  */
1086 
1087 bool
known_nonzero_p() const1088 ipcp_bits_lattice::known_nonzero_p () const
1089 {
1090   if (!constant_p ())
1091     return false;
1092   return wi::ne_p (wi::bit_and (wi::bit_not (m_mask), m_value), 0);
1093 }
1094 
1095 /* Convert operand to value, mask form.  */
1096 
1097 void
get_value_and_mask(tree operand,widest_int * valuep,widest_int * maskp)1098 ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp)
1099 {
1100   wide_int get_nonzero_bits (const_tree);
1101 
1102   if (TREE_CODE (operand) == INTEGER_CST)
1103     {
1104       *valuep = wi::to_widest (operand);
1105       *maskp = 0;
1106     }
1107   else
1108     {
1109       *valuep = 0;
1110       *maskp = -1;
1111     }
1112 }
1113 
1114 /* Meet operation, similar to ccp_lattice_meet, we xor values
1115    if this->value, value have different values at same bit positions, we want
1116    to drop that bit to varying. Return true if mask is changed.
1117    This function assumes that the lattice value is in CONSTANT state.  If
1118    DROP_ALL_ONES, mask out any known bits with value one afterwards.  */
1119 
1120 bool
meet_with_1(widest_int value,widest_int mask,unsigned precision,bool drop_all_ones)1121 ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask,
1122                                         unsigned precision, bool drop_all_ones)
1123 {
1124   gcc_assert (constant_p ());
1125 
1126   widest_int old_mask = m_mask;
1127   m_mask = (m_mask | mask) | (m_value ^ value);
1128   if (drop_all_ones)
1129     m_mask |= m_value;
1130   m_value &= ~m_mask;
1131 
1132   if (wi::sext (m_mask, precision) == -1)
1133     return set_to_bottom ();
1134 
1135   return m_mask != old_mask;
1136 }
1137 
1138 /* Meet the bits lattice with operand
1139    described by <value, mask, sgn, precision.  */
1140 
1141 bool
meet_with(widest_int value,widest_int mask,unsigned precision)1142 ipcp_bits_lattice::meet_with (widest_int value, widest_int mask,
1143                                     unsigned precision)
1144 {
1145   if (bottom_p ())
1146     return false;
1147 
1148   if (top_p ())
1149     {
1150       if (wi::sext (mask, precision) == -1)
1151           return set_to_bottom ();
1152       return set_to_constant (value, mask);
1153     }
1154 
1155   return meet_with_1 (value, mask, precision, false);
1156 }
1157 
1158 /* Meet bits lattice with the result of bit_value_binop (other, operand)
1159    if code is binary operation or bit_value_unop (other) if code is unary op.
1160    In the case when code is nop_expr, no adjustment is required.  If
1161    DROP_ALL_ONES, mask out any known bits with value one afterwards.  */
1162 
1163 bool
meet_with(ipcp_bits_lattice & other,unsigned precision,signop sgn,enum tree_code code,tree operand,bool drop_all_ones)1164 ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision,
1165                                     signop sgn, enum tree_code code, tree operand,
1166                                     bool drop_all_ones)
1167 {
1168   if (other.bottom_p ())
1169     return set_to_bottom ();
1170 
1171   if (bottom_p () || other.top_p ())
1172     return false;
1173 
1174   widest_int adjusted_value, adjusted_mask;
1175 
1176   if (TREE_CODE_CLASS (code) == tcc_binary)
1177     {
1178       tree type = TREE_TYPE (operand);
1179       widest_int o_value, o_mask;
1180       get_value_and_mask (operand, &o_value, &o_mask);
1181 
1182       bit_value_binop (code, sgn, precision, &adjusted_value, &adjusted_mask,
1183                            sgn, precision, other.get_value (), other.get_mask (),
1184                            TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask);
1185 
1186       if (wi::sext (adjusted_mask, precision) == -1)
1187           return set_to_bottom ();
1188     }
1189 
1190   else if (TREE_CODE_CLASS (code) == tcc_unary)
1191     {
1192       bit_value_unop (code, sgn, precision, &adjusted_value,
1193                           &adjusted_mask, sgn, precision, other.get_value (),
1194                           other.get_mask ());
1195 
1196       if (wi::sext (adjusted_mask, precision) == -1)
1197           return set_to_bottom ();
1198     }
1199 
1200   else
1201     return set_to_bottom ();
1202 
1203   if (top_p ())
1204     {
1205       if (drop_all_ones)
1206           {
1207             adjusted_mask |= adjusted_value;
1208             adjusted_value &= ~adjusted_mask;
1209           }
1210       if (wi::sext (adjusted_mask, precision) == -1)
1211           return set_to_bottom ();
1212       return set_to_constant (adjusted_value, adjusted_mask);
1213     }
1214   else
1215     return meet_with_1 (adjusted_value, adjusted_mask, precision,
1216                               drop_all_ones);
1217 }
1218 
1219 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1220    return true is any of them has not been marked as such so far.  */
1221 
1222 static inline bool
set_all_contains_variable(class ipcp_param_lattices * plats)1223 set_all_contains_variable (class ipcp_param_lattices *plats)
1224 {
1225   bool ret;
1226   ret = plats->itself.set_contains_variable ();
1227   ret |= plats->ctxlat.set_contains_variable ();
1228   ret |= set_agg_lats_contain_variable (plats);
1229   ret |= plats->bits_lattice.set_to_bottom ();
1230   ret |= plats->m_value_range.set_to_bottom ();
1231   return ret;
1232 }
1233 
1234 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1235    points to by the number of callers to NODE.  */
1236 
1237 static bool
count_callers(cgraph_node * node,void * data)1238 count_callers (cgraph_node *node, void *data)
1239 {
1240   int *caller_count = (int *) data;
1241 
1242   for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
1243     /* Local thunks can be handled transparently, but if the thunk cannot
1244        be optimized out, count it as a real use.  */
1245     if (!cs->caller->thunk || !cs->caller->local)
1246       ++*caller_count;
1247   return false;
1248 }
1249 
1250 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1251    the one caller of some other node.  Set the caller's corresponding flag.  */
1252 
1253 static bool
set_single_call_flag(cgraph_node * node,void *)1254 set_single_call_flag (cgraph_node *node, void *)
1255 {
1256   cgraph_edge *cs = node->callers;
1257   /* Local thunks can be handled transparently, skip them.  */
1258   while (cs && cs->caller->thunk && cs->caller->local)
1259     cs = cs->next_caller;
1260   if (cs)
1261     if (ipa_node_params* info = ipa_node_params_sum->get (cs->caller))
1262       {
1263           info->node_calling_single_call = true;
1264           return true;
1265       }
1266   return false;
1267 }
1268 
1269 /* Initialize ipcp_lattices.  */
1270 
1271 static void
initialize_node_lattices(struct cgraph_node * node)1272 initialize_node_lattices (struct cgraph_node *node)
1273 {
1274   ipa_node_params *info = ipa_node_params_sum->get (node);
1275   struct cgraph_edge *ie;
1276   bool disable = false, variable = false;
1277   int i;
1278 
1279   gcc_checking_assert (node->has_gimple_body_p ());
1280 
1281   if (!ipa_get_param_count (info))
1282     disable = true;
1283   else if (node->local)
1284     {
1285       int caller_count = 0;
1286       node->call_for_symbol_thunks_and_aliases (count_callers, &caller_count,
1287                                                             true);
1288       gcc_checking_assert (caller_count > 0);
1289       if (caller_count == 1)
1290           node->call_for_symbol_thunks_and_aliases (set_single_call_flag,
1291                                                               NULL, true);
1292     }
1293   else
1294     {
1295       /* When cloning is allowed, we can assume that externally visible
1296            functions are not called.  We will compensate this by cloning
1297            later.  */
1298       if (ipcp_versionable_function_p (node)
1299             && ipcp_cloning_candidate_p (node))
1300           variable = true;
1301       else
1302           disable = true;
1303     }
1304 
1305   if (dump_file && (dump_flags & TDF_DETAILS)
1306       && !node->alias && !node->thunk)
1307     {
1308       fprintf (dump_file, "Initializing lattices of %s\n",
1309                  node->dump_name ());
1310       if (disable || variable)
1311           fprintf (dump_file, "  Marking all lattices as %s\n",
1312                      disable ? "BOTTOM" : "VARIABLE");
1313     }
1314 
1315   auto_vec<bool, 16> surviving_params;
1316   bool pre_modified = false;
1317 
1318   clone_info *cinfo = clone_info::get (node);
1319 
1320   if (!disable && cinfo && cinfo->param_adjustments)
1321     {
1322       /* At the moment all IPA optimizations should use the number of
1323            parameters of the prevailing decl as the m_always_copy_start.
1324            Handling any other value would complicate the code below, so for the
1325            time bing let's only assert it is so.  */
1326       gcc_assert ((cinfo->param_adjustments->m_always_copy_start
1327                        == ipa_get_param_count (info))
1328                       || cinfo->param_adjustments->m_always_copy_start < 0);
1329 
1330       pre_modified = true;
1331       cinfo->param_adjustments->get_surviving_params (&surviving_params);
1332 
1333       if (dump_file && (dump_flags & TDF_DETAILS)
1334             && !node->alias && !node->thunk)
1335           {
1336             bool first = true;
1337             for (int j = 0; j < ipa_get_param_count (info); j++)
1338               {
1339                 if (j < (int) surviving_params.length ()
1340                       && surviving_params[j])
1341                     continue;
1342                 if (first)
1343                     {
1344                       fprintf (dump_file,
1345                                  "  The following parameters are dead on arrival:");
1346                       first = false;
1347                     }
1348                 fprintf (dump_file, " %u", j);
1349               }
1350             if (!first)
1351                 fprintf (dump_file, "\n");
1352           }
1353     }
1354 
1355   for (i = 0; i < ipa_get_param_count (info); i++)
1356     {
1357       ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
1358       if (disable
1359             || !ipa_get_type (info, i)
1360             || (pre_modified && (surviving_params.length () <= (unsigned) i
1361                                      || !surviving_params[i])))
1362           {
1363             plats->itself.set_to_bottom ();
1364             plats->ctxlat.set_to_bottom ();
1365             set_agg_lats_to_bottom (plats);
1366             plats->bits_lattice.set_to_bottom ();
1367             plats->m_value_range.m_vr = value_range ();
1368             plats->m_value_range.set_to_bottom ();
1369           }
1370       else
1371           {
1372             plats->m_value_range.init ();
1373             if (variable)
1374               set_all_contains_variable (plats);
1375           }
1376     }
1377 
1378   for (ie = node->indirect_calls; ie; ie = ie->next_callee)
1379     if (ie->indirect_info->polymorphic
1380           && ie->indirect_info->param_index >= 0)
1381       {
1382           gcc_checking_assert (ie->indirect_info->param_index >= 0);
1383           ipa_get_parm_lattices (info,
1384                                      ie->indirect_info->param_index)->virt_call = 1;
1385       }
1386 }
1387 
1388 /* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1389    PARAM_TYPE.  */
1390 
1391 static bool
ipacp_value_safe_for_type(tree param_type,tree value)1392 ipacp_value_safe_for_type (tree param_type, tree value)
1393 {
1394   tree val_type = TREE_TYPE (value);
1395   if (param_type == val_type
1396       || useless_type_conversion_p (param_type, val_type)
1397       || fold_convertible_p (param_type, value))
1398     return true;
1399   else
1400     return false;
1401 }
1402 
1403 /* Return true iff X and Y should be considered equal values by IPA-CP.  */
1404 
1405 bool
values_equal_for_ipcp_p(tree x,tree y)1406 values_equal_for_ipcp_p (tree x, tree y)
1407 {
1408   gcc_checking_assert (x != NULL_TREE && y != NULL_TREE);
1409 
1410   if (x == y)
1411     return true;
1412 
1413   if (TREE_CODE (x) == ADDR_EXPR
1414       && TREE_CODE (y) == ADDR_EXPR
1415       && TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL
1416       && TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL)
1417     return operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)),
1418                                   DECL_INITIAL (TREE_OPERAND (y, 0)), 0);
1419   else
1420     return operand_equal_p (x, y, 0);
1421 }
1422 
1423 /* Return the result of a (possibly arithmetic) operation on the constant
1424    value INPUT.  OPERAND is 2nd operand for binary operation.  RES_TYPE is
1425    the type of the parameter to which the result is passed.  Return
1426    NULL_TREE if that cannot be determined or be considered an
1427    interprocedural invariant.  */
1428 
1429 static tree
ipa_get_jf_arith_result(enum tree_code opcode,tree input,tree operand,tree res_type)1430 ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand,
1431                                tree res_type)
1432 {
1433   tree res;
1434 
1435   if (opcode == NOP_EXPR)
1436     return input;
1437   if (!is_gimple_ip_invariant (input))
1438     return NULL_TREE;
1439 
1440   if (opcode == ASSERT_EXPR)
1441     {
1442       if (values_equal_for_ipcp_p (input, operand))
1443           return input;
1444       else
1445           return NULL_TREE;
1446     }
1447 
1448   if (!res_type)
1449     {
1450       if (TREE_CODE_CLASS (opcode) == tcc_comparison)
1451           res_type = boolean_type_node;
1452       else if (expr_type_first_operand_type_p (opcode))
1453           res_type = TREE_TYPE (input);
1454       else
1455           return NULL_TREE;
1456     }
1457 
1458   if (TREE_CODE_CLASS (opcode) == tcc_unary)
1459     res = fold_unary (opcode, res_type, input);
1460   else
1461     res = fold_binary (opcode, res_type, input, operand);
1462 
1463   if (res && !is_gimple_ip_invariant (res))
1464     return NULL_TREE;
1465 
1466   return res;
1467 }
1468 
1469 /* Return the result of a (possibly arithmetic) pass through jump function
1470    JFUNC on the constant value INPUT.  RES_TYPE is the type of the parameter
1471    to which the result is passed.  Return NULL_TREE if that cannot be
1472    determined or be considered an interprocedural invariant.  */
1473 
1474 static tree
ipa_get_jf_pass_through_result(struct ipa_jump_func * jfunc,tree input,tree res_type)1475 ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input,
1476                                         tree res_type)
1477 {
1478   return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc),
1479                                           input,
1480                                           ipa_get_jf_pass_through_operand (jfunc),
1481                                           res_type);
1482 }
1483 
1484 /* Return the result of an ancestor jump function JFUNC on the constant value
1485    INPUT.  Return NULL_TREE if that cannot be determined.  */
1486 
1487 static tree
ipa_get_jf_ancestor_result(struct ipa_jump_func * jfunc,tree input)1488 ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input)
1489 {
1490   gcc_checking_assert (TREE_CODE (input) != TREE_BINFO);
1491   if (TREE_CODE (input) == ADDR_EXPR)
1492     {
1493       gcc_checking_assert (is_gimple_ip_invariant_address (input));
1494       poly_int64 off = ipa_get_jf_ancestor_offset (jfunc);
1495       if (known_eq (off, 0))
1496           return input;
1497       poly_int64 byte_offset = exact_div (off, BITS_PER_UNIT);
1498       return build1 (ADDR_EXPR, TREE_TYPE (input),
1499                          fold_build2 (MEM_REF, TREE_TYPE (TREE_TYPE (input)), input,
1500                                           build_int_cst (ptr_type_node, byte_offset)));
1501     }
1502   else if (ipa_get_jf_ancestor_keep_null (jfunc)
1503              && zerop (input))
1504     return input;
1505   else
1506     return NULL_TREE;
1507 }
1508 
1509 /* Determine whether JFUNC evaluates to a single known constant value and if
1510    so, return it.  Otherwise return NULL.  INFO describes the caller node or
1511    the one it is inlined to, so that pass-through jump functions can be
1512    evaluated.  PARM_TYPE is the type of the parameter to which the result is
1513    passed.  */
1514 
1515 tree
ipa_value_from_jfunc(class ipa_node_params * info,struct ipa_jump_func * jfunc,tree parm_type)1516 ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc,
1517                           tree parm_type)
1518 {
1519   if (jfunc->type == IPA_JF_CONST)
1520     return ipa_get_jf_constant (jfunc);
1521   else if (jfunc->type == IPA_JF_PASS_THROUGH
1522              || jfunc->type == IPA_JF_ANCESTOR)
1523     {
1524       tree input;
1525       int idx;
1526 
1527       if (jfunc->type == IPA_JF_PASS_THROUGH)
1528           idx = ipa_get_jf_pass_through_formal_id (jfunc);
1529       else
1530           idx = ipa_get_jf_ancestor_formal_id (jfunc);
1531 
1532       if (info->ipcp_orig_node)
1533           input = info->known_csts[idx];
1534       else
1535           {
1536             ipcp_lattice<tree> *lat;
1537 
1538             if (!info->lattices
1539                 || idx >= ipa_get_param_count (info))
1540               return NULL_TREE;
1541             lat = ipa_get_scalar_lat (info, idx);
1542             if (!lat->is_single_const ())
1543               return NULL_TREE;
1544             input = lat->values->value;
1545           }
1546 
1547       if (!input)
1548           return NULL_TREE;
1549 
1550       if (jfunc->type == IPA_JF_PASS_THROUGH)
1551           return ipa_get_jf_pass_through_result (jfunc, input, parm_type);
1552       else
1553           return ipa_get_jf_ancestor_result (jfunc, input);
1554     }
1555   else
1556     return NULL_TREE;
1557 }
1558 
1559 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1560    that INFO describes the caller node or the one it is inlined to, CS is the
1561    call graph edge corresponding to JFUNC and CSIDX index of the described
1562    parameter.  */
1563 
1564 ipa_polymorphic_call_context
ipa_context_from_jfunc(ipa_node_params * info,cgraph_edge * cs,int csidx,ipa_jump_func * jfunc)1565 ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx,
1566                               ipa_jump_func *jfunc)
1567 {
1568   ipa_edge_args *args = ipa_edge_args_sum->get (cs);
1569   ipa_polymorphic_call_context ctx;
1570   ipa_polymorphic_call_context *edge_ctx
1571     = cs ? ipa_get_ith_polymorhic_call_context (args, csidx) : NULL;
1572 
1573   if (edge_ctx && !edge_ctx->useless_p ())
1574     ctx = *edge_ctx;
1575 
1576   if (jfunc->type == IPA_JF_PASS_THROUGH
1577       || jfunc->type == IPA_JF_ANCESTOR)
1578     {
1579       ipa_polymorphic_call_context srcctx;
1580       int srcidx;
1581       bool type_preserved = true;
1582       if (jfunc->type == IPA_JF_PASS_THROUGH)
1583           {
1584             if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
1585               return ctx;
1586             type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
1587             srcidx = ipa_get_jf_pass_through_formal_id (jfunc);
1588           }
1589       else
1590           {
1591             type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
1592             srcidx = ipa_get_jf_ancestor_formal_id (jfunc);
1593           }
1594       if (info->ipcp_orig_node)
1595           {
1596             if (info->known_contexts.exists ())
1597               srcctx = info->known_contexts[srcidx];
1598           }
1599       else
1600           {
1601             if (!info->lattices
1602                 || srcidx >= ipa_get_param_count (info))
1603               return ctx;
1604             ipcp_lattice<ipa_polymorphic_call_context> *lat;
1605             lat = ipa_get_poly_ctx_lat (info, srcidx);
1606             if (!lat->is_single_const ())
1607               return ctx;
1608             srcctx = lat->values->value;
1609           }
1610       if (srcctx.useless_p ())
1611           return ctx;
1612       if (jfunc->type == IPA_JF_ANCESTOR)
1613           srcctx.offset_by (ipa_get_jf_ancestor_offset (jfunc));
1614       if (!type_preserved)
1615           srcctx.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
1616       srcctx.combine_with (ctx);
1617       return srcctx;
1618     }
1619 
1620   return ctx;
1621 }
1622 
1623 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1624    DST_TYPE on value range in SRC_VR and store it to DST_VR.  Return true if
1625    the result is a range or an anti-range.  */
1626 
1627 static bool
ipa_vr_operation_and_type_effects(value_range * dst_vr,value_range * src_vr,enum tree_code operation,tree dst_type,tree src_type)1628 ipa_vr_operation_and_type_effects (value_range *dst_vr,
1629                                            value_range *src_vr,
1630                                            enum tree_code operation,
1631                                            tree dst_type, tree src_type)
1632 {
1633   range_fold_unary_expr (dst_vr, operation, dst_type, src_vr, src_type);
1634   if (dst_vr->varying_p () || dst_vr->undefined_p ())
1635     return false;
1636   return true;
1637 }
1638 
1639 /* Determine value_range of JFUNC given that INFO describes the caller node or
1640    the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1641    and PARM_TYPE of the parameter.  */
1642 
1643 value_range
ipa_value_range_from_jfunc(ipa_node_params * info,cgraph_edge * cs,ipa_jump_func * jfunc,tree parm_type)1644 ipa_value_range_from_jfunc (ipa_node_params *info, cgraph_edge *cs,
1645                                   ipa_jump_func *jfunc, tree parm_type)
1646 {
1647   value_range vr;
1648   if (jfunc->m_vr)
1649     ipa_vr_operation_and_type_effects (&vr,
1650                                                jfunc->m_vr,
1651                                                NOP_EXPR, parm_type,
1652                                                jfunc->m_vr->type ());
1653   if (vr.singleton_p ())
1654     return vr;
1655   if (jfunc->type == IPA_JF_PASS_THROUGH)
1656     {
1657       int idx;
1658       ipcp_transformation *sum
1659           = ipcp_get_transformation_summary (cs->caller->inlined_to
1660                                                      ? cs->caller->inlined_to
1661                                                      : cs->caller);
1662       if (!sum || !sum->m_vr)
1663           return vr;
1664 
1665       idx = ipa_get_jf_pass_through_formal_id (jfunc);
1666 
1667       if (!(*sum->m_vr)[idx].known)
1668           return vr;
1669       tree vr_type = ipa_get_type (info, idx);
1670       value_range srcvr (wide_int_to_tree (vr_type, (*sum->m_vr)[idx].min),
1671                                wide_int_to_tree (vr_type, (*sum->m_vr)[idx].max),
1672                                (*sum->m_vr)[idx].type);
1673 
1674       enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
1675 
1676       if (TREE_CODE_CLASS (operation) == tcc_unary)
1677           {
1678             value_range res;
1679 
1680             if (ipa_vr_operation_and_type_effects (&res,
1681                                                              &srcvr,
1682                                                              operation, parm_type,
1683                                                              vr_type))
1684               vr.intersect (res);
1685           }
1686       else
1687           {
1688             value_range op_res, res;
1689             tree op = ipa_get_jf_pass_through_operand (jfunc);
1690             value_range op_vr (op, op);
1691 
1692             range_fold_binary_expr (&op_res, operation, vr_type, &srcvr, &op_vr);
1693             if (ipa_vr_operation_and_type_effects (&res,
1694                                                              &op_res,
1695                                                              NOP_EXPR, parm_type,
1696                                                              vr_type))
1697               vr.intersect (res);
1698           }
1699     }
1700   return vr;
1701 }
1702 
1703 /* See if NODE is a clone with a known aggregate value at a given OFFSET of a
1704    parameter with the given INDEX.  */
1705 
1706 static tree
get_clone_agg_value(struct cgraph_node * node,HOST_WIDE_INT offset,int index)1707 get_clone_agg_value (struct cgraph_node *node, HOST_WIDE_INT offset,
1708                          int index)
1709 {
1710   struct ipa_agg_replacement_value *aggval;
1711 
1712   aggval = ipa_get_agg_replacements_for_node (node);
1713   while (aggval)
1714     {
1715       if (aggval->offset == offset
1716             && aggval->index == index)
1717           return aggval->value;
1718       aggval = aggval->next;
1719     }
1720   return NULL_TREE;
1721 }
1722 
1723 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
1724    single known constant value and if so, return it.  Otherwise return NULL.
1725    NODE and INFO describes the caller node or the one it is inlined to, and
1726    its related info.  */
1727 
1728 static tree
ipa_agg_value_from_node(class ipa_node_params * info,struct cgraph_node * node,struct ipa_agg_jf_item * item)1729 ipa_agg_value_from_node (class ipa_node_params *info,
1730                                struct cgraph_node *node,
1731                                struct ipa_agg_jf_item *item)
1732 {
1733   tree value = NULL_TREE;
1734   int src_idx;
1735 
1736   if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
1737     return NULL_TREE;
1738 
1739   if (item->jftype == IPA_JF_CONST)
1740     return item->value.constant;
1741 
1742   gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
1743                            || item->jftype == IPA_JF_LOAD_AGG);
1744 
1745   src_idx = item->value.pass_through.formal_id;
1746 
1747   if (info->ipcp_orig_node)
1748     {
1749       if (item->jftype == IPA_JF_PASS_THROUGH)
1750           value = info->known_csts[src_idx];
1751       else
1752           value = get_clone_agg_value (node, item->value.load_agg.offset,
1753                                              src_idx);
1754     }
1755   else if (info->lattices)
1756     {
1757       class ipcp_param_lattices *src_plats
1758           = ipa_get_parm_lattices (info, src_idx);
1759 
1760       if (item->jftype == IPA_JF_PASS_THROUGH)
1761           {
1762             struct ipcp_lattice<tree> *lat = &src_plats->itself;
1763 
1764             if (!lat->is_single_const ())
1765               return NULL_TREE;
1766 
1767             value = lat->values->value;
1768           }
1769       else if (src_plats->aggs
1770                  && !src_plats->aggs_bottom
1771                  && !src_plats->aggs_contain_variable
1772                  && src_plats->aggs_by_ref == item->value.load_agg.by_ref)
1773           {
1774             struct ipcp_agg_lattice *aglat;
1775 
1776             for (aglat = src_plats->aggs; aglat; aglat = aglat->next)
1777               {
1778                 if (aglat->offset > item->value.load_agg.offset)
1779                     break;
1780 
1781                 if (aglat->offset == item->value.load_agg.offset)
1782                     {
1783                       if (aglat->is_single_const ())
1784                         value = aglat->values->value;
1785                       break;
1786                     }
1787               }
1788           }
1789     }
1790 
1791   if (!value)
1792     return NULL_TREE;
1793 
1794   if (item->jftype == IPA_JF_LOAD_AGG)
1795     {
1796       tree load_type = item->value.load_agg.type;
1797       tree value_type = TREE_TYPE (value);
1798 
1799       /* Ensure value type is compatible with load type.  */
1800       if (!useless_type_conversion_p (load_type, value_type))
1801           return NULL_TREE;
1802     }
1803 
1804   return ipa_get_jf_arith_result (item->value.pass_through.operation,
1805                                           value,
1806                                           item->value.pass_through.operand,
1807                                           item->type);
1808 }
1809 
1810 /* Determine whether AGG_JFUNC evaluates to a set of known constant value for
1811    an aggregate and if so, return it.  Otherwise return an empty set.  NODE
1812    and INFO describes the caller node or the one it is inlined to, and its
1813    related info.  */
1814 
1815 struct ipa_agg_value_set
ipa_agg_value_set_from_jfunc(class ipa_node_params * info,cgraph_node * node,struct ipa_agg_jump_function * agg_jfunc)1816 ipa_agg_value_set_from_jfunc (class ipa_node_params *info, cgraph_node *node,
1817                                     struct ipa_agg_jump_function *agg_jfunc)
1818 {
1819   struct ipa_agg_value_set agg;
1820   struct ipa_agg_jf_item *item;
1821   int i;
1822 
1823   agg.items = vNULL;
1824   agg.by_ref = agg_jfunc->by_ref;
1825 
1826   FOR_EACH_VEC_SAFE_ELT (agg_jfunc->items, i, item)
1827     {
1828       tree value = ipa_agg_value_from_node (info, node, item);
1829 
1830       if (value)
1831           {
1832             struct ipa_agg_value value_item;
1833 
1834             value_item.offset = item->offset;
1835             value_item.value = value;
1836 
1837             agg.items.safe_push (value_item);
1838           }
1839     }
1840   return agg;
1841 }
1842 
1843 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
1844    bottom, not containing a variable component and without any known value at
1845    the same time.  */
1846 
1847 DEBUG_FUNCTION void
ipcp_verify_propagated_values(void)1848 ipcp_verify_propagated_values (void)
1849 {
1850   struct cgraph_node *node;
1851 
1852   FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
1853     {
1854       ipa_node_params *info = ipa_node_params_sum->get (node);
1855       if (!opt_for_fn (node->decl, flag_ipa_cp)
1856             || !opt_for_fn (node->decl, optimize))
1857           continue;
1858       int i, count = ipa_get_param_count (info);
1859 
1860       for (i = 0; i < count; i++)
1861           {
1862             ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i);
1863 
1864             if (!lat->bottom
1865                 && !lat->contains_variable
1866                 && lat->values_count == 0)
1867               {
1868                 if (dump_file)
1869                     {
1870                       symtab->dump (dump_file);
1871                       fprintf (dump_file, "\nIPA lattices after constant "
1872                                  "propagation, before gcc_unreachable:\n");
1873                       print_all_lattices (dump_file, true, false);
1874                     }
1875 
1876                 gcc_unreachable ();
1877               }
1878           }
1879     }
1880 }
1881 
1882 /* Return true iff X and Y should be considered equal contexts by IPA-CP.  */
1883 
1884 static bool
values_equal_for_ipcp_p(ipa_polymorphic_call_context x,ipa_polymorphic_call_context y)1885 values_equal_for_ipcp_p (ipa_polymorphic_call_context x,
1886                                ipa_polymorphic_call_context y)
1887 {
1888   return x.equal_to (y);
1889 }
1890 
1891 
1892 /* Add a new value source to the value represented by THIS, marking that a
1893    value comes from edge CS and (if the underlying jump function is a
1894    pass-through or an ancestor one) from a caller value SRC_VAL of a caller
1895    parameter described by SRC_INDEX.  OFFSET is negative if the source was the
1896    scalar value of the parameter itself or the offset within an aggregate.  */
1897 
1898 template <typename valtype>
1899 void
add_source(cgraph_edge * cs,ipcp_value * src_val,int src_idx,HOST_WIDE_INT offset)1900 ipcp_value<valtype>::add_source (cgraph_edge *cs, ipcp_value *src_val,
1901                                          int src_idx, HOST_WIDE_INT offset)
1902 {
1903   ipcp_value_source<valtype> *src;
1904 
1905   src = new (ipcp_sources_pool.allocate ()) ipcp_value_source<valtype>;
1906   src->offset = offset;
1907   src->cs = cs;
1908   src->val = src_val;
1909   src->index = src_idx;
1910 
1911   src->next = sources;
1912   sources = src;
1913 }
1914 
1915 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
1916    SOURCE and clear all other fields.  */
1917 
1918 static ipcp_value<tree> *
allocate_and_init_ipcp_value(tree cst,unsigned same_lat_gen_level)1919 allocate_and_init_ipcp_value (tree cst, unsigned same_lat_gen_level)
1920 {
1921   ipcp_value<tree> *val;
1922 
1923   val = new (ipcp_cst_values_pool.allocate ()) ipcp_value<tree>();
1924   val->value = cst;
1925   val->self_recursion_generated_level = same_lat_gen_level;
1926   return val;
1927 }
1928 
1929 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
1930    value to SOURCE and clear all other fields.  */
1931 
1932 static ipcp_value<ipa_polymorphic_call_context> *
allocate_and_init_ipcp_value(ipa_polymorphic_call_context ctx,unsigned same_lat_gen_level)1933 allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx,
1934                                     unsigned same_lat_gen_level)
1935 {
1936   ipcp_value<ipa_polymorphic_call_context> *val;
1937 
1938   val = new (ipcp_poly_ctx_values_pool.allocate ())
1939     ipcp_value<ipa_polymorphic_call_context>();
1940   val->value = ctx;
1941   val->self_recursion_generated_level = same_lat_gen_level;
1942   return val;
1943 }
1944 
1945 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it.  CS,
1946    SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
1947    meaning.  OFFSET -1 means the source is scalar and not a part of an
1948    aggregate.  If non-NULL, VAL_P records address of existing or newly added
1949    ipcp_value.
1950 
1951    If the value is generated for a self-recursive call as a result of an
1952    arithmetic pass-through jump-function acting on a value in the same lattice,
1953    SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
1954    zero.  If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored.  */
1955 
1956 template <typename valtype>
1957 bool
add_value(valtype newval,cgraph_edge * cs,ipcp_value<valtype> * src_val,int src_idx,HOST_WIDE_INT offset,ipcp_value<valtype> ** val_p,unsigned same_lat_gen_level)1958 ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
1959                                           ipcp_value<valtype> *src_val,
1960                                           int src_idx, HOST_WIDE_INT offset,
1961                                           ipcp_value<valtype> **val_p,
1962                                           unsigned same_lat_gen_level)
1963 {
1964   ipcp_value<valtype> *val, *last_val = NULL;
1965 
1966   if (val_p)
1967     *val_p = NULL;
1968 
1969   if (bottom)
1970     return false;
1971 
1972   for (val = values; val; last_val = val, val = val->next)
1973     if (values_equal_for_ipcp_p (val->value, newval))
1974       {
1975           if (val_p)
1976             *val_p = val;
1977 
1978           if (val->self_recursion_generated_level < same_lat_gen_level)
1979             val->self_recursion_generated_level = same_lat_gen_level;
1980 
1981           if (ipa_edge_within_scc (cs))
1982             {
1983               ipcp_value_source<valtype> *s;
1984               for (s = val->sources; s; s = s->next)
1985                 if (s->cs == cs && s->val == src_val)
1986                     break;
1987               if (s)
1988                 return false;
1989             }
1990 
1991           val->add_source (cs, src_val, src_idx, offset);
1992           return false;
1993       }
1994 
1995   if (!same_lat_gen_level && values_count == opt_for_fn (cs->caller->decl,
1996                                                             param_ipa_cp_value_list_size))
1997     {
1998       /* We can only free sources, not the values themselves, because sources
1999            of other values in this SCC might point to them.   */
2000       for (val = values; val; val = val->next)
2001           {
2002             while (val->sources)
2003               {
2004                 ipcp_value_source<valtype> *src = val->sources;
2005                 val->sources = src->next;
2006                 ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src);
2007               }
2008           }
2009       values = NULL;
2010       return set_to_bottom ();
2011     }
2012 
2013   values_count++;
2014   val = allocate_and_init_ipcp_value (newval, same_lat_gen_level);
2015   val->add_source (cs, src_val, src_idx, offset);
2016   val->next = NULL;
2017 
2018   /* Add the new value to end of value list, which can reduce iterations
2019      of propagation stage for recursive function.  */
2020   if (last_val)
2021     last_val->next = val;
2022   else
2023     values = val;
2024 
2025   if (val_p)
2026     *val_p = val;
2027 
2028   return true;
2029 }
2030 
2031 /* A helper function that returns result of operation specified by OPCODE on
2032    the value of SRC_VAL.  If non-NULL, OPND1_TYPE is expected type for the
2033    value of SRC_VAL.  If the operation is binary, OPND2 is a constant value
2034    acting as its second operand.  If non-NULL, RES_TYPE is expected type of
2035    the result.  */
2036 
2037 static tree
get_val_across_arith_op(enum tree_code opcode,tree opnd1_type,tree opnd2,ipcp_value<tree> * src_val,tree res_type)2038 get_val_across_arith_op (enum tree_code opcode,
2039                                tree opnd1_type,
2040                                tree opnd2,
2041                                ipcp_value<tree> *src_val,
2042                                tree res_type)
2043 {
2044   tree opnd1 = src_val->value;
2045 
2046   /* Skip source values that is incompatible with specified type.  */
2047   if (opnd1_type
2048       && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
2049     return NULL_TREE;
2050 
2051   return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
2052 }
2053 
2054 /* Propagate values through an arithmetic transformation described by a jump
2055    function associated with edge CS, taking values from SRC_LAT and putting
2056    them into DEST_LAT.  OPND1_TYPE is expected type for the values in SRC_LAT.
2057    OPND2 is a constant value if transformation is a binary operation.
2058    SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2059    a part of the aggregate.  SRC_IDX is the index of the source parameter.
2060    RES_TYPE is the value type of result being propagated into.  Return true if
2061    DEST_LAT changed.  */
2062 
2063 static bool
propagate_vals_across_arith_jfunc(cgraph_edge * cs,enum tree_code opcode,tree opnd1_type,tree opnd2,ipcp_lattice<tree> * src_lat,ipcp_lattice<tree> * dest_lat,HOST_WIDE_INT src_offset,int src_idx,tree res_type)2064 propagate_vals_across_arith_jfunc (cgraph_edge *cs,
2065                                            enum tree_code opcode,
2066                                            tree opnd1_type,
2067                                            tree opnd2,
2068                                            ipcp_lattice<tree> *src_lat,
2069                                            ipcp_lattice<tree> *dest_lat,
2070                                            HOST_WIDE_INT src_offset,
2071                                            int src_idx,
2072                                            tree res_type)
2073 {
2074   ipcp_value<tree> *src_val;
2075   bool ret = false;
2076 
2077   /* Due to circular dependencies, propagating within an SCC through arithmetic
2078      transformation would create infinite number of values.  But for
2079      self-feeding recursive function, we could allow propagation in a limited
2080      count, and this can enable a simple kind of recursive function versioning.
2081      For other scenario, we would just make lattices bottom.  */
2082   if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
2083     {
2084       int i;
2085 
2086       int max_recursive_depth = opt_for_fn(cs->caller->decl,
2087                                                      param_ipa_cp_max_recursive_depth);
2088       if (src_lat != dest_lat || max_recursive_depth < 1)
2089           return dest_lat->set_contains_variable ();
2090 
2091       /* No benefit if recursive execution is in low probability.  */
2092       if (cs->sreal_frequency () * 100
2093             <= ((sreal) 1) * opt_for_fn (cs->caller->decl,
2094                                                param_ipa_cp_min_recursive_probability))
2095           return dest_lat->set_contains_variable ();
2096 
2097       auto_vec<ipcp_value<tree> *, 8> val_seeds;
2098 
2099       for (src_val = src_lat->values; src_val; src_val = src_val->next)
2100           {
2101             /* Now we do not use self-recursively generated value as propagation
2102                source, this is absolutely conservative, but could avoid explosion
2103                of lattice's value space, especially when one recursive function
2104                calls another recursive.  */
2105             if (src_val->self_recursion_generated_p ())
2106               {
2107                 ipcp_value_source<tree> *s;
2108 
2109                 /* If the lattice has already been propagated for the call site,
2110                      no need to do that again.  */
2111                 for (s = src_val->sources; s; s = s->next)
2112                     if (s->cs == cs)
2113                       return dest_lat->set_contains_variable ();
2114               }
2115             else
2116               val_seeds.safe_push (src_val);
2117           }
2118 
2119       gcc_assert ((int) val_seeds.length () <= param_ipa_cp_value_list_size);
2120 
2121       /* Recursively generate lattice values with a limited count.  */
2122       FOR_EACH_VEC_ELT (val_seeds, i, src_val)
2123           {
2124             for (int j = 1; j < max_recursive_depth; j++)
2125               {
2126                 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2127                                                                  src_val, res_type);
2128                 if (!cstval
2129                       || !ipacp_value_safe_for_type (res_type, cstval))
2130                     break;
2131 
2132                 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2133                                                     src_offset, &src_val, j);
2134                 gcc_checking_assert (src_val);
2135               }
2136           }
2137       ret |= dest_lat->set_contains_variable ();
2138     }
2139   else
2140     for (src_val = src_lat->values; src_val; src_val = src_val->next)
2141       {
2142           /* Now we do not use self-recursively generated value as propagation
2143              source, otherwise it is easy to make value space of normal lattice
2144              overflow.  */
2145           if (src_val->self_recursion_generated_p ())
2146             {
2147               ret |= dest_lat->set_contains_variable ();
2148               continue;
2149             }
2150 
2151           tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2152                                                          src_val, res_type);
2153           if (cstval
2154               && ipacp_value_safe_for_type (res_type, cstval))
2155             ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2156                                               src_offset);
2157           else
2158             ret |= dest_lat->set_contains_variable ();
2159       }
2160 
2161   return ret;
2162 }
2163 
2164 /* Propagate values through a pass-through jump function JFUNC associated with
2165    edge CS, taking values from SRC_LAT and putting them into DEST_LAT.  SRC_IDX
2166    is the index of the source parameter.  PARM_TYPE is the type of the
2167    parameter to which the result is passed.  */
2168 
2169 static bool
propagate_vals_across_pass_through(cgraph_edge * cs,ipa_jump_func * jfunc,ipcp_lattice<tree> * src_lat,ipcp_lattice<tree> * dest_lat,int src_idx,tree parm_type)2170 propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
2171                                             ipcp_lattice<tree> *src_lat,
2172                                             ipcp_lattice<tree> *dest_lat, int src_idx,
2173                                             tree parm_type)
2174 {
2175   return propagate_vals_across_arith_jfunc (cs,
2176                                         ipa_get_jf_pass_through_operation (jfunc),
2177                                         NULL_TREE,
2178                                         ipa_get_jf_pass_through_operand (jfunc),
2179                                         src_lat, dest_lat, -1, src_idx, parm_type);
2180 }
2181 
2182 /* Propagate values through an ancestor jump function JFUNC associated with
2183    edge CS, taking values from SRC_LAT and putting them into DEST_LAT.  SRC_IDX
2184    is the index of the source parameter.  */
2185 
2186 static bool
propagate_vals_across_ancestor(struct cgraph_edge * cs,struct ipa_jump_func * jfunc,ipcp_lattice<tree> * src_lat,ipcp_lattice<tree> * dest_lat,int src_idx,tree param_type)2187 propagate_vals_across_ancestor (struct cgraph_edge *cs,
2188                                         struct ipa_jump_func *jfunc,
2189                                         ipcp_lattice<tree> *src_lat,
2190                                         ipcp_lattice<tree> *dest_lat, int src_idx,
2191                                         tree param_type)
2192 {
2193   ipcp_value<tree> *src_val;
2194   bool ret = false;
2195 
2196   if (ipa_edge_within_scc (cs))
2197     return dest_lat->set_contains_variable ();
2198 
2199   for (src_val = src_lat->values; src_val; src_val = src_val->next)
2200     {
2201       tree t = ipa_get_jf_ancestor_result (jfunc, src_val->value);
2202 
2203       if (t && ipacp_value_safe_for_type (param_type, t))
2204           ret |= dest_lat->add_value (t, cs, src_val, src_idx);
2205       else
2206           ret |= dest_lat->set_contains_variable ();
2207     }
2208 
2209   return ret;
2210 }
2211 
2212 /* Propagate scalar values across jump function JFUNC that is associated with
2213    edge CS and put the values into DEST_LAT.  PARM_TYPE is the type of the
2214    parameter to which the result is passed.  */
2215 
2216 static bool
propagate_scalar_across_jump_function(struct cgraph_edge * cs,struct ipa_jump_func * jfunc,ipcp_lattice<tree> * dest_lat,tree param_type)2217 propagate_scalar_across_jump_function (struct cgraph_edge *cs,
2218                                                struct ipa_jump_func *jfunc,
2219                                                ipcp_lattice<tree> *dest_lat,
2220                                                tree param_type)
2221 {
2222   if (dest_lat->bottom)
2223     return false;
2224 
2225   if (jfunc->type == IPA_JF_CONST)
2226     {
2227       tree val = ipa_get_jf_constant (jfunc);
2228       if (ipacp_value_safe_for_type (param_type, val))
2229           return dest_lat->add_value (val, cs, NULL, 0);
2230       else
2231           return dest_lat->set_contains_variable ();
2232     }
2233   else if (jfunc->type == IPA_JF_PASS_THROUGH
2234              || jfunc->type == IPA_JF_ANCESTOR)
2235     {
2236       ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2237       ipcp_lattice<tree> *src_lat;
2238       int src_idx;
2239       bool ret;
2240 
2241       if (jfunc->type == IPA_JF_PASS_THROUGH)
2242           src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2243       else
2244           src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2245 
2246       src_lat = ipa_get_scalar_lat (caller_info, src_idx);
2247       if (src_lat->bottom)
2248           return dest_lat->set_contains_variable ();
2249 
2250       /* If we would need to clone the caller and cannot, do not propagate.  */
2251       if (!ipcp_versionable_function_p (cs->caller)
2252             && (src_lat->contains_variable
2253                 || (src_lat->values_count > 1)))
2254           return dest_lat->set_contains_variable ();
2255 
2256       if (jfunc->type == IPA_JF_PASS_THROUGH)
2257           ret = propagate_vals_across_pass_through (cs, jfunc, src_lat,
2258                                                               dest_lat, src_idx,
2259                                                               param_type);
2260       else
2261           ret = propagate_vals_across_ancestor (cs, jfunc, src_lat, dest_lat,
2262                                                         src_idx, param_type);
2263 
2264       if (src_lat->contains_variable)
2265           ret |= dest_lat->set_contains_variable ();
2266 
2267       return ret;
2268     }
2269 
2270   /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2271      use it for indirect inlining), we should propagate them too.  */
2272   return dest_lat->set_contains_variable ();
2273 }
2274 
2275 /* Propagate scalar values across jump function JFUNC that is associated with
2276    edge CS and describes argument IDX and put the values into DEST_LAT.  */
2277 
2278 static bool
propagate_context_across_jump_function(cgraph_edge * cs,ipa_jump_func * jfunc,int idx,ipcp_lattice<ipa_polymorphic_call_context> * dest_lat)2279 propagate_context_across_jump_function (cgraph_edge *cs,
2280                                 ipa_jump_func *jfunc, int idx,
2281                                 ipcp_lattice<ipa_polymorphic_call_context> *dest_lat)
2282 {
2283   if (dest_lat->bottom)
2284     return false;
2285   ipa_edge_args *args = ipa_edge_args_sum->get (cs);
2286   bool ret = false;
2287   bool added_sth = false;
2288   bool type_preserved = true;
2289 
2290   ipa_polymorphic_call_context edge_ctx, *edge_ctx_ptr
2291     = ipa_get_ith_polymorhic_call_context (args, idx);
2292 
2293   if (edge_ctx_ptr)
2294     edge_ctx = *edge_ctx_ptr;
2295 
2296   if (jfunc->type == IPA_JF_PASS_THROUGH
2297       || jfunc->type == IPA_JF_ANCESTOR)
2298     {
2299       ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2300       int src_idx;
2301       ipcp_lattice<ipa_polymorphic_call_context> *src_lat;
2302 
2303       /* TODO: Once we figure out how to propagate speculations, it will
2304            probably be a good idea to switch to speculation if type_preserved is
2305            not set instead of punting.  */
2306       if (jfunc->type == IPA_JF_PASS_THROUGH)
2307           {
2308             if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
2309               goto prop_fail;
2310             type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
2311             src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2312           }
2313       else
2314           {
2315             type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
2316             src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2317           }
2318 
2319       src_lat = ipa_get_poly_ctx_lat (caller_info, src_idx);
2320       /* If we would need to clone the caller and cannot, do not propagate.  */
2321       if (!ipcp_versionable_function_p (cs->caller)
2322             && (src_lat->contains_variable
2323                 || (src_lat->values_count > 1)))
2324           goto prop_fail;
2325 
2326       ipcp_value<ipa_polymorphic_call_context> *src_val;
2327       for (src_val = src_lat->values; src_val; src_val = src_val->next)
2328           {
2329             ipa_polymorphic_call_context cur = src_val->value;
2330 
2331             if (!type_preserved)
2332               cur.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
2333             if (jfunc->type == IPA_JF_ANCESTOR)
2334               cur.offset_by (ipa_get_jf_ancestor_offset (jfunc));
2335             /* TODO: In cases we know how the context is going to be used,
2336                we can improve the result by passing proper OTR_TYPE.  */
2337             cur.combine_with (edge_ctx);
2338             if (!cur.useless_p ())
2339               {
2340                 if (src_lat->contains_variable
2341                       && !edge_ctx.equal_to (cur))
2342                     ret |= dest_lat->set_contains_variable ();
2343                 ret |= dest_lat->add_value (cur, cs, src_val, src_idx);
2344                 added_sth = true;
2345               }
2346           }
2347     }
2348 
2349  prop_fail:
2350   if (!added_sth)
2351     {
2352       if (!edge_ctx.useless_p ())
2353           ret |= dest_lat->add_value (edge_ctx, cs);
2354       else
2355           ret |= dest_lat->set_contains_variable ();
2356     }
2357 
2358   return ret;
2359 }
2360 
2361 /* Propagate bits across jfunc that is associated with
2362    edge cs and update dest_lattice accordingly.  */
2363 
2364 bool
propagate_bits_across_jump_function(cgraph_edge * cs,int idx,ipa_jump_func * jfunc,ipcp_bits_lattice * dest_lattice)2365 propagate_bits_across_jump_function (cgraph_edge *cs, int idx,
2366                                              ipa_jump_func *jfunc,
2367                                              ipcp_bits_lattice *dest_lattice)
2368 {
2369   if (dest_lattice->bottom_p ())
2370     return false;
2371 
2372   enum availability availability;
2373   cgraph_node *callee = cs->callee->function_symbol (&availability);
2374   ipa_node_params *callee_info = ipa_node_params_sum->get (callee);
2375   tree parm_type = ipa_get_type (callee_info, idx);
2376 
2377   /* For K&R C programs, ipa_get_type() could return NULL_TREE.  Avoid the
2378      transform for these cases.  Similarly, we can have bad type mismatches
2379      with LTO, avoid doing anything with those too.  */
2380   if (!parm_type
2381       || (!INTEGRAL_TYPE_P (parm_type) && !POINTER_TYPE_P (parm_type)))
2382     {
2383       if (dump_file && (dump_flags & TDF_DETAILS))
2384           fprintf (dump_file, "Setting dest_lattice to bottom, because type of "
2385                      "param %i of %s is NULL or unsuitable for bits propagation\n",
2386                      idx, cs->callee->dump_name ());
2387 
2388       return dest_lattice->set_to_bottom ();
2389     }
2390 
2391   unsigned precision = TYPE_PRECISION (parm_type);
2392   signop sgn = TYPE_SIGN (parm_type);
2393 
2394   if (jfunc->type == IPA_JF_PASS_THROUGH
2395       || jfunc->type == IPA_JF_ANCESTOR)
2396     {
2397       ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2398       tree operand = NULL_TREE;
2399       enum tree_code code;
2400       unsigned src_idx;
2401       bool keep_null = false;
2402 
2403       if (jfunc->type == IPA_JF_PASS_THROUGH)
2404           {
2405             code = ipa_get_jf_pass_through_operation (jfunc);
2406             src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2407             if (code != NOP_EXPR)
2408               operand = ipa_get_jf_pass_through_operand (jfunc);
2409           }
2410       else
2411           {
2412             code = POINTER_PLUS_EXPR;
2413             src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2414             unsigned HOST_WIDE_INT offset
2415               = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
2416             keep_null = (ipa_get_jf_ancestor_keep_null (jfunc) || !offset);
2417             operand = build_int_cstu (size_type_node, offset);
2418           }
2419 
2420       class ipcp_param_lattices *src_lats
2421           = ipa_get_parm_lattices (caller_info, src_idx);
2422 
2423       /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2424            for eg consider:
2425            int f(int x)
2426            {
2427              g (x & 0xff);
2428            }
2429            Assume lattice for x is bottom, however we can still propagate
2430            result of x & 0xff == 0xff, which gets computed during ccp1 pass
2431            and we store it in jump function during analysis stage.  */
2432 
2433       if (!src_lats->bits_lattice.bottom_p ())
2434           {
2435             bool drop_all_ones
2436               = keep_null && !src_lats->bits_lattice.known_nonzero_p ();
2437 
2438             return dest_lattice->meet_with (src_lats->bits_lattice, precision,
2439                                                     sgn, code, operand, drop_all_ones);
2440           }
2441     }
2442 
2443   if (jfunc->bits)
2444     return dest_lattice->meet_with (jfunc->bits->value, jfunc->bits->mask,
2445                                             precision);
2446   else
2447     return dest_lattice->set_to_bottom ();
2448 }
2449 
2450 /* Propagate value range across jump function JFUNC that is associated with
2451    edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2452    accordingly.  */
2453 
2454 static bool
propagate_vr_across_jump_function(cgraph_edge * cs,ipa_jump_func * jfunc,class ipcp_param_lattices * dest_plats,tree param_type)2455 propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc,
2456                                            class ipcp_param_lattices *dest_plats,
2457                                            tree param_type)
2458 {
2459   ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range;
2460 
2461   if (dest_lat->bottom_p ())
2462     return false;
2463 
2464   if (!param_type
2465       || (!INTEGRAL_TYPE_P (param_type)
2466             && !POINTER_TYPE_P (param_type)))
2467     return dest_lat->set_to_bottom ();
2468 
2469   if (jfunc->type == IPA_JF_PASS_THROUGH)
2470     {
2471       enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
2472       ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2473       int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2474       class ipcp_param_lattices *src_lats
2475           = ipa_get_parm_lattices (caller_info, src_idx);
2476       tree operand_type = ipa_get_type (caller_info, src_idx);
2477 
2478       if (src_lats->m_value_range.bottom_p ())
2479           return dest_lat->set_to_bottom ();
2480 
2481       value_range vr;
2482       if (TREE_CODE_CLASS (operation) == tcc_unary)
2483           ipa_vr_operation_and_type_effects (&vr,
2484                                                      &src_lats->m_value_range.m_vr,
2485                                                      operation, param_type,
2486                                                      operand_type);
2487       /* A crude way to prevent unbounded number of value range updates
2488            in SCC components.  We should allow limited number of updates within
2489            SCC, too.  */
2490       else if (!ipa_edge_within_scc (cs))
2491           {
2492             tree op = ipa_get_jf_pass_through_operand (jfunc);
2493             value_range op_vr (op, op);
2494             value_range op_res,res;
2495 
2496             range_fold_binary_expr (&op_res, operation, operand_type,
2497                                           &src_lats->m_value_range.m_vr, &op_vr);
2498             ipa_vr_operation_and_type_effects (&vr,
2499                                                        &op_res,
2500                                                        NOP_EXPR, param_type,
2501                                                        operand_type);
2502           }
2503       if (!vr.undefined_p () && !vr.varying_p ())
2504           {
2505             if (jfunc->m_vr)
2506               {
2507                 value_range jvr;
2508                 if (ipa_vr_operation_and_type_effects (&jvr, jfunc->m_vr,
2509                                                                  NOP_EXPR,
2510                                                                  param_type,
2511                                                                  jfunc->m_vr->type ()))
2512                     vr.intersect (jvr);
2513               }
2514             return dest_lat->meet_with (&vr);
2515           }
2516     }
2517   else if (jfunc->type == IPA_JF_CONST)
2518     {
2519       tree val = ipa_get_jf_constant (jfunc);
2520       if (TREE_CODE (val) == INTEGER_CST)
2521           {
2522             val = fold_convert (param_type, val);
2523             if (TREE_OVERFLOW_P (val))
2524               val = drop_tree_overflow (val);
2525 
2526             value_range tmpvr (val, val);
2527             return dest_lat->meet_with (&tmpvr);
2528           }
2529     }
2530 
2531   value_range vr;
2532   if (jfunc->m_vr
2533       && ipa_vr_operation_and_type_effects (&vr, jfunc->m_vr, NOP_EXPR,
2534                                                       param_type,
2535                                                       jfunc->m_vr->type ()))
2536     return dest_lat->meet_with (&vr);
2537   else
2538     return dest_lat->set_to_bottom ();
2539 }
2540 
2541 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2542    NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2543    other cases, return false).  If there are no aggregate items, set
2544    aggs_by_ref to NEW_AGGS_BY_REF.  */
2545 
2546 static bool
set_check_aggs_by_ref(class ipcp_param_lattices * dest_plats,bool new_aggs_by_ref)2547 set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats,
2548                            bool new_aggs_by_ref)
2549 {
2550   if (dest_plats->aggs)
2551     {
2552       if (dest_plats->aggs_by_ref != new_aggs_by_ref)
2553           {
2554             set_agg_lats_to_bottom (dest_plats);
2555             return true;
2556           }
2557     }
2558   else
2559     dest_plats->aggs_by_ref = new_aggs_by_ref;
2560   return false;
2561 }
2562 
2563 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2564    already existing lattice for the given OFFSET and SIZE, marking all skipped
2565    lattices as containing variable and checking for overlaps.  If there is no
2566    already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2567    it with offset, size and contains_variable to PRE_EXISTING, and return true,
2568    unless there are too many already.  If there are two many, return false.  If
2569    there are overlaps turn whole DEST_PLATS to bottom and return false.  If any
2570    skipped lattices were newly marked as containing variable, set *CHANGE to
2571    true.  MAX_AGG_ITEMS is the maximum number of lattices.  */
2572 
2573 static bool
merge_agg_lats_step(class ipcp_param_lattices * dest_plats,HOST_WIDE_INT offset,HOST_WIDE_INT val_size,struct ipcp_agg_lattice *** aglat,bool pre_existing,bool * change,int max_agg_items)2574 merge_agg_lats_step (class ipcp_param_lattices *dest_plats,
2575                          HOST_WIDE_INT offset, HOST_WIDE_INT val_size,
2576                          struct ipcp_agg_lattice ***aglat,
2577                          bool pre_existing, bool *change, int max_agg_items)
2578 {
2579   gcc_checking_assert (offset >= 0);
2580 
2581   while (**aglat && (**aglat)->offset < offset)
2582     {
2583       if ((**aglat)->offset + (**aglat)->size > offset)
2584           {
2585             set_agg_lats_to_bottom (dest_plats);
2586             return false;
2587           }
2588       *change |= (**aglat)->set_contains_variable ();
2589       *aglat = &(**aglat)->next;
2590     }
2591 
2592   if (**aglat && (**aglat)->offset == offset)
2593     {
2594       if ((**aglat)->size != val_size)
2595           {
2596             set_agg_lats_to_bottom (dest_plats);
2597             return false;
2598           }
2599       gcc_assert (!(**aglat)->next
2600                       || (**aglat)->next->offset >= offset + val_size);
2601       return true;
2602     }
2603   else
2604     {
2605       struct ipcp_agg_lattice *new_al;
2606 
2607       if (**aglat && (**aglat)->offset < offset + val_size)
2608           {
2609             set_agg_lats_to_bottom (dest_plats);
2610             return false;
2611           }
2612       if (dest_plats->aggs_count == max_agg_items)
2613           return false;
2614       dest_plats->aggs_count++;
2615       new_al = ipcp_agg_lattice_pool.allocate ();
2616       memset (new_al, 0, sizeof (*new_al));
2617 
2618       new_al->offset = offset;
2619       new_al->size = val_size;
2620       new_al->contains_variable = pre_existing;
2621 
2622       new_al->next = **aglat;
2623       **aglat = new_al;
2624       return true;
2625     }
2626 }
2627 
2628 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2629    containing an unknown value.  */
2630 
2631 static bool
set_chain_of_aglats_contains_variable(struct ipcp_agg_lattice * aglat)2632 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice *aglat)
2633 {
2634   bool ret = false;
2635   while (aglat)
2636     {
2637       ret |= aglat->set_contains_variable ();
2638       aglat = aglat->next;
2639     }
2640   return ret;
2641 }
2642 
2643 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2644    DELTA_OFFSET.  CS is the call graph edge and SRC_IDX the index of the source
2645    parameter used for lattice value sources.  Return true if DEST_PLATS changed
2646    in any way.  */
2647 
2648 static bool
merge_aggregate_lattices(struct cgraph_edge * cs,class ipcp_param_lattices * dest_plats,class ipcp_param_lattices * src_plats,int src_idx,HOST_WIDE_INT offset_delta)2649 merge_aggregate_lattices (struct cgraph_edge *cs,
2650                                 class ipcp_param_lattices *dest_plats,
2651                                 class ipcp_param_lattices *src_plats,
2652                                 int src_idx, HOST_WIDE_INT offset_delta)
2653 {
2654   bool pre_existing = dest_plats->aggs != NULL;
2655   struct ipcp_agg_lattice **dst_aglat;
2656   bool ret = false;
2657 
2658   if (set_check_aggs_by_ref (dest_plats, src_plats->aggs_by_ref))
2659     return true;
2660   if (src_plats->aggs_bottom)
2661     return set_agg_lats_contain_variable (dest_plats);
2662   if (src_plats->aggs_contain_variable)
2663     ret |= set_agg_lats_contain_variable (dest_plats);
2664   dst_aglat = &dest_plats->aggs;
2665 
2666   int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2667                                           param_ipa_max_agg_items);
2668   for (struct ipcp_agg_lattice *src_aglat = src_plats->aggs;
2669        src_aglat;
2670        src_aglat = src_aglat->next)
2671     {
2672       HOST_WIDE_INT new_offset = src_aglat->offset - offset_delta;
2673 
2674       if (new_offset < 0)
2675           continue;
2676       if (merge_agg_lats_step (dest_plats, new_offset, src_aglat->size,
2677                                      &dst_aglat, pre_existing, &ret, max_agg_items))
2678           {
2679             struct ipcp_agg_lattice *new_al = *dst_aglat;
2680 
2681             dst_aglat = &(*dst_aglat)->next;
2682             if (src_aglat->bottom)
2683               {
2684                 ret |= new_al->set_contains_variable ();
2685                 continue;
2686               }
2687             if (src_aglat->contains_variable)
2688               ret |= new_al->set_contains_variable ();
2689             for (ipcp_value<tree> *val = src_aglat->values;
2690                  val;
2691                  val = val->next)
2692               ret |= new_al->add_value (val->value, cs, val, src_idx,
2693                                               src_aglat->offset);
2694           }
2695       else if (dest_plats->aggs_bottom)
2696           return true;
2697     }
2698   ret |= set_chain_of_aglats_contains_variable (*dst_aglat);
2699   return ret;
2700 }
2701 
2702 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
2703    pass-through JFUNC and if so, whether it has conform and conforms to the
2704    rules about propagating values passed by reference.  */
2705 
2706 static bool
agg_pass_through_permissible_p(class ipcp_param_lattices * src_plats,struct ipa_jump_func * jfunc)2707 agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats,
2708                                         struct ipa_jump_func *jfunc)
2709 {
2710   return src_plats->aggs
2711     && (!src_plats->aggs_by_ref
2712           || ipa_get_jf_pass_through_agg_preserved (jfunc));
2713 }
2714 
2715 /* Propagate values through ITEM, jump function for a part of an aggregate,
2716    into corresponding aggregate lattice AGLAT.  CS is the call graph edge
2717    associated with the jump function.  Return true if AGLAT changed in any
2718    way.  */
2719 
2720 static bool
propagate_aggregate_lattice(struct cgraph_edge * cs,struct ipa_agg_jf_item * item,struct ipcp_agg_lattice * aglat)2721 propagate_aggregate_lattice (struct cgraph_edge *cs,
2722                                    struct ipa_agg_jf_item *item,
2723                                    struct ipcp_agg_lattice *aglat)
2724 {
2725   class ipa_node_params *caller_info;
2726   class ipcp_param_lattices *src_plats;
2727   struct ipcp_lattice<tree> *src_lat;
2728   HOST_WIDE_INT src_offset;
2729   int src_idx;
2730   tree load_type;
2731   bool ret;
2732 
2733   if (item->jftype == IPA_JF_CONST)
2734     {
2735       tree value = item->value.constant;
2736 
2737       gcc_checking_assert (is_gimple_ip_invariant (value));
2738       return aglat->add_value (value, cs, NULL, 0);
2739     }
2740 
2741   gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
2742                            || item->jftype == IPA_JF_LOAD_AGG);
2743 
2744   caller_info = ipa_node_params_sum->get (cs->caller);
2745   src_idx = item->value.pass_through.formal_id;
2746   src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2747 
2748   if (item->jftype == IPA_JF_PASS_THROUGH)
2749     {
2750       load_type = NULL_TREE;
2751       src_lat = &src_plats->itself;
2752       src_offset = -1;
2753     }
2754   else
2755     {
2756       HOST_WIDE_INT load_offset = item->value.load_agg.offset;
2757       struct ipcp_agg_lattice *src_aglat;
2758 
2759       for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next)
2760           if (src_aglat->offset >= load_offset)
2761             break;
2762 
2763       load_type = item->value.load_agg.type;
2764       if (!src_aglat
2765             || src_aglat->offset > load_offset
2766             || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type))
2767             || src_plats->aggs_by_ref != item->value.load_agg.by_ref)
2768           return aglat->set_contains_variable ();
2769 
2770       src_lat = src_aglat;
2771       src_offset = load_offset;
2772     }
2773 
2774   if (src_lat->bottom
2775       || (!ipcp_versionable_function_p (cs->caller)
2776             && !src_lat->is_single_const ()))
2777     return aglat->set_contains_variable ();
2778 
2779   ret = propagate_vals_across_arith_jfunc (cs,
2780                                                      item->value.pass_through.operation,
2781                                                      load_type,
2782                                                      item->value.pass_through.operand,
2783                                                      src_lat, aglat,
2784                                                      src_offset,
2785                                                      src_idx,
2786                                                      item->type);
2787 
2788   if (src_lat->contains_variable)
2789     ret |= aglat->set_contains_variable ();
2790 
2791   return ret;
2792 }
2793 
2794 /* Propagate scalar values across jump function JFUNC that is associated with
2795    edge CS and put the values into DEST_LAT.  */
2796 
2797 static bool
propagate_aggs_across_jump_function(struct cgraph_edge * cs,struct ipa_jump_func * jfunc,class ipcp_param_lattices * dest_plats)2798 propagate_aggs_across_jump_function (struct cgraph_edge *cs,
2799                                              struct ipa_jump_func *jfunc,
2800                                              class ipcp_param_lattices *dest_plats)
2801 {
2802   bool ret = false;
2803 
2804   if (dest_plats->aggs_bottom)
2805     return false;
2806 
2807   if (jfunc->type == IPA_JF_PASS_THROUGH
2808       && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
2809     {
2810       ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2811       int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2812       class ipcp_param_lattices *src_plats;
2813 
2814       src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2815       if (agg_pass_through_permissible_p (src_plats, jfunc))
2816           {
2817             /* Currently we do not produce clobber aggregate jump
2818                functions, replace with merging when we do.  */
2819             gcc_assert (!jfunc->agg.items);
2820             ret |= merge_aggregate_lattices (cs, dest_plats, src_plats,
2821                                                      src_idx, 0);
2822             return ret;
2823           }
2824     }
2825   else if (jfunc->type == IPA_JF_ANCESTOR
2826              && ipa_get_jf_ancestor_agg_preserved (jfunc))
2827     {
2828       ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2829       int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2830       class ipcp_param_lattices *src_plats;
2831 
2832       src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2833       if (src_plats->aggs && src_plats->aggs_by_ref)
2834           {
2835             /* Currently we do not produce clobber aggregate jump
2836                functions, replace with merging when we do.  */
2837             gcc_assert (!jfunc->agg.items);
2838             ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx,
2839                                                      ipa_get_jf_ancestor_offset (jfunc));
2840           }
2841       else if (!src_plats->aggs_by_ref)
2842           ret |= set_agg_lats_to_bottom (dest_plats);
2843       else
2844           ret |= set_agg_lats_contain_variable (dest_plats);
2845       return ret;
2846     }
2847 
2848   if (jfunc->agg.items)
2849     {
2850       bool pre_existing = dest_plats->aggs != NULL;
2851       struct ipcp_agg_lattice **aglat = &dest_plats->aggs;
2852       struct ipa_agg_jf_item *item;
2853       int i;
2854 
2855       if (set_check_aggs_by_ref (dest_plats, jfunc->agg.by_ref))
2856           return true;
2857 
2858       int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2859                                               param_ipa_max_agg_items);
2860       FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item)
2861           {
2862             HOST_WIDE_INT val_size;
2863 
2864             if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
2865               continue;
2866             val_size = tree_to_shwi (TYPE_SIZE (item->type));
2867 
2868             if (merge_agg_lats_step (dest_plats, item->offset, val_size,
2869                                            &aglat, pre_existing, &ret, max_agg_items))
2870               {
2871                 ret |= propagate_aggregate_lattice (cs, item, *aglat);
2872                 aglat = &(*aglat)->next;
2873               }
2874             else if (dest_plats->aggs_bottom)
2875               return true;
2876           }
2877 
2878       ret |= set_chain_of_aglats_contains_variable (*aglat);
2879     }
2880   else
2881     ret |= set_agg_lats_contain_variable (dest_plats);
2882 
2883   return ret;
2884 }
2885 
2886 /* Return true if on the way cfrom CS->caller to the final (non-alias and
2887    non-thunk) destination, the call passes through a thunk.  */
2888 
2889 static bool
call_passes_through_thunk(cgraph_edge * cs)2890 call_passes_through_thunk (cgraph_edge *cs)
2891 {
2892   cgraph_node *alias_or_thunk = cs->callee;
2893   while (alias_or_thunk->alias)
2894     alias_or_thunk = alias_or_thunk->get_alias_target ();
2895   return alias_or_thunk->thunk;
2896 }
2897 
2898 /* Propagate constants from the caller to the callee of CS.  INFO describes the
2899    caller.  */
2900 
2901 static bool
propagate_constants_across_call(struct cgraph_edge * cs)2902 propagate_constants_across_call (struct cgraph_edge *cs)
2903 {
2904   class ipa_node_params *callee_info;
2905   enum availability availability;
2906   cgraph_node *callee;
2907   class ipa_edge_args *args;
2908   bool ret = false;
2909   int i, args_count, parms_count;
2910 
2911   callee = cs->callee->function_symbol (&availability);
2912   if (!callee->definition)
2913     return false;
2914   gcc_checking_assert (callee->has_gimple_body_p ());
2915   callee_info = ipa_node_params_sum->get (callee);
2916   if (!callee_info)
2917     return false;
2918 
2919   args = ipa_edge_args_sum->get (cs);
2920   parms_count = ipa_get_param_count (callee_info);
2921   if (parms_count == 0)
2922     return false;
2923   if (!args
2924       || !opt_for_fn (cs->caller->decl, flag_ipa_cp)
2925       || !opt_for_fn (cs->caller->decl, optimize))
2926     {
2927       for (i = 0; i < parms_count; i++)
2928           ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
2929                                                                                  i));
2930       return ret;
2931     }
2932   args_count = ipa_get_cs_argument_count (args);
2933 
2934   /* If this call goes through a thunk we must not propagate to the first (0th)
2935      parameter.  However, we might need to uncover a thunk from below a series
2936      of aliases first.  */
2937   if (call_passes_through_thunk (cs))
2938     {
2939       ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
2940                                                                              0));
2941       i = 1;
2942     }
2943   else
2944     i = 0;
2945 
2946   for (; (i < args_count) && (i < parms_count); i++)
2947     {
2948       struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i);
2949       class ipcp_param_lattices *dest_plats;
2950       tree param_type = ipa_get_type (callee_info, i);
2951 
2952       dest_plats = ipa_get_parm_lattices (callee_info, i);
2953       if (availability == AVAIL_INTERPOSABLE)
2954           ret |= set_all_contains_variable (dest_plats);
2955       else
2956           {
2957             ret |= propagate_scalar_across_jump_function (cs, jump_func,
2958                                                                       &dest_plats->itself,
2959                                                                       param_type);
2960             ret |= propagate_context_across_jump_function (cs, jump_func, i,
2961                                                                        &dest_plats->ctxlat);
2962             ret
2963               |= propagate_bits_across_jump_function (cs, i, jump_func,
2964                                                                 &dest_plats->bits_lattice);
2965             ret |= propagate_aggs_across_jump_function (cs, jump_func,
2966                                                                   dest_plats);
2967             if (opt_for_fn (callee->decl, flag_ipa_vrp))
2968               ret |= propagate_vr_across_jump_function (cs, jump_func,
2969                                                                   dest_plats, param_type);
2970             else
2971               ret |= dest_plats->m_value_range.set_to_bottom ();
2972           }
2973     }
2974   for (; i < parms_count; i++)
2975     ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, i));
2976 
2977   return ret;
2978 }
2979 
2980 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
2981    KNOWN_CONTEXTS, KNOWN_AGGS or AGG_REPS return the destination.  The latter
2982    three can be NULL.  If AGG_REPS is not NULL, KNOWN_AGGS is ignored.  */
2983 
2984 static tree
ipa_get_indirect_edge_target_1(struct cgraph_edge * ie,const vec<tree> & known_csts,const vec<ipa_polymorphic_call_context> & known_contexts,const vec<ipa_agg_value_set> & known_aggs,struct ipa_agg_replacement_value * agg_reps,bool * speculative)2985 ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
2986                                         const vec<tree> &known_csts,
2987                                         const vec<ipa_polymorphic_call_context> &known_contexts,
2988                                         const vec<ipa_agg_value_set> &known_aggs,
2989                                         struct ipa_agg_replacement_value *agg_reps,
2990                                         bool *speculative)
2991 {
2992   int param_index = ie->indirect_info->param_index;
2993   HOST_WIDE_INT anc_offset;
2994   tree t = NULL;
2995   tree target = NULL;
2996 
2997   *speculative = false;
2998 
2999   if (param_index == -1)
3000     return NULL_TREE;
3001 
3002   if (!ie->indirect_info->polymorphic)
3003     {
3004       tree t = NULL;
3005 
3006       if (ie->indirect_info->agg_contents)
3007           {
3008             t = NULL;
3009             if (agg_reps && ie->indirect_info->guaranteed_unmodified)
3010               {
3011                 while (agg_reps)
3012                     {
3013                       if (agg_reps->index == param_index
3014                           && agg_reps->offset == ie->indirect_info->offset
3015                           && agg_reps->by_ref == ie->indirect_info->by_ref)
3016                         {
3017                           t = agg_reps->value;
3018                           break;
3019                         }
3020                       agg_reps = agg_reps->next;
3021                     }
3022               }
3023             if (!t)
3024               {
3025                 const ipa_agg_value_set *agg;
3026                 if (known_aggs.length () > (unsigned int) param_index)
3027                     agg = &known_aggs[param_index];
3028                 else
3029                     agg = NULL;
3030                 bool from_global_constant;
3031                 t = ipa_find_agg_cst_for_param (agg,
3032                                                         (unsigned) param_index
3033                                                              < known_csts.length ()
3034                                                         ? known_csts[param_index]
3035                                                         : NULL,
3036                                                         ie->indirect_info->offset,
3037                                                         ie->indirect_info->by_ref,
3038                                                         &from_global_constant);
3039                 if (t
3040                       && !from_global_constant
3041                       && !ie->indirect_info->guaranteed_unmodified)
3042                     t = NULL_TREE;
3043               }
3044           }
3045       else if ((unsigned) param_index < known_csts.length ())
3046           t = known_csts[param_index];
3047 
3048       if (t
3049             && TREE_CODE (t) == ADDR_EXPR
3050             && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL)
3051           return TREE_OPERAND (t, 0);
3052       else
3053           return NULL_TREE;
3054     }
3055 
3056   if (!opt_for_fn (ie->caller->decl, flag_devirtualize))
3057     return NULL_TREE;
3058 
3059   gcc_assert (!ie->indirect_info->agg_contents);
3060   anc_offset = ie->indirect_info->offset;
3061 
3062   t = NULL;
3063 
3064   /* Try to work out value of virtual table pointer value in replacements.  */
3065   if (!t && agg_reps && !ie->indirect_info->by_ref)
3066     {
3067       while (agg_reps)
3068           {
3069             if (agg_reps->index == param_index
3070                 && agg_reps->offset == ie->indirect_info->offset
3071                 && agg_reps->by_ref)
3072               {
3073                 t = agg_reps->value;
3074                 break;
3075               }
3076             agg_reps = agg_reps->next;
3077           }
3078     }
3079 
3080   /* Try to work out value of virtual table pointer value in known
3081      aggregate values.  */
3082   if (!t && known_aggs.length () > (unsigned int) param_index
3083       && !ie->indirect_info->by_ref)
3084     {
3085       const ipa_agg_value_set *agg = &known_aggs[param_index];
3086       t = ipa_find_agg_cst_for_param (agg,
3087                                               (unsigned) param_index
3088                                                    < known_csts.length ()
3089                                               ? known_csts[param_index] : NULL,
3090                                               ie->indirect_info->offset, true);
3091     }
3092 
3093   /* If we found the virtual table pointer, lookup the target.  */
3094   if (t)
3095     {
3096       tree vtable;
3097       unsigned HOST_WIDE_INT offset;
3098       if (vtable_pointer_value_to_vtable (t, &vtable, &offset))
3099           {
3100             bool can_refer;
3101             target = gimple_get_virt_method_for_vtable (ie->indirect_info->otr_token,
3102                                                                   vtable, offset, &can_refer);
3103             if (can_refer)
3104               {
3105                 if (!target
3106                       || fndecl_built_in_p (target, BUILT_IN_UNREACHABLE)
3107                       || !possible_polymorphic_call_target_p
3108                            (ie, cgraph_node::get (target)))
3109                     {
3110                       /* Do not speculate builtin_unreachable, it is stupid!  */
3111                       if (ie->indirect_info->vptr_changed)
3112                         return NULL;
3113                       target = ipa_impossible_devirt_target (ie, target);
3114                     }
3115                 *speculative = ie->indirect_info->vptr_changed;
3116                 if (!*speculative)
3117                     return target;
3118               }
3119           }
3120     }
3121 
3122   /* Do we know the constant value of pointer?  */
3123   if (!t && (unsigned) param_index < known_csts.length ())
3124     t = known_csts[param_index];
3125 
3126   gcc_checking_assert (!t || TREE_CODE (t) != TREE_BINFO);
3127 
3128   ipa_polymorphic_call_context context;
3129   if (known_contexts.length () > (unsigned int) param_index)
3130     {
3131       context = known_contexts[param_index];
3132       context.offset_by (anc_offset);
3133       if (ie->indirect_info->vptr_changed)
3134           context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3135                                                         ie->indirect_info->otr_type);
3136       if (t)
3137           {
3138             ipa_polymorphic_call_context ctx2 = ipa_polymorphic_call_context
3139               (t, ie->indirect_info->otr_type, anc_offset);
3140             if (!ctx2.useless_p ())
3141               context.combine_with (ctx2, ie->indirect_info->otr_type);
3142           }
3143     }
3144   else if (t)
3145     {
3146       context = ipa_polymorphic_call_context (t, ie->indirect_info->otr_type,
3147                                                         anc_offset);
3148       if (ie->indirect_info->vptr_changed)
3149           context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3150                                                         ie->indirect_info->otr_type);
3151     }
3152   else
3153     return NULL_TREE;
3154 
3155   vec <cgraph_node *>targets;
3156   bool final;
3157 
3158   targets = possible_polymorphic_call_targets
3159     (ie->indirect_info->otr_type,
3160      ie->indirect_info->otr_token,
3161      context, &final);
3162   if (!final || targets.length () > 1)
3163     {
3164       struct cgraph_node *node;
3165       if (*speculative)
3166           return target;
3167       if (!opt_for_fn (ie->caller->decl, flag_devirtualize_speculatively)
3168             || ie->speculative || !ie->maybe_hot_p ())
3169           return NULL;
3170       node = try_speculative_devirtualization (ie->indirect_info->otr_type,
3171                                                          ie->indirect_info->otr_token,
3172                                                          context);
3173       if (node)
3174           {
3175             *speculative = true;
3176             target = node->decl;
3177           }
3178       else
3179           return NULL;
3180     }
3181   else
3182     {
3183       *speculative = false;
3184       if (targets.length () == 1)
3185           target = targets[0]->decl;
3186       else
3187           target = ipa_impossible_devirt_target (ie, NULL_TREE);
3188     }
3189 
3190   if (target && !possible_polymorphic_call_target_p (ie,
3191                                                                  cgraph_node::get (target)))
3192     {
3193       if (*speculative)
3194           return NULL;
3195       target = ipa_impossible_devirt_target (ie, target);
3196     }
3197 
3198   return target;
3199 }
3200 
3201 /* If an indirect edge IE can be turned into a direct one based on data in
3202    AVALS, return the destination.  Store into *SPECULATIVE a boolean determinig
3203    whether the discovered target is only speculative guess.  */
3204 
3205 tree
ipa_get_indirect_edge_target(struct cgraph_edge * ie,ipa_call_arg_values * avals,bool * speculative)3206 ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3207                                     ipa_call_arg_values *avals,
3208                                     bool *speculative)
3209 {
3210   return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3211                                                    avals->m_known_contexts,
3212                                                    avals->m_known_aggs,
3213                                                    NULL, speculative);
3214 }
3215 
3216 /* The same functionality as above overloaded for ipa_auto_call_arg_values.  */
3217 
3218 tree
ipa_get_indirect_edge_target(struct cgraph_edge * ie,ipa_auto_call_arg_values * avals,bool * speculative)3219 ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3220                                     ipa_auto_call_arg_values *avals,
3221                                     bool *speculative)
3222 {
3223   return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3224                                                    avals->m_known_contexts,
3225                                                    avals->m_known_aggs,
3226                                                    NULL, speculative);
3227 }
3228 
3229 /* Calculate devirtualization time bonus for NODE, assuming we know information
3230    about arguments stored in AVALS.  */
3231 
3232 static int
devirtualization_time_bonus(struct cgraph_node * node,ipa_auto_call_arg_values * avals)3233 devirtualization_time_bonus (struct cgraph_node *node,
3234                                    ipa_auto_call_arg_values *avals)
3235 {
3236   struct cgraph_edge *ie;
3237   int res = 0;
3238 
3239   for (ie = node->indirect_calls; ie; ie = ie->next_callee)
3240     {
3241       struct cgraph_node *callee;
3242       class ipa_fn_summary *isummary;
3243       enum availability avail;
3244       tree target;
3245       bool speculative;
3246 
3247       target = ipa_get_indirect_edge_target (ie, avals, &speculative);
3248       if (!target)
3249           continue;
3250 
3251       /* Only bare minimum benefit for clearly un-inlineable targets.  */
3252       res += 1;
3253       callee = cgraph_node::get (target);
3254       if (!callee || !callee->definition)
3255           continue;
3256       callee = callee->function_symbol (&avail);
3257       if (avail < AVAIL_AVAILABLE)
3258           continue;
3259       isummary = ipa_fn_summaries->get (callee);
3260       if (!isummary || !isummary->inlinable)
3261           continue;
3262 
3263       int size = ipa_size_summaries->get (callee)->size;
3264       /* FIXME: The values below need re-considering and perhaps also
3265            integrating into the cost metrics, at lest in some very basic way.  */
3266       int max_inline_insns_auto
3267           = opt_for_fn (callee->decl, param_max_inline_insns_auto);
3268       if (size <= max_inline_insns_auto / 4)
3269           res += 31 / ((int)speculative + 1);
3270       else if (size <= max_inline_insns_auto / 2)
3271           res += 15 / ((int)speculative + 1);
3272       else if (size <= max_inline_insns_auto
3273                  || DECL_DECLARED_INLINE_P (callee->decl))
3274           res += 7 / ((int)speculative + 1);
3275     }
3276 
3277   return res;
3278 }
3279 
3280 /* Return time bonus incurred because of hints stored in ESTIMATES.  */
3281 
3282 static int
hint_time_bonus(cgraph_node * node,const ipa_call_estimates & estimates)3283 hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates)
3284 {
3285   int result = 0;
3286   ipa_hints hints = estimates.hints;
3287   if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
3288     result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3289 
3290   sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3291 
3292   if (hints & INLINE_HINT_loop_iterations)
3293     result += (estimates.loops_with_known_iterations * bonus_for_one).to_int ();
3294 
3295   if (hints & INLINE_HINT_loop_stride)
3296     result += (estimates.loops_with_known_strides * bonus_for_one).to_int ();
3297 
3298   return result;
3299 }
3300 
3301 /* If there is a reason to penalize the function described by INFO in the
3302    cloning goodness evaluation, do so.  */
3303 
3304 static inline sreal
incorporate_penalties(cgraph_node * node,ipa_node_params * info,sreal evaluation)3305 incorporate_penalties (cgraph_node *node, ipa_node_params *info,
3306                            sreal evaluation)
3307 {
3308   if (info->node_within_scc && !info->node_is_self_scc)
3309     evaluation = (evaluation
3310                       * (100 - opt_for_fn (node->decl,
3311                                                param_ipa_cp_recursion_penalty))) / 100;
3312 
3313   if (info->node_calling_single_call)
3314     evaluation = (evaluation
3315                       * (100 - opt_for_fn (node->decl,
3316                                                param_ipa_cp_single_call_penalty)))
3317       / 100;
3318 
3319   return evaluation;
3320 }
3321 
3322 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3323    and SIZE_COST and with the sum of frequencies of incoming edges to the
3324    potential new clone in FREQUENCIES.  */
3325 
3326 static bool
good_cloning_opportunity_p(struct cgraph_node * node,sreal time_benefit,sreal freq_sum,profile_count count_sum,int size_cost)3327 good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit,
3328                                   sreal freq_sum, profile_count count_sum,
3329                                   int size_cost)
3330 {
3331   if (time_benefit == 0
3332       || !opt_for_fn (node->decl, flag_ipa_cp_clone)
3333       || node->optimize_for_size_p ())
3334     return false;
3335 
3336   gcc_assert (size_cost > 0);
3337 
3338   ipa_node_params *info = ipa_node_params_sum->get (node);
3339   int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold);
3340   if (count_sum > profile_count::zero ())
3341     {
3342       gcc_assert (base_count > profile_count::zero ());
3343       sreal factor = count_sum.probability_in (base_count).to_sreal ();
3344       sreal evaluation = (time_benefit * factor) / size_cost;
3345       evaluation = incorporate_penalties (node, info, evaluation);
3346       evaluation *= 1000;
3347 
3348       if (dump_file && (dump_flags & TDF_DETAILS))
3349           {
3350             fprintf (dump_file, "     good_cloning_opportunity_p (time: %g, "
3351                        "size: %i, count_sum: ", time_benefit.to_double (),
3352                        size_cost);
3353             count_sum.dump (dump_file);
3354             fprintf (dump_file, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3355                      info->node_within_scc
3356                        ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3357                      info->node_calling_single_call ? ", single_call" : "",
3358                        evaluation.to_double (), eval_threshold);
3359           }
3360 
3361       return evaluation.to_int () >= eval_threshold;
3362     }
3363   else
3364     {
3365       sreal evaluation = (time_benefit * freq_sum) / size_cost;
3366       evaluation = incorporate_penalties (node, info, evaluation);
3367       evaluation *= 1000;
3368 
3369       if (dump_file && (dump_flags & TDF_DETAILS))
3370           fprintf (dump_file, "     good_cloning_opportunity_p (time: %g, "
3371                      "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3372                      "threshold: %i\n",
3373                      time_benefit.to_double (), size_cost, freq_sum.to_double (),
3374                      info->node_within_scc
3375                        ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3376                      info->node_calling_single_call ? ", single_call" : "",
3377                      evaluation.to_double (), eval_threshold);
3378 
3379       return evaluation.to_int () >= eval_threshold;
3380     }
3381 }
3382 
3383 /* Return all context independent values from aggregate lattices in PLATS in a
3384    vector.  Return NULL if there are none.  */
3385 
3386 static vec<ipa_agg_value>
context_independent_aggregate_values(class ipcp_param_lattices * plats)3387 context_independent_aggregate_values (class ipcp_param_lattices *plats)
3388 {
3389   vec<ipa_agg_value> res = vNULL;
3390 
3391   if (plats->aggs_bottom
3392       || plats->aggs_contain_variable
3393       || plats->aggs_count == 0)
3394     return vNULL;
3395 
3396   for (struct ipcp_agg_lattice *aglat = plats->aggs;
3397        aglat;
3398        aglat = aglat->next)
3399     if (aglat->is_single_const ())
3400       {
3401           struct ipa_agg_value item;
3402           item.offset = aglat->offset;
3403           item.value = aglat->values->value;
3404           res.safe_push (item);
3405       }
3406   return res;
3407 }
3408 
3409 /* Grow vectors in AVALS and fill them with information about values of
3410    parameters that are known to be independent of the context.  Only calculate
3411    m_known_aggs if CALCULATE_AGGS is true.  INFO describes the function.  If
3412    REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3413    parameters will be stored in it.
3414 
3415    TODO: Also grow context independent value range vectors.  */
3416 
3417 static bool
gather_context_independent_values(class ipa_node_params * info,ipa_auto_call_arg_values * avals,bool calculate_aggs,int * removable_params_cost)3418 gather_context_independent_values (class ipa_node_params *info,
3419                                            ipa_auto_call_arg_values *avals,
3420                                            bool calculate_aggs,
3421                                            int *removable_params_cost)
3422 {
3423   int i, count = ipa_get_param_count (info);
3424   bool ret = false;
3425 
3426   avals->m_known_vals.safe_grow_cleared (count, true);
3427   avals->m_known_contexts.safe_grow_cleared (count, true);
3428   if (calculate_aggs)
3429     avals->m_known_aggs.safe_grow_cleared (count, true);
3430 
3431   if (removable_params_cost)
3432     *removable_params_cost = 0;
3433 
3434   for (i = 0; i < count; i++)
3435     {
3436       class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3437       ipcp_lattice<tree> *lat = &plats->itself;
3438 
3439       if (lat->is_single_const ())
3440           {
3441             ipcp_value<tree> *val = lat->values;
3442             gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3443             avals->m_known_vals[i] = val->value;
3444             if (removable_params_cost)
3445               *removable_params_cost
3446                 += estimate_move_cost (TREE_TYPE (val->value), false);
3447             ret = true;
3448           }
3449       else if (removable_params_cost
3450                  && !ipa_is_param_used (info, i))
3451           *removable_params_cost
3452             += ipa_get_param_move_cost (info, i);
3453 
3454       if (!ipa_is_param_used (info, i))
3455           continue;
3456 
3457       ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3458       /* Do not account known context as reason for cloning.  We can see
3459            if it permits devirtualization.  */
3460       if (ctxlat->is_single_const ())
3461           avals->m_known_contexts[i] = ctxlat->values->value;
3462 
3463       if (calculate_aggs)
3464           {
3465             vec<ipa_agg_value> agg_items;
3466             struct ipa_agg_value_set *agg;
3467 
3468             agg_items = context_independent_aggregate_values (plats);
3469             agg = &avals->m_known_aggs[i];
3470             agg->items = agg_items;
3471             agg->by_ref = plats->aggs_by_ref;
3472             ret |= !agg_items.is_empty ();
3473           }
3474     }
3475 
3476   return ret;
3477 }
3478 
3479 /* Perform time and size measurement of NODE with the context given in AVALS,
3480    calculate the benefit compared to the node without specialization and store
3481    it into VAL.  Take into account REMOVABLE_PARAMS_COST of all
3482    context-independent or unused removable parameters and EST_MOVE_COST, the
3483    estimated movement of the considered parameter.  */
3484 
3485 static void
perform_estimation_of_a_value(cgraph_node * node,ipa_auto_call_arg_values * avals,int removable_params_cost,int est_move_cost,ipcp_value_base * val)3486 perform_estimation_of_a_value (cgraph_node *node,
3487                                      ipa_auto_call_arg_values *avals,
3488                                      int removable_params_cost, int est_move_cost,
3489                                      ipcp_value_base *val)
3490 {
3491   sreal time_benefit;
3492   ipa_call_estimates estimates;
3493 
3494   estimate_ipcp_clone_size_and_time (node, avals, &estimates);
3495 
3496   /* Extern inline functions have no cloning local time benefits because they
3497      will be inlined anyway.  The only reason to clone them is if it enables
3498      optimization in any of the functions they call.  */
3499   if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl))
3500     time_benefit = 0;
3501   else
3502     time_benefit = (estimates.nonspecialized_time - estimates.time)
3503       + (devirtualization_time_bonus (node, avals)
3504            + hint_time_bonus (node, estimates)
3505            + removable_params_cost + est_move_cost);
3506 
3507   int size = estimates.size;
3508   gcc_checking_assert (size >=0);
3509   /* The inliner-heuristics based estimates may think that in certain
3510      contexts some functions do not have any size at all but we want
3511      all specializations to have at least a tiny cost, not least not to
3512      divide by zero.  */
3513   if (size == 0)
3514     size = 1;
3515 
3516   val->local_time_benefit = time_benefit;
3517   val->local_size_cost = size;
3518 }
3519 
3520 /* Get the overall limit oof growth based on parameters extracted from growth.
3521    it does not really make sense to mix functions with different overall growth
3522    limits but it is possible and if it happens, we do not want to select one
3523    limit at random.  */
3524 
3525 static long
get_max_overall_size(cgraph_node * node)3526 get_max_overall_size (cgraph_node *node)
3527 {
3528   long max_new_size = orig_overall_size;
3529   long large_unit = opt_for_fn (node->decl, param_ipa_cp_large_unit_insns);
3530   if (max_new_size < large_unit)
3531     max_new_size = large_unit;
3532   int unit_growth = opt_for_fn (node->decl, param_ipa_cp_unit_growth);
3533   max_new_size += max_new_size * unit_growth / 100 + 1;
3534   return max_new_size;
3535 }
3536 
3537 /* Iterate over known values of parameters of NODE and estimate the local
3538    effects in terms of time and size they have.  */
3539 
3540 static void
estimate_local_effects(struct cgraph_node * node)3541 estimate_local_effects (struct cgraph_node *node)
3542 {
3543   ipa_node_params *info = ipa_node_params_sum->get (node);
3544   int i, count = ipa_get_param_count (info);
3545   bool always_const;
3546   int removable_params_cost;
3547 
3548   if (!count || !ipcp_versionable_function_p (node))
3549     return;
3550 
3551   if (dump_file && (dump_flags & TDF_DETAILS))
3552     fprintf (dump_file, "\nEstimating effects for %s.\n", node->dump_name ());
3553 
3554   ipa_auto_call_arg_values avals;
3555   always_const = gather_context_independent_values (info, &avals, true,
3556                                                                 &removable_params_cost);
3557   int devirt_bonus = devirtualization_time_bonus (node, &avals);
3558   if (always_const || devirt_bonus
3559       || (removable_params_cost && node->can_change_signature))
3560     {
3561       struct caller_statistics stats;
3562       ipa_call_estimates estimates;
3563 
3564       init_caller_stats (&stats);
3565       node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
3566                                                         false);
3567       estimate_ipcp_clone_size_and_time (node, &avals, &estimates);
3568       sreal time = estimates.nonspecialized_time - estimates.time;
3569       time += devirt_bonus;
3570       time += hint_time_bonus (node, estimates);
3571       time += removable_params_cost;
3572       int size = estimates.size - stats.n_calls * removable_params_cost;
3573 
3574       if (dump_file)
3575           fprintf (dump_file, " - context independent values, size: %i, "
3576                      "time_benefit: %f\n", size, (time).to_double ());
3577 
3578       if (size <= 0 || node->local)
3579           {
3580             info->do_clone_for_all_contexts = true;
3581 
3582             if (dump_file)
3583               fprintf (dump_file, "     Decided to specialize for all "
3584                          "known contexts, code not going to grow.\n");
3585           }
3586       else if (good_cloning_opportunity_p (node, time, stats.freq_sum,
3587                                                      stats.count_sum, size))
3588           {
3589             if (size + overall_size <= get_max_overall_size (node))
3590               {
3591                 info->do_clone_for_all_contexts = true;
3592                 overall_size += size;
3593 
3594                 if (dump_file)
3595                     fprintf (dump_file, "     Decided to specialize for all "
3596                                "known contexts, growth (to %li) deemed "
3597                                "beneficial.\n", overall_size);
3598               }
3599             else if (dump_file && (dump_flags & TDF_DETAILS))
3600               fprintf (dump_file, "  Not cloning for all contexts because "
3601                          "maximum unit size would be reached with %li.\n",
3602                          size + overall_size);
3603           }
3604       else if (dump_file && (dump_flags & TDF_DETAILS))
3605           fprintf (dump_file, "   Not cloning for all contexts because "
3606                      "!good_cloning_opportunity_p.\n");
3607 
3608     }
3609 
3610   for (i = 0; i < count; i++)
3611     {
3612       class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3613       ipcp_lattice<tree> *lat = &plats->itself;
3614       ipcp_value<tree> *val;
3615 
3616       if (lat->bottom
3617             || !lat->values
3618             || avals.m_known_vals[i])
3619           continue;
3620 
3621       for (val = lat->values; val; val = val->next)
3622           {
3623             gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3624             avals.m_known_vals[i] = val->value;
3625 
3626             int emc = estimate_move_cost (TREE_TYPE (val->value), true);
3627             perform_estimation_of_a_value (node, &avals, removable_params_cost,
3628                                                    emc, val);
3629 
3630             if (dump_file && (dump_flags & TDF_DETAILS))
3631               {
3632                 fprintf (dump_file, " - estimates for value ");
3633                 print_ipcp_constant_value (dump_file, val->value);
3634                 fprintf (dump_file, " for ");
3635                 ipa_dump_param (dump_file, info, i);
3636                 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3637                            val->local_time_benefit.to_double (),
3638                            val->local_size_cost);
3639               }
3640           }
3641       avals.m_known_vals[i] = NULL_TREE;
3642     }
3643 
3644   for (i = 0; i < count; i++)
3645     {
3646       class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3647 
3648       if (!plats->virt_call)
3649           continue;
3650 
3651       ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3652       ipcp_value<ipa_polymorphic_call_context> *val;
3653 
3654       if (ctxlat->bottom
3655             || !ctxlat->values
3656             || !avals.m_known_contexts[i].useless_p ())
3657           continue;
3658 
3659       for (val = ctxlat->values; val; val = val->next)
3660           {
3661             avals.m_known_contexts[i] = val->value;
3662             perform_estimation_of_a_value (node, &avals, removable_params_cost,
3663                                                    0, val);
3664 
3665             if (dump_file && (dump_flags & TDF_DETAILS))
3666               {
3667                 fprintf (dump_file, " - estimates for polymorphic context ");
3668                 print_ipcp_constant_value (dump_file, val->value);
3669                 fprintf (dump_file, " for ");
3670                 ipa_dump_param (dump_file, info, i);
3671                 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3672                            val->local_time_benefit.to_double (),
3673                            val->local_size_cost);
3674               }
3675           }
3676       avals.m_known_contexts[i] = ipa_polymorphic_call_context ();
3677     }
3678 
3679   for (i = 0; i < count; i++)
3680     {
3681       class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3682 
3683       if (plats->aggs_bottom || !plats->aggs)
3684           continue;
3685 
3686       ipa_agg_value_set *agg = &avals.m_known_aggs[i];
3687       for (ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
3688           {
3689             ipcp_value<tree> *val;
3690             if (aglat->bottom || !aglat->values
3691                 /* If the following is true, the one value is in known_aggs.  */
3692                 || (!plats->aggs_contain_variable
3693                       && aglat->is_single_const ()))
3694               continue;
3695 
3696             for (val = aglat->values; val; val = val->next)
3697               {
3698                 struct ipa_agg_value item;
3699 
3700                 item.offset = aglat->offset;
3701                 item.value = val->value;
3702                 agg->items.safe_push (item);
3703 
3704                 perform_estimation_of_a_value (node, &avals,
3705                                                        removable_params_cost, 0, val);
3706 
3707                 if (dump_file && (dump_flags & TDF_DETAILS))
3708                     {
3709                       fprintf (dump_file, " - estimates for value ");
3710                       print_ipcp_constant_value (dump_file, val->value);
3711                       fprintf (dump_file, " for ");
3712                       ipa_dump_param (dump_file, info, i);
3713                       fprintf (dump_file, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3714                                  "]: time_benefit: %g, size: %i\n",
3715                                  plats->aggs_by_ref ? "ref " : "",
3716                                  aglat->offset,
3717                                  val->local_time_benefit.to_double (),
3718                                  val->local_size_cost);
3719                     }
3720 
3721                 agg->items.pop ();
3722               }
3723           }
3724     }
3725 }
3726 
3727 
3728 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3729    topological sort of values.  */
3730 
3731 template <typename valtype>
3732 void
add_val(ipcp_value<valtype> * cur_val)3733 value_topo_info<valtype>::add_val (ipcp_value<valtype> *cur_val)
3734 {
3735   ipcp_value_source<valtype> *src;
3736 
3737   if (cur_val->dfs)
3738     return;
3739 
3740   dfs_counter++;
3741   cur_val->dfs = dfs_counter;
3742   cur_val->low_link = dfs_counter;
3743 
3744   cur_val->topo_next = stack;
3745   stack = cur_val;
3746   cur_val->on_stack = true;
3747 
3748   for (src = cur_val->sources; src; src = src->next)
3749     if (src->val)
3750       {
3751           if (src->val->dfs == 0)
3752             {
3753               add_val (src->val);
3754               if (src->val->low_link < cur_val->low_link)
3755                 cur_val->low_link = src->val->low_link;
3756             }
3757           else if (src->val->on_stack
3758                      && src->val->dfs < cur_val->low_link)
3759             cur_val->low_link = src->val->dfs;
3760       }
3761 
3762   if (cur_val->dfs == cur_val->low_link)
3763     {
3764       ipcp_value<valtype> *v, *scc_list = NULL;
3765 
3766       do
3767           {
3768             v = stack;
3769             stack = v->topo_next;
3770             v->on_stack = false;
3771             v->scc_no = cur_val->dfs;
3772 
3773             v->scc_next = scc_list;
3774             scc_list = v;
3775           }
3776       while (v != cur_val);
3777 
3778       cur_val->topo_next = values_topo;
3779       values_topo = cur_val;
3780     }
3781 }
3782 
3783 /* Add all values in lattices associated with NODE to the topological sort if
3784    they are not there yet.  */
3785 
3786 static void
add_all_node_vals_to_toposort(cgraph_node * node,ipa_topo_info * topo)3787 add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo)
3788 {
3789   ipa_node_params *info = ipa_node_params_sum->get (node);
3790   int i, count = ipa_get_param_count (info);
3791 
3792   for (i = 0; i < count; i++)
3793     {
3794       class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3795       ipcp_lattice<tree> *lat = &plats->itself;
3796       struct ipcp_agg_lattice *aglat;
3797 
3798       if (!lat->bottom)
3799           {
3800             ipcp_value<tree> *val;
3801             for (val = lat->values; val; val = val->next)
3802               topo->constants.add_val (val);
3803           }
3804 
3805       if (!plats->aggs_bottom)
3806           for (aglat = plats->aggs; aglat; aglat = aglat->next)
3807             if (!aglat->bottom)
3808               {
3809                 ipcp_value<tree> *val;
3810                 for (val = aglat->values; val; val = val->next)
3811                     topo->constants.add_val (val);
3812               }
3813 
3814       ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3815       if (!ctxlat->bottom)
3816           {
3817             ipcp_value<ipa_polymorphic_call_context> *ctxval;
3818             for (ctxval = ctxlat->values; ctxval; ctxval = ctxval->next)
3819               topo->contexts.add_val (ctxval);
3820           }
3821     }
3822 }
3823 
3824 /* One pass of constants propagation along the call graph edges, from callers
3825    to callees (requires topological ordering in TOPO), iterate over strongly
3826    connected components.  */
3827 
3828 static void
propagate_constants_topo(class ipa_topo_info * topo)3829 propagate_constants_topo (class ipa_topo_info *topo)
3830 {
3831   int i;
3832 
3833   for (i = topo->nnodes - 1; i >= 0; i--)
3834     {
3835       unsigned j;
3836       struct cgraph_node *v, *node = topo->order[i];
3837       vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (node);
3838 
3839       /* First, iteratively propagate within the strongly connected component
3840            until all lattices stabilize.  */
3841       FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3842           if (v->has_gimple_body_p ())
3843             {
3844               if (opt_for_fn (v->decl, flag_ipa_cp)
3845                     && opt_for_fn (v->decl, optimize))
3846                 push_node_to_stack (topo, v);
3847               /* When V is not optimized, we can not push it to stack, but
3848                  still we need to set all its callees lattices to bottom.  */
3849               else
3850                 {
3851                     for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3852                      propagate_constants_across_call (cs);
3853                 }
3854             }
3855 
3856       v = pop_node_from_stack (topo);
3857       while (v)
3858           {
3859             struct cgraph_edge *cs;
3860             class ipa_node_params *info = NULL;
3861             bool self_scc = true;
3862 
3863             for (cs = v->callees; cs; cs = cs->next_callee)
3864               if (ipa_edge_within_scc (cs))
3865                 {
3866                     cgraph_node *callee = cs->callee->function_symbol ();
3867 
3868                     if (v != callee)
3869                       self_scc = false;
3870 
3871                     if (!info)
3872                       {
3873                         info = ipa_node_params_sum->get (v);
3874                         info->node_within_scc = true;
3875                       }
3876 
3877                     if (propagate_constants_across_call (cs))
3878                       push_node_to_stack (topo, callee);
3879                 }
3880 
3881             if (info)
3882               info->node_is_self_scc = self_scc;
3883 
3884             v = pop_node_from_stack (topo);
3885           }
3886 
3887       /* Afterwards, propagate along edges leading out of the SCC, calculates
3888            the local effects of the discovered constants and all valid values to
3889            their topological sort.  */
3890       FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3891           if (v->has_gimple_body_p ()
3892               && opt_for_fn (v->decl, flag_ipa_cp)
3893               && opt_for_fn (v->decl, optimize))
3894             {
3895               struct cgraph_edge *cs;
3896 
3897               estimate_local_effects (v);
3898               add_all_node_vals_to_toposort (v, topo);
3899               for (cs = v->callees; cs; cs = cs->next_callee)
3900                 if (!ipa_edge_within_scc (cs))
3901                     propagate_constants_across_call (cs);
3902             }
3903       cycle_nodes.release ();
3904     }
3905 }
3906 
3907 /* Propagate the estimated effects of individual values along the topological
3908    from the dependent values to those they depend on.  */
3909 
3910 template <typename valtype>
3911 void
propagate_effects()3912 value_topo_info<valtype>::propagate_effects ()
3913 {
3914   ipcp_value<valtype> *base;
3915   hash_set<ipcp_value<valtype> *> processed_srcvals;
3916 
3917   for (base = values_topo; base; base = base->topo_next)
3918     {
3919       ipcp_value_source<valtype> *src;
3920       ipcp_value<valtype> *val;
3921       sreal time = 0;
3922       HOST_WIDE_INT size = 0;
3923 
3924       for (val = base; val; val = val->scc_next)
3925           {
3926             time = time + val->local_time_benefit + val->prop_time_benefit;
3927             size = size + val->local_size_cost + val->prop_size_cost;
3928           }
3929 
3930       for (val = base; val; val = val->scc_next)
3931           {
3932             processed_srcvals.empty ();
3933             for (src = val->sources; src; src = src->next)
3934               if (src->val
3935                     && src->cs->maybe_hot_p ())
3936                 {
3937                     if (!processed_srcvals.add (src->val))
3938                       {
3939                         HOST_WIDE_INT prop_size = size + src->val->prop_size_cost;
3940                         if (prop_size < INT_MAX)
3941                           src->val->prop_size_cost = prop_size;
3942                         else
3943                           continue;
3944                       }
3945 
3946                     int special_factor = 1;
3947                     if (val->same_scc (src->val))
3948                       special_factor
3949                         = opt_for_fn(src->cs->caller->decl,
3950                                          param_ipa_cp_recursive_freq_factor);
3951                     else if (val->self_recursion_generated_p ()
3952                                && (src->cs->callee->function_symbol ()
3953                                    == src->cs->caller))
3954                       {
3955                         int max_recur_gen_depth
3956                           = opt_for_fn(src->cs->caller->decl,
3957                                            param_ipa_cp_max_recursive_depth);
3958                         special_factor = max_recur_gen_depth
3959                           - val->self_recursion_generated_level + 1;
3960                       }
3961 
3962                     src->val->prop_time_benefit
3963                       += time * special_factor * src->cs->sreal_frequency ();
3964                 }
3965 
3966             if (size < INT_MAX)
3967               {
3968                 val->prop_time_benefit = time;
3969                 val->prop_size_cost = size;
3970               }
3971             else
3972               {
3973                 val->prop_time_benefit = 0;
3974                 val->prop_size_cost = 0;
3975               }
3976           }
3977     }
3978 }
3979 
3980 /* Callback for qsort to sort counts of all edges.  */
3981 
3982 static int
compare_edge_profile_counts(const void * a,const void * b)3983 compare_edge_profile_counts (const void *a, const void *b)
3984 {
3985   const profile_count *cnt1 = (const profile_count *) a;
3986   const profile_count *cnt2 = (const profile_count *) b;
3987 
3988   if (*cnt1 < *cnt2)
3989     return 1;
3990   if (*cnt1 > *cnt2)
3991     return -1;
3992   return 0;
3993 }
3994 
3995 
3996 /* Propagate constants, polymorphic contexts and their effects from the
3997    summaries interprocedurally.  */
3998 
3999 static void
ipcp_propagate_stage(class ipa_topo_info * topo)4000 ipcp_propagate_stage (class ipa_topo_info *topo)
4001 {
4002   struct cgraph_node *node;
4003 
4004   if (dump_file)
4005     fprintf (dump_file, "\n Propagating constants:\n\n");
4006 
4007   base_count = profile_count::uninitialized ();
4008 
4009   bool compute_count_base = false;
4010   unsigned base_count_pos_percent = 0;
4011   FOR_EACH_DEFINED_FUNCTION (node)
4012   {
4013     if (node->has_gimple_body_p ()
4014           && opt_for_fn (node->decl, flag_ipa_cp)
4015           && opt_for_fn (node->decl, optimize))
4016       {
4017         ipa_node_params *info = ipa_node_params_sum->get (node);
4018         determine_versionability (node, info);
4019 
4020           unsigned nlattices = ipa_get_param_count (info);
4021           void *chunk = XCNEWVEC (class ipcp_param_lattices, nlattices);
4022           info->lattices = new (chunk) ipcp_param_lattices[nlattices];
4023           initialize_node_lattices (node);
4024       }
4025     ipa_size_summary *s = ipa_size_summaries->get (node);
4026     if (node->definition && !node->alias && s != NULL)
4027       overall_size += s->self_size;
4028     if (node->count.ipa ().initialized_p ())
4029       {
4030           compute_count_base = true;
4031           unsigned pos_percent = opt_for_fn (node->decl,
4032                                                      param_ipa_cp_profile_count_base);
4033           base_count_pos_percent = MAX (base_count_pos_percent, pos_percent);
4034       }
4035   }
4036 
4037   if (compute_count_base)
4038     {
4039       auto_vec<profile_count> all_edge_counts;
4040       all_edge_counts.reserve_exact (symtab->edges_count);
4041       FOR_EACH_DEFINED_FUNCTION (node)
4042           for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4043             {
4044               profile_count count = cs->count.ipa ();
4045               if (!(count > profile_count::zero ()))
4046                 continue;
4047 
4048               enum availability avail;
4049               cgraph_node *tgt
4050                 = cs->callee->function_or_virtual_thunk_symbol (&avail);
4051               ipa_node_params *info = ipa_node_params_sum->get (tgt);
4052               if (info && info->versionable)
4053                 all_edge_counts.quick_push (count);
4054             }
4055 
4056       if (!all_edge_counts.is_empty ())
4057           {
4058             gcc_assert (base_count_pos_percent <= 100);
4059             all_edge_counts.qsort (compare_edge_profile_counts);
4060 
4061             unsigned base_count_pos
4062               = ((all_edge_counts.length () * (base_count_pos_percent)) / 100);
4063             base_count = all_edge_counts[base_count_pos];
4064 
4065             if (dump_file)
4066               {
4067                 fprintf (dump_file, "\nSelected base_count from %u edges at "
4068                            "position %u, arriving at: ", all_edge_counts.length (),
4069                            base_count_pos);
4070                 base_count.dump (dump_file);
4071                 fprintf (dump_file, "\n");
4072               }
4073           }
4074       else if (dump_file)
4075           fprintf (dump_file, "\nNo candidates with non-zero call count found, "
4076                      "continuing as if without profile feedback.\n");
4077     }
4078 
4079   orig_overall_size = overall_size;
4080 
4081   if (dump_file)
4082     fprintf (dump_file, "\noverall_size: %li\n", overall_size);
4083 
4084   propagate_constants_topo (topo);
4085   if (flag_checking)
4086     ipcp_verify_propagated_values ();
4087   topo->constants.propagate_effects ();
4088   topo->contexts.propagate_effects ();
4089 
4090   if (dump_file)
4091     {
4092       fprintf (dump_file, "\nIPA lattices after all propagation:\n");
4093       print_all_lattices (dump_file, (dump_flags & TDF_DETAILS), true);
4094     }
4095 }
4096 
4097 /* Discover newly direct outgoing edges from NODE which is a new clone with
4098    known KNOWN_CSTS and make them direct.  */
4099 
4100 static void
ipcp_discover_new_direct_edges(struct cgraph_node * node,vec<tree> known_csts,vec<ipa_polymorphic_call_context> known_contexts,struct ipa_agg_replacement_value * aggvals)4101 ipcp_discover_new_direct_edges (struct cgraph_node *node,
4102                                         vec<tree> known_csts,
4103                                         vec<ipa_polymorphic_call_context>
4104                                         known_contexts,
4105                                         struct ipa_agg_replacement_value *aggvals)
4106 {
4107   struct cgraph_edge *ie, *next_ie;
4108   bool found = false;
4109 
4110   for (ie = node->indirect_calls; ie; ie = next_ie)
4111     {
4112       tree target;
4113       bool speculative;
4114 
4115       next_ie = ie->next_callee;
4116       target = ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts,
4117                                                          vNULL, aggvals, &speculative);
4118       if (target)
4119           {
4120             bool agg_contents = ie->indirect_info->agg_contents;
4121             bool polymorphic = ie->indirect_info->polymorphic;
4122             int param_index = ie->indirect_info->param_index;
4123             struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target,
4124                                                                                    speculative);
4125             found = true;
4126 
4127             if (cs && !agg_contents && !polymorphic)
4128               {
4129                 ipa_node_params *info = ipa_node_params_sum->get (node);
4130                 int c = ipa_get_controlled_uses (info, param_index);
4131                 if (c != IPA_UNDESCRIBED_USE
4132                       && !ipa_get_param_load_dereferenced (info, param_index))
4133                     {
4134                       struct ipa_ref *to_del;
4135 
4136                       c--;
4137                       ipa_set_controlled_uses (info, param_index, c);
4138                       if (dump_file && (dump_flags & TDF_DETAILS))
4139                         fprintf (dump_file, "     controlled uses count of param "
4140                                    "%i bumped down to %i\n", param_index, c);
4141                       if (c == 0
4142                           && (to_del = node->find_reference (cs->callee, NULL, 0,
4143                                                                        IPA_REF_ADDR)))
4144                         {
4145                           if (dump_file && (dump_flags & TDF_DETAILS))
4146                               fprintf (dump_file, "       and even removing its "
4147                                          "cloning-created reference\n");
4148                           to_del->remove_reference ();
4149                         }
4150                     }
4151               }
4152           }
4153     }
4154   /* Turning calls to direct calls will improve overall summary.  */
4155   if (found)
4156     ipa_update_overall_fn_summary (node);
4157 }
4158 
4159 class edge_clone_summary;
4160 static call_summary <edge_clone_summary *> *edge_clone_summaries = NULL;
4161 
4162 /* Edge clone summary.  */
4163 
4164 class edge_clone_summary
4165 {
4166 public:
4167   /* Default constructor.  */
edge_clone_summary()4168   edge_clone_summary (): prev_clone (NULL), next_clone (NULL) {}
4169 
4170   /* Default destructor.  */
~edge_clone_summary()4171   ~edge_clone_summary ()
4172   {
4173     if (prev_clone)
4174       edge_clone_summaries->get (prev_clone)->next_clone = next_clone;
4175     if (next_clone)
4176       edge_clone_summaries->get (next_clone)->prev_clone = prev_clone;
4177   }
4178 
4179   cgraph_edge *prev_clone;
4180   cgraph_edge *next_clone;
4181 };
4182 
4183 class edge_clone_summary_t:
4184   public call_summary <edge_clone_summary *>
4185 {
4186 public:
edge_clone_summary_t(symbol_table * symtab)4187   edge_clone_summary_t (symbol_table *symtab):
4188     call_summary <edge_clone_summary *> (symtab)
4189     {
4190       m_initialize_when_cloning = true;
4191     }
4192 
4193   virtual void duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4194                                 edge_clone_summary *src_data,
4195                                 edge_clone_summary *dst_data);
4196 };
4197 
4198 /* Edge duplication hook.  */
4199 
4200 void
duplicate(cgraph_edge * src_edge,cgraph_edge * dst_edge,edge_clone_summary * src_data,edge_clone_summary * dst_data)4201 edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4202                                          edge_clone_summary *src_data,
4203                                          edge_clone_summary *dst_data)
4204 {
4205   if (src_data->next_clone)
4206     edge_clone_summaries->get (src_data->next_clone)->prev_clone = dst_edge;
4207   dst_data->prev_clone = src_edge;
4208   dst_data->next_clone = src_data->next_clone;
4209   src_data->next_clone = dst_edge;
4210 }
4211 
4212 /* Return true is CS calls DEST or its clone for all contexts.  When
4213    ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4214    edges from/to an all-context clone.  */
4215 
4216 static bool
calls_same_node_or_its_all_contexts_clone_p(cgraph_edge * cs,cgraph_node * dest,bool allow_recursion_to_clone)4217 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest,
4218                                                        bool allow_recursion_to_clone)
4219 {
4220   enum availability availability;
4221   cgraph_node *callee = cs->callee->function_symbol (&availability);
4222 
4223   if (availability <= AVAIL_INTERPOSABLE)
4224     return false;
4225   if (callee == dest)
4226     return true;
4227   if (!allow_recursion_to_clone && cs->caller == callee)
4228     return false;
4229 
4230   ipa_node_params *info = ipa_node_params_sum->get (callee);
4231   return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
4232 }
4233 
4234 /* Return true if edge CS does bring about the value described by SRC to
4235    DEST_VAL of node DEST or its clone for all contexts.  */
4236 
4237 static bool
cgraph_edge_brings_value_p(cgraph_edge * cs,ipcp_value_source<tree> * src,cgraph_node * dest,ipcp_value<tree> * dest_val)4238 cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
4239                                   cgraph_node *dest, ipcp_value<tree> *dest_val)
4240 {
4241   ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4242 
4243   if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, !src->val)
4244       || caller_info->node_dead)
4245     return false;
4246 
4247   if (!src->val)
4248     return true;
4249 
4250   if (caller_info->ipcp_orig_node)
4251     {
4252       tree t;
4253       if (src->offset == -1)
4254           t = caller_info->known_csts[src->index];
4255       else
4256           t = get_clone_agg_value (cs->caller, src->offset, src->index);
4257       return (t != NULL_TREE
4258                 && values_equal_for_ipcp_p (src->val->value, t));
4259     }
4260   else
4261     {
4262       if (src->val == dest_val)
4263           return true;
4264 
4265       struct ipcp_agg_lattice *aglat;
4266       class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4267                                                                                  src->index);
4268       if (src->offset == -1)
4269           return (plats->itself.is_single_const ()
4270                     && values_equal_for_ipcp_p (src->val->value,
4271                                                       plats->itself.values->value));
4272       else
4273           {
4274             if (plats->aggs_bottom || plats->aggs_contain_variable)
4275               return false;
4276             for (aglat = plats->aggs; aglat; aglat = aglat->next)
4277               if (aglat->offset == src->offset)
4278                 return  (aglat->is_single_const ()
4279                            && values_equal_for_ipcp_p (src->val->value,
4280                                                                aglat->values->value));
4281           }
4282       return false;
4283     }
4284 }
4285 
4286 /* Return true if edge CS does bring about the value described by SRC to
4287    DST_VAL of node DEST or its clone for all contexts.  */
4288 
4289 static bool
cgraph_edge_brings_value_p(cgraph_edge * cs,ipcp_value_source<ipa_polymorphic_call_context> * src,cgraph_node * dest,ipcp_value<ipa_polymorphic_call_context> *)4290 cgraph_edge_brings_value_p (cgraph_edge *cs,
4291                                   ipcp_value_source<ipa_polymorphic_call_context> *src,
4292                                   cgraph_node *dest,
4293                                   ipcp_value<ipa_polymorphic_call_context> *)
4294 {
4295   ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4296 
4297   if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, true)
4298       || caller_info->node_dead)
4299     return false;
4300   if (!src->val)
4301     return true;
4302 
4303   if (caller_info->ipcp_orig_node)
4304     return (caller_info->known_contexts.length () > (unsigned) src->index)
4305       && values_equal_for_ipcp_p (src->val->value,
4306                                           caller_info->known_contexts[src->index]);
4307 
4308   class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4309                                                                            src->index);
4310   return plats->ctxlat.is_single_const ()
4311     && values_equal_for_ipcp_p (src->val->value,
4312                                         plats->ctxlat.values->value);
4313 }
4314 
4315 /* Get the next clone in the linked list of clones of an edge.  */
4316 
4317 static inline struct cgraph_edge *
get_next_cgraph_edge_clone(struct cgraph_edge * cs)4318 get_next_cgraph_edge_clone (struct cgraph_edge *cs)
4319 {
4320   edge_clone_summary *s = edge_clone_summaries->get (cs);
4321   return s != NULL ? s->next_clone : NULL;
4322 }
4323 
4324 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4325    of them is viable and hot, return true.  In that case, for those that still
4326    hold, add their edge frequency and their number and cumulative profile
4327    counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4328    REC_COUNT_SUM and NONREC_COUNT_SUM respectively.  */
4329 
4330 template <typename valtype>
4331 static bool
get_info_about_necessary_edges(ipcp_value<valtype> * val,cgraph_node * dest,sreal * freq_sum,int * caller_count,profile_count * rec_count_sum,profile_count * nonrec_count_sum)4332 get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
4333                                         sreal *freq_sum, int *caller_count,
4334                                         profile_count *rec_count_sum,
4335                                         profile_count *nonrec_count_sum)
4336 {
4337   ipcp_value_source<valtype> *src;
4338   sreal freq = 0;
4339   int count = 0;
4340   profile_count rec_cnt = profile_count::zero ();
4341   profile_count nonrec_cnt = profile_count::zero ();
4342   bool hot = false;
4343   bool non_self_recursive = false;
4344 
4345   for (src = val->sources; src; src = src->next)
4346     {
4347       struct cgraph_edge *cs = src->cs;
4348       while (cs)
4349           {
4350             if (cgraph_edge_brings_value_p (cs, src, dest, val))
4351               {
4352                 count++;
4353                 freq += cs->sreal_frequency ();
4354                 hot |= cs->maybe_hot_p ();
4355                 if (cs->caller != dest)
4356                     {
4357                       non_self_recursive = true;
4358                       if (cs->count.ipa ().initialized_p ())
4359                         rec_cnt += cs->count.ipa ();
4360                     }
4361                 else if (cs->count.ipa ().initialized_p ())
4362                   nonrec_cnt += cs->count.ipa ();
4363               }
4364             cs = get_next_cgraph_edge_clone (cs);
4365           }
4366     }
4367 
4368   /* If the only edges bringing a value are self-recursive ones, do not bother
4369      evaluating it.  */
4370   if (!non_self_recursive)
4371     return false;
4372 
4373   *freq_sum = freq;
4374   *caller_count = count;
4375   *rec_count_sum = rec_cnt;
4376   *nonrec_count_sum = nonrec_cnt;
4377 
4378   if (!hot && ipa_node_params_sum->get (dest)->node_within_scc)
4379     {
4380       struct cgraph_edge *cs;
4381 
4382       /* Cold non-SCC source edge could trigger hot recursive execution of
4383            function. Consider the case as hot and rely on following cost model
4384            computation to further select right one.  */
4385       for (cs = dest->callers; cs; cs = cs->next_caller)
4386           if (cs->caller == dest && cs->maybe_hot_p ())
4387             return true;
4388     }
4389 
4390   return hot;
4391 }
4392 
4393 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4394    to let a non-self-recursive caller be the first element.  Thus, we can
4395    simplify intersecting operations on values that arrive from all of these
4396    callers, especially when there exists self-recursive call.  Return true if
4397    this kind of adjustment is possible.  */
4398 
4399 static bool
adjust_callers_for_value_intersection(vec<cgraph_edge * > & callers,cgraph_node * node)4400 adjust_callers_for_value_intersection (vec<cgraph_edge *> &callers,
4401                                                cgraph_node *node)
4402 {
4403   for (unsigned i = 0; i < callers.length (); i++)
4404     {
4405       cgraph_edge *cs = callers[i];
4406 
4407       if (cs->caller != node)
4408           {
4409             if (i > 0)
4410               {
4411                 callers[i] = callers[0];
4412                 callers[0] = cs;
4413               }
4414             return true;
4415           }
4416     }
4417   return false;
4418 }
4419 
4420 /* Return a vector of incoming edges that do bring value VAL to node DEST.  It
4421    is assumed their number is known and equal to CALLER_COUNT.  */
4422 
4423 template <typename valtype>
4424 static vec<cgraph_edge *>
gather_edges_for_value(ipcp_value<valtype> * val,cgraph_node * dest,int caller_count)4425 gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest,
4426                               int caller_count)
4427 {
4428   ipcp_value_source<valtype> *src;
4429   vec<cgraph_edge *> ret;
4430 
4431   ret.create (caller_count);
4432   for (src = val->sources; src; src = src->next)
4433     {
4434       struct cgraph_edge *cs = src->cs;
4435       while (cs)
4436           {
4437             if (cgraph_edge_brings_value_p (cs, src, dest, val))
4438               ret.quick_push (cs);
4439             cs = get_next_cgraph_edge_clone (cs);
4440           }
4441     }
4442 
4443   if (caller_count > 1)
4444     adjust_callers_for_value_intersection (ret, dest);
4445 
4446   return ret;
4447 }
4448 
4449 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4450    Return it or NULL if for some reason it cannot be created.  FORCE_LOAD_REF
4451    should be set to true when the reference created for the constant should be
4452    a load one and not an address one because the corresponding parameter p is
4453    only used as *p.  */
4454 
4455 static struct ipa_replace_map *
get_replacement_map(class ipa_node_params * info,tree value,int parm_num,bool force_load_ref)4456 get_replacement_map (class ipa_node_params *info, tree value, int parm_num,
4457                          bool force_load_ref)
4458 {
4459   struct ipa_replace_map *replace_map;
4460 
4461   replace_map = ggc_alloc<ipa_replace_map> ();
4462   if (dump_file)
4463     {
4464       fprintf (dump_file, "    replacing ");
4465       ipa_dump_param (dump_file, info, parm_num);
4466 
4467       fprintf (dump_file, " with const ");
4468       print_generic_expr (dump_file, value);
4469 
4470       if (force_load_ref)
4471           fprintf (dump_file, " - forcing load reference\n");
4472       else
4473           fprintf (dump_file, "\n");
4474     }
4475   replace_map->parm_num = parm_num;
4476   replace_map->new_tree = value;
4477   replace_map->force_load_ref = force_load_ref;
4478   return replace_map;
4479 }
4480 
4481 /* Dump new profiling counts of NODE.  SPEC is true when NODE is a specialzied
4482    one, otherwise it will be referred to as the original node.  */
4483 
4484 static void
dump_profile_updates(cgraph_node * node,bool spec)4485 dump_profile_updates (cgraph_node *node, bool spec)
4486 {
4487   if (spec)
4488     fprintf (dump_file, "     setting count of the specialized node %s to ",
4489                node->dump_name ());
4490   else
4491     fprintf (dump_file, "     setting count of the original node %s to ",
4492                node->dump_name ());
4493 
4494   node->count.dump (dump_file);
4495   fprintf (dump_file, "\n");
4496   for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4497     {
4498       fprintf (dump_file, "       edge to %s has count ",
4499                  cs->callee->dump_name ());
4500       cs->count.dump (dump_file);
4501       fprintf (dump_file, "\n");
4502     }
4503 }
4504 
4505 /* With partial train run we do not want to assume that original's count is
4506    zero whenever we redurect all executed edges to clone.  Simply drop profile
4507    to local one in this case.  In eany case, return the new value.  ORIG_NODE
4508    is the original node and its count has not been updaed yet.  */
4509 
4510 profile_count
lenient_count_portion_handling(profile_count remainder,cgraph_node * orig_node)4511 lenient_count_portion_handling (profile_count remainder, cgraph_node *orig_node)
4512 {
4513   if (remainder.ipa_p () && !remainder.ipa ().nonzero_p ()
4514       && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p ()
4515       && opt_for_fn (orig_node->decl, flag_profile_partial_training))
4516     remainder = remainder.guessed_local ();
4517 
4518   return remainder;
4519 }
4520 
4521 /* Structure to sum counts coming from nodes other than the original node and
4522    its clones.  */
4523 
4524 struct gather_other_count_struct
4525 {
4526   cgraph_node *orig;
4527   profile_count other_count;
4528 };
4529 
4530 /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4531    counts that come from non-self-recursive calls..  */
4532 
4533 static bool
gather_count_of_non_rec_edges(cgraph_node * node,void * data)4534 gather_count_of_non_rec_edges (cgraph_node *node, void *data)
4535 {
4536   gather_other_count_struct *desc = (gather_other_count_struct *) data;
4537   for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4538     if (cs->caller != desc->orig && cs->caller->clone_of != desc->orig)
4539       desc->other_count += cs->count.ipa ();
4540   return false;
4541 }
4542 
4543 /* Structure to help analyze if we need to boost counts of some clones of some
4544    non-recursive edges to match the new callee count.  */
4545 
4546 struct desc_incoming_count_struct
4547 {
4548   cgraph_node *orig;
4549   hash_set <cgraph_edge *> *processed_edges;
4550   profile_count count;
4551   unsigned unproc_orig_rec_edges;
4552 };
4553 
4554 /* Go over edges calling NODE and its thunks and gather information about
4555    incoming counts so that we know if we need to make any adjustments.  */
4556 
4557 static void
analyze_clone_icoming_counts(cgraph_node * node,desc_incoming_count_struct * desc)4558 analyze_clone_icoming_counts (cgraph_node *node,
4559                                     desc_incoming_count_struct *desc)
4560 {
4561   for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4562     if (cs->caller->thunk)
4563       {
4564           analyze_clone_icoming_counts (cs->caller, desc);
4565           continue;
4566       }
4567     else
4568       {
4569           if (cs->count.initialized_p ())
4570             desc->count += cs->count.ipa ();
4571           if (!desc->processed_edges->contains (cs)
4572               && cs->caller->clone_of == desc->orig)
4573             desc->unproc_orig_rec_edges++;
4574       }
4575 }
4576 
4577 /* If caller edge counts of a clone created for a self-recursive arithmetic
4578    jump function must be adjusted because it is coming from a the "seed" clone
4579    for the first value and so has been excessively scaled back as if it was not
4580    a recursive call, adjust it so that the incoming counts of NODE match its
4581    count. NODE is the node or its thunk.  */
4582 
4583 static void
adjust_clone_incoming_counts(cgraph_node * node,desc_incoming_count_struct * desc)4584 adjust_clone_incoming_counts (cgraph_node *node,
4585                                     desc_incoming_count_struct *desc)
4586 {
4587   for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4588     if (cs->caller->thunk)
4589       {
4590           adjust_clone_incoming_counts (cs->caller, desc);
4591           profile_count sum = profile_count::zero ();
4592           for (cgraph_edge *e = cs->caller->callers; e; e = e->next_caller)
4593             if (e->count.initialized_p ())
4594               sum += e->count.ipa ();
4595           cs->count = cs->count.combine_with_ipa_count (sum);
4596       }
4597     else if (!desc->processed_edges->contains (cs)
4598                && cs->caller->clone_of == desc->orig)
4599       {
4600           cs->count += desc->count;
4601           if (dump_file)
4602             {
4603               fprintf (dump_file, "       Adjusted count of an incoming edge of "
4604                          "a clone %s -> %s to ", cs->caller->dump_name (),
4605                          cs->callee->dump_name ());
4606               cs->count.dump (dump_file);
4607               fprintf (dump_file, "\n");
4608             }
4609       }
4610 }
4611 
4612 /* When ORIG_NODE has been cloned for values which have been generated fora
4613    self-recursive call as a result of an arithmetic pass-through
4614    jump-functions, adjust its count together with counts of all such clones in
4615    SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4616 
4617    The function sums the counts of the original node and all its clones that
4618    cannot be attributed to a specific clone because it comes from a
4619    non-recursive edge.  This sum is then evenly divided between the clones and
4620    on top of that each one gets all the counts which can be attributed directly
4621    to it.  */
4622 
4623 static void
update_counts_for_self_gen_clones(cgraph_node * orig_node,const vec<cgraph_node * > & self_gen_clones)4624 update_counts_for_self_gen_clones (cgraph_node *orig_node,
4625                                            const vec<cgraph_node *> &self_gen_clones)
4626 {
4627   profile_count redist_sum = orig_node->count.ipa ();
4628   if (!(redist_sum > profile_count::zero ()))
4629     return;
4630 
4631   if (dump_file)
4632     fprintf (dump_file, "     Updating profile of self recursive clone "
4633                "series\n");
4634 
4635   gather_other_count_struct gocs;
4636   gocs.orig = orig_node;
4637   gocs.other_count = profile_count::zero ();
4638 
4639   auto_vec <profile_count, 8> other_edges_count;
4640   for (cgraph_node *n : self_gen_clones)
4641     {
4642       gocs.other_count = profile_count::zero ();
4643       n->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges,
4644                                                        &gocs, false);
4645       other_edges_count.safe_push (gocs.other_count);
4646       redist_sum -= gocs.other_count;
4647     }
4648 
4649   hash_set<cgraph_edge *> processed_edges;
4650   unsigned i = 0;
4651   for (cgraph_node *n : self_gen_clones)
4652     {
4653       profile_count orig_count = n->count;
4654       profile_count new_count
4655           = (redist_sum.apply_scale (1, self_gen_clones.length ())
4656              + other_edges_count[i]);
4657       new_count = lenient_count_portion_handling (new_count, orig_node);
4658       n->count = new_count;
4659       profile_count::adjust_for_ipa_scaling (&new_count, &orig_count);
4660       for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee)
4661           {
4662             cs->count = cs->count.apply_scale (new_count, orig_count);
4663             processed_edges.add (cs);
4664           }
4665       for (cgraph_edge *cs = n->indirect_calls; cs; cs = cs->next_callee)
4666           cs->count = cs->count.apply_scale (new_count, orig_count);
4667 
4668       i++;
4669     }
4670 
4671   /* There are still going to be edges to ORIG_NODE that have one or more
4672      clones coming from another node clone in SELF_GEN_CLONES and which we
4673      scaled by the same amount, which means that the total incoming sum of
4674      counts to ORIG_NODE will be too high, scale such edges back.  */
4675   for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4676     {
4677       if (cs->callee->ultimate_alias_target () == orig_node)
4678           {
4679             unsigned den = 0;
4680             for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4681               if (e->callee->ultimate_alias_target () == orig_node
4682                     && processed_edges.contains (e))
4683                 den++;
4684             if (den > 0)
4685               for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4686                 if (e->callee->ultimate_alias_target () == orig_node
4687                       && processed_edges.contains (e))
4688                     e->count = e->count.apply_scale (1, den);
4689           }
4690     }
4691 
4692   /* Edges from the seeds of the valus generated for arithmetic jump-functions
4693      along self-recursive edges are likely to have fairly low count and so
4694      edges from them to nodes in the self_gen_clones do not correspond to the
4695      artificially distributed count of the nodes, the total sum of incoming
4696      edges to some clones might be too low.  Detect this situation and correct
4697      it.  */
4698   for (cgraph_node *n : self_gen_clones)
4699     {
4700       if (!(n->count.ipa () > profile_count::zero ()))
4701           continue;
4702 
4703       desc_incoming_count_struct desc;
4704       desc.orig = orig_node;
4705       desc.processed_edges = &processed_edges;
4706       desc.count = profile_count::zero ();
4707       desc.unproc_orig_rec_edges = 0;
4708       analyze_clone_icoming_counts (n, &desc);
4709 
4710       if (n->count.differs_from_p (desc.count))
4711           {
4712             if (n->count > desc.count
4713                 && desc.unproc_orig_rec_edges > 0)
4714               {
4715                 desc.count = n->count - desc.count;
4716                 desc.count
4717                     = desc.count.apply_scale (1, desc.unproc_orig_rec_edges);
4718                 adjust_clone_incoming_counts (n, &desc);
4719               }
4720             else if (dump_file)
4721               fprintf (dump_file,
4722                          "       Unable to fix up incoming counts for %s.\n",
4723                          n->dump_name ());
4724           }
4725     }
4726 
4727   if (dump_file)
4728     for (cgraph_node *n : self_gen_clones)
4729       dump_profile_updates (n, n != orig_node);
4730   return;
4731 }
4732 
4733 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
4734    their profile information to reflect this.  This function should not be used
4735    for clones generated for arithmetic pass-through jump functions on a
4736    self-recursive call graph edge, that situation is handled by
4737    update_counts_for_self_gen_clones.  */
4738 
4739 static void
update_profiling_info(struct cgraph_node * orig_node,struct cgraph_node * new_node)4740 update_profiling_info (struct cgraph_node *orig_node,
4741                            struct cgraph_node *new_node)
4742 {
4743   struct caller_statistics stats;
4744   profile_count new_sum;
4745   profile_count remainder, orig_node_count = orig_node->count.ipa ();
4746 
4747   if (!(orig_node_count > profile_count::zero ()))
4748     return;
4749 
4750   if (dump_file)
4751     {
4752       fprintf (dump_file, "     Updating profile from original count: ");
4753       orig_node_count.dump (dump_file);
4754       fprintf (dump_file, "\n");
4755     }
4756 
4757   init_caller_stats (&stats, new_node);
4758   new_node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
4759                                                         false);
4760   new_sum = stats.count_sum;
4761 
4762   if (new_sum > orig_node_count)
4763     {
4764       /* TODO: Perhaps this should be gcc_unreachable ()?  */
4765       remainder = profile_count::zero ().guessed_local ();
4766     }
4767   else if (stats.rec_count_sum.nonzero_p ())
4768     {
4769       int new_nonrec_calls = stats.n_nonrec_calls;
4770       /* There are self-recursive edges which are likely to bring in the
4771            majority of calls but which we must divide in between the original and
4772            new node.  */
4773       init_caller_stats (&stats, orig_node);
4774       orig_node->call_for_symbol_thunks_and_aliases (gather_caller_stats,
4775                                                                  &stats, false);
4776       int orig_nonrec_calls = stats.n_nonrec_calls;
4777       profile_count orig_nonrec_call_count = stats.count_sum;
4778 
4779       if (orig_node->local)
4780           {
4781             if (!orig_nonrec_call_count.nonzero_p ())
4782               {
4783                 if (dump_file)
4784                     fprintf (dump_file, "       The original is local and the only "
4785                                "incoming edges from non-dead callers with nonzero "
4786                                "counts are self-recursive, assuming it is cold.\n");
4787                 /* The NEW_NODE count and counts of all its outgoing edges
4788                      are still unmodified copies of ORIG_NODE's.  Just clear
4789                      the latter and bail out.  */
4790                 profile_count zero;
4791               if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
4792                 zero = profile_count::zero ().guessed_local ();
4793                 else
4794                     zero = profile_count::adjusted_zero ();
4795                 orig_node->count = zero;
4796                 for (cgraph_edge *cs = orig_node->callees;
4797                        cs;
4798                        cs = cs->next_callee)
4799                     cs->count = zero;
4800                 for (cgraph_edge *cs = orig_node->indirect_calls;
4801                        cs;
4802                        cs = cs->next_callee)
4803                     cs->count = zero;
4804                 return;
4805               }
4806           }
4807       else
4808           {
4809             /* Let's behave as if there was another caller that accounts for all
4810                the calls that were either indirect or from other compilation
4811                units. */
4812             orig_nonrec_calls++;
4813             profile_count pretend_caller_count
4814               = (orig_node_count - new_sum - orig_nonrec_call_count
4815                  - stats.rec_count_sum);
4816             orig_nonrec_call_count += pretend_caller_count;
4817           }
4818 
4819       /* Divide all "unexplained" counts roughly proportionally to sums of
4820            counts of non-recursive calls.
4821 
4822            We put rather arbitrary limits on how many counts we claim because the
4823            number of non-self-recursive incoming count is only a rough guideline
4824            and there are cases (such as mcf) where using it blindly just takes
4825            too many.  And if lattices are considered in the opposite order we
4826            could also take too few.  */
4827       profile_count unexp = orig_node_count - new_sum - orig_nonrec_call_count;
4828 
4829       int limit_den = 2 * (orig_nonrec_calls + new_nonrec_calls);
4830       profile_count new_part
4831           = MAX(MIN (unexp.apply_scale (new_sum,
4832                                               new_sum + orig_nonrec_call_count),
4833                        unexp.apply_scale (limit_den - 1, limit_den)),
4834                 unexp.apply_scale (new_nonrec_calls, limit_den));
4835       if (dump_file)
4836           {
4837             fprintf (dump_file, "       Claiming ");
4838             new_part.dump (dump_file);
4839             fprintf (dump_file, " of unexplained ");
4840             unexp.dump (dump_file);
4841             fprintf (dump_file, " counts because of self-recursive "
4842                        "calls\n");
4843           }
4844       new_sum += new_part;
4845       remainder = lenient_count_portion_handling (orig_node_count - new_sum,
4846                                                               orig_node);
4847     }
4848   else
4849     remainder = lenient_count_portion_handling (orig_node_count - new_sum,
4850                                                             orig_node);
4851 
4852   new_sum = orig_node_count.combine_with_ipa_count (new_sum);
4853   new_node->count = new_sum;
4854   orig_node->count = remainder;
4855 
4856   profile_count orig_new_node_count = orig_node_count;
4857   profile_count::adjust_for_ipa_scaling (&new_sum, &orig_new_node_count);
4858   for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee)
4859     cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
4860   for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee)
4861     cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
4862 
4863   profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count);
4864   for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4865     cs->count = cs->count.apply_scale (remainder, orig_node_count);
4866   for (cgraph_edge *cs = orig_node->indirect_calls; cs; cs = cs->next_callee)
4867     cs->count = cs->count.apply_scale (remainder, orig_node_count);
4868 
4869   if (dump_file)
4870     {
4871       dump_profile_updates (new_node, true);
4872       dump_profile_updates (orig_node, false);
4873     }
4874 }
4875 
4876 /* Update the respective profile of specialized NEW_NODE and the original
4877    ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
4878    have been redirected to the specialized version.  */
4879 
4880 static void
update_specialized_profile(struct cgraph_node * new_node,struct cgraph_node * orig_node,profile_count redirected_sum)4881 update_specialized_profile (struct cgraph_node *new_node,
4882                                   struct cgraph_node *orig_node,
4883                                   profile_count redirected_sum)
4884 {
4885   struct cgraph_edge *cs;
4886   profile_count new_node_count, orig_node_count = orig_node->count.ipa ();
4887 
4888   if (dump_file)
4889     {
4890       fprintf (dump_file, "    the sum of counts of redirected  edges is ");
4891       redirected_sum.dump (dump_file);
4892       fprintf (dump_file, "\n    old ipa count of the original node is ");
4893       orig_node_count.dump (dump_file);
4894       fprintf (dump_file, "\n");
4895     }
4896   if (!(orig_node_count > profile_count::zero ()))
4897     return;
4898 
4899   new_node_count = new_node->count;
4900   new_node->count += redirected_sum;
4901   orig_node->count
4902     = lenient_count_portion_handling (orig_node->count - redirected_sum,
4903                                               orig_node);
4904 
4905   for (cs = new_node->callees; cs; cs = cs->next_callee)
4906     cs->count += cs->count.apply_scale (redirected_sum, new_node_count);
4907 
4908   for (cs = orig_node->callees; cs; cs = cs->next_callee)
4909     {
4910       profile_count dec = cs->count.apply_scale (redirected_sum,
4911                                                              orig_node_count);
4912       cs->count -= dec;
4913     }
4914 
4915   if (dump_file)
4916     {
4917       dump_profile_updates (new_node, true);
4918       dump_profile_updates (orig_node, false);
4919     }
4920 }
4921 
4922 static void adjust_references_in_caller (cgraph_edge *cs,
4923                                                    symtab_node *symbol, int index);
4924 
4925 /* Simple structure to pass a symbol and index (with same meaning as parameters
4926    of adjust_references_in_caller) through a void* parameter of a
4927    call_for_symbol_thunks_and_aliases callback. */
4928 struct symbol_and_index_together
4929 {
4930   symtab_node *symbol;
4931   int index;
4932 };
4933 
4934 /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
4935    adjust_references_in_caller on edges up in the call-graph, if necessary. */
4936 static bool
adjust_refs_in_act_callers(struct cgraph_node * node,void * data)4937 adjust_refs_in_act_callers (struct cgraph_node *node, void *data)
4938 {
4939   symbol_and_index_together *pack = (symbol_and_index_together *) data;
4940   for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4941     if (!cs->caller->thunk)
4942       adjust_references_in_caller (cs, pack->symbol, pack->index);
4943   return false;
4944 }
4945 
4946 /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
4947    variable which is only dereferenced and which is represented by SYMBOL.  See
4948    if we can remove ADDR reference in callers assosiated witht the call. */
4949 
4950 static void
adjust_references_in_caller(cgraph_edge * cs,symtab_node * symbol,int index)4951 adjust_references_in_caller (cgraph_edge *cs, symtab_node *symbol, int index)
4952 {
4953   ipa_edge_args *args = ipa_edge_args_sum->get (cs);
4954   ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, index);
4955   if (jfunc->type == IPA_JF_CONST)
4956     {
4957       ipa_ref *to_del = cs->caller->find_reference (symbol, cs->call_stmt,
4958                                                                 cs->lto_stmt_uid,
4959                                                                 IPA_REF_ADDR);
4960       if (!to_del)
4961           return;
4962       to_del->remove_reference ();
4963       ipa_zap_jf_refdesc (jfunc);
4964       if (dump_file)
4965           fprintf (dump_file, "    Removed a reference from %s to %s.\n",
4966                      cs->caller->dump_name (), symbol->dump_name ());
4967       return;
4968     }
4969 
4970   if (jfunc->type != IPA_JF_PASS_THROUGH
4971       || ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR
4972       || ipa_get_jf_pass_through_refdesc_decremented (jfunc))
4973     return;
4974 
4975   int fidx = ipa_get_jf_pass_through_formal_id (jfunc);
4976   cgraph_node *caller = cs->caller;
4977   ipa_node_params *caller_info = ipa_node_params_sum->get (caller);
4978   /* TODO: This consistency check may be too big and not really
4979      that useful.  Consider removing it.  */
4980   tree cst;
4981   if (caller_info->ipcp_orig_node)
4982     cst = caller_info->known_csts[fidx];
4983   else
4984     {
4985       ipcp_lattice<tree> *lat = ipa_get_scalar_lat (caller_info, fidx);
4986       gcc_assert (lat->is_single_const ());
4987       cst = lat->values->value;
4988     }
4989   gcc_assert (TREE_CODE (cst) == ADDR_EXPR
4990                 && (symtab_node::get (get_base_address (TREE_OPERAND (cst, 0)))
4991                       == symbol));
4992 
4993   int cuses = ipa_get_controlled_uses (caller_info, fidx);
4994   if (cuses == IPA_UNDESCRIBED_USE)
4995     return;
4996   gcc_assert (cuses > 0);
4997   cuses--;
4998   ipa_set_controlled_uses (caller_info, fidx, cuses);
4999   ipa_set_jf_pass_through_refdesc_decremented (jfunc, true);
5000   if (dump_file && (dump_flags & TDF_DETAILS))
5001     fprintf (dump_file, "    Controlled uses of parameter %i of %s dropped "
5002                "to %i.\n", fidx, caller->dump_name (), cuses);
5003   if (cuses)
5004     return;
5005 
5006   if (caller_info->ipcp_orig_node)
5007     {
5008       /* Cloning machinery has created a reference here, we need to either
5009            remove it or change it to a read one.  */
5010       ipa_ref *to_del = caller->find_reference (symbol, NULL, 0, IPA_REF_ADDR);
5011       if (to_del)
5012           {
5013             to_del->remove_reference ();
5014             if (dump_file)
5015               fprintf (dump_file, "    Removed a reference from %s to %s.\n",
5016                          cs->caller->dump_name (), symbol->dump_name ());
5017             if (ipa_get_param_load_dereferenced (caller_info, fidx))
5018               {
5019                 caller->create_reference (symbol, IPA_REF_LOAD, NULL);
5020                 if (dump_file)
5021                     fprintf (dump_file,
5022                                "      ...and replaced it with LOAD one.\n");
5023               }
5024           }
5025     }
5026 
5027   symbol_and_index_together pack;
5028   pack.symbol = symbol;
5029   pack.index = fidx;
5030   if (caller->can_change_signature)
5031     caller->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers,
5032                                                             &pack, true);
5033 }
5034 
5035 
5036 /* Return true if we would like to remove a parameter from NODE when cloning it
5037    with KNOWN_CSTS scalar constants.  */
5038 
5039 static bool
want_remove_some_param_p(cgraph_node * node,vec<tree> known_csts)5040 want_remove_some_param_p (cgraph_node *node, vec<tree> known_csts)
5041 {
5042   auto_vec<bool, 16> surviving;
5043   bool filled_vec = false;
5044   ipa_node_params *info = ipa_node_params_sum->get (node);
5045   int i, count = ipa_get_param_count (info);
5046 
5047   for (i = 0; i < count; i++)
5048     {
5049       if (!known_csts[i] && ipa_is_param_used (info, i))
5050        continue;
5051 
5052       if (!filled_vec)
5053        {
5054            clone_info *info = clone_info::get (node);
5055            if (!info || !info->param_adjustments)
5056            return true;
5057            info->param_adjustments->get_surviving_params (&surviving);
5058          filled_vec = true;
5059        }
5060       if (surviving.length() < (unsigned) i &&  surviving[i])
5061        return true;
5062     }
5063   return false;
5064 }
5065 
5066 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5067    known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5068    redirect all edges in CALLERS to it.  */
5069 
5070 static struct cgraph_node *
create_specialized_node(struct cgraph_node * node,vec<tree> known_csts,vec<ipa_polymorphic_call_context> known_contexts,struct ipa_agg_replacement_value * aggvals,vec<cgraph_edge * > & callers)5071 create_specialized_node (struct cgraph_node *node,
5072                                vec<tree> known_csts,
5073                                vec<ipa_polymorphic_call_context> known_contexts,
5074                                struct ipa_agg_replacement_value *aggvals,
5075                                vec<cgraph_edge *> &callers)
5076 {
5077   ipa_node_params *new_info, *info = ipa_node_params_sum->get (node);
5078   vec<ipa_replace_map *, va_gc> *replace_trees = NULL;
5079   vec<ipa_adjusted_param, va_gc> *new_params = NULL;
5080   struct ipa_agg_replacement_value *av;
5081   struct cgraph_node *new_node;
5082   int i, count = ipa_get_param_count (info);
5083   clone_info *cinfo = clone_info::get (node);
5084   ipa_param_adjustments *old_adjustments = cinfo
5085                                                      ? cinfo->param_adjustments : NULL;
5086   ipa_param_adjustments *new_adjustments;
5087   gcc_assert (!info->ipcp_orig_node);
5088   gcc_assert (node->can_change_signature
5089                 || !old_adjustments);
5090 
5091   if (old_adjustments)
5092     {
5093       /* At the moment all IPA optimizations should use the number of
5094            parameters of the prevailing decl as the m_always_copy_start.
5095            Handling any other value would complicate the code below, so for the
5096            time bing let's only assert it is so.  */
5097       gcc_assert (old_adjustments->m_always_copy_start == count
5098                       || old_adjustments->m_always_copy_start < 0);
5099       int old_adj_count = vec_safe_length (old_adjustments->m_adj_params);
5100       for (i = 0; i < old_adj_count; i++)
5101           {
5102             ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
5103             if (!node->can_change_signature
5104                 || old_adj->op != IPA_PARAM_OP_COPY
5105                 || (!known_csts[old_adj->base_index]
5106                       && ipa_is_param_used (info, old_adj->base_index)))
5107               {
5108                 ipa_adjusted_param new_adj = *old_adj;
5109 
5110                 new_adj.prev_clone_adjustment = true;
5111                 new_adj.prev_clone_index = i;
5112                 vec_safe_push (new_params, new_adj);
5113               }
5114           }
5115       bool skip_return = old_adjustments->m_skip_return;
5116       new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5117                                ipa_param_adjustments (new_params, count,
5118                                                             skip_return));
5119     }
5120   else if (node->can_change_signature
5121              && want_remove_some_param_p (node, known_csts))
5122     {
5123       ipa_adjusted_param adj;
5124       memset (&adj, 0, sizeof (adj));
5125       adj.op = IPA_PARAM_OP_COPY;
5126       for (i = 0; i < count; i++)
5127           if (!known_csts[i] && ipa_is_param_used (info, i))
5128             {
5129               adj.base_index = i;
5130               adj.prev_clone_index = i;
5131               vec_safe_push (new_params, adj);
5132             }
5133       new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5134                                ipa_param_adjustments (new_params, count, false));
5135     }
5136   else
5137     new_adjustments = NULL;
5138 
5139   auto_vec<cgraph_edge *, 2> self_recursive_calls;
5140   for (i = callers.length () - 1; i >= 0; i--)
5141     {
5142       cgraph_edge *cs = callers[i];
5143       if (cs->caller == node)
5144           {
5145             self_recursive_calls.safe_push (cs);
5146             callers.unordered_remove (i);
5147           }
5148     }
5149   replace_trees = cinfo ? vec_safe_copy (cinfo->tree_map) : NULL;
5150   for (i = 0; i < count; i++)
5151     {
5152       tree t = known_csts[i];
5153       if (!t)
5154           continue;
5155 
5156       gcc_checking_assert (TREE_CODE (t) != TREE_BINFO);
5157 
5158       bool load_ref = false;
5159       symtab_node *ref_symbol;
5160       if (TREE_CODE (t) == ADDR_EXPR)
5161           {
5162             tree base = get_base_address (TREE_OPERAND (t, 0));
5163             if (TREE_CODE (base) == VAR_DECL
5164                 && ipa_get_controlled_uses (info, i) == 0
5165                 && ipa_get_param_load_dereferenced (info, i)
5166                 && (ref_symbol = symtab_node::get (base)))
5167               {
5168                 load_ref = true;
5169                 if (node->can_change_signature)
5170                     for (cgraph_edge *caller : callers)
5171                       adjust_references_in_caller (caller, ref_symbol, i);
5172               }
5173           }
5174 
5175       ipa_replace_map *replace_map = get_replacement_map (info, t, i, load_ref);
5176       if (replace_map)
5177           vec_safe_push (replace_trees, replace_map);
5178     }
5179 
5180   unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
5181                                      IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5182                                          node->decl)));
5183   new_node = node->create_virtual_clone (callers, replace_trees,
5184                                                    new_adjustments, "constprop",
5185                                                    suffix_counter);
5186   suffix_counter++;
5187 
5188   bool have_self_recursive_calls = !self_recursive_calls.is_empty ();
5189   for (unsigned j = 0; j < self_recursive_calls.length (); j++)
5190     {
5191       cgraph_edge *cs = get_next_cgraph_edge_clone (self_recursive_calls[j]);
5192       /* Cloned edges can disappear during cloning as speculation can be
5193            resolved, check that we have one and that it comes from the last
5194            cloning.  */
5195       if (cs && cs->caller == new_node)
5196           cs->redirect_callee_duplicating_thunks (new_node);
5197       /* Any future code that would make more than one clone of an outgoing
5198            edge would confuse this mechanism, so let's check that does not
5199            happen.  */
5200       gcc_checking_assert (!cs
5201                                  || !get_next_cgraph_edge_clone (cs)
5202                                  || get_next_cgraph_edge_clone (cs)->caller != new_node);
5203     }
5204   if (have_self_recursive_calls)
5205     new_node->expand_all_artificial_thunks ();
5206 
5207   ipa_set_node_agg_value_chain (new_node, aggvals);
5208   for (av = aggvals; av; av = av->next)
5209     new_node->maybe_create_reference (av->value, NULL);
5210 
5211   if (dump_file && (dump_flags & TDF_DETAILS))
5212     {
5213       fprintf (dump_file, "     the new node is %s.\n", new_node->dump_name ());
5214       if (known_contexts.exists ())
5215           {
5216             for (i = 0; i < count; i++)
5217               if (!known_contexts[i].useless_p ())
5218                 {
5219                     fprintf (dump_file, "     known ctx %i is ", i);
5220                     known_contexts[i].dump (dump_file);
5221                 }
5222           }
5223       if (aggvals)
5224           ipa_dump_agg_replacement_values (dump_file, aggvals);
5225     }
5226 
5227   new_info = ipa_node_params_sum->get (new_node);
5228   new_info->ipcp_orig_node = node;
5229   new_node->ipcp_clone = true;
5230   new_info->known_csts = known_csts;
5231   new_info->known_contexts = known_contexts;
5232 
5233   ipcp_discover_new_direct_edges (new_node, known_csts, known_contexts, aggvals);
5234 
5235   return new_node;
5236 }
5237 
5238 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5239    pass-through function to itself when the cgraph_node involved is not an
5240    IPA-CP clone.  When SIMPLE is true, further check if JFUNC is a simple
5241    no-operation pass-through.  */
5242 
5243 static bool
self_recursive_pass_through_p(cgraph_edge * cs,ipa_jump_func * jfunc,int i,bool simple=true)5244 self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
5245                                      bool simple = true)
5246 {
5247   enum availability availability;
5248   if (cs->caller == cs->callee->function_symbol (&availability)
5249       && availability > AVAIL_INTERPOSABLE
5250       && jfunc->type == IPA_JF_PASS_THROUGH
5251       && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5252       && ipa_get_jf_pass_through_formal_id (jfunc) == i
5253       && ipa_node_params_sum->get (cs->caller)
5254       && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5255     return true;
5256   return false;
5257 }
5258 
5259 /* Return true if JFUNC, which describes a part of an aggregate represented or
5260    pointed to by the i-th parameter of call CS, is a pass-through function to
5261    itself when the cgraph_node involved is not an IPA-CP clone..  When
5262    SIMPLE is true, further check if JFUNC is a simple no-operation
5263    pass-through.  */
5264 
5265 static bool
self_recursive_agg_pass_through_p(cgraph_edge * cs,ipa_agg_jf_item * jfunc,int i,bool simple=true)5266 self_recursive_agg_pass_through_p (cgraph_edge *cs, ipa_agg_jf_item *jfunc,
5267                                            int i, bool simple = true)
5268 {
5269   enum availability availability;
5270   if (cs->caller == cs->callee->function_symbol (&availability)
5271       && availability > AVAIL_INTERPOSABLE
5272       && jfunc->jftype == IPA_JF_LOAD_AGG
5273       && jfunc->offset == jfunc->value.load_agg.offset
5274       && (!simple || jfunc->value.pass_through.operation == NOP_EXPR)
5275       && jfunc->value.pass_through.formal_id == i
5276       && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type)
5277       && ipa_node_params_sum->get (cs->caller)
5278       && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5279     return true;
5280   return false;
5281 }
5282 
5283 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5284    KNOWN_CSTS with constants that are also known for all of the CALLERS.  */
5285 
5286 static void
find_more_scalar_values_for_callers_subset(struct cgraph_node * node,vec<tree> & known_csts,const vec<cgraph_edge * > & callers)5287 find_more_scalar_values_for_callers_subset (struct cgraph_node *node,
5288                                                       vec<tree> &known_csts,
5289                                                       const vec<cgraph_edge *> &callers)
5290 {
5291   ipa_node_params *info = ipa_node_params_sum->get (node);
5292   int i, count = ipa_get_param_count (info);
5293 
5294   for (i = 0; i < count; i++)
5295     {
5296       struct cgraph_edge *cs;
5297       tree newval = NULL_TREE;
5298       int j;
5299       bool first = true;
5300       tree type = ipa_get_type (info, i);
5301 
5302       if (ipa_get_scalar_lat (info, i)->bottom || known_csts[i])
5303           continue;
5304 
5305       FOR_EACH_VEC_ELT (callers, j, cs)
5306           {
5307             struct ipa_jump_func *jump_func;
5308             tree t;
5309 
5310             ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5311             if (!args
5312                 || i >= ipa_get_cs_argument_count (args)
5313                 || (i == 0
5314                       && call_passes_through_thunk (cs)))
5315               {
5316                 newval = NULL_TREE;
5317                 break;
5318               }
5319             jump_func = ipa_get_ith_jump_func (args, i);
5320 
5321             /* Besides simple pass-through jump function, arithmetic jump
5322                function could also introduce argument-direct-pass-through for
5323                self-feeding recursive call.  For example,
5324 
5325                   fn (int i)
5326                   {
5327                     fn (i & 1);
5328                   }
5329 
5330                Given that i is 0, recursive propagation via (i & 1) also gets
5331                0.  */
5332             if (self_recursive_pass_through_p (cs, jump_func, i, false))
5333               {
5334                 gcc_assert (newval);
5335                 t = ipa_get_jf_arith_result (
5336                                         ipa_get_jf_pass_through_operation (jump_func),
5337                                         newval,
5338                                         ipa_get_jf_pass_through_operand (jump_func),
5339                                         type);
5340               }
5341             else
5342               t = ipa_value_from_jfunc (ipa_node_params_sum->get (cs->caller),
5343                                               jump_func, type);
5344             if (!t
5345                 || (newval
5346                       && !values_equal_for_ipcp_p (t, newval))
5347                 || (!first && !newval))
5348               {
5349                 newval = NULL_TREE;
5350                 break;
5351               }
5352             else
5353               newval = t;
5354             first = false;
5355           }
5356 
5357       if (newval)
5358           {
5359             if (dump_file && (dump_flags & TDF_DETAILS))
5360               {
5361                 fprintf (dump_file, "    adding an extra known scalar value ");
5362                 print_ipcp_constant_value (dump_file, newval);
5363                 fprintf (dump_file, " for ");
5364                 ipa_dump_param (dump_file, info, i);
5365                 fprintf (dump_file, "\n");
5366               }
5367 
5368             known_csts[i] = newval;
5369           }
5370     }
5371 }
5372 
5373 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5374    KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5375    CALLERS.  */
5376 
5377 static void
find_more_contexts_for_caller_subset(cgraph_node * node,vec<ipa_polymorphic_call_context> * known_contexts,const vec<cgraph_edge * > & callers)5378 find_more_contexts_for_caller_subset (cgraph_node *node,
5379                                               vec<ipa_polymorphic_call_context>
5380                                               *known_contexts,
5381                                               const vec<cgraph_edge *> &callers)
5382 {
5383   ipa_node_params *info = ipa_node_params_sum->get (node);
5384   int i, count = ipa_get_param_count (info);
5385 
5386   for (i = 0; i < count; i++)
5387     {
5388       cgraph_edge *cs;
5389 
5390       if (ipa_get_poly_ctx_lat (info, i)->bottom
5391             || (known_contexts->exists ()
5392                 && !(*known_contexts)[i].useless_p ()))
5393           continue;
5394 
5395       ipa_polymorphic_call_context newval;
5396       bool first = true;
5397       int j;
5398 
5399       FOR_EACH_VEC_ELT (callers, j, cs)
5400           {
5401             ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5402             if (!args
5403                 || i >= ipa_get_cs_argument_count (args))
5404               return;
5405             ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
5406             ipa_polymorphic_call_context ctx;
5407             ctx = ipa_context_from_jfunc (ipa_node_params_sum->get (cs->caller),
5408                                                   cs, i, jfunc);
5409             if (first)
5410               {
5411                 newval = ctx;
5412                 first = false;
5413               }
5414             else
5415               newval.meet_with (ctx);
5416             if (newval.useless_p ())
5417               break;
5418           }
5419 
5420       if (!newval.useless_p ())
5421           {
5422             if (dump_file && (dump_flags & TDF_DETAILS))
5423               {
5424                 fprintf (dump_file, "    adding an extra known polymorphic "
5425                            "context ");
5426                 print_ipcp_constant_value (dump_file, newval);
5427                 fprintf (dump_file, " for ");
5428                 ipa_dump_param (dump_file, info, i);
5429                 fprintf (dump_file, "\n");
5430               }
5431 
5432             if (!known_contexts->exists ())
5433               known_contexts->safe_grow_cleared (ipa_get_param_count (info),
5434                                                          true);
5435             (*known_contexts)[i] = newval;
5436           }
5437 
5438     }
5439 }
5440 
5441 /* Go through PLATS and create a vector of values consisting of values and
5442    offsets (minus OFFSET) of lattices that contain only a single value.  */
5443 
5444 static vec<ipa_agg_value>
copy_plats_to_inter(class ipcp_param_lattices * plats,HOST_WIDE_INT offset)5445 copy_plats_to_inter (class ipcp_param_lattices *plats, HOST_WIDE_INT offset)
5446 {
5447   vec<ipa_agg_value> res = vNULL;
5448 
5449   if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom)
5450     return vNULL;
5451 
5452   for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
5453     if (aglat->is_single_const ())
5454       {
5455           struct ipa_agg_value ti;
5456           ti.offset = aglat->offset - offset;
5457           ti.value = aglat->values->value;
5458           res.safe_push (ti);
5459       }
5460   return res;
5461 }
5462 
5463 /* Intersect all values in INTER with single value lattices in PLATS (while
5464    subtracting OFFSET).  */
5465 
5466 static void
intersect_with_plats(class ipcp_param_lattices * plats,vec<ipa_agg_value> * inter,HOST_WIDE_INT offset)5467 intersect_with_plats (class ipcp_param_lattices *plats,
5468                           vec<ipa_agg_value> *inter,
5469                           HOST_WIDE_INT offset)
5470 {
5471   struct ipcp_agg_lattice *aglat;
5472   struct ipa_agg_value *item;
5473   int k;
5474 
5475   if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom)
5476     {
5477       inter->release ();
5478       return;
5479     }
5480 
5481   aglat = plats->aggs;
5482   FOR_EACH_VEC_ELT (*inter, k, item)
5483     {
5484       bool found = false;
5485       if (!item->value)
5486           continue;
5487       while (aglat)
5488           {
5489             if (aglat->offset - offset > item->offset)
5490               break;
5491             if (aglat->offset - offset == item->offset)
5492               {
5493                 if (aglat->is_single_const ())
5494                     {
5495                       tree value = aglat->values->value;
5496 
5497                       if (values_equal_for_ipcp_p (item->value, value))
5498                         found = true;
5499                     }
5500                 break;
5501               }
5502             aglat = aglat->next;
5503           }
5504       if (!found)
5505           item->value = NULL_TREE;
5506     }
5507 }
5508 
5509 /* Copy aggregate replacement values of NODE (which is an IPA-CP clone) to the
5510    vector result while subtracting OFFSET from the individual value offsets.  */
5511 
5512 static vec<ipa_agg_value>
agg_replacements_to_vector(struct cgraph_node * node,int index,HOST_WIDE_INT offset)5513 agg_replacements_to_vector (struct cgraph_node *node, int index,
5514                                   HOST_WIDE_INT offset)
5515 {
5516   struct ipa_agg_replacement_value *av;
5517   vec<ipa_agg_value> res = vNULL;
5518 
5519   for (av = ipa_get_agg_replacements_for_node (node); av; av = av->next)
5520     if (av->index == index
5521           && (av->offset - offset) >= 0)
5522     {
5523       struct ipa_agg_value item;
5524       gcc_checking_assert (av->value);
5525       item.offset = av->offset - offset;
5526       item.value = av->value;
5527       res.safe_push (item);
5528     }
5529 
5530   return res;
5531 }
5532 
5533 /* Intersect all values in INTER with those that we have already scheduled to
5534    be replaced in parameter number INDEX of NODE, which is an IPA-CP clone
5535    (while subtracting OFFSET).  */
5536 
5537 static void
intersect_with_agg_replacements(struct cgraph_node * node,int index,vec<ipa_agg_value> * inter,HOST_WIDE_INT offset)5538 intersect_with_agg_replacements (struct cgraph_node *node, int index,
5539                                          vec<ipa_agg_value> *inter,
5540                                          HOST_WIDE_INT offset)
5541 {
5542   struct ipa_agg_replacement_value *srcvals;
5543   struct ipa_agg_value *item;
5544   int i;
5545 
5546   srcvals = ipa_get_agg_replacements_for_node (node);
5547   if (!srcvals)
5548     {
5549       inter->release ();
5550       return;
5551     }
5552 
5553   FOR_EACH_VEC_ELT (*inter, i, item)
5554     {
5555       struct ipa_agg_replacement_value *av;
5556       bool found = false;
5557       if (!item->value)
5558           continue;
5559       for (av = srcvals; av; av = av->next)
5560           {
5561             gcc_checking_assert (av->value);
5562             if (av->index == index
5563                 && av->offset - offset == item->offset)
5564               {
5565                 if (values_equal_for_ipcp_p (item->value, av->value))
5566                     found = true;
5567                 break;
5568               }
5569           }
5570       if (!found)
5571           item->value = NULL_TREE;
5572     }
5573 }
5574 
5575 /* Intersect values in INTER with aggregate values that come along edge CS to
5576    parameter number INDEX and return it.  If INTER does not actually exist yet,
5577    copy all incoming values to it.  If we determine we ended up with no values
5578    whatsoever, return a released vector.  */
5579 
5580 static vec<ipa_agg_value>
intersect_aggregates_with_edge(struct cgraph_edge * cs,int index,vec<ipa_agg_value> inter)5581 intersect_aggregates_with_edge (struct cgraph_edge *cs, int index,
5582                                         vec<ipa_agg_value> inter)
5583 {
5584   struct ipa_jump_func *jfunc;
5585   jfunc = ipa_get_ith_jump_func (ipa_edge_args_sum->get (cs), index);
5586   if (jfunc->type == IPA_JF_PASS_THROUGH
5587       && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5588     {
5589       ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5590       int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
5591 
5592       if (caller_info->ipcp_orig_node)
5593           {
5594             struct cgraph_node *orig_node = caller_info->ipcp_orig_node;
5595             class ipcp_param_lattices *orig_plats;
5596             ipa_node_params *orig_info = ipa_node_params_sum->get (orig_node);
5597             orig_plats = ipa_get_parm_lattices (orig_info, src_idx);
5598             if (agg_pass_through_permissible_p (orig_plats, jfunc))
5599               {
5600                 if (!inter.exists ())
5601                     inter = agg_replacements_to_vector (cs->caller, src_idx, 0);
5602                 else
5603                     intersect_with_agg_replacements (cs->caller, src_idx,
5604                                                              &inter, 0);
5605                 return inter;
5606               }
5607           }
5608       else
5609           {
5610             class ipcp_param_lattices *src_plats;
5611             src_plats = ipa_get_parm_lattices (caller_info, src_idx);
5612             if (agg_pass_through_permissible_p (src_plats, jfunc))
5613               {
5614                 /* Currently we do not produce clobber aggregate jump
5615                      functions, adjust when we do.  */
5616                 gcc_checking_assert (!jfunc->agg.items);
5617                 if (!inter.exists ())
5618                     inter = copy_plats_to_inter (src_plats, 0);
5619                 else
5620                     intersect_with_plats (src_plats, &inter, 0);
5621                 return inter;
5622               }
5623           }
5624     }
5625   else if (jfunc->type == IPA_JF_ANCESTOR
5626              && ipa_get_jf_ancestor_agg_preserved (jfunc))
5627     {
5628       ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5629       int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
5630       class ipcp_param_lattices *src_plats;
5631       HOST_WIDE_INT delta = ipa_get_jf_ancestor_offset (jfunc);
5632 
5633       if (caller_info->ipcp_orig_node)
5634           {
5635             if (!inter.exists ())
5636               inter = agg_replacements_to_vector (cs->caller, src_idx, delta);
5637             else
5638               intersect_with_agg_replacements (cs->caller, src_idx, &inter,
5639                                                        delta);
5640           }
5641       else
5642           {
5643             src_plats = ipa_get_parm_lattices (caller_info, src_idx);
5644             /* Currently we do not produce clobber aggregate jump
5645                functions, adjust when we do.  */
5646             gcc_checking_assert (!src_plats->aggs || !jfunc->agg.items);
5647             if (!inter.exists ())
5648               inter = copy_plats_to_inter (src_plats, delta);
5649             else
5650               intersect_with_plats (src_plats, &inter, delta);
5651           }
5652       return inter;
5653     }
5654 
5655   if (jfunc->agg.items)
5656     {
5657       ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5658       struct ipa_agg_value *item;
5659       int k;
5660 
5661       if (!inter.exists ())
5662           for (unsigned i = 0; i < jfunc->agg.items->length (); i++)
5663             {
5664               struct ipa_agg_jf_item *agg_item = &(*jfunc->agg.items)[i];
5665               tree value = ipa_agg_value_from_node (caller_info, cs->caller,
5666                                                               agg_item);
5667               if (value)
5668                 {
5669                     struct ipa_agg_value agg_value;
5670 
5671                     agg_value.value = value;
5672                     agg_value.offset = agg_item->offset;
5673                     inter.safe_push (agg_value);
5674                 }
5675             }
5676       else
5677           FOR_EACH_VEC_ELT (inter, k, item)
5678             {
5679               int l = 0;
5680               bool found = false;
5681 
5682               if (!item->value)
5683                 continue;
5684 
5685               while ((unsigned) l < jfunc->agg.items->length ())
5686                 {
5687                     struct ipa_agg_jf_item *ti;
5688                     ti = &(*jfunc->agg.items)[l];
5689                     if (ti->offset > item->offset)
5690                       break;
5691                     if (ti->offset == item->offset)
5692                       {
5693                         tree value;
5694 
5695                         /* Besides simple pass-through aggregate jump function,
5696                            arithmetic aggregate jump function could also bring
5697                            same aggregate value as parameter passed-in for
5698                            self-feeding recursive call.  For example,
5699 
5700                              fn (int *i)
5701                                {
5702                                  int j = *i & 1;
5703                                  fn (&j);
5704                                }
5705 
5706                            Given that *i is 0, recursive propagation via (*i & 1)
5707                            also gets 0.  */
5708                         if (self_recursive_agg_pass_through_p (cs, ti, index,
5709                                                                          false))
5710                           value = ipa_get_jf_arith_result (
5711                                                   ti->value.pass_through.operation,
5712                                                   item->value,
5713                                                   ti->value.pass_through.operand,
5714                                                   ti->type);
5715                         else
5716                           value = ipa_agg_value_from_node (caller_info,
5717                                                                    cs->caller, ti);
5718 
5719                         if (value && values_equal_for_ipcp_p (item->value, value))
5720                           found = true;
5721                         break;
5722                       }
5723                     l++;
5724                 }
5725               if (!found)
5726                 item->value = NULL;
5727             }
5728     }
5729   else
5730     {
5731       inter.release ();
5732       return vNULL;
5733     }
5734   return inter;
5735 }
5736 
5737 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5738    from all of them.  */
5739 
5740 static struct ipa_agg_replacement_value *
find_aggregate_values_for_callers_subset(struct cgraph_node * node,const vec<cgraph_edge * > & callers)5741 find_aggregate_values_for_callers_subset (struct cgraph_node *node,
5742                                                     const vec<cgraph_edge *> &callers)
5743 {
5744   ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5745   struct ipa_agg_replacement_value *res;
5746   struct ipa_agg_replacement_value **tail = &res;
5747   struct cgraph_edge *cs;
5748   int i, j, count = ipa_get_param_count (dest_info);
5749 
5750   FOR_EACH_VEC_ELT (callers, j, cs)
5751     {
5752       ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5753       if (!args)
5754           {
5755             count = 0;
5756             break;
5757           }
5758       int c = ipa_get_cs_argument_count (args);
5759       if (c < count)
5760           count = c;
5761     }
5762 
5763   for (i = 0; i < count; i++)
5764     {
5765       struct cgraph_edge *cs;
5766       vec<ipa_agg_value> inter = vNULL;
5767       struct ipa_agg_value *item;
5768       class ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, i);
5769       int j;
5770 
5771       /* Among other things, the following check should deal with all by_ref
5772            mismatches.  */
5773       if (plats->aggs_bottom)
5774           continue;
5775 
5776       FOR_EACH_VEC_ELT (callers, j, cs)
5777           {
5778             struct ipa_jump_func *jfunc
5779               = ipa_get_ith_jump_func (ipa_edge_args_sum->get (cs), i);
5780             if (self_recursive_pass_through_p (cs, jfunc, i)
5781                 && (!plats->aggs_by_ref
5782                       || ipa_get_jf_pass_through_agg_preserved (jfunc)))
5783               continue;
5784             inter = intersect_aggregates_with_edge (cs, i, inter);
5785 
5786             if (!inter.exists ())
5787               goto next_param;
5788           }
5789 
5790       FOR_EACH_VEC_ELT (inter, j, item)
5791           {
5792             struct ipa_agg_replacement_value *v;
5793 
5794             if (!item->value)
5795               continue;
5796 
5797             v = ggc_alloc<ipa_agg_replacement_value> ();
5798             v->index = i;
5799             v->offset = item->offset;
5800             v->value = item->value;
5801             v->by_ref = plats->aggs_by_ref;
5802             *tail = v;
5803             tail = &v->next;
5804           }
5805 
5806     next_param:
5807       if (inter.exists ())
5808           inter.release ();
5809     }
5810   *tail = NULL;
5811   return res;
5812 }
5813 
5814 /* Determine whether CS also brings all scalar values that the NODE is
5815    specialized for.  */
5816 
5817 static bool
cgraph_edge_brings_all_scalars_for_node(struct cgraph_edge * cs,struct cgraph_node * node)5818 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs,
5819                                                    struct cgraph_node *node)
5820 {
5821   ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5822   int count = ipa_get_param_count (dest_info);
5823   class ipa_node_params *caller_info;
5824   class ipa_edge_args *args;
5825   int i;
5826 
5827   caller_info = ipa_node_params_sum->get (cs->caller);
5828   args = ipa_edge_args_sum->get (cs);
5829   for (i = 0; i < count; i++)
5830     {
5831       struct ipa_jump_func *jump_func;
5832       tree val, t;
5833 
5834       val = dest_info->known_csts[i];
5835       if (!val)
5836           continue;
5837 
5838       if (i >= ipa_get_cs_argument_count (args))
5839           return false;
5840       jump_func = ipa_get_ith_jump_func (args, i);
5841       t = ipa_value_from_jfunc (caller_info, jump_func,
5842                                         ipa_get_type (dest_info, i));
5843       if (!t || !values_equal_for_ipcp_p (val, t))
5844           return false;
5845     }
5846   return true;
5847 }
5848 
5849 /* Determine whether CS also brings all aggregate values that NODE is
5850    specialized for.  */
5851 static bool
cgraph_edge_brings_all_agg_vals_for_node(struct cgraph_edge * cs,struct cgraph_node * node)5852 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs,
5853                                                     struct cgraph_node *node)
5854 {
5855   struct ipa_agg_replacement_value *aggval;
5856   int i, ec, count;
5857 
5858   aggval = ipa_get_agg_replacements_for_node (node);
5859   if (!aggval)
5860     return true;
5861 
5862   ipa_node_params *clone_node_info = ipa_node_params_sum->get (node);
5863   count = ipa_get_param_count (clone_node_info);
5864   ec = ipa_get_cs_argument_count (ipa_edge_args_sum->get (cs));
5865   if (ec < count)
5866     for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
5867       if (aggval->index >= ec)
5868           return false;
5869 
5870   ipa_node_params *orig_node_info
5871     = ipa_node_params_sum->get (clone_node_info->ipcp_orig_node);
5872 
5873   for (i = 0; i < count; i++)
5874     {
5875       class ipcp_param_lattices *plats;
5876       bool interesting = false;
5877       for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
5878           if (aggval->index == i)
5879             {
5880               interesting = true;
5881               break;
5882             }
5883       if (!interesting)
5884           continue;
5885 
5886       plats = ipa_get_parm_lattices (orig_node_info, aggval->index);
5887       if (plats->aggs_bottom)
5888           return false;
5889 
5890       vec<ipa_agg_value> values = intersect_aggregates_with_edge (cs, i, vNULL);
5891       if (!values.exists ())
5892           return false;
5893 
5894       for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
5895           if (aggval->index == i)
5896             {
5897               struct ipa_agg_value *item;
5898               int j;
5899               bool found = false;
5900               FOR_EACH_VEC_ELT (values, j, item)
5901                 if (item->value
5902                       && item->offset == av->offset
5903                       && values_equal_for_ipcp_p (item->value, av->value))
5904                     {
5905                       found = true;
5906                       break;
5907                     }
5908               if (!found)
5909                 {
5910                     values.release ();
5911                     return false;
5912                 }
5913             }
5914       values.release ();
5915     }
5916   return true;
5917 }
5918 
5919 /* Given an original NODE and a VAL for which we have already created a
5920    specialized clone, look whether there are incoming edges that still lead
5921    into the old node but now also bring the requested value and also conform to
5922    all other criteria such that they can be redirected the special node.
5923    This function can therefore redirect the final edge in a SCC.  */
5924 
5925 template <typename valtype>
5926 static void
perhaps_add_new_callers(cgraph_node * node,ipcp_value<valtype> * val)5927 perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val)
5928 {
5929   ipcp_value_source<valtype> *src;
5930   profile_count redirected_sum = profile_count::zero ();
5931 
5932   for (src = val->sources; src; src = src->next)
5933     {
5934       struct cgraph_edge *cs = src->cs;
5935       while (cs)
5936           {
5937             if (cgraph_edge_brings_value_p (cs, src, node, val)
5938                 && cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node)
5939                 && cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node))
5940               {
5941                 if (dump_file)
5942                     fprintf (dump_file, " - adding an extra caller %s of %s\n",
5943                                cs->caller->dump_name (),
5944                                val->spec_node->dump_name ());
5945 
5946                 cs->redirect_callee_duplicating_thunks (val->spec_node);
5947                 val->spec_node->expand_all_artificial_thunks ();
5948                 if (cs->count.ipa ().initialized_p ())
5949                   redirected_sum = redirected_sum + cs->count.ipa ();
5950               }
5951             cs = get_next_cgraph_edge_clone (cs);
5952           }
5953     }
5954 
5955   if (redirected_sum.nonzero_p ())
5956     update_specialized_profile (val->spec_node, node, redirected_sum);
5957 }
5958 
5959 /* Return true if KNOWN_CONTEXTS contain at least one useful context.  */
5960 
5961 static bool
known_contexts_useful_p(vec<ipa_polymorphic_call_context> known_contexts)5962 known_contexts_useful_p (vec<ipa_polymorphic_call_context> known_contexts)
5963 {
5964   ipa_polymorphic_call_context *ctx;
5965   int i;
5966 
5967   FOR_EACH_VEC_ELT (known_contexts, i, ctx)
5968     if (!ctx->useless_p ())
5969       return true;
5970   return false;
5971 }
5972 
5973 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL.  */
5974 
5975 static vec<ipa_polymorphic_call_context>
copy_useful_known_contexts(const vec<ipa_polymorphic_call_context> & known_contexts)5976 copy_useful_known_contexts (const vec<ipa_polymorphic_call_context> &known_contexts)
5977 {
5978   if (known_contexts_useful_p (known_contexts))
5979     return known_contexts.copy ();
5980   else
5981     return vNULL;
5982 }
5983 
5984 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
5985    according to VAL and INDEX.  If non-empty, replace KNOWN_CONTEXTS with its
5986    copy too.  */
5987 
5988 static void
copy_known_vectors_add_val(ipa_auto_call_arg_values * avals,vec<tree> * known_csts,vec<ipa_polymorphic_call_context> * known_contexts,ipcp_value<tree> * val,int index)5989 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
5990                                   vec<tree> *known_csts,
5991                                   vec<ipa_polymorphic_call_context> *known_contexts,
5992                                   ipcp_value<tree> *val, int index)
5993 {
5994   *known_csts = avals->m_known_vals.copy ();
5995   *known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
5996   (*known_csts)[index] = val->value;
5997 }
5998 
5999 /* Copy known scalar values from AVALS into KNOWN_CSTS.  Similarly, copy
6000    contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
6001    INDEX.  */
6002 
6003 static void
copy_known_vectors_add_val(ipa_auto_call_arg_values * avals,vec<tree> * known_csts,vec<ipa_polymorphic_call_context> * known_contexts,ipcp_value<ipa_polymorphic_call_context> * val,int index)6004 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
6005                                   vec<tree> *known_csts,
6006                                   vec<ipa_polymorphic_call_context> *known_contexts,
6007                                   ipcp_value<ipa_polymorphic_call_context> *val,
6008                                   int index)
6009 {
6010   *known_csts = avals->m_known_vals.copy ();
6011   *known_contexts = avals->m_known_contexts.copy ();
6012   (*known_contexts)[index] = val->value;
6013 }
6014 
6015 /* Return true if OFFSET indicates this was not an aggregate value or there is
6016    a replacement equivalent to VALUE, INDEX and OFFSET among those in the
6017    AGGVALS list.  */
6018 
6019 DEBUG_FUNCTION bool
ipcp_val_agg_replacement_ok_p(ipa_agg_replacement_value * aggvals,int index,HOST_WIDE_INT offset,tree value)6020 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value *aggvals,
6021                                      int index, HOST_WIDE_INT offset, tree value)
6022 {
6023   if (offset == -1)
6024     return true;
6025 
6026   while (aggvals)
6027     {
6028       if (aggvals->index == index
6029             && aggvals->offset == offset
6030             && values_equal_for_ipcp_p (aggvals->value, value))
6031           return true;
6032       aggvals = aggvals->next;
6033     }
6034   return false;
6035 }
6036 
6037 /* Return true if offset is minus one because source of a polymorphic context
6038    cannot be an aggregate value.  */
6039 
6040 DEBUG_FUNCTION bool
ipcp_val_agg_replacement_ok_p(ipa_agg_replacement_value *,int,HOST_WIDE_INT offset,ipa_polymorphic_call_context)6041 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value *,
6042                                      int , HOST_WIDE_INT offset,
6043                                      ipa_polymorphic_call_context)
6044 {
6045   return offset == -1;
6046 }
6047 
6048 /* Decide whether to create a special version of NODE for value VAL of
6049    parameter at the given INDEX.  If OFFSET is -1, the value is for the
6050    parameter itself, otherwise it is stored at the given OFFSET of the
6051    parameter.  AVALS describes the other already known values.  SELF_GEN_CLONES
6052    is a vector which contains clones created for self-recursive calls with an
6053    arithmetic pass-through jump function.  */
6054 
6055 template <typename valtype>
6056 static bool
decide_about_value(struct cgraph_node * node,int index,HOST_WIDE_INT offset,ipcp_value<valtype> * val,ipa_auto_call_arg_values * avals,vec<cgraph_node * > * self_gen_clones)6057 decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
6058                         ipcp_value<valtype> *val, ipa_auto_call_arg_values *avals,
6059                         vec<cgraph_node *> *self_gen_clones)
6060 {
6061   struct ipa_agg_replacement_value *aggvals;
6062   int caller_count;
6063   sreal freq_sum;
6064   profile_count count_sum, rec_count_sum;
6065   vec<cgraph_edge *> callers;
6066 
6067   if (val->spec_node)
6068     {
6069       perhaps_add_new_callers (node, val);
6070       return false;
6071     }
6072   else if (val->local_size_cost + overall_size > get_max_overall_size (node))
6073     {
6074       if (dump_file && (dump_flags & TDF_DETAILS))
6075           fprintf (dump_file, "   Ignoring candidate value because "
6076                      "maximum unit size would be reached with %li.\n",
6077                      val->local_size_cost + overall_size);
6078       return false;
6079     }
6080   else if (!get_info_about_necessary_edges (val, node, &freq_sum, &caller_count,
6081                                                       &rec_count_sum, &count_sum))
6082     return false;
6083 
6084   if (!dbg_cnt (ipa_cp_values))
6085     return false;
6086 
6087   if (val->self_recursion_generated_p ())
6088     {
6089       /* The edge counts in this case might not have been adjusted yet.
6090            Nevertleless, even if they were it would be only a guesswork which we
6091            can do now.  The recursive part of the counts can be derived from the
6092            count of the original node anyway.  */
6093       if (node->count.ipa ().nonzero_p ())
6094           {
6095             unsigned dem = self_gen_clones->length () + 1;
6096             rec_count_sum = node->count.ipa ().apply_scale (1, dem);
6097           }
6098       else
6099           rec_count_sum = profile_count::zero ();
6100     }
6101 
6102   /* get_info_about_necessary_edges only sums up ipa counts.  */
6103   count_sum += rec_count_sum;
6104 
6105   if (dump_file && (dump_flags & TDF_DETAILS))
6106     {
6107       fprintf (dump_file, " - considering value ");
6108       print_ipcp_constant_value (dump_file, val->value);
6109       fprintf (dump_file, " for ");
6110       ipa_dump_param (dump_file, ipa_node_params_sum->get (node), index);
6111       if (offset != -1)
6112           fprintf (dump_file, ", offset: " HOST_WIDE_INT_PRINT_DEC, offset);
6113       fprintf (dump_file, " (caller_count: %i)\n", caller_count);
6114     }
6115 
6116   if (!good_cloning_opportunity_p (node, val->local_time_benefit,
6117                                            freq_sum, count_sum,
6118                                            val->local_size_cost)
6119       && !good_cloning_opportunity_p (node, val->prop_time_benefit,
6120                                               freq_sum, count_sum, val->prop_size_cost))
6121     return false;
6122 
6123   if (dump_file)
6124     fprintf (dump_file, "  Creating a specialized node of %s.\n",
6125                node->dump_name ());
6126 
6127   vec<tree> known_csts;
6128   vec<ipa_polymorphic_call_context> known_contexts;
6129 
6130   callers = gather_edges_for_value (val, node, caller_count);
6131   if (offset == -1)
6132     copy_known_vectors_add_val (avals, &known_csts, &known_contexts, val, index);
6133   else
6134     {
6135       known_csts = avals->m_known_vals.copy ();
6136       known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
6137     }
6138   find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6139   find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6140   aggvals = find_aggregate_values_for_callers_subset (node, callers);
6141   gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals, index,
6142                                                                   offset, val->value));
6143   val->spec_node = create_specialized_node (node, known_csts, known_contexts,
6144                                                       aggvals, callers);
6145 
6146   if (val->self_recursion_generated_p ())
6147     self_gen_clones->safe_push (val->spec_node);
6148   else
6149     update_profiling_info (node, val->spec_node);
6150 
6151   callers.release ();
6152   overall_size += val->local_size_cost;
6153   if (dump_file && (dump_flags & TDF_DETAILS))
6154     fprintf (dump_file, "     overall size reached %li\n",
6155                overall_size);
6156 
6157   /* TODO: If for some lattice there is only one other known value
6158      left, make a special node for it too. */
6159 
6160   return true;
6161 }
6162 
6163 /* Decide whether and what specialized clones of NODE should be created.  */
6164 
6165 static bool
decide_whether_version_node(struct cgraph_node * node)6166 decide_whether_version_node (struct cgraph_node *node)
6167 {
6168   ipa_node_params *info = ipa_node_params_sum->get (node);
6169   int i, count = ipa_get_param_count (info);
6170   bool ret = false;
6171 
6172   if (count == 0)
6173     return false;
6174 
6175   if (dump_file && (dump_flags & TDF_DETAILS))
6176     fprintf (dump_file, "\nEvaluating opportunities for %s.\n",
6177                node->dump_name ());
6178 
6179   auto_vec <cgraph_node *, 9> self_gen_clones;
6180   ipa_auto_call_arg_values avals;
6181   gather_context_independent_values (info, &avals, false, NULL);
6182 
6183   for (i = 0; i < count;i++)
6184     {
6185       class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6186       ipcp_lattice<tree> *lat = &plats->itself;
6187       ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
6188 
6189       if (!lat->bottom
6190             && !avals.m_known_vals[i])
6191           {
6192             ipcp_value<tree> *val;
6193             for (val = lat->values; val; val = val->next)
6194               {
6195                 /* If some values generated for self-recursive calls with
6196                      arithmetic jump functions fall outside of the known
6197                      value_range for the parameter, we can skip them.  VR interface
6198                      supports this only for integers now.  */
6199                 if (TREE_CODE (val->value) == INTEGER_CST
6200                       && !plats->m_value_range.bottom_p ()
6201                       && !plats->m_value_range.m_vr.contains_p (val->value))
6202                     {
6203                       /* This can happen also if a constant present in the source
6204                          code falls outside of the range of parameter's type, so we
6205                          cannot assert.  */
6206                       if (dump_file && (dump_flags & TDF_DETAILS))
6207                         {
6208                           fprintf (dump_file, " - skipping%s value ",
6209                                      val->self_recursion_generated_p ()
6210                                      ? " self_recursion_generated" : "");
6211                           print_ipcp_constant_value (dump_file, val->value);
6212                           fprintf (dump_file, " because it is outside known "
6213                                      "value range.\n");
6214                         }
6215                       continue;
6216                     }
6217                 ret |= decide_about_value (node, i, -1, val, &avals,
6218                                                    &self_gen_clones);
6219               }
6220           }
6221 
6222       if (!plats->aggs_bottom)
6223           {
6224             struct ipcp_agg_lattice *aglat;
6225             ipcp_value<tree> *val;
6226             for (aglat = plats->aggs; aglat; aglat = aglat->next)
6227               if (!aglat->bottom && aglat->values
6228                     /* If the following is false, the one value has been considered
6229                        for cloning for all contexts.  */
6230                     && (plats->aggs_contain_variable
6231                         || !aglat->is_single_const ()))
6232                 for (val = aglat->values; val; val = val->next)
6233                     ret |= decide_about_value (node, i, aglat->offset, val, &avals,
6234                                                      &self_gen_clones);
6235           }
6236 
6237       if (!ctxlat->bottom
6238             && avals.m_known_contexts[i].useless_p ())
6239           {
6240             ipcp_value<ipa_polymorphic_call_context> *val;
6241             for (val = ctxlat->values; val; val = val->next)
6242               ret |= decide_about_value (node, i, -1, val, &avals,
6243                                                &self_gen_clones);
6244           }
6245     }
6246 
6247   if (!self_gen_clones.is_empty ())
6248     {
6249       self_gen_clones.safe_push (node);
6250       update_counts_for_self_gen_clones (node, self_gen_clones);
6251     }
6252 
6253   if (info->do_clone_for_all_contexts)
6254     {
6255       if (!dbg_cnt (ipa_cp_values))
6256           {
6257             info->do_clone_for_all_contexts = false;
6258             return ret;
6259           }
6260 
6261       struct cgraph_node *clone;
6262       auto_vec<cgraph_edge *> callers = node->collect_callers ();
6263 
6264       for (int i = callers.length () - 1; i >= 0; i--)
6265           {
6266             cgraph_edge *cs = callers[i];
6267             ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6268 
6269             if (caller_info && caller_info->node_dead)
6270               callers.unordered_remove (i);
6271           }
6272 
6273       if (!adjust_callers_for_value_intersection (callers, node))
6274           {
6275             /* If node is not called by anyone, or all its caller edges are
6276                self-recursive, the node is not really in use, no need to do
6277                cloning.  */
6278             info->do_clone_for_all_contexts = false;
6279             return ret;
6280           }
6281 
6282       if (dump_file)
6283           fprintf (dump_file, " - Creating a specialized node of %s "
6284                      "for all known contexts.\n", node->dump_name ());
6285 
6286       vec<tree> known_csts = avals.m_known_vals.copy ();
6287       vec<ipa_polymorphic_call_context> known_contexts
6288           = copy_useful_known_contexts (avals.m_known_contexts);
6289       find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6290       find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6291       ipa_agg_replacement_value *aggvals
6292           = find_aggregate_values_for_callers_subset (node, callers);
6293 
6294       if (!known_contexts_useful_p (known_contexts))
6295           {
6296             known_contexts.release ();
6297             known_contexts = vNULL;
6298           }
6299       clone = create_specialized_node (node, known_csts, known_contexts,
6300                                                aggvals, callers);
6301       info->do_clone_for_all_contexts = false;
6302       ipa_node_params_sum->get (clone)->is_all_contexts_clone = true;
6303       ret = true;
6304     }
6305 
6306   return ret;
6307 }
6308 
6309 /* Transitively mark all callees of NODE within the same SCC as not dead.  */
6310 
6311 static void
spread_undeadness(struct cgraph_node * node)6312 spread_undeadness (struct cgraph_node *node)
6313 {
6314   struct cgraph_edge *cs;
6315 
6316   for (cs = node->callees; cs; cs = cs->next_callee)
6317     if (ipa_edge_within_scc (cs))
6318       {
6319           struct cgraph_node *callee;
6320           class ipa_node_params *info;
6321 
6322           callee = cs->callee->function_symbol (NULL);
6323           info = ipa_node_params_sum->get (callee);
6324 
6325           if (info && info->node_dead)
6326             {
6327               info->node_dead = 0;
6328               spread_undeadness (callee);
6329             }
6330       }
6331 }
6332 
6333 /* Return true if NODE has a caller from outside of its SCC that is not
6334    dead.  Worker callback for cgraph_for_node_and_aliases.  */
6335 
6336 static bool
has_undead_caller_from_outside_scc_p(struct cgraph_node * node,void * data ATTRIBUTE_UNUSED)6337 has_undead_caller_from_outside_scc_p (struct cgraph_node *node,
6338                                               void *data ATTRIBUTE_UNUSED)
6339 {
6340   struct cgraph_edge *cs;
6341 
6342   for (cs = node->callers; cs; cs = cs->next_caller)
6343     if (cs->caller->thunk
6344           && cs->caller->call_for_symbol_thunks_and_aliases
6345             (has_undead_caller_from_outside_scc_p, NULL, true))
6346       return true;
6347     else if (!ipa_edge_within_scc (cs))
6348       {
6349           ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6350           if (!caller_info /* Unoptimized caller are like dead ones.  */
6351               || !caller_info->node_dead)
6352             return true;
6353       }
6354   return false;
6355 }
6356 
6357 
6358 /* Identify nodes within the same SCC as NODE which are no longer needed
6359    because of new clones and will be removed as unreachable.  */
6360 
6361 static void
identify_dead_nodes(struct cgraph_node * node)6362 identify_dead_nodes (struct cgraph_node *node)
6363 {
6364   struct cgraph_node *v;
6365   for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6366     if (v->local)
6367       {
6368           ipa_node_params *info = ipa_node_params_sum->get (v);
6369           if (info
6370               && !v->call_for_symbol_thunks_and_aliases
6371                 (has_undead_caller_from_outside_scc_p, NULL, true))
6372             info->node_dead = 1;
6373       }
6374 
6375   for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6376     {
6377       ipa_node_params *info = ipa_node_params_sum->get (v);
6378       if (info && !info->node_dead)
6379           spread_undeadness (v);
6380     }
6381 
6382   if (dump_file && (dump_flags & TDF_DETAILS))
6383     {
6384       for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6385           if (ipa_node_params_sum->get (v)
6386               && ipa_node_params_sum->get (v)->node_dead)
6387             fprintf (dump_file, "  Marking node as dead: %s.\n",
6388                        v->dump_name ());
6389     }
6390 }
6391 
6392 /* The decision stage.  Iterate over the topological order of call graph nodes
6393    TOPO and make specialized clones if deemed beneficial.  */
6394 
6395 static void
ipcp_decision_stage(class ipa_topo_info * topo)6396 ipcp_decision_stage (class ipa_topo_info *topo)
6397 {
6398   int i;
6399 
6400   if (dump_file)
6401     fprintf (dump_file, "\nIPA decision stage:\n\n");
6402 
6403   for (i = topo->nnodes - 1; i >= 0; i--)
6404     {
6405       struct cgraph_node *node = topo->order[i];
6406       bool change = false, iterate = true;
6407 
6408       while (iterate)
6409           {
6410             struct cgraph_node *v;
6411             iterate = false;
6412             for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6413               if (v->has_gimple_body_p ()
6414                     && ipcp_versionable_function_p (v))
6415                 iterate |= decide_whether_version_node (v);
6416 
6417             change |= iterate;
6418           }
6419       if (change)
6420           identify_dead_nodes (node);
6421     }
6422 }
6423 
6424 /* Look up all the bits information that we have discovered and copy it over
6425    to the transformation summary.  */
6426 
6427 static void
ipcp_store_bits_results(void)6428 ipcp_store_bits_results (void)
6429 {
6430   cgraph_node *node;
6431 
6432   FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6433     {
6434       ipa_node_params *info = ipa_node_params_sum->get (node);
6435       bool dumped_sth = false;
6436       bool found_useful_result = false;
6437 
6438       if (!opt_for_fn (node->decl, flag_ipa_bit_cp) || !info)
6439           {
6440             if (dump_file)
6441               fprintf (dump_file, "Not considering %s for ipa bitwise propagation "
6442                                         "; -fipa-bit-cp: disabled.\n",
6443                                         node->dump_name ());
6444             continue;
6445           }
6446 
6447       if (info->ipcp_orig_node)
6448           info = ipa_node_params_sum->get (info->ipcp_orig_node);
6449       if (!info->lattices)
6450           /* Newly expanded artificial thunks do not have lattices.  */
6451           continue;
6452 
6453       unsigned count = ipa_get_param_count (info);
6454       for (unsigned i = 0; i < count; i++)
6455           {
6456             ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6457             if (plats->bits_lattice.constant_p ())
6458               {
6459                 found_useful_result = true;
6460                 break;
6461               }
6462           }
6463 
6464       if (!found_useful_result)
6465           continue;
6466 
6467       ipcp_transformation_initialize ();
6468       ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6469       vec_safe_reserve_exact (ts->bits, count);
6470 
6471       for (unsigned i = 0; i < count; i++)
6472           {
6473             ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6474             ipa_bits *jfbits;
6475 
6476             if (plats->bits_lattice.constant_p ())
6477               {
6478                 jfbits
6479                     = ipa_get_ipa_bits_for_value (plats->bits_lattice.get_value (),
6480                                                         plats->bits_lattice.get_mask ());
6481                 if (!dbg_cnt (ipa_cp_bits))
6482                     jfbits = NULL;
6483               }
6484             else
6485               jfbits = NULL;
6486 
6487             ts->bits->quick_push (jfbits);
6488             if (!dump_file || !jfbits)
6489               continue;
6490             if (!dumped_sth)
6491               {
6492                 fprintf (dump_file, "Propagated bits info for function %s:\n",
6493                            node->dump_name ());
6494                 dumped_sth = true;
6495               }
6496             fprintf (dump_file, " param %i: value = ", i);
6497             print_hex (jfbits->value, dump_file);
6498             fprintf (dump_file, ", mask = ");
6499             print_hex (jfbits->mask, dump_file);
6500             fprintf (dump_file, "\n");
6501           }
6502     }
6503 }
6504 
6505 /* Look up all VR information that we have discovered and copy it over
6506    to the transformation summary.  */
6507 
6508 static void
ipcp_store_vr_results(void)6509 ipcp_store_vr_results (void)
6510 {
6511   cgraph_node *node;
6512 
6513   FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6514     {
6515       ipa_node_params *info = ipa_node_params_sum->get (node);
6516       bool found_useful_result = false;
6517 
6518       if (!info || !opt_for_fn (node->decl, flag_ipa_vrp))
6519           {
6520             if (dump_file)
6521               fprintf (dump_file, "Not considering %s for VR discovery "
6522                          "and propagate; -fipa-ipa-vrp: disabled.\n",
6523                          node->dump_name ());
6524             continue;
6525           }
6526 
6527       if (info->ipcp_orig_node)
6528           info = ipa_node_params_sum->get (info->ipcp_orig_node);
6529       if (!info->lattices)
6530           /* Newly expanded artificial thunks do not have lattices.  */
6531           continue;
6532 
6533       unsigned count = ipa_get_param_count (info);
6534       for (unsigned i = 0; i < count; i++)
6535           {
6536             ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6537             if (!plats->m_value_range.bottom_p ()
6538                 && !plats->m_value_range.top_p ())
6539               {
6540                 found_useful_result = true;
6541                 break;
6542               }
6543           }
6544       if (!found_useful_result)
6545           continue;
6546 
6547       ipcp_transformation_initialize ();
6548       ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6549       vec_safe_reserve_exact (ts->m_vr, count);
6550 
6551       for (unsigned i = 0; i < count; i++)
6552           {
6553             ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6554             ipa_vr vr;
6555 
6556             if (!plats->m_value_range.bottom_p ()
6557                 && !plats->m_value_range.top_p ()
6558                 && dbg_cnt (ipa_cp_vr))
6559               {
6560                 vr.known = true;
6561                 vr.type = plats->m_value_range.m_vr.kind ();
6562                 vr.min = wi::to_wide (plats->m_value_range.m_vr.min ());
6563                 vr.max = wi::to_wide (plats->m_value_range.m_vr.max ());
6564               }
6565             else
6566               {
6567                 vr.known = false;
6568                 vr.type = VR_VARYING;
6569                 vr.min = vr.max = wi::zero (INT_TYPE_SIZE);
6570               }
6571             ts->m_vr->quick_push (vr);
6572           }
6573     }
6574 }
6575 
6576 /* The IPCP driver.  */
6577 
6578 static unsigned int
ipcp_driver(void)6579 ipcp_driver (void)
6580 {
6581   class ipa_topo_info topo;
6582 
6583   if (edge_clone_summaries == NULL)
6584     edge_clone_summaries = new edge_clone_summary_t (symtab);
6585 
6586   ipa_check_create_node_params ();
6587   ipa_check_create_edge_args ();
6588   clone_num_suffixes = new hash_map<const char *, unsigned>;
6589 
6590   if (dump_file)
6591     {
6592       fprintf (dump_file, "\nIPA structures before propagation:\n");
6593       if (dump_flags & TDF_DETAILS)
6594           ipa_print_all_params (dump_file);
6595       ipa_print_all_jump_functions (dump_file);
6596     }
6597 
6598   /* Topological sort.  */
6599   build_toporder_info (&topo);
6600   /* Do the interprocedural propagation.  */
6601   ipcp_propagate_stage (&topo);
6602   /* Decide what constant propagation and cloning should be performed.  */
6603   ipcp_decision_stage (&topo);
6604   /* Store results of bits propagation.  */
6605   ipcp_store_bits_results ();
6606   /* Store results of value range propagation.  */
6607   ipcp_store_vr_results ();
6608 
6609   /* Free all IPCP structures.  */
6610   delete clone_num_suffixes;
6611   free_toporder_info (&topo);
6612   delete edge_clone_summaries;
6613   edge_clone_summaries = NULL;
6614   ipa_free_all_structures_after_ipa_cp ();
6615   if (dump_file)
6616     fprintf (dump_file, "\nIPA constant propagation end\n");
6617   return 0;
6618 }
6619 
6620 /* Initialization and computation of IPCP data structures.  This is the initial
6621    intraprocedural analysis of functions, which gathers information to be
6622    propagated later on.  */
6623 
6624 static void
ipcp_generate_summary(void)6625 ipcp_generate_summary (void)
6626 {
6627   struct cgraph_node *node;
6628 
6629   if (dump_file)
6630     fprintf (dump_file, "\nIPA constant propagation start:\n");
6631   ipa_register_cgraph_hooks ();
6632 
6633   FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6634     ipa_analyze_node (node);
6635 }
6636 
6637 namespace {
6638 
6639 const pass_data pass_data_ipa_cp =
6640 {
6641   IPA_PASS, /* type */
6642   "cp", /* name */
6643   OPTGROUP_NONE, /* optinfo_flags */
6644   TV_IPA_CONSTANT_PROP, /* tv_id */
6645   0, /* properties_required */
6646   0, /* properties_provided */
6647   0, /* properties_destroyed */
6648   0, /* todo_flags_start */
6649   ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
6650 };
6651 
6652 class pass_ipa_cp : public ipa_opt_pass_d
6653 {
6654 public:
pass_ipa_cp(gcc::context * ctxt)6655   pass_ipa_cp (gcc::context *ctxt)
6656     : ipa_opt_pass_d (pass_data_ipa_cp, ctxt,
6657                           ipcp_generate_summary, /* generate_summary */
6658                           NULL, /* write_summary */
6659                           NULL, /* read_summary */
6660                           ipcp_write_transformation_summaries, /*
6661                           write_optimization_summary */
6662                           ipcp_read_transformation_summaries, /*
6663                           read_optimization_summary */
6664                           NULL, /* stmt_fixup */
6665                           0, /* function_transform_todo_flags_start */
6666                           ipcp_transform_function, /* function_transform */
6667                           NULL) /* variable_transform */
6668   {}
6669 
6670   /* opt_pass methods: */
gate(function *)6671   virtual bool gate (function *)
6672     {
6673       /* FIXME: We should remove the optimize check after we ensure we never run
6674            IPA passes when not optimizing.  */
6675       return (flag_ipa_cp && optimize) || in_lto_p;
6676     }
6677 
execute(function *)6678   virtual unsigned int execute (function *) { return ipcp_driver (); }
6679 
6680 }; // class pass_ipa_cp
6681 
6682 } // anon namespace
6683 
6684 ipa_opt_pass_d *
make_pass_ipa_cp(gcc::context * ctxt)6685 make_pass_ipa_cp (gcc::context *ctxt)
6686 {
6687   return new pass_ipa_cp (ctxt);
6688 }
6689 
6690 /* Reset all state within ipa-cp.cc so that we can rerun the compiler
6691    within the same process.  For use by toplev::finalize.  */
6692 
6693 void
ipa_cp_cc_finalize(void)6694 ipa_cp_cc_finalize (void)
6695 {
6696   base_count = profile_count::uninitialized ();
6697   overall_size = 0;
6698   orig_overall_size = 0;
6699   ipcp_free_transformation_sum ();
6700 }
6701