1/* Interprocedural constant propagation
2 Copyright (C) 2005-2024 Free Software Foundation, Inc.
3
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
5 <mjambor@suse.cz>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23/* Interprocedural constant propagation (IPA-CP).
24
25 The goal of this transformation is to
26
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
30
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
34 is deemed good.
35
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
38
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
46 calls are redirected.
47
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
52
53
54 First stage - intraprocedural analysis
55 =======================================
56
57 This phase computes jump_function and modification flags.
58
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
61 values:
62
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
67
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
70
71 ipcp_generate_summary() is the main function of the first stage.
72
73 Second stage - interprocedural analysis
74 ========================================
75
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
79
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
86
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
92
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
94 third stage.
95
96 Third phase - materialization of clones, call statement updates.
97 ============================================
98
99 This stage is currently performed by call graph code (mainly in cgraphunit.cc
100 and tree-inline.cc) according to instructions inserted to the call graph by
101 the second stage. */
102
103#define INCLUDE_ALGORITHM
104#include "config.h"
105#include "system.h"
106#include "coretypes.h"
107#include "backend.h"
108#include "tree.h"
109#include "gimple-expr.h"
110#include "gimple.h"
111#include "predict.h"
112#include "sreal.h"
113#include "alloc-pool.h"
114#include "tree-pass.h"
115#include "cgraph.h"
116#include "diagnostic.h"
117#include "fold-const.h"
118#include "gimple-iterator.h"
119#include "gimple-fold.h"
120#include "symbol-summary.h"
121#include "tree-vrp.h"
122#include "ipa-cp.h"
123#include "ipa-prop.h"
124#include "tree-pretty-print.h"
125#include "tree-inline.h"
126#include "ipa-fnsummary.h"
127#include "ipa-utils.h"
128#include "tree-ssa-ccp.h"
129#include "stringpool.h"
130#include "attribs.h"
131#include "dbgcnt.h"
132#include "symtab-clones.h"
133#include "gimple-range.h"
134
135
136/* Allocation pools for values and their sources in ipa-cp. */
137
138object_allocator<ipcp_value<tree> > ipcp_cst_values_pool
139 ("IPA-CP constant values");
140
141object_allocator<ipcp_value<ipa_polymorphic_call_context> >
142 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
143
144object_allocator<ipcp_value_source<tree> > ipcp_sources_pool
145 ("IPA-CP value sources");
146
147object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool
148 ("IPA_CP aggregate lattices");
149
150/* Base count to use in heuristics when using profile feedback. */
151
152static profile_count base_count;
153
154/* Original overall size of the program. */
155
156static long overall_size, orig_overall_size;
157
158/* Node name to unique clone suffix number map. */
159static hash_map<const char *, unsigned> *clone_num_suffixes;
160
161/* Return the param lattices structure corresponding to the Ith formal
162 parameter of the function described by INFO. */
163static inline class ipcp_param_lattices *
164ipa_get_parm_lattices (class ipa_node_params *info, int i)
165{
166 gcc_assert (i >= 0 && i < ipa_get_param_count (info));
167 gcc_checking_assert (!info->ipcp_orig_node);
168 return &(info->lattices[i]);
169}
170
171/* Return the lattice corresponding to the scalar value of the Ith formal
172 parameter of the function described by INFO. */
173static inline ipcp_lattice<tree> *
174ipa_get_scalar_lat (class ipa_node_params *info, int i)
175{
176 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
177 return &plats->itself;
178}
179
180/* Return the lattice corresponding to the scalar value of the Ith formal
181 parameter of the function described by INFO. */
182static inline ipcp_lattice<ipa_polymorphic_call_context> *
183ipa_get_poly_ctx_lat (class ipa_node_params *info, int i)
184{
185 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
186 return &plats->ctxlat;
187}
188
189/* Return whether LAT is a lattice with a single constant and without an
190 undefined value. */
191
192template <typename valtype>
193inline bool
194ipcp_lattice<valtype>::is_single_const ()
195{
196 if (bottom || contains_variable || values_count != 1)
197 return false;
198 else
199 return true;
200}
201
202/* Return true iff X and Y should be considered equal values by IPA-CP. */
203
204bool
205values_equal_for_ipcp_p (tree x, tree y)
206{
207 gcc_checking_assert (x != NULL_TREE && y != NULL_TREE);
208
209 if (x == y)
210 return true;
211
212 if (TREE_CODE (x) == ADDR_EXPR
213 && TREE_CODE (y) == ADDR_EXPR
214 && (TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL
215 || (TREE_CODE (TREE_OPERAND (x, 0)) == VAR_DECL
216 && DECL_IN_CONSTANT_POOL (TREE_OPERAND (x, 0))))
217 && (TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL
218 || (TREE_CODE (TREE_OPERAND (y, 0)) == VAR_DECL
219 && DECL_IN_CONSTANT_POOL (TREE_OPERAND (y, 0)))))
220 return TREE_OPERAND (x, 0) == TREE_OPERAND (y, 0)
221 || operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)),
222 DECL_INITIAL (TREE_OPERAND (y, 0)), flags: 0);
223 else
224 return operand_equal_p (x, y, flags: 0);
225}
226
227/* Print V which is extracted from a value in a lattice to F. */
228
229static void
230print_ipcp_constant_value (FILE * f, ipa_polymorphic_call_context v)
231{
232 v.dump(f, newline: false);
233}
234
235/* Print a lattice LAT to F. */
236
237template <typename valtype>
238void
239ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits)
240{
241 ipcp_value<valtype> *val;
242 bool prev = false;
243
244 if (bottom)
245 {
246 fprintf (stream: f, format: "BOTTOM\n");
247 return;
248 }
249
250 if (!values_count && !contains_variable)
251 {
252 fprintf (stream: f, format: "TOP\n");
253 return;
254 }
255
256 if (contains_variable)
257 {
258 fprintf (stream: f, format: "VARIABLE");
259 prev = true;
260 if (dump_benefits)
261 fprintf (stream: f, format: "\n");
262 }
263
264 for (val = values; val; val = val->next)
265 {
266 if (dump_benefits && prev)
267 fprintf (stream: f, format: " ");
268 else if (!dump_benefits && prev)
269 fprintf (stream: f, format: ", ");
270 else
271 prev = true;
272
273 print_ipcp_constant_value (f, val->value);
274
275 if (dump_sources)
276 {
277 ipcp_value_source<valtype> *s;
278
279 if (val->self_recursion_generated_p ())
280 fprintf (f, " [self_gen(%i), from:",
281 val->self_recursion_generated_level);
282 else
283 fprintf (f, " [scc: %i, from:", val->scc_no);
284 for (s = val->sources; s; s = s->next)
285 fprintf (f, " %i(%f)", s->cs->caller->order,
286 s->cs->sreal_frequency ().to_double ());
287 fprintf (stream: f, format: "]");
288 }
289
290 if (dump_benefits)
291 fprintf (f, " [loc_time: %g, loc_size: %i, "
292 "prop_time: %g, prop_size: %i]\n",
293 val->local_time_benefit.to_double (), val->local_size_cost,
294 val->prop_time_benefit.to_double (), val->prop_size_cost);
295 }
296 if (!dump_benefits)
297 fprintf (stream: f, format: "\n");
298}
299
300void
301ipcp_bits_lattice::print (FILE *f)
302{
303 if (top_p ())
304 fprintf (stream: f, format: " Bits unknown (TOP)\n");
305 else if (bottom_p ())
306 fprintf (stream: f, format: " Bits unusable (BOTTOM)\n");
307 else
308 {
309 fprintf (stream: f, format: " Bits: value = "); print_hex (wi: get_value (), file: f);
310 fprintf (stream: f, format: ", mask = "); print_hex (wi: get_mask (), file: f);
311 fprintf (stream: f, format: "\n");
312 }
313}
314
315/* Print value range lattice to F. */
316
317void
318ipcp_vr_lattice::print (FILE * f)
319{
320 m_vr.dump (f);
321}
322
323/* Print all ipcp_lattices of all functions to F. */
324
325static void
326print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
327{
328 struct cgraph_node *node;
329 int i, count;
330
331 fprintf (stream: f, format: "\nLattices:\n");
332 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
333 {
334 class ipa_node_params *info;
335
336 info = ipa_node_params_sum->get (node);
337 /* Skip unoptimized functions and constprop clones since we don't make
338 lattices for them. */
339 if (!info || info->ipcp_orig_node)
340 continue;
341 fprintf (stream: f, format: " Node: %s:\n", node->dump_name ());
342 count = ipa_get_param_count (info);
343 for (i = 0; i < count; i++)
344 {
345 struct ipcp_agg_lattice *aglat;
346 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
347 fprintf (stream: f, format: " param [%d]: ", i);
348 plats->itself.print (f, dump_sources, dump_benefits);
349 fprintf (stream: f, format: " ctxs: ");
350 plats->ctxlat.print (f, dump_sources, dump_benefits);
351 plats->bits_lattice.print (f);
352 fprintf (stream: f, format: " ");
353 plats->m_value_range.print (f);
354 fprintf (stream: f, format: "\n");
355 if (plats->virt_call)
356 fprintf (stream: f, format: " virt_call flag set\n");
357
358 if (plats->aggs_bottom)
359 {
360 fprintf (stream: f, format: " AGGS BOTTOM\n");
361 continue;
362 }
363 if (plats->aggs_contain_variable)
364 fprintf (stream: f, format: " AGGS VARIABLE\n");
365 for (aglat = plats->aggs; aglat; aglat = aglat->next)
366 {
367 fprintf (stream: f, format: " %soffset " HOST_WIDE_INT_PRINT_DEC ": ",
368 plats->aggs_by_ref ? "ref " : "", aglat->offset);
369 aglat->print (f, dump_sources, dump_benefits);
370 }
371 }
372 }
373}
374
375/* Determine whether it is at all technically possible to create clones of NODE
376 and store this information in the ipa_node_params structure associated
377 with NODE. */
378
379static void
380determine_versionability (struct cgraph_node *node,
381 class ipa_node_params *info)
382{
383 const char *reason = NULL;
384
385 /* There are a number of generic reasons functions cannot be versioned. We
386 also cannot remove parameters if there are type attributes such as fnspec
387 present. */
388 if (node->alias || node->thunk)
389 reason = "alias or thunk";
390 else if (!node->versionable)
391 reason = "not a tree_versionable_function";
392 else if (node->get_availability () <= AVAIL_INTERPOSABLE)
393 reason = "insufficient body availability";
394 else if (!opt_for_fn (node->decl, optimize)
395 || !opt_for_fn (node->decl, flag_ipa_cp))
396 reason = "non-optimized function";
397 else if (lookup_attribute (attr_name: "omp declare simd", DECL_ATTRIBUTES (node->decl)))
398 {
399 /* Ideally we should clone the SIMD clones themselves and create
400 vector copies of them, so IPA-cp and SIMD clones can happily
401 coexist, but that may not be worth the effort. */
402 reason = "function has SIMD clones";
403 }
404 else if (lookup_attribute (attr_name: "target_clones", DECL_ATTRIBUTES (node->decl)))
405 {
406 /* Ideally we should clone the target clones themselves and create
407 copies of them, so IPA-cp and target clones can happily
408 coexist, but that may not be worth the effort. */
409 reason = "function target_clones attribute";
410 }
411 /* Don't clone decls local to a comdat group; it breaks and for C++
412 decloned constructors, inlining is always better anyway. */
413 else if (node->comdat_local_p ())
414 reason = "comdat-local function";
415 else if (node->calls_comdat_local)
416 {
417 /* TODO: call is versionable if we make sure that all
418 callers are inside of a comdat group. */
419 reason = "calls comdat-local function";
420 }
421
422 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
423 work only when inlined. Cloning them may still lead to better code
424 because ipa-cp will not give up on cloning further. If the function is
425 external this however leads to wrong code because we may end up producing
426 offline copy of the function. */
427 if (DECL_EXTERNAL (node->decl))
428 for (cgraph_edge *edge = node->callees; !reason && edge;
429 edge = edge->next_callee)
430 if (fndecl_built_in_p (node: edge->callee->decl, klass: BUILT_IN_NORMAL))
431 {
432 if (DECL_FUNCTION_CODE (decl: edge->callee->decl) == BUILT_IN_VA_ARG_PACK)
433 reason = "external function which calls va_arg_pack";
434 if (DECL_FUNCTION_CODE (decl: edge->callee->decl)
435 == BUILT_IN_VA_ARG_PACK_LEN)
436 reason = "external function which calls va_arg_pack_len";
437 }
438
439 if (reason && dump_file && !node->alias && !node->thunk)
440 fprintf (stream: dump_file, format: "Function %s is not versionable, reason: %s.\n",
441 node->dump_name (), reason);
442
443 info->versionable = (reason == NULL);
444}
445
446/* Return true if it is at all technically possible to create clones of a
447 NODE. */
448
449static bool
450ipcp_versionable_function_p (struct cgraph_node *node)
451{
452 ipa_node_params *info = ipa_node_params_sum->get (node);
453 return info && info->versionable;
454}
455
456/* Structure holding accumulated information about callers of a node. */
457
458struct caller_statistics
459{
460 /* If requested (see below), self-recursive call counts are summed into this
461 field. */
462 profile_count rec_count_sum;
463 /* The sum of all ipa counts of all the other (non-recursive) calls. */
464 profile_count count_sum;
465 /* Sum of all frequencies for all calls. */
466 sreal freq_sum;
467 /* Number of calls and hot calls respectively. */
468 int n_calls, n_hot_calls;
469 /* If itself is set up, also count the number of non-self-recursive
470 calls. */
471 int n_nonrec_calls;
472 /* If non-NULL, this is the node itself and calls from it should have their
473 counts included in rec_count_sum and not count_sum. */
474 cgraph_node *itself;
475};
476
477/* Initialize fields of STAT to zeroes and optionally set it up so that edges
478 from IGNORED_CALLER are not counted. */
479
480static inline void
481init_caller_stats (caller_statistics *stats, cgraph_node *itself = NULL)
482{
483 stats->rec_count_sum = profile_count::zero ();
484 stats->count_sum = profile_count::zero ();
485 stats->n_calls = 0;
486 stats->n_hot_calls = 0;
487 stats->n_nonrec_calls = 0;
488 stats->freq_sum = 0;
489 stats->itself = itself;
490}
491
492/* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
493 non-thunk incoming edges to NODE. */
494
495static bool
496gather_caller_stats (struct cgraph_node *node, void *data)
497{
498 struct caller_statistics *stats = (struct caller_statistics *) data;
499 struct cgraph_edge *cs;
500
501 for (cs = node->callers; cs; cs = cs->next_caller)
502 if (!cs->caller->thunk)
503 {
504 ipa_node_params *info = ipa_node_params_sum->get (node: cs->caller);
505 if (info && info->node_dead)
506 continue;
507
508 if (cs->count.ipa ().initialized_p ())
509 {
510 if (stats->itself && stats->itself == cs->caller)
511 stats->rec_count_sum += cs->count.ipa ();
512 else
513 stats->count_sum += cs->count.ipa ();
514 }
515 stats->freq_sum += cs->sreal_frequency ();
516 stats->n_calls++;
517 if (stats->itself && stats->itself != cs->caller)
518 stats->n_nonrec_calls++;
519
520 if (cs->maybe_hot_p ())
521 stats->n_hot_calls ++;
522 }
523 return false;
524
525}
526
527/* Return true if this NODE is viable candidate for cloning. */
528
529static bool
530ipcp_cloning_candidate_p (struct cgraph_node *node)
531{
532 struct caller_statistics stats;
533
534 gcc_checking_assert (node->has_gimple_body_p ());
535
536 if (!opt_for_fn (node->decl, flag_ipa_cp_clone))
537 {
538 if (dump_file)
539 fprintf (stream: dump_file, format: "Not considering %s for cloning; "
540 "-fipa-cp-clone disabled.\n",
541 node->dump_name ());
542 return false;
543 }
544
545 if (node->optimize_for_size_p ())
546 {
547 if (dump_file)
548 fprintf (stream: dump_file, format: "Not considering %s for cloning; "
549 "optimizing it for size.\n",
550 node->dump_name ());
551 return false;
552 }
553
554 init_caller_stats (stats: &stats);
555 node->call_for_symbol_thunks_and_aliases (callback: gather_caller_stats, data: &stats, include_overwritable: false);
556
557 if (ipa_size_summaries->get (node)->self_size < stats.n_calls)
558 {
559 if (dump_file)
560 fprintf (stream: dump_file, format: "Considering %s for cloning; code might shrink.\n",
561 node->dump_name ());
562 return true;
563 }
564
565 /* When profile is available and function is hot, propagate into it even if
566 calls seems cold; constant propagation can improve function's speed
567 significantly. */
568 if (stats.count_sum > profile_count::zero ()
569 && node->count.ipa ().initialized_p ())
570 {
571 if (stats.count_sum > node->count.ipa ().apply_scale (num: 90, den: 100))
572 {
573 if (dump_file)
574 fprintf (stream: dump_file, format: "Considering %s for cloning; "
575 "usually called directly.\n",
576 node->dump_name ());
577 return true;
578 }
579 }
580 if (!stats.n_hot_calls)
581 {
582 if (dump_file)
583 fprintf (stream: dump_file, format: "Not considering %s for cloning; no hot calls.\n",
584 node->dump_name ());
585 return false;
586 }
587 if (dump_file)
588 fprintf (stream: dump_file, format: "Considering %s for cloning.\n",
589 node->dump_name ());
590 return true;
591}
592
593template <typename valtype>
594class value_topo_info
595{
596public:
597 /* Head of the linked list of topologically sorted values. */
598 ipcp_value<valtype> *values_topo;
599 /* Stack for creating SCCs, represented by a linked list too. */
600 ipcp_value<valtype> *stack;
601 /* Counter driving the algorithm in add_val_to_toposort. */
602 int dfs_counter;
603
604 value_topo_info () : values_topo (NULL), stack (NULL), dfs_counter (0)
605 {}
606 void add_val (ipcp_value<valtype> *cur_val);
607 void propagate_effects ();
608};
609
610/* Arrays representing a topological ordering of call graph nodes and a stack
611 of nodes used during constant propagation and also data required to perform
612 topological sort of values and propagation of benefits in the determined
613 order. */
614
615class ipa_topo_info
616{
617public:
618 /* Array with obtained topological order of cgraph nodes. */
619 struct cgraph_node **order;
620 /* Stack of cgraph nodes used during propagation within SCC until all values
621 in the SCC stabilize. */
622 struct cgraph_node **stack;
623 int nnodes, stack_top;
624
625 value_topo_info<tree> constants;
626 value_topo_info<ipa_polymorphic_call_context> contexts;
627
628 ipa_topo_info () : order(NULL), stack(NULL), nnodes(0), stack_top(0),
629 constants ()
630 {}
631};
632
633/* Skip edges from and to nodes without ipa_cp enabled.
634 Ignore not available symbols. */
635
636static bool
637ignore_edge_p (cgraph_edge *e)
638{
639 enum availability avail;
640 cgraph_node *ultimate_target
641 = e->callee->function_or_virtual_thunk_symbol (avail: &avail, ref: e->caller);
642
643 return (avail <= AVAIL_INTERPOSABLE
644 || !opt_for_fn (ultimate_target->decl, optimize)
645 || !opt_for_fn (ultimate_target->decl, flag_ipa_cp));
646}
647
648/* Allocate the arrays in TOPO and topologically sort the nodes into order. */
649
650static void
651build_toporder_info (class ipa_topo_info *topo)
652{
653 topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
654 topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
655
656 gcc_checking_assert (topo->stack_top == 0);
657 topo->nnodes = ipa_reduced_postorder (topo->order, true,
658 ignore_edge: ignore_edge_p);
659}
660
661/* Free information about strongly connected components and the arrays in
662 TOPO. */
663
664static void
665free_toporder_info (class ipa_topo_info *topo)
666{
667 ipa_free_postorder_info ();
668 free (ptr: topo->order);
669 free (ptr: topo->stack);
670}
671
672/* Add NODE to the stack in TOPO, unless it is already there. */
673
674static inline void
675push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node)
676{
677 ipa_node_params *info = ipa_node_params_sum->get (node);
678 if (info->node_enqueued)
679 return;
680 info->node_enqueued = 1;
681 topo->stack[topo->stack_top++] = node;
682}
683
684/* Pop a node from the stack in TOPO and return it or return NULL if the stack
685 is empty. */
686
687static struct cgraph_node *
688pop_node_from_stack (class ipa_topo_info *topo)
689{
690 if (topo->stack_top)
691 {
692 struct cgraph_node *node;
693 topo->stack_top--;
694 node = topo->stack[topo->stack_top];
695 ipa_node_params_sum->get (node)->node_enqueued = 0;
696 return node;
697 }
698 else
699 return NULL;
700}
701
702/* Set lattice LAT to bottom and return true if it previously was not set as
703 such. */
704
705template <typename valtype>
706inline bool
707ipcp_lattice<valtype>::set_to_bottom ()
708{
709 bool ret = !bottom;
710 bottom = true;
711 return ret;
712}
713
714/* Mark lattice as containing an unknown value and return true if it previously
715 was not marked as such. */
716
717template <typename valtype>
718inline bool
719ipcp_lattice<valtype>::set_contains_variable ()
720{
721 bool ret = !contains_variable;
722 contains_variable = true;
723 return ret;
724}
725
726/* Set all aggregate lattices in PLATS to bottom and return true if they were
727 not previously set as such. */
728
729static inline bool
730set_agg_lats_to_bottom (class ipcp_param_lattices *plats)
731{
732 bool ret = !plats->aggs_bottom;
733 plats->aggs_bottom = true;
734 return ret;
735}
736
737/* Mark all aggregate lattices in PLATS as containing an unknown value and
738 return true if they were not previously marked as such. */
739
740static inline bool
741set_agg_lats_contain_variable (class ipcp_param_lattices *plats)
742{
743 bool ret = !plats->aggs_contain_variable;
744 plats->aggs_contain_variable = true;
745 return ret;
746}
747
748bool
749ipcp_vr_lattice::meet_with (const ipcp_vr_lattice &other)
750{
751 return meet_with_1 (other_vr: other.m_vr);
752}
753
754/* Meet the current value of the lattice with the range described by
755 P_VR. */
756
757bool
758ipcp_vr_lattice::meet_with (const vrange &p_vr)
759{
760 return meet_with_1 (other_vr: p_vr);
761}
762
763/* Meet the current value of the lattice with the range described by
764 OTHER_VR. Return TRUE if anything changed. */
765
766bool
767ipcp_vr_lattice::meet_with_1 (const vrange &other_vr)
768{
769 if (bottom_p ())
770 return false;
771
772 if (other_vr.varying_p ())
773 return set_to_bottom ();
774
775 bool res;
776 if (flag_checking)
777 {
778 Value_Range save (m_vr);
779 res = m_vr.union_ (r: other_vr);
780 gcc_assert (res == (m_vr != save));
781 }
782 else
783 res = m_vr.union_ (r: other_vr);
784 return res;
785}
786
787/* Return true if value range information in the lattice is yet unknown. */
788
789bool
790ipcp_vr_lattice::top_p () const
791{
792 return m_vr.undefined_p ();
793}
794
795/* Return true if value range information in the lattice is known to be
796 unusable. */
797
798bool
799ipcp_vr_lattice::bottom_p () const
800{
801 return m_vr.varying_p ();
802}
803
804/* Set value range information in the lattice to bottom. Return true if it
805 previously was in a different state. */
806
807bool
808ipcp_vr_lattice::set_to_bottom ()
809{
810 if (m_vr.varying_p ())
811 return false;
812
813 /* Setting an unsupported type here forces the temporary to default
814 to unsupported_range, which can handle VARYING/DEFINED ranges,
815 but nothing else (union, intersect, etc). This allows us to set
816 bottoms on any ranges, and is safe as all users of the lattice
817 check for bottom first. */
818 m_vr.set_type (void_type_node);
819 m_vr.set_varying (void_type_node);
820
821 return true;
822}
823
824/* Set lattice value to bottom, if it already isn't the case. */
825
826bool
827ipcp_bits_lattice::set_to_bottom ()
828{
829 if (bottom_p ())
830 return false;
831 m_lattice_val = IPA_BITS_VARYING;
832 m_value = 0;
833 m_mask = -1;
834 return true;
835}
836
837/* Set to constant if it isn't already. Only meant to be called
838 when switching state from TOP. */
839
840bool
841ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask)
842{
843 gcc_assert (top_p ());
844 m_lattice_val = IPA_BITS_CONSTANT;
845 m_value = wi::bit_and (x: wi::bit_not (x: mask), y: value);
846 m_mask = mask;
847 return true;
848}
849
850/* Return true if any of the known bits are non-zero. */
851
852bool
853ipcp_bits_lattice::known_nonzero_p () const
854{
855 if (!constant_p ())
856 return false;
857 return wi::ne_p (x: wi::bit_and (x: wi::bit_not (x: m_mask), y: m_value), y: 0);
858}
859
860/* Convert operand to value, mask form. */
861
862void
863ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp)
864{
865 wide_int get_nonzero_bits (const_tree);
866
867 if (TREE_CODE (operand) == INTEGER_CST)
868 {
869 *valuep = wi::to_widest (t: operand);
870 *maskp = 0;
871 }
872 else
873 {
874 *valuep = 0;
875 *maskp = -1;
876 }
877}
878
879/* Meet operation, similar to ccp_lattice_meet, we xor values
880 if this->value, value have different values at same bit positions, we want
881 to drop that bit to varying. Return true if mask is changed.
882 This function assumes that the lattice value is in CONSTANT state. If
883 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
884
885bool
886ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask,
887 unsigned precision, bool drop_all_ones)
888{
889 gcc_assert (constant_p ());
890
891 widest_int old_mask = m_mask;
892 m_mask = (m_mask | mask) | (m_value ^ value);
893 if (drop_all_ones)
894 m_mask |= m_value;
895 m_value &= ~m_mask;
896
897 if (wi::sext (x: m_mask, offset: precision) == -1)
898 return set_to_bottom ();
899
900 return m_mask != old_mask;
901}
902
903/* Meet the bits lattice with operand
904 described by <value, mask, sgn, precision. */
905
906bool
907ipcp_bits_lattice::meet_with (widest_int value, widest_int mask,
908 unsigned precision)
909{
910 if (bottom_p ())
911 return false;
912
913 if (top_p ())
914 {
915 if (wi::sext (x: mask, offset: precision) == -1)
916 return set_to_bottom ();
917 return set_to_constant (value, mask);
918 }
919
920 return meet_with_1 (value, mask, precision, drop_all_ones: false);
921}
922
923/* Meet bits lattice with the result of bit_value_binop (other, operand)
924 if code is binary operation or bit_value_unop (other) if code is unary op.
925 In the case when code is nop_expr, no adjustment is required. If
926 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
927
928bool
929ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision,
930 signop sgn, enum tree_code code, tree operand,
931 bool drop_all_ones)
932{
933 if (other.bottom_p ())
934 return set_to_bottom ();
935
936 if (bottom_p () || other.top_p ())
937 return false;
938
939 widest_int adjusted_value, adjusted_mask;
940
941 if (TREE_CODE_CLASS (code) == tcc_binary)
942 {
943 tree type = TREE_TYPE (operand);
944 widest_int o_value, o_mask;
945 get_value_and_mask (operand, valuep: &o_value, maskp: &o_mask);
946
947 bit_value_binop (code, sgn, precision, &adjusted_value, &adjusted_mask,
948 sgn, precision, other.get_value (), other.get_mask (),
949 TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask);
950
951 if (wi::sext (x: adjusted_mask, offset: precision) == -1)
952 return set_to_bottom ();
953 }
954
955 else if (TREE_CODE_CLASS (code) == tcc_unary)
956 {
957 bit_value_unop (code, sgn, precision, &adjusted_value,
958 &adjusted_mask, sgn, precision, other.get_value (),
959 other.get_mask ());
960
961 if (wi::sext (x: adjusted_mask, offset: precision) == -1)
962 return set_to_bottom ();
963 }
964
965 else
966 return set_to_bottom ();
967
968 if (top_p ())
969 {
970 if (drop_all_ones)
971 {
972 adjusted_mask |= adjusted_value;
973 adjusted_value &= ~adjusted_mask;
974 }
975 if (wi::sext (x: adjusted_mask, offset: precision) == -1)
976 return set_to_bottom ();
977 return set_to_constant (value: adjusted_value, mask: adjusted_mask);
978 }
979 else
980 return meet_with_1 (value: adjusted_value, mask: adjusted_mask, precision,
981 drop_all_ones);
982}
983
984/* Dump the contents of the list to FILE. */
985
986void
987ipa_argagg_value_list::dump (FILE *f)
988{
989 bool comma = false;
990 for (const ipa_argagg_value &av : m_elts)
991 {
992 fprintf (stream: f, format: "%s %i[%u]=", comma ? "," : "",
993 av.index, av.unit_offset);
994 print_generic_expr (f, av.value);
995 if (av.by_ref)
996 fprintf (stream: f, format: "(by_ref)");
997 if (av.killed)
998 fprintf (stream: f, format: "(killed)");
999 comma = true;
1000 }
1001 fprintf (stream: f, format: "\n");
1002}
1003
1004/* Dump the contents of the list to stderr. */
1005
1006void
1007ipa_argagg_value_list::debug ()
1008{
1009 dump (stderr);
1010}
1011
1012/* Return the item describing a constant stored for INDEX at UNIT_OFFSET or
1013 NULL if there is no such constant. */
1014
1015const ipa_argagg_value *
1016ipa_argagg_value_list::get_elt (int index, unsigned unit_offset) const
1017{
1018 ipa_argagg_value key;
1019 key.index = index;
1020 key.unit_offset = unit_offset;
1021 const ipa_argagg_value *res
1022 = std::lower_bound (first: m_elts.begin (), last: m_elts.end (), val: key,
1023 comp: [] (const ipa_argagg_value &elt,
1024 const ipa_argagg_value &val)
1025 {
1026 if (elt.index < val.index)
1027 return true;
1028 if (elt.index > val.index)
1029 return false;
1030 if (elt.unit_offset < val.unit_offset)
1031 return true;
1032 return false;
1033 });
1034
1035 if (res == m_elts.end ()
1036 || res->index != index
1037 || res->unit_offset != unit_offset)
1038 res = nullptr;
1039
1040 /* TODO: perhaps remove the check (that the underlying array is indeed
1041 sorted) if it turns out it can be too slow? */
1042 if (!flag_checking)
1043 return res;
1044
1045 const ipa_argagg_value *slow_res = NULL;
1046 int prev_index = -1;
1047 unsigned prev_unit_offset = 0;
1048 for (const ipa_argagg_value &av : m_elts)
1049 {
1050 gcc_assert (prev_index < 0
1051 || prev_index < av.index
1052 || prev_unit_offset < av.unit_offset);
1053 prev_index = av.index;
1054 prev_unit_offset = av.unit_offset;
1055 if (av.index == index
1056 && av.unit_offset == unit_offset)
1057 slow_res = &av;
1058 }
1059 gcc_assert (res == slow_res);
1060
1061 return res;
1062}
1063
1064/* Return the first item describing a constant stored for parameter with INDEX,
1065 regardless of offset or reference, or NULL if there is no such constant. */
1066
1067const ipa_argagg_value *
1068ipa_argagg_value_list::get_elt_for_index (int index) const
1069{
1070 const ipa_argagg_value *res
1071 = std::lower_bound (first: m_elts.begin (), last: m_elts.end (), val: index,
1072 comp: [] (const ipa_argagg_value &elt, unsigned idx)
1073 {
1074 return elt.index < idx;
1075 });
1076 if (res == m_elts.end ()
1077 || res->index != index)
1078 res = nullptr;
1079 return res;
1080}
1081
1082/* Return the aggregate constant stored for INDEX at UNIT_OFFSET, not
1083 performing any check of whether value is passed by reference, or NULL_TREE
1084 if there is no such constant. */
1085
1086tree
1087ipa_argagg_value_list::get_value (int index, unsigned unit_offset) const
1088{
1089 const ipa_argagg_value *av = get_elt (index, unit_offset);
1090 return av ? av->value : NULL_TREE;
1091}
1092
1093/* Return the aggregate constant stored for INDEX at UNIT_OFFSET, if it is
1094 passed by reference or not according to BY_REF, or NULL_TREE if there is
1095 no such constant. */
1096
1097tree
1098ipa_argagg_value_list::get_value (int index, unsigned unit_offset,
1099 bool by_ref) const
1100{
1101 const ipa_argagg_value *av = get_elt (index, unit_offset);
1102 if (av && av->by_ref == by_ref)
1103 return av->value;
1104 return NULL_TREE;
1105}
1106
1107/* Return true if all elements present in OTHER are also present in this
1108 list. */
1109
1110bool
1111ipa_argagg_value_list::superset_of_p (const ipa_argagg_value_list &other) const
1112{
1113 unsigned j = 0;
1114 for (unsigned i = 0; i < other.m_elts.size (); i++)
1115 {
1116 unsigned other_index = other.m_elts[i].index;
1117 unsigned other_offset = other.m_elts[i].unit_offset;
1118
1119 while (j < m_elts.size ()
1120 && (m_elts[j].index < other_index
1121 || (m_elts[j].index == other_index
1122 && m_elts[j].unit_offset < other_offset)))
1123 j++;
1124
1125 if (j >= m_elts.size ()
1126 || m_elts[j].index != other_index
1127 || m_elts[j].unit_offset != other_offset
1128 || m_elts[j].by_ref != other.m_elts[i].by_ref
1129 || !m_elts[j].value
1130 || !values_equal_for_ipcp_p (x: m_elts[j].value, y: other.m_elts[i].value))
1131 return false;
1132 }
1133 return true;
1134}
1135
1136/* Push all items in this list that describe parameter SRC_INDEX into RES as
1137 ones describing DST_INDEX while subtracting UNIT_DELTA from their unit
1138 offsets but skip those which would end up with a negative offset. */
1139
1140void
1141ipa_argagg_value_list::push_adjusted_values (unsigned src_index,
1142 unsigned dest_index,
1143 unsigned unit_delta,
1144 vec<ipa_argagg_value> *res) const
1145{
1146 const ipa_argagg_value *av = get_elt_for_index (index: src_index);
1147 if (!av)
1148 return;
1149 unsigned prev_unit_offset = 0;
1150 bool first = true;
1151 for (; av < m_elts.end (); ++av)
1152 {
1153 if (av->index > src_index)
1154 return;
1155 if (av->index == src_index
1156 && (av->unit_offset >= unit_delta)
1157 && av->value)
1158 {
1159 ipa_argagg_value new_av;
1160 gcc_checking_assert (av->value);
1161 new_av.value = av->value;
1162 new_av.unit_offset = av->unit_offset - unit_delta;
1163 new_av.index = dest_index;
1164 new_av.by_ref = av->by_ref;
1165 gcc_assert (!av->killed);
1166 new_av.killed = false;
1167
1168 /* Quick check that the offsets we push are indeed increasing. */
1169 gcc_assert (first
1170 || new_av.unit_offset > prev_unit_offset);
1171 prev_unit_offset = new_av.unit_offset;
1172 first = false;
1173
1174 res->safe_push (obj: new_av);
1175 }
1176 }
1177}
1178
1179/* Push to RES information about single lattices describing aggregate values in
1180 PLATS as those describing parameter DEST_INDEX and the original offset minus
1181 UNIT_DELTA. Return true if any item has been pushed to RES. */
1182
1183static bool
1184push_agg_values_from_plats (ipcp_param_lattices *plats, int dest_index,
1185 unsigned unit_delta,
1186 vec<ipa_argagg_value> *res)
1187{
1188 if (plats->aggs_contain_variable)
1189 return false;
1190
1191 bool pushed_sth = false;
1192 bool first = true;
1193 unsigned prev_unit_offset = 0;
1194 for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
1195 if (aglat->is_single_const ()
1196 && (aglat->offset / BITS_PER_UNIT - unit_delta) >= 0)
1197 {
1198 ipa_argagg_value iav;
1199 iav.value = aglat->values->value;
1200 iav.unit_offset = aglat->offset / BITS_PER_UNIT - unit_delta;
1201 iav.index = dest_index;
1202 iav.by_ref = plats->aggs_by_ref;
1203 iav.killed = false;
1204
1205 gcc_assert (first
1206 || iav.unit_offset > prev_unit_offset);
1207 prev_unit_offset = iav.unit_offset;
1208 first = false;
1209
1210 pushed_sth = true;
1211 res->safe_push (obj: iav);
1212 }
1213 return pushed_sth;
1214}
1215
1216/* Turn all values in LIST that are not present in OTHER into NULL_TREEs.
1217 Return the number of remaining valid entries. */
1218
1219static unsigned
1220intersect_argaggs_with (vec<ipa_argagg_value> &elts,
1221 const vec<ipa_argagg_value> &other)
1222{
1223 unsigned valid_entries = 0;
1224 unsigned j = 0;
1225 for (unsigned i = 0; i < elts.length (); i++)
1226 {
1227 if (!elts[i].value)
1228 continue;
1229
1230 unsigned this_index = elts[i].index;
1231 unsigned this_offset = elts[i].unit_offset;
1232
1233 while (j < other.length ()
1234 && (other[j].index < this_index
1235 || (other[j].index == this_index
1236 && other[j].unit_offset < this_offset)))
1237 j++;
1238
1239 if (j >= other.length ())
1240 {
1241 elts[i].value = NULL_TREE;
1242 continue;
1243 }
1244
1245 if (other[j].index == this_index
1246 && other[j].unit_offset == this_offset
1247 && other[j].by_ref == elts[i].by_ref
1248 && other[j].value
1249 && values_equal_for_ipcp_p (x: other[j].value, y: elts[i].value))
1250 valid_entries++;
1251 else
1252 elts[i].value = NULL_TREE;
1253 }
1254 return valid_entries;
1255}
1256
1257/* Mark bot aggregate and scalar lattices as containing an unknown variable,
1258 return true is any of them has not been marked as such so far. */
1259
1260static inline bool
1261set_all_contains_variable (class ipcp_param_lattices *plats)
1262{
1263 bool ret;
1264 ret = plats->itself.set_contains_variable ();
1265 ret |= plats->ctxlat.set_contains_variable ();
1266 ret |= set_agg_lats_contain_variable (plats);
1267 ret |= plats->bits_lattice.set_to_bottom ();
1268 ret |= plats->m_value_range.set_to_bottom ();
1269 return ret;
1270}
1271
1272/* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1273 points to by the number of callers to NODE. */
1274
1275static bool
1276count_callers (cgraph_node *node, void *data)
1277{
1278 int *caller_count = (int *) data;
1279
1280 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
1281 /* Local thunks can be handled transparently, but if the thunk cannot
1282 be optimized out, count it as a real use. */
1283 if (!cs->caller->thunk || !cs->caller->local)
1284 ++*caller_count;
1285 return false;
1286}
1287
1288/* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1289 the one caller of some other node. Set the caller's corresponding flag. */
1290
1291static bool
1292set_single_call_flag (cgraph_node *node, void *)
1293{
1294 cgraph_edge *cs = node->callers;
1295 /* Local thunks can be handled transparently, skip them. */
1296 while (cs && cs->caller->thunk && cs->caller->local)
1297 cs = cs->next_caller;
1298 if (cs)
1299 if (ipa_node_params* info = ipa_node_params_sum->get (node: cs->caller))
1300 {
1301 info->node_calling_single_call = true;
1302 return true;
1303 }
1304 return false;
1305}
1306
1307/* Initialize ipcp_lattices. */
1308
1309static void
1310initialize_node_lattices (struct cgraph_node *node)
1311{
1312 ipa_node_params *info = ipa_node_params_sum->get (node);
1313 struct cgraph_edge *ie;
1314 bool disable = false, variable = false;
1315 int i;
1316
1317 gcc_checking_assert (node->has_gimple_body_p ());
1318
1319 if (!ipa_get_param_count (info))
1320 disable = true;
1321 else if (node->local)
1322 {
1323 int caller_count = 0;
1324 node->call_for_symbol_thunks_and_aliases (callback: count_callers, data: &caller_count,
1325 include_overwritable: true);
1326 gcc_checking_assert (caller_count > 0);
1327 if (caller_count == 1)
1328 node->call_for_symbol_thunks_and_aliases (callback: set_single_call_flag,
1329 NULL, include_overwritable: true);
1330 }
1331 else
1332 {
1333 /* When cloning is allowed, we can assume that externally visible
1334 functions are not called. We will compensate this by cloning
1335 later. */
1336 if (ipcp_versionable_function_p (node)
1337 && ipcp_cloning_candidate_p (node))
1338 variable = true;
1339 else
1340 disable = true;
1341 }
1342
1343 if (dump_file && (dump_flags & TDF_DETAILS)
1344 && !node->alias && !node->thunk)
1345 {
1346 fprintf (stream: dump_file, format: "Initializing lattices of %s\n",
1347 node->dump_name ());
1348 if (disable || variable)
1349 fprintf (stream: dump_file, format: " Marking all lattices as %s\n",
1350 disable ? "BOTTOM" : "VARIABLE");
1351 }
1352
1353 auto_vec<bool, 16> surviving_params;
1354 bool pre_modified = false;
1355
1356 clone_info *cinfo = clone_info::get (node);
1357
1358 if (!disable && cinfo && cinfo->param_adjustments)
1359 {
1360 /* At the moment all IPA optimizations should use the number of
1361 parameters of the prevailing decl as the m_always_copy_start.
1362 Handling any other value would complicate the code below, so for the
1363 time bing let's only assert it is so. */
1364 gcc_assert ((cinfo->param_adjustments->m_always_copy_start
1365 == ipa_get_param_count (info))
1366 || cinfo->param_adjustments->m_always_copy_start < 0);
1367
1368 pre_modified = true;
1369 cinfo->param_adjustments->get_surviving_params (surviving_params: &surviving_params);
1370
1371 if (dump_file && (dump_flags & TDF_DETAILS)
1372 && !node->alias && !node->thunk)
1373 {
1374 bool first = true;
1375 for (int j = 0; j < ipa_get_param_count (info); j++)
1376 {
1377 if (j < (int) surviving_params.length ()
1378 && surviving_params[j])
1379 continue;
1380 if (first)
1381 {
1382 fprintf (stream: dump_file,
1383 format: " The following parameters are dead on arrival:");
1384 first = false;
1385 }
1386 fprintf (stream: dump_file, format: " %u", j);
1387 }
1388 if (!first)
1389 fprintf (stream: dump_file, format: "\n");
1390 }
1391 }
1392
1393 for (i = 0; i < ipa_get_param_count (info); i++)
1394 {
1395 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
1396 tree type = ipa_get_type (info, i);
1397 if (disable
1398 || !ipa_get_type (info, i)
1399 || (pre_modified && (surviving_params.length () <= (unsigned) i
1400 || !surviving_params[i])))
1401 {
1402 plats->itself.set_to_bottom ();
1403 plats->ctxlat.set_to_bottom ();
1404 set_agg_lats_to_bottom (plats);
1405 plats->bits_lattice.set_to_bottom ();
1406 plats->m_value_range.init (type);
1407 plats->m_value_range.set_to_bottom ();
1408 }
1409 else
1410 {
1411 plats->m_value_range.init (type);
1412 if (variable)
1413 set_all_contains_variable (plats);
1414 }
1415 }
1416
1417 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
1418 if (ie->indirect_info->polymorphic
1419 && ie->indirect_info->param_index >= 0)
1420 {
1421 gcc_checking_assert (ie->indirect_info->param_index >= 0);
1422 ipa_get_parm_lattices (info,
1423 i: ie->indirect_info->param_index)->virt_call = 1;
1424 }
1425}
1426
1427/* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1428 PARAM_TYPE. */
1429
1430static bool
1431ipacp_value_safe_for_type (tree param_type, tree value)
1432{
1433 tree val_type = TREE_TYPE (value);
1434 if (param_type == val_type
1435 || useless_type_conversion_p (param_type, val_type)
1436 || fold_convertible_p (param_type, value))
1437 return true;
1438 else
1439 return false;
1440}
1441
1442/* Return the result of a (possibly arithmetic) operation on the constant
1443 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1444 the type of the parameter to which the result is passed. Return
1445 NULL_TREE if that cannot be determined or be considered an
1446 interprocedural invariant. */
1447
1448static tree
1449ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand,
1450 tree res_type)
1451{
1452 tree res;
1453
1454 if (opcode == NOP_EXPR)
1455 return input;
1456 if (!is_gimple_ip_invariant (input))
1457 return NULL_TREE;
1458
1459 if (opcode == ASSERT_EXPR)
1460 {
1461 if (values_equal_for_ipcp_p (x: input, y: operand))
1462 return input;
1463 else
1464 return NULL_TREE;
1465 }
1466
1467 if (!res_type)
1468 {
1469 if (TREE_CODE_CLASS (opcode) == tcc_comparison)
1470 res_type = boolean_type_node;
1471 else if (expr_type_first_operand_type_p (opcode))
1472 res_type = TREE_TYPE (input);
1473 else
1474 return NULL_TREE;
1475 }
1476
1477 if (TREE_CODE_CLASS (opcode) == tcc_unary)
1478 res = fold_unary (opcode, res_type, input);
1479 else
1480 res = fold_binary (opcode, res_type, input, operand);
1481
1482 if (res && !is_gimple_ip_invariant (res))
1483 return NULL_TREE;
1484
1485 return res;
1486}
1487
1488/* Return the result of a (possibly arithmetic) pass through jump function
1489 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1490 to which the result is passed. Return NULL_TREE if that cannot be
1491 determined or be considered an interprocedural invariant. */
1492
1493static tree
1494ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input,
1495 tree res_type)
1496{
1497 return ipa_get_jf_arith_result (opcode: ipa_get_jf_pass_through_operation (jfunc),
1498 input,
1499 operand: ipa_get_jf_pass_through_operand (jfunc),
1500 res_type);
1501}
1502
1503/* Return the result of an ancestor jump function JFUNC on the constant value
1504 INPUT. Return NULL_TREE if that cannot be determined. */
1505
1506static tree
1507ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input)
1508{
1509 gcc_checking_assert (TREE_CODE (input) != TREE_BINFO);
1510 if (TREE_CODE (input) == ADDR_EXPR)
1511 {
1512 gcc_checking_assert (is_gimple_ip_invariant_address (input));
1513 poly_int64 off = ipa_get_jf_ancestor_offset (jfunc);
1514 if (known_eq (off, 0))
1515 return input;
1516 poly_int64 byte_offset = exact_div (a: off, BITS_PER_UNIT);
1517 return build1 (ADDR_EXPR, TREE_TYPE (input),
1518 fold_build2 (MEM_REF, TREE_TYPE (TREE_TYPE (input)), input,
1519 build_int_cst (ptr_type_node, byte_offset)));
1520 }
1521 else if (ipa_get_jf_ancestor_keep_null (jfunc)
1522 && zerop (input))
1523 return input;
1524 else
1525 return NULL_TREE;
1526}
1527
1528/* Determine whether JFUNC evaluates to a single known constant value and if
1529 so, return it. Otherwise return NULL. INFO describes the caller node or
1530 the one it is inlined to, so that pass-through jump functions can be
1531 evaluated. PARM_TYPE is the type of the parameter to which the result is
1532 passed. */
1533
1534tree
1535ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc,
1536 tree parm_type)
1537{
1538 if (jfunc->type == IPA_JF_CONST)
1539 return ipa_get_jf_constant (jfunc);
1540 else if (jfunc->type == IPA_JF_PASS_THROUGH
1541 || jfunc->type == IPA_JF_ANCESTOR)
1542 {
1543 tree input;
1544 int idx;
1545
1546 if (jfunc->type == IPA_JF_PASS_THROUGH)
1547 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1548 else
1549 idx = ipa_get_jf_ancestor_formal_id (jfunc);
1550
1551 if (info->ipcp_orig_node)
1552 input = info->known_csts[idx];
1553 else
1554 {
1555 ipcp_lattice<tree> *lat;
1556
1557 if (info->lattices.is_empty ()
1558 || idx >= ipa_get_param_count (info))
1559 return NULL_TREE;
1560 lat = ipa_get_scalar_lat (info, i: idx);
1561 if (!lat->is_single_const ())
1562 return NULL_TREE;
1563 input = lat->values->value;
1564 }
1565
1566 if (!input)
1567 return NULL_TREE;
1568
1569 if (jfunc->type == IPA_JF_PASS_THROUGH)
1570 return ipa_get_jf_pass_through_result (jfunc, input, res_type: parm_type);
1571 else
1572 return ipa_get_jf_ancestor_result (jfunc, input);
1573 }
1574 else
1575 return NULL_TREE;
1576}
1577
1578/* Determine whether JFUNC evaluates to single known polymorphic context, given
1579 that INFO describes the caller node or the one it is inlined to, CS is the
1580 call graph edge corresponding to JFUNC and CSIDX index of the described
1581 parameter. */
1582
1583ipa_polymorphic_call_context
1584ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx,
1585 ipa_jump_func *jfunc)
1586{
1587 ipa_edge_args *args = ipa_edge_args_sum->get (edge: cs);
1588 ipa_polymorphic_call_context ctx;
1589 ipa_polymorphic_call_context *edge_ctx
1590 = cs ? ipa_get_ith_polymorhic_call_context (args, i: csidx) : NULL;
1591
1592 if (edge_ctx && !edge_ctx->useless_p ())
1593 ctx = *edge_ctx;
1594
1595 if (jfunc->type == IPA_JF_PASS_THROUGH
1596 || jfunc->type == IPA_JF_ANCESTOR)
1597 {
1598 ipa_polymorphic_call_context srcctx;
1599 int srcidx;
1600 bool type_preserved = true;
1601 if (jfunc->type == IPA_JF_PASS_THROUGH)
1602 {
1603 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
1604 return ctx;
1605 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
1606 srcidx = ipa_get_jf_pass_through_formal_id (jfunc);
1607 }
1608 else
1609 {
1610 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
1611 srcidx = ipa_get_jf_ancestor_formal_id (jfunc);
1612 }
1613 if (info->ipcp_orig_node)
1614 {
1615 if (info->known_contexts.exists ())
1616 srcctx = info->known_contexts[srcidx];
1617 }
1618 else
1619 {
1620 if (info->lattices.is_empty ()
1621 || srcidx >= ipa_get_param_count (info))
1622 return ctx;
1623 ipcp_lattice<ipa_polymorphic_call_context> *lat;
1624 lat = ipa_get_poly_ctx_lat (info, i: srcidx);
1625 if (!lat->is_single_const ())
1626 return ctx;
1627 srcctx = lat->values->value;
1628 }
1629 if (srcctx.useless_p ())
1630 return ctx;
1631 if (jfunc->type == IPA_JF_ANCESTOR)
1632 srcctx.offset_by (off: ipa_get_jf_ancestor_offset (jfunc));
1633 if (!type_preserved)
1634 srcctx.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
1635 srcctx.combine_with (ctx);
1636 return srcctx;
1637 }
1638
1639 return ctx;
1640}
1641
1642/* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1643 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1644 the result is a range that is not VARYING nor UNDEFINED. */
1645
1646static bool
1647ipa_vr_operation_and_type_effects (vrange &dst_vr,
1648 const vrange &src_vr,
1649 enum tree_code operation,
1650 tree dst_type, tree src_type)
1651{
1652 if (!irange::supports_p (type: dst_type) || !irange::supports_p (type: src_type))
1653 return false;
1654
1655 range_op_handler handler (operation);
1656 if (!handler)
1657 return false;
1658
1659 Value_Range varying (dst_type);
1660 varying.set_varying (dst_type);
1661
1662 return (handler.operand_check_p (dst_type, src_type, dst_type)
1663 && handler.fold_range (r&: dst_vr, type: dst_type, lh: src_vr, rh: varying)
1664 && !dst_vr.varying_p ()
1665 && !dst_vr.undefined_p ());
1666}
1667
1668/* Same as above, but the SRC_VR argument is an IPA_VR which must
1669 first be extracted onto a vrange. */
1670
1671static bool
1672ipa_vr_operation_and_type_effects (vrange &dst_vr,
1673 const ipa_vr &src_vr,
1674 enum tree_code operation,
1675 tree dst_type, tree src_type)
1676{
1677 Value_Range tmp;
1678 src_vr.get_vrange (tmp);
1679 return ipa_vr_operation_and_type_effects (dst_vr, src_vr: tmp, operation,
1680 dst_type, src_type);
1681}
1682
1683/* Determine range of JFUNC given that INFO describes the caller node or
1684 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1685 and PARM_TYPE of the parameter. */
1686
1687void
1688ipa_value_range_from_jfunc (vrange &vr,
1689 ipa_node_params *info, cgraph_edge *cs,
1690 ipa_jump_func *jfunc, tree parm_type)
1691{
1692 vr.set_undefined ();
1693
1694 if (jfunc->m_vr)
1695 ipa_vr_operation_and_type_effects (dst_vr&: vr,
1696 src_vr: *jfunc->m_vr,
1697 operation: NOP_EXPR, dst_type: parm_type,
1698 src_type: jfunc->m_vr->type ());
1699 if (vr.singleton_p ())
1700 return;
1701 if (jfunc->type == IPA_JF_PASS_THROUGH)
1702 {
1703 int idx;
1704 ipcp_transformation *sum
1705 = ipcp_get_transformation_summary (node: cs->caller->inlined_to
1706 ? cs->caller->inlined_to
1707 : cs->caller);
1708 if (!sum || !sum->m_vr)
1709 return;
1710
1711 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1712
1713 if (!(*sum->m_vr)[idx].known_p ())
1714 return;
1715 tree vr_type = ipa_get_type (info, i: idx);
1716 Value_Range srcvr;
1717 (*sum->m_vr)[idx].get_vrange (srcvr);
1718
1719 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
1720
1721 if (TREE_CODE_CLASS (operation) == tcc_unary)
1722 {
1723 Value_Range res (vr_type);
1724
1725 if (ipa_vr_operation_and_type_effects (dst_vr&: res,
1726 src_vr: srcvr,
1727 operation, dst_type: parm_type,
1728 src_type: vr_type))
1729 vr.intersect (res);
1730 }
1731 else
1732 {
1733 Value_Range op_res (vr_type);
1734 Value_Range res (vr_type);
1735 tree op = ipa_get_jf_pass_through_operand (jfunc);
1736 Value_Range op_vr (vr_type);
1737 range_op_handler handler (operation);
1738
1739 ipa_range_set_and_normalize (r&: op_vr, val: op);
1740
1741 if (!handler
1742 || !op_res.supports_type_p (type: vr_type)
1743 || !handler.fold_range (r&: op_res, type: vr_type, lh: srcvr, rh: op_vr))
1744 op_res.set_varying (vr_type);
1745
1746 if (ipa_vr_operation_and_type_effects (dst_vr&: res,
1747 src_vr: op_res,
1748 operation: NOP_EXPR, dst_type: parm_type,
1749 src_type: vr_type))
1750 vr.intersect (res);
1751 }
1752 }
1753}
1754
1755/* Determine whether ITEM, jump function for an aggregate part, evaluates to a
1756 single known constant value and if so, return it. Otherwise return NULL.
1757 NODE and INFO describes the caller node or the one it is inlined to, and
1758 its related info. */
1759
1760tree
1761ipa_agg_value_from_jfunc (ipa_node_params *info, cgraph_node *node,
1762 const ipa_agg_jf_item *item)
1763{
1764 tree value = NULL_TREE;
1765 int src_idx;
1766
1767 if (item->offset < 0
1768 || item->jftype == IPA_JF_UNKNOWN
1769 || item->offset >= (HOST_WIDE_INT) UINT_MAX * BITS_PER_UNIT)
1770 return NULL_TREE;
1771
1772 if (item->jftype == IPA_JF_CONST)
1773 return item->value.constant;
1774
1775 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
1776 || item->jftype == IPA_JF_LOAD_AGG);
1777
1778 src_idx = item->value.pass_through.formal_id;
1779
1780 if (info->ipcp_orig_node)
1781 {
1782 if (item->jftype == IPA_JF_PASS_THROUGH)
1783 value = info->known_csts[src_idx];
1784 else if (ipcp_transformation *ts = ipcp_get_transformation_summary (node))
1785 {
1786 ipa_argagg_value_list avl (ts);
1787 value = avl.get_value (index: src_idx,
1788 unit_offset: item->value.load_agg.offset / BITS_PER_UNIT,
1789 by_ref: item->value.load_agg.by_ref);
1790 }
1791 }
1792 else if (!info->lattices.is_empty ())
1793 {
1794 class ipcp_param_lattices *src_plats
1795 = ipa_get_parm_lattices (info, i: src_idx);
1796
1797 if (item->jftype == IPA_JF_PASS_THROUGH)
1798 {
1799 struct ipcp_lattice<tree> *lat = &src_plats->itself;
1800
1801 if (!lat->is_single_const ())
1802 return NULL_TREE;
1803
1804 value = lat->values->value;
1805 }
1806 else if (src_plats->aggs
1807 && !src_plats->aggs_bottom
1808 && !src_plats->aggs_contain_variable
1809 && src_plats->aggs_by_ref == item->value.load_agg.by_ref)
1810 {
1811 struct ipcp_agg_lattice *aglat;
1812
1813 for (aglat = src_plats->aggs; aglat; aglat = aglat->next)
1814 {
1815 if (aglat->offset > item->value.load_agg.offset)
1816 break;
1817
1818 if (aglat->offset == item->value.load_agg.offset)
1819 {
1820 if (aglat->is_single_const ())
1821 value = aglat->values->value;
1822 break;
1823 }
1824 }
1825 }
1826 }
1827
1828 if (!value)
1829 return NULL_TREE;
1830
1831 if (item->jftype == IPA_JF_LOAD_AGG)
1832 {
1833 tree load_type = item->value.load_agg.type;
1834 tree value_type = TREE_TYPE (value);
1835
1836 /* Ensure value type is compatible with load type. */
1837 if (!useless_type_conversion_p (load_type, value_type))
1838 return NULL_TREE;
1839 }
1840
1841 return ipa_get_jf_arith_result (opcode: item->value.pass_through.operation,
1842 input: value,
1843 operand: item->value.pass_through.operand,
1844 res_type: item->type);
1845}
1846
1847/* Process all items in AGG_JFUNC relative to caller (or the node the original
1848 caller is inlined to) NODE which described by INFO and push the results to
1849 RES as describing values passed in parameter DST_INDEX. */
1850
1851void
1852ipa_push_agg_values_from_jfunc (ipa_node_params *info, cgraph_node *node,
1853 ipa_agg_jump_function *agg_jfunc,
1854 unsigned dst_index,
1855 vec<ipa_argagg_value> *res)
1856{
1857 unsigned prev_unit_offset = 0;
1858 bool first = true;
1859
1860 for (const ipa_agg_jf_item &item : agg_jfunc->items)
1861 {
1862 tree value = ipa_agg_value_from_jfunc (info, node, item: &item);
1863 if (!value)
1864 continue;
1865
1866 ipa_argagg_value iav;
1867 iav.value = value;
1868 iav.unit_offset = item.offset / BITS_PER_UNIT;
1869 iav.index = dst_index;
1870 iav.by_ref = agg_jfunc->by_ref;
1871 iav.killed = 0;
1872
1873 gcc_assert (first
1874 || iav.unit_offset > prev_unit_offset);
1875 prev_unit_offset = iav.unit_offset;
1876 first = false;
1877
1878 res->safe_push (obj: iav);
1879 }
1880}
1881
1882/* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
1883 bottom, not containing a variable component and without any known value at
1884 the same time. */
1885
1886DEBUG_FUNCTION void
1887ipcp_verify_propagated_values (void)
1888{
1889 struct cgraph_node *node;
1890
1891 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
1892 {
1893 ipa_node_params *info = ipa_node_params_sum->get (node);
1894 if (!opt_for_fn (node->decl, flag_ipa_cp)
1895 || !opt_for_fn (node->decl, optimize))
1896 continue;
1897 int i, count = ipa_get_param_count (info);
1898
1899 for (i = 0; i < count; i++)
1900 {
1901 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i);
1902
1903 if (!lat->bottom
1904 && !lat->contains_variable
1905 && lat->values_count == 0)
1906 {
1907 if (dump_file)
1908 {
1909 symtab->dump (f: dump_file);
1910 fprintf (stream: dump_file, format: "\nIPA lattices after constant "
1911 "propagation, before gcc_unreachable:\n");
1912 print_all_lattices (f: dump_file, dump_sources: true, dump_benefits: false);
1913 }
1914
1915 gcc_unreachable ();
1916 }
1917 }
1918 }
1919}
1920
1921/* Return true iff X and Y should be considered equal contexts by IPA-CP. */
1922
1923static bool
1924values_equal_for_ipcp_p (ipa_polymorphic_call_context x,
1925 ipa_polymorphic_call_context y)
1926{
1927 return x.equal_to (x: y);
1928}
1929
1930
1931/* Add a new value source to the value represented by THIS, marking that a
1932 value comes from edge CS and (if the underlying jump function is a
1933 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
1934 parameter described by SRC_INDEX. OFFSET is negative if the source was the
1935 scalar value of the parameter itself or the offset within an aggregate. */
1936
1937template <typename valtype>
1938void
1939ipcp_value<valtype>::add_source (cgraph_edge *cs, ipcp_value *src_val,
1940 int src_idx, HOST_WIDE_INT offset)
1941{
1942 ipcp_value_source<valtype> *src;
1943
1944 src = new (ipcp_sources_pool.allocate ()) ipcp_value_source<valtype>;
1945 src->offset = offset;
1946 src->cs = cs;
1947 src->val = src_val;
1948 src->index = src_idx;
1949
1950 src->next = sources;
1951 sources = src;
1952}
1953
1954/* Allocate a new ipcp_value holding a tree constant, initialize its value to
1955 SOURCE and clear all other fields. */
1956
1957static ipcp_value<tree> *
1958allocate_and_init_ipcp_value (tree cst, unsigned same_lat_gen_level)
1959{
1960 ipcp_value<tree> *val;
1961
1962 val = new (ipcp_cst_values_pool.allocate ()) ipcp_value<tree>();
1963 val->value = cst;
1964 val->self_recursion_generated_level = same_lat_gen_level;
1965 return val;
1966}
1967
1968/* Allocate a new ipcp_value holding a polymorphic context, initialize its
1969 value to SOURCE and clear all other fields. */
1970
1971static ipcp_value<ipa_polymorphic_call_context> *
1972allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx,
1973 unsigned same_lat_gen_level)
1974{
1975 ipcp_value<ipa_polymorphic_call_context> *val;
1976
1977 val = new (ipcp_poly_ctx_values_pool.allocate ())
1978 ipcp_value<ipa_polymorphic_call_context>();
1979 val->value = ctx;
1980 val->self_recursion_generated_level = same_lat_gen_level;
1981 return val;
1982}
1983
1984/* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
1985 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
1986 meaning. OFFSET -1 means the source is scalar and not a part of an
1987 aggregate. If non-NULL, VAL_P records address of existing or newly added
1988 ipcp_value.
1989
1990 If the value is generated for a self-recursive call as a result of an
1991 arithmetic pass-through jump-function acting on a value in the same lattice,
1992 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
1993 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
1994
1995template <typename valtype>
1996bool
1997ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
1998 ipcp_value<valtype> *src_val,
1999 int src_idx, HOST_WIDE_INT offset,
2000 ipcp_value<valtype> **val_p,
2001 unsigned same_lat_gen_level)
2002{
2003 ipcp_value<valtype> *val, *last_val = NULL;
2004
2005 if (val_p)
2006 *val_p = NULL;
2007
2008 if (bottom)
2009 return false;
2010
2011 for (val = values; val; last_val = val, val = val->next)
2012 if (values_equal_for_ipcp_p (val->value, newval))
2013 {
2014 if (val_p)
2015 *val_p = val;
2016
2017 if (val->self_recursion_generated_level < same_lat_gen_level)
2018 val->self_recursion_generated_level = same_lat_gen_level;
2019
2020 if (ipa_edge_within_scc (cs))
2021 {
2022 ipcp_value_source<valtype> *s;
2023 for (s = val->sources; s; s = s->next)
2024 if (s->cs == cs && s->val == src_val)
2025 break;
2026 if (s)
2027 return false;
2028 }
2029
2030 val->add_source (cs, src_val, src_idx, offset);
2031 return false;
2032 }
2033
2034 if (!same_lat_gen_level && values_count >= opt_for_fn (cs->callee->decl,
2035 param_ipa_cp_value_list_size))
2036 {
2037 /* We can only free sources, not the values themselves, because sources
2038 of other values in this SCC might point to them. */
2039 for (val = values; val; val = val->next)
2040 {
2041 while (val->sources)
2042 {
2043 ipcp_value_source<valtype> *src = val->sources;
2044 val->sources = src->next;
2045 ipcp_sources_pool.remove (object: (ipcp_value_source<tree>*)src);
2046 }
2047 }
2048 values = NULL;
2049 return set_to_bottom ();
2050 }
2051
2052 values_count++;
2053 val = allocate_and_init_ipcp_value (newval, same_lat_gen_level);
2054 val->add_source (cs, src_val, src_idx, offset);
2055 val->next = NULL;
2056
2057 /* Add the new value to end of value list, which can reduce iterations
2058 of propagation stage for recursive function. */
2059 if (last_val)
2060 last_val->next = val;
2061 else
2062 values = val;
2063
2064 if (val_p)
2065 *val_p = val;
2066
2067 return true;
2068}
2069
2070/* A helper function that returns result of operation specified by OPCODE on
2071 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2072 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2073 acting as its second operand. If non-NULL, RES_TYPE is expected type of
2074 the result. */
2075
2076static tree
2077get_val_across_arith_op (enum tree_code opcode,
2078 tree opnd1_type,
2079 tree opnd2,
2080 ipcp_value<tree> *src_val,
2081 tree res_type)
2082{
2083 tree opnd1 = src_val->value;
2084
2085 /* Skip source values that is incompatible with specified type. */
2086 if (opnd1_type
2087 && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
2088 return NULL_TREE;
2089
2090 return ipa_get_jf_arith_result (opcode, input: opnd1, operand: opnd2, res_type);
2091}
2092
2093/* Propagate values through an arithmetic transformation described by a jump
2094 function associated with edge CS, taking values from SRC_LAT and putting
2095 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
2096 OPND2 is a constant value if transformation is a binary operation.
2097 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2098 a part of the aggregate. SRC_IDX is the index of the source parameter.
2099 RES_TYPE is the value type of result being propagated into. Return true if
2100 DEST_LAT changed. */
2101
2102static bool
2103propagate_vals_across_arith_jfunc (cgraph_edge *cs,
2104 enum tree_code opcode,
2105 tree opnd1_type,
2106 tree opnd2,
2107 ipcp_lattice<tree> *src_lat,
2108 ipcp_lattice<tree> *dest_lat,
2109 HOST_WIDE_INT src_offset,
2110 int src_idx,
2111 tree res_type)
2112{
2113 ipcp_value<tree> *src_val;
2114 bool ret = false;
2115
2116 /* Due to circular dependencies, propagating within an SCC through arithmetic
2117 transformation would create infinite number of values. But for
2118 self-feeding recursive function, we could allow propagation in a limited
2119 count, and this can enable a simple kind of recursive function versioning.
2120 For other scenario, we would just make lattices bottom. */
2121 if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
2122 {
2123 int i;
2124
2125 int max_recursive_depth = opt_for_fn(cs->caller->decl,
2126 param_ipa_cp_max_recursive_depth);
2127 if (src_lat != dest_lat || max_recursive_depth < 1)
2128 return dest_lat->set_contains_variable ();
2129
2130 /* No benefit if recursive execution is in low probability. */
2131 if (cs->sreal_frequency () * 100
2132 <= ((sreal) 1) * opt_for_fn (cs->caller->decl,
2133 param_ipa_cp_min_recursive_probability))
2134 return dest_lat->set_contains_variable ();
2135
2136 auto_vec<ipcp_value<tree> *, 8> val_seeds;
2137
2138 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2139 {
2140 /* Now we do not use self-recursively generated value as propagation
2141 source, this is absolutely conservative, but could avoid explosion
2142 of lattice's value space, especially when one recursive function
2143 calls another recursive. */
2144 if (src_val->self_recursion_generated_p ())
2145 {
2146 ipcp_value_source<tree> *s;
2147
2148 /* If the lattice has already been propagated for the call site,
2149 no need to do that again. */
2150 for (s = src_val->sources; s; s = s->next)
2151 if (s->cs == cs)
2152 return dest_lat->set_contains_variable ();
2153 }
2154 else
2155 val_seeds.safe_push (obj: src_val);
2156 }
2157
2158 gcc_assert ((int) val_seeds.length () <= param_ipa_cp_value_list_size);
2159
2160 /* Recursively generate lattice values with a limited count. */
2161 FOR_EACH_VEC_ELT (val_seeds, i, src_val)
2162 {
2163 for (int j = 1; j < max_recursive_depth; j++)
2164 {
2165 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2166 src_val, res_type);
2167 if (!cstval
2168 || !ipacp_value_safe_for_type (param_type: res_type, value: cstval))
2169 break;
2170
2171 ret |= dest_lat->add_value (newval: cstval, cs, src_val, src_idx,
2172 offset: src_offset, val_p: &src_val, same_lat_gen_level: j);
2173 gcc_checking_assert (src_val);
2174 }
2175 }
2176 ret |= dest_lat->set_contains_variable ();
2177 }
2178 else
2179 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2180 {
2181 /* Now we do not use self-recursively generated value as propagation
2182 source, otherwise it is easy to make value space of normal lattice
2183 overflow. */
2184 if (src_val->self_recursion_generated_p ())
2185 {
2186 ret |= dest_lat->set_contains_variable ();
2187 continue;
2188 }
2189
2190 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2191 src_val, res_type);
2192 if (cstval
2193 && ipacp_value_safe_for_type (param_type: res_type, value: cstval))
2194 ret |= dest_lat->add_value (newval: cstval, cs, src_val, src_idx,
2195 offset: src_offset);
2196 else
2197 ret |= dest_lat->set_contains_variable ();
2198 }
2199
2200 return ret;
2201}
2202
2203/* Propagate values through a pass-through jump function JFUNC associated with
2204 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2205 is the index of the source parameter. PARM_TYPE is the type of the
2206 parameter to which the result is passed. */
2207
2208static bool
2209propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
2210 ipcp_lattice<tree> *src_lat,
2211 ipcp_lattice<tree> *dest_lat, int src_idx,
2212 tree parm_type)
2213{
2214 return propagate_vals_across_arith_jfunc (cs,
2215 opcode: ipa_get_jf_pass_through_operation (jfunc),
2216 NULL_TREE,
2217 opnd2: ipa_get_jf_pass_through_operand (jfunc),
2218 src_lat, dest_lat, src_offset: -1, src_idx, res_type: parm_type);
2219}
2220
2221/* Propagate values through an ancestor jump function JFUNC associated with
2222 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2223 is the index of the source parameter. */
2224
2225static bool
2226propagate_vals_across_ancestor (struct cgraph_edge *cs,
2227 struct ipa_jump_func *jfunc,
2228 ipcp_lattice<tree> *src_lat,
2229 ipcp_lattice<tree> *dest_lat, int src_idx,
2230 tree param_type)
2231{
2232 ipcp_value<tree> *src_val;
2233 bool ret = false;
2234
2235 if (ipa_edge_within_scc (cs))
2236 return dest_lat->set_contains_variable ();
2237
2238 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2239 {
2240 tree t = ipa_get_jf_ancestor_result (jfunc, input: src_val->value);
2241
2242 if (t && ipacp_value_safe_for_type (param_type, value: t))
2243 ret |= dest_lat->add_value (newval: t, cs, src_val, src_idx);
2244 else
2245 ret |= dest_lat->set_contains_variable ();
2246 }
2247
2248 return ret;
2249}
2250
2251/* Propagate scalar values across jump function JFUNC that is associated with
2252 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2253 parameter to which the result is passed. */
2254
2255static bool
2256propagate_scalar_across_jump_function (struct cgraph_edge *cs,
2257 struct ipa_jump_func *jfunc,
2258 ipcp_lattice<tree> *dest_lat,
2259 tree param_type)
2260{
2261 if (dest_lat->bottom)
2262 return false;
2263
2264 if (jfunc->type == IPA_JF_CONST)
2265 {
2266 tree val = ipa_get_jf_constant (jfunc);
2267 if (ipacp_value_safe_for_type (param_type, value: val))
2268 return dest_lat->add_value (newval: val, cs, NULL, src_idx: 0);
2269 else
2270 return dest_lat->set_contains_variable ();
2271 }
2272 else if (jfunc->type == IPA_JF_PASS_THROUGH
2273 || jfunc->type == IPA_JF_ANCESTOR)
2274 {
2275 ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller);
2276 ipcp_lattice<tree> *src_lat;
2277 int src_idx;
2278 bool ret;
2279
2280 if (jfunc->type == IPA_JF_PASS_THROUGH)
2281 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2282 else
2283 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2284
2285 src_lat = ipa_get_scalar_lat (info: caller_info, i: src_idx);
2286 if (src_lat->bottom)
2287 return dest_lat->set_contains_variable ();
2288
2289 /* If we would need to clone the caller and cannot, do not propagate. */
2290 if (!ipcp_versionable_function_p (node: cs->caller)
2291 && (src_lat->contains_variable
2292 || (src_lat->values_count > 1)))
2293 return dest_lat->set_contains_variable ();
2294
2295 if (jfunc->type == IPA_JF_PASS_THROUGH)
2296 ret = propagate_vals_across_pass_through (cs, jfunc, src_lat,
2297 dest_lat, src_idx,
2298 parm_type: param_type);
2299 else
2300 ret = propagate_vals_across_ancestor (cs, jfunc, src_lat, dest_lat,
2301 src_idx, param_type);
2302
2303 if (src_lat->contains_variable)
2304 ret |= dest_lat->set_contains_variable ();
2305
2306 return ret;
2307 }
2308
2309 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2310 use it for indirect inlining), we should propagate them too. */
2311 return dest_lat->set_contains_variable ();
2312}
2313
2314/* Propagate scalar values across jump function JFUNC that is associated with
2315 edge CS and describes argument IDX and put the values into DEST_LAT. */
2316
2317static bool
2318propagate_context_across_jump_function (cgraph_edge *cs,
2319 ipa_jump_func *jfunc, int idx,
2320 ipcp_lattice<ipa_polymorphic_call_context> *dest_lat)
2321{
2322 if (dest_lat->bottom)
2323 return false;
2324 ipa_edge_args *args = ipa_edge_args_sum->get (edge: cs);
2325 bool ret = false;
2326 bool added_sth = false;
2327 bool type_preserved = true;
2328
2329 ipa_polymorphic_call_context edge_ctx, *edge_ctx_ptr
2330 = ipa_get_ith_polymorhic_call_context (args, i: idx);
2331
2332 if (edge_ctx_ptr)
2333 edge_ctx = *edge_ctx_ptr;
2334
2335 if (jfunc->type == IPA_JF_PASS_THROUGH
2336 || jfunc->type == IPA_JF_ANCESTOR)
2337 {
2338 ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller);
2339 int src_idx;
2340 ipcp_lattice<ipa_polymorphic_call_context> *src_lat;
2341
2342 /* TODO: Once we figure out how to propagate speculations, it will
2343 probably be a good idea to switch to speculation if type_preserved is
2344 not set instead of punting. */
2345 if (jfunc->type == IPA_JF_PASS_THROUGH)
2346 {
2347 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
2348 goto prop_fail;
2349 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
2350 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2351 }
2352 else
2353 {
2354 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
2355 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2356 }
2357
2358 src_lat = ipa_get_poly_ctx_lat (info: caller_info, i: src_idx);
2359 /* If we would need to clone the caller and cannot, do not propagate. */
2360 if (!ipcp_versionable_function_p (node: cs->caller)
2361 && (src_lat->contains_variable
2362 || (src_lat->values_count > 1)))
2363 goto prop_fail;
2364
2365 ipcp_value<ipa_polymorphic_call_context> *src_val;
2366 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2367 {
2368 ipa_polymorphic_call_context cur = src_val->value;
2369
2370 if (!type_preserved)
2371 cur.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
2372 if (jfunc->type == IPA_JF_ANCESTOR)
2373 cur.offset_by (off: ipa_get_jf_ancestor_offset (jfunc));
2374 /* TODO: In cases we know how the context is going to be used,
2375 we can improve the result by passing proper OTR_TYPE. */
2376 cur.combine_with (edge_ctx);
2377 if (!cur.useless_p ())
2378 {
2379 if (src_lat->contains_variable
2380 && !edge_ctx.equal_to (x: cur))
2381 ret |= dest_lat->set_contains_variable ();
2382 ret |= dest_lat->add_value (newval: cur, cs, src_val, src_idx);
2383 added_sth = true;
2384 }
2385 }
2386 }
2387
2388 prop_fail:
2389 if (!added_sth)
2390 {
2391 if (!edge_ctx.useless_p ())
2392 ret |= dest_lat->add_value (newval: edge_ctx, cs);
2393 else
2394 ret |= dest_lat->set_contains_variable ();
2395 }
2396
2397 return ret;
2398}
2399
2400/* Propagate bits across jfunc that is associated with
2401 edge cs and update dest_lattice accordingly. */
2402
2403bool
2404propagate_bits_across_jump_function (cgraph_edge *cs, int idx,
2405 ipa_jump_func *jfunc,
2406 ipcp_bits_lattice *dest_lattice)
2407{
2408 if (dest_lattice->bottom_p ())
2409 return false;
2410
2411 enum availability availability;
2412 cgraph_node *callee = cs->callee->function_symbol (avail: &availability);
2413 ipa_node_params *callee_info = ipa_node_params_sum->get (node: callee);
2414 tree parm_type = ipa_get_type (info: callee_info, i: idx);
2415
2416 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2417 transform for these cases. Similarly, we can have bad type mismatches
2418 with LTO, avoid doing anything with those too. */
2419 if (!parm_type
2420 || (!INTEGRAL_TYPE_P (parm_type) && !POINTER_TYPE_P (parm_type)))
2421 {
2422 if (dump_file && (dump_flags & TDF_DETAILS))
2423 fprintf (stream: dump_file, format: "Setting dest_lattice to bottom, because type of "
2424 "param %i of %s is NULL or unsuitable for bits propagation\n",
2425 idx, cs->callee->dump_name ());
2426
2427 return dest_lattice->set_to_bottom ();
2428 }
2429
2430 unsigned precision = TYPE_PRECISION (parm_type);
2431 signop sgn = TYPE_SIGN (parm_type);
2432
2433 if (jfunc->type == IPA_JF_PASS_THROUGH
2434 || jfunc->type == IPA_JF_ANCESTOR)
2435 {
2436 ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller);
2437 tree operand = NULL_TREE;
2438 enum tree_code code;
2439 unsigned src_idx;
2440 bool keep_null = false;
2441
2442 if (jfunc->type == IPA_JF_PASS_THROUGH)
2443 {
2444 code = ipa_get_jf_pass_through_operation (jfunc);
2445 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2446 if (code != NOP_EXPR)
2447 operand = ipa_get_jf_pass_through_operand (jfunc);
2448 }
2449 else
2450 {
2451 code = POINTER_PLUS_EXPR;
2452 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2453 unsigned HOST_WIDE_INT offset
2454 = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
2455 keep_null = (ipa_get_jf_ancestor_keep_null (jfunc) || !offset);
2456 operand = build_int_cstu (size_type_node, offset);
2457 }
2458
2459 class ipcp_param_lattices *src_lats
2460 = ipa_get_parm_lattices (info: caller_info, i: src_idx);
2461
2462 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2463 for eg consider:
2464 int f(int x)
2465 {
2466 g (x & 0xff);
2467 }
2468 Assume lattice for x is bottom, however we can still propagate
2469 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2470 and we store it in jump function during analysis stage. */
2471
2472 if (!src_lats->bits_lattice.bottom_p ())
2473 {
2474 bool drop_all_ones
2475 = keep_null && !src_lats->bits_lattice.known_nonzero_p ();
2476
2477 return dest_lattice->meet_with (other&: src_lats->bits_lattice, precision,
2478 sgn, code, operand, drop_all_ones);
2479 }
2480 }
2481
2482 Value_Range vr (parm_type);
2483 if (jfunc->m_vr)
2484 {
2485 jfunc->m_vr->get_vrange (vr);
2486 if (!vr.undefined_p () && !vr.varying_p ())
2487 {
2488 irange &r = as_a <irange> (v&: vr);
2489 irange_bitmask bm = r.get_bitmask ();
2490 widest_int mask
2491 = widest_int::from (x: bm.mask (), TYPE_SIGN (parm_type));
2492 widest_int value
2493 = widest_int::from (x: bm.value (), TYPE_SIGN (parm_type));
2494 return dest_lattice->meet_with (value, mask, precision);
2495 }
2496 }
2497 return dest_lattice->set_to_bottom ();
2498}
2499
2500/* Propagate value range across jump function JFUNC that is associated with
2501 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2502 accordingly. */
2503
2504static bool
2505propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc,
2506 class ipcp_param_lattices *dest_plats,
2507 tree param_type)
2508{
2509 ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range;
2510
2511 if (dest_lat->bottom_p ())
2512 return false;
2513
2514 if (!param_type
2515 || (!INTEGRAL_TYPE_P (param_type)
2516 && !POINTER_TYPE_P (param_type)))
2517 return dest_lat->set_to_bottom ();
2518
2519 if (jfunc->type == IPA_JF_PASS_THROUGH)
2520 {
2521 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
2522 ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller);
2523 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2524 class ipcp_param_lattices *src_lats
2525 = ipa_get_parm_lattices (info: caller_info, i: src_idx);
2526 tree operand_type = ipa_get_type (info: caller_info, i: src_idx);
2527
2528 if (src_lats->m_value_range.bottom_p ())
2529 return dest_lat->set_to_bottom ();
2530
2531 Value_Range vr (operand_type);
2532 if (TREE_CODE_CLASS (operation) == tcc_unary)
2533 ipa_vr_operation_and_type_effects (dst_vr&: vr,
2534 src_vr: src_lats->m_value_range.m_vr,
2535 operation, dst_type: param_type,
2536 src_type: operand_type);
2537 /* A crude way to prevent unbounded number of value range updates
2538 in SCC components. We should allow limited number of updates within
2539 SCC, too. */
2540 else if (!ipa_edge_within_scc (cs))
2541 {
2542 tree op = ipa_get_jf_pass_through_operand (jfunc);
2543 Value_Range op_vr (TREE_TYPE (op));
2544 Value_Range op_res (operand_type);
2545 range_op_handler handler (operation);
2546
2547 ipa_range_set_and_normalize (r&: op_vr, val: op);
2548
2549 if (!handler
2550 || !op_res.supports_type_p (type: operand_type)
2551 || !handler.fold_range (r&: op_res, type: operand_type,
2552 lh: src_lats->m_value_range.m_vr, rh: op_vr))
2553 op_res.set_varying (operand_type);
2554
2555 ipa_vr_operation_and_type_effects (dst_vr&: vr,
2556 src_vr: op_res,
2557 operation: NOP_EXPR, dst_type: param_type,
2558 src_type: operand_type);
2559 }
2560 if (!vr.undefined_p () && !vr.varying_p ())
2561 {
2562 if (jfunc->m_vr)
2563 {
2564 Value_Range jvr (param_type);
2565 if (ipa_vr_operation_and_type_effects (dst_vr&: jvr, src_vr: *jfunc->m_vr,
2566 operation: NOP_EXPR,
2567 dst_type: param_type,
2568 src_type: jfunc->m_vr->type ()))
2569 vr.intersect (r: jvr);
2570 }
2571 return dest_lat->meet_with (p_vr: vr);
2572 }
2573 }
2574 else if (jfunc->type == IPA_JF_CONST)
2575 {
2576 tree val = ipa_get_jf_constant (jfunc);
2577 if (TREE_CODE (val) == INTEGER_CST)
2578 {
2579 val = fold_convert (param_type, val);
2580 if (TREE_OVERFLOW_P (val))
2581 val = drop_tree_overflow (val);
2582
2583 Value_Range tmpvr (val, val);
2584 return dest_lat->meet_with (p_vr: tmpvr);
2585 }
2586 }
2587
2588 Value_Range vr (param_type);
2589 if (jfunc->m_vr
2590 && ipa_vr_operation_and_type_effects (dst_vr&: vr, src_vr: *jfunc->m_vr, operation: NOP_EXPR,
2591 dst_type: param_type,
2592 src_type: jfunc->m_vr->type ()))
2593 return dest_lat->meet_with (p_vr: vr);
2594 else
2595 return dest_lat->set_to_bottom ();
2596}
2597
2598/* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2599 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2600 other cases, return false). If there are no aggregate items, set
2601 aggs_by_ref to NEW_AGGS_BY_REF. */
2602
2603static bool
2604set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats,
2605 bool new_aggs_by_ref)
2606{
2607 if (dest_plats->aggs)
2608 {
2609 if (dest_plats->aggs_by_ref != new_aggs_by_ref)
2610 {
2611 set_agg_lats_to_bottom (dest_plats);
2612 return true;
2613 }
2614 }
2615 else
2616 dest_plats->aggs_by_ref = new_aggs_by_ref;
2617 return false;
2618}
2619
2620/* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2621 already existing lattice for the given OFFSET and SIZE, marking all skipped
2622 lattices as containing variable and checking for overlaps. If there is no
2623 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2624 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2625 unless there are too many already. If there are two many, return false. If
2626 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2627 skipped lattices were newly marked as containing variable, set *CHANGE to
2628 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2629
2630static bool
2631merge_agg_lats_step (class ipcp_param_lattices *dest_plats,
2632 HOST_WIDE_INT offset, HOST_WIDE_INT val_size,
2633 struct ipcp_agg_lattice ***aglat,
2634 bool pre_existing, bool *change, int max_agg_items)
2635{
2636 gcc_checking_assert (offset >= 0);
2637
2638 while (**aglat && (**aglat)->offset < offset)
2639 {
2640 if ((**aglat)->offset + (**aglat)->size > offset)
2641 {
2642 set_agg_lats_to_bottom (dest_plats);
2643 return false;
2644 }
2645 *change |= (**aglat)->set_contains_variable ();
2646 *aglat = &(**aglat)->next;
2647 }
2648
2649 if (**aglat && (**aglat)->offset == offset)
2650 {
2651 if ((**aglat)->size != val_size)
2652 {
2653 set_agg_lats_to_bottom (dest_plats);
2654 return false;
2655 }
2656 gcc_assert (!(**aglat)->next
2657 || (**aglat)->next->offset >= offset + val_size);
2658 return true;
2659 }
2660 else
2661 {
2662 struct ipcp_agg_lattice *new_al;
2663
2664 if (**aglat && (**aglat)->offset < offset + val_size)
2665 {
2666 set_agg_lats_to_bottom (dest_plats);
2667 return false;
2668 }
2669 if (dest_plats->aggs_count == max_agg_items)
2670 return false;
2671 dest_plats->aggs_count++;
2672 new_al = ipcp_agg_lattice_pool.allocate ();
2673
2674 new_al->offset = offset;
2675 new_al->size = val_size;
2676 new_al->contains_variable = pre_existing;
2677
2678 new_al->next = **aglat;
2679 **aglat = new_al;
2680 return true;
2681 }
2682}
2683
2684/* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2685 containing an unknown value. */
2686
2687static bool
2688set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice *aglat)
2689{
2690 bool ret = false;
2691 while (aglat)
2692 {
2693 ret |= aglat->set_contains_variable ();
2694 aglat = aglat->next;
2695 }
2696 return ret;
2697}
2698
2699/* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2700 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2701 parameter used for lattice value sources. Return true if DEST_PLATS changed
2702 in any way. */
2703
2704static bool
2705merge_aggregate_lattices (struct cgraph_edge *cs,
2706 class ipcp_param_lattices *dest_plats,
2707 class ipcp_param_lattices *src_plats,
2708 int src_idx, HOST_WIDE_INT offset_delta)
2709{
2710 bool pre_existing = dest_plats->aggs != NULL;
2711 struct ipcp_agg_lattice **dst_aglat;
2712 bool ret = false;
2713
2714 if (set_check_aggs_by_ref (dest_plats, new_aggs_by_ref: src_plats->aggs_by_ref))
2715 return true;
2716 if (src_plats->aggs_bottom)
2717 return set_agg_lats_contain_variable (dest_plats);
2718 if (src_plats->aggs_contain_variable)
2719 ret |= set_agg_lats_contain_variable (dest_plats);
2720 dst_aglat = &dest_plats->aggs;
2721
2722 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2723 param_ipa_max_agg_items);
2724 for (struct ipcp_agg_lattice *src_aglat = src_plats->aggs;
2725 src_aglat;
2726 src_aglat = src_aglat->next)
2727 {
2728 HOST_WIDE_INT new_offset = src_aglat->offset - offset_delta;
2729
2730 if (new_offset < 0)
2731 continue;
2732 if (merge_agg_lats_step (dest_plats, offset: new_offset, val_size: src_aglat->size,
2733 aglat: &dst_aglat, pre_existing, change: &ret, max_agg_items))
2734 {
2735 struct ipcp_agg_lattice *new_al = *dst_aglat;
2736
2737 dst_aglat = &(*dst_aglat)->next;
2738 if (src_aglat->bottom)
2739 {
2740 ret |= new_al->set_contains_variable ();
2741 continue;
2742 }
2743 if (src_aglat->contains_variable)
2744 ret |= new_al->set_contains_variable ();
2745 for (ipcp_value<tree> *val = src_aglat->values;
2746 val;
2747 val = val->next)
2748 ret |= new_al->add_value (newval: val->value, cs, src_val: val, src_idx,
2749 offset: src_aglat->offset);
2750 }
2751 else if (dest_plats->aggs_bottom)
2752 return true;
2753 }
2754 ret |= set_chain_of_aglats_contains_variable (*dst_aglat);
2755 return ret;
2756}
2757
2758/* Determine whether there is anything to propagate FROM SRC_PLATS through a
2759 pass-through JFUNC and if so, whether it has conform and conforms to the
2760 rules about propagating values passed by reference. */
2761
2762static bool
2763agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats,
2764 struct ipa_jump_func *jfunc)
2765{
2766 return src_plats->aggs
2767 && (!src_plats->aggs_by_ref
2768 || ipa_get_jf_pass_through_agg_preserved (jfunc));
2769}
2770
2771/* Propagate values through ITEM, jump function for a part of an aggregate,
2772 into corresponding aggregate lattice AGLAT. CS is the call graph edge
2773 associated with the jump function. Return true if AGLAT changed in any
2774 way. */
2775
2776static bool
2777propagate_aggregate_lattice (struct cgraph_edge *cs,
2778 struct ipa_agg_jf_item *item,
2779 struct ipcp_agg_lattice *aglat)
2780{
2781 class ipa_node_params *caller_info;
2782 class ipcp_param_lattices *src_plats;
2783 struct ipcp_lattice<tree> *src_lat;
2784 HOST_WIDE_INT src_offset;
2785 int src_idx;
2786 tree load_type;
2787 bool ret;
2788
2789 if (item->jftype == IPA_JF_CONST)
2790 {
2791 tree value = item->value.constant;
2792
2793 gcc_checking_assert (is_gimple_ip_invariant (value));
2794 return aglat->add_value (newval: value, cs, NULL, src_idx: 0);
2795 }
2796
2797 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
2798 || item->jftype == IPA_JF_LOAD_AGG);
2799
2800 caller_info = ipa_node_params_sum->get (node: cs->caller);
2801 src_idx = item->value.pass_through.formal_id;
2802 src_plats = ipa_get_parm_lattices (info: caller_info, i: src_idx);
2803
2804 if (item->jftype == IPA_JF_PASS_THROUGH)
2805 {
2806 load_type = NULL_TREE;
2807 src_lat = &src_plats->itself;
2808 src_offset = -1;
2809 }
2810 else
2811 {
2812 HOST_WIDE_INT load_offset = item->value.load_agg.offset;
2813 struct ipcp_agg_lattice *src_aglat;
2814
2815 for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next)
2816 if (src_aglat->offset >= load_offset)
2817 break;
2818
2819 load_type = item->value.load_agg.type;
2820 if (!src_aglat
2821 || src_aglat->offset > load_offset
2822 || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type))
2823 || src_plats->aggs_by_ref != item->value.load_agg.by_ref)
2824 return aglat->set_contains_variable ();
2825
2826 src_lat = src_aglat;
2827 src_offset = load_offset;
2828 }
2829
2830 if (src_lat->bottom
2831 || (!ipcp_versionable_function_p (node: cs->caller)
2832 && !src_lat->is_single_const ()))
2833 return aglat->set_contains_variable ();
2834
2835 ret = propagate_vals_across_arith_jfunc (cs,
2836 opcode: item->value.pass_through.operation,
2837 opnd1_type: load_type,
2838 opnd2: item->value.pass_through.operand,
2839 src_lat, dest_lat: aglat,
2840 src_offset,
2841 src_idx,
2842 res_type: item->type);
2843
2844 if (src_lat->contains_variable)
2845 ret |= aglat->set_contains_variable ();
2846
2847 return ret;
2848}
2849
2850/* Propagate scalar values across jump function JFUNC that is associated with
2851 edge CS and put the values into DEST_LAT. */
2852
2853static bool
2854propagate_aggs_across_jump_function (struct cgraph_edge *cs,
2855 struct ipa_jump_func *jfunc,
2856 class ipcp_param_lattices *dest_plats)
2857{
2858 bool ret = false;
2859
2860 if (dest_plats->aggs_bottom)
2861 return false;
2862
2863 if (jfunc->type == IPA_JF_PASS_THROUGH
2864 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
2865 {
2866 ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller);
2867 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2868 class ipcp_param_lattices *src_plats;
2869
2870 src_plats = ipa_get_parm_lattices (info: caller_info, i: src_idx);
2871 if (agg_pass_through_permissible_p (src_plats, jfunc))
2872 {
2873 /* Currently we do not produce clobber aggregate jump
2874 functions, replace with merging when we do. */
2875 gcc_assert (!jfunc->agg.items);
2876 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats,
2877 src_idx, offset_delta: 0);
2878 return ret;
2879 }
2880 }
2881 else if (jfunc->type == IPA_JF_ANCESTOR
2882 && ipa_get_jf_ancestor_agg_preserved (jfunc))
2883 {
2884 ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller);
2885 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2886 class ipcp_param_lattices *src_plats;
2887
2888 src_plats = ipa_get_parm_lattices (info: caller_info, i: src_idx);
2889 if (src_plats->aggs && src_plats->aggs_by_ref)
2890 {
2891 /* Currently we do not produce clobber aggregate jump
2892 functions, replace with merging when we do. */
2893 gcc_assert (!jfunc->agg.items);
2894 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx,
2895 offset_delta: ipa_get_jf_ancestor_offset (jfunc));
2896 }
2897 else if (!src_plats->aggs_by_ref)
2898 ret |= set_agg_lats_to_bottom (dest_plats);
2899 else
2900 ret |= set_agg_lats_contain_variable (dest_plats);
2901 return ret;
2902 }
2903
2904 if (jfunc->agg.items)
2905 {
2906 bool pre_existing = dest_plats->aggs != NULL;
2907 struct ipcp_agg_lattice **aglat = &dest_plats->aggs;
2908 struct ipa_agg_jf_item *item;
2909 int i;
2910
2911 if (set_check_aggs_by_ref (dest_plats, new_aggs_by_ref: jfunc->agg.by_ref))
2912 return true;
2913
2914 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2915 param_ipa_max_agg_items);
2916 FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item)
2917 {
2918 HOST_WIDE_INT val_size;
2919
2920 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
2921 continue;
2922 val_size = tree_to_shwi (TYPE_SIZE (item->type));
2923
2924 if (merge_agg_lats_step (dest_plats, offset: item->offset, val_size,
2925 aglat: &aglat, pre_existing, change: &ret, max_agg_items))
2926 {
2927 ret |= propagate_aggregate_lattice (cs, item, aglat: *aglat);
2928 aglat = &(*aglat)->next;
2929 }
2930 else if (dest_plats->aggs_bottom)
2931 return true;
2932 }
2933
2934 ret |= set_chain_of_aglats_contains_variable (*aglat);
2935 }
2936 else
2937 ret |= set_agg_lats_contain_variable (dest_plats);
2938
2939 return ret;
2940}
2941
2942/* Return true if on the way cfrom CS->caller to the final (non-alias and
2943 non-thunk) destination, the call passes through a thunk. */
2944
2945static bool
2946call_passes_through_thunk (cgraph_edge *cs)
2947{
2948 cgraph_node *alias_or_thunk = cs->callee;
2949 while (alias_or_thunk->alias)
2950 alias_or_thunk = alias_or_thunk->get_alias_target ();
2951 return alias_or_thunk->thunk;
2952}
2953
2954/* Propagate constants from the caller to the callee of CS. INFO describes the
2955 caller. */
2956
2957static bool
2958propagate_constants_across_call (struct cgraph_edge *cs)
2959{
2960 class ipa_node_params *callee_info;
2961 enum availability availability;
2962 cgraph_node *callee;
2963 class ipa_edge_args *args;
2964 bool ret = false;
2965 int i, args_count, parms_count;
2966
2967 callee = cs->callee->function_symbol (avail: &availability);
2968 if (!callee->definition)
2969 return false;
2970 gcc_checking_assert (callee->has_gimple_body_p ());
2971 callee_info = ipa_node_params_sum->get (node: callee);
2972 if (!callee_info)
2973 return false;
2974
2975 args = ipa_edge_args_sum->get (edge: cs);
2976 parms_count = ipa_get_param_count (info: callee_info);
2977 if (parms_count == 0)
2978 return false;
2979 if (!args
2980 || !opt_for_fn (cs->caller->decl, flag_ipa_cp)
2981 || !opt_for_fn (cs->caller->decl, optimize))
2982 {
2983 for (i = 0; i < parms_count; i++)
2984 ret |= set_all_contains_variable (ipa_get_parm_lattices (info: callee_info,
2985 i));
2986 return ret;
2987 }
2988 args_count = ipa_get_cs_argument_count (args);
2989
2990 /* If this call goes through a thunk we must not propagate to the first (0th)
2991 parameter. However, we might need to uncover a thunk from below a series
2992 of aliases first. */
2993 if (call_passes_through_thunk (cs))
2994 {
2995 ret |= set_all_contains_variable (ipa_get_parm_lattices (info: callee_info,
2996 i: 0));
2997 i = 1;
2998 }
2999 else
3000 i = 0;
3001
3002 for (; (i < args_count) && (i < parms_count); i++)
3003 {
3004 struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i);
3005 class ipcp_param_lattices *dest_plats;
3006 tree param_type = ipa_get_type (info: callee_info, i);
3007
3008 dest_plats = ipa_get_parm_lattices (info: callee_info, i);
3009 if (availability == AVAIL_INTERPOSABLE)
3010 ret |= set_all_contains_variable (dest_plats);
3011 else
3012 {
3013 ret |= propagate_scalar_across_jump_function (cs, jfunc: jump_func,
3014 dest_lat: &dest_plats->itself,
3015 param_type);
3016 ret |= propagate_context_across_jump_function (cs, jfunc: jump_func, idx: i,
3017 dest_lat: &dest_plats->ctxlat);
3018 ret
3019 |= propagate_bits_across_jump_function (cs, idx: i, jfunc: jump_func,
3020 dest_lattice: &dest_plats->bits_lattice);
3021 ret |= propagate_aggs_across_jump_function (cs, jfunc: jump_func,
3022 dest_plats);
3023 if (opt_for_fn (callee->decl, flag_ipa_vrp))
3024 ret |= propagate_vr_across_jump_function (cs, jfunc: jump_func,
3025 dest_plats, param_type);
3026 else
3027 ret |= dest_plats->m_value_range.set_to_bottom ();
3028 }
3029 }
3030 for (; i < parms_count; i++)
3031 ret |= set_all_contains_variable (ipa_get_parm_lattices (info: callee_info, i));
3032
3033 return ret;
3034}
3035
3036/* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
3037 KNOWN_CONTEXTS, and known aggregates either in AVS or KNOWN_AGGS return
3038 the destination. The latter three can be NULL. If AGG_REPS is not NULL,
3039 KNOWN_AGGS is ignored. */
3040
3041static tree
3042ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
3043 const vec<tree> &known_csts,
3044 const vec<ipa_polymorphic_call_context> &known_contexts,
3045 const ipa_argagg_value_list &avs,
3046 bool *speculative)
3047{
3048 int param_index = ie->indirect_info->param_index;
3049 HOST_WIDE_INT anc_offset;
3050 tree t = NULL;
3051 tree target = NULL;
3052
3053 *speculative = false;
3054
3055 if (param_index == -1)
3056 return NULL_TREE;
3057
3058 if (!ie->indirect_info->polymorphic)
3059 {
3060 tree t = NULL;
3061
3062 if (ie->indirect_info->agg_contents)
3063 {
3064 t = NULL;
3065 if ((unsigned) param_index < known_csts.length ()
3066 && known_csts[param_index])
3067 t = ipa_find_agg_cst_from_init (scalar: known_csts[param_index],
3068 offset: ie->indirect_info->offset,
3069 by_ref: ie->indirect_info->by_ref);
3070
3071 if (!t && ie->indirect_info->guaranteed_unmodified)
3072 t = avs.get_value (index: param_index,
3073 unit_offset: ie->indirect_info->offset / BITS_PER_UNIT,
3074 by_ref: ie->indirect_info->by_ref);
3075 }
3076 else if ((unsigned) param_index < known_csts.length ())
3077 t = known_csts[param_index];
3078
3079 if (t
3080 && TREE_CODE (t) == ADDR_EXPR
3081 && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL)
3082 return TREE_OPERAND (t, 0);
3083 else
3084 return NULL_TREE;
3085 }
3086
3087 if (!opt_for_fn (ie->caller->decl, flag_devirtualize))
3088 return NULL_TREE;
3089
3090 gcc_assert (!ie->indirect_info->agg_contents);
3091 gcc_assert (!ie->indirect_info->by_ref);
3092 anc_offset = ie->indirect_info->offset;
3093
3094 t = NULL;
3095
3096 if ((unsigned) param_index < known_csts.length ()
3097 && known_csts[param_index])
3098 t = ipa_find_agg_cst_from_init (scalar: known_csts[param_index],
3099 offset: ie->indirect_info->offset, by_ref: true);
3100
3101 /* Try to work out value of virtual table pointer value in replacements. */
3102 /* or known aggregate values. */
3103 if (!t)
3104 t = avs.get_value (index: param_index,
3105 unit_offset: ie->indirect_info->offset / BITS_PER_UNIT,
3106 by_ref: true);
3107
3108 /* If we found the virtual table pointer, lookup the target. */
3109 if (t)
3110 {
3111 tree vtable;
3112 unsigned HOST_WIDE_INT offset;
3113 if (vtable_pointer_value_to_vtable (t, &vtable, &offset))
3114 {
3115 bool can_refer;
3116 target = gimple_get_virt_method_for_vtable (ie->indirect_info->otr_token,
3117 vtable, offset, can_refer: &can_refer);
3118 if (can_refer)
3119 {
3120 if (!target
3121 || fndecl_built_in_p (node: target, name1: BUILT_IN_UNREACHABLE)
3122 || !possible_polymorphic_call_target_p
3123 (e: ie, n: cgraph_node::get (decl: target)))
3124 {
3125 /* Do not speculate builtin_unreachable, it is stupid! */
3126 if (ie->indirect_info->vptr_changed)
3127 return NULL;
3128 target = ipa_impossible_devirt_target (ie, target);
3129 }
3130 *speculative = ie->indirect_info->vptr_changed;
3131 if (!*speculative)
3132 return target;
3133 }
3134 }
3135 }
3136
3137 /* Do we know the constant value of pointer? */
3138 if (!t && (unsigned) param_index < known_csts.length ())
3139 t = known_csts[param_index];
3140
3141 gcc_checking_assert (!t || TREE_CODE (t) != TREE_BINFO);
3142
3143 ipa_polymorphic_call_context context;
3144 if (known_contexts.length () > (unsigned int) param_index)
3145 {
3146 context = known_contexts[param_index];
3147 context.offset_by (off: anc_offset);
3148 if (ie->indirect_info->vptr_changed)
3149 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3150 otr_type: ie->indirect_info->otr_type);
3151 if (t)
3152 {
3153 ipa_polymorphic_call_context ctx2 = ipa_polymorphic_call_context
3154 (t, ie->indirect_info->otr_type, anc_offset);
3155 if (!ctx2.useless_p ())
3156 context.combine_with (ctx2, otr_type: ie->indirect_info->otr_type);
3157 }
3158 }
3159 else if (t)
3160 {
3161 context = ipa_polymorphic_call_context (t, ie->indirect_info->otr_type,
3162 anc_offset);
3163 if (ie->indirect_info->vptr_changed)
3164 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3165 otr_type: ie->indirect_info->otr_type);
3166 }
3167 else
3168 return NULL_TREE;
3169
3170 vec <cgraph_node *>targets;
3171 bool final;
3172
3173 targets = possible_polymorphic_call_targets
3174 (ie->indirect_info->otr_type,
3175 ie->indirect_info->otr_token,
3176 context, copletep: &final);
3177 if (!final || targets.length () > 1)
3178 {
3179 struct cgraph_node *node;
3180 if (*speculative)
3181 return target;
3182 if (!opt_for_fn (ie->caller->decl, flag_devirtualize_speculatively)
3183 || ie->speculative || !ie->maybe_hot_p ())
3184 return NULL;
3185 node = try_speculative_devirtualization (ie->indirect_info->otr_type,
3186 ie->indirect_info->otr_token,
3187 context);
3188 if (node)
3189 {
3190 *speculative = true;
3191 target = node->decl;
3192 }
3193 else
3194 return NULL;
3195 }
3196 else
3197 {
3198 *speculative = false;
3199 if (targets.length () == 1)
3200 target = targets[0]->decl;
3201 else
3202 target = ipa_impossible_devirt_target (ie, NULL_TREE);
3203 }
3204
3205 if (target && !possible_polymorphic_call_target_p (e: ie,
3206 n: cgraph_node::get (decl: target)))
3207 {
3208 if (*speculative)
3209 return NULL;
3210 target = ipa_impossible_devirt_target (ie, target);
3211 }
3212
3213 return target;
3214}
3215
3216/* If an indirect edge IE can be turned into a direct one based on data in
3217 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3218 whether the discovered target is only speculative guess. */
3219
3220tree
3221ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3222 ipa_call_arg_values *avals,
3223 bool *speculative)
3224{
3225 ipa_argagg_value_list avl (avals);
3226 return ipa_get_indirect_edge_target_1 (ie, known_csts: avals->m_known_vals,
3227 known_contexts: avals->m_known_contexts,
3228 avs: avl, speculative);
3229}
3230
3231/* Calculate devirtualization time bonus for NODE, assuming we know information
3232 about arguments stored in AVALS. */
3233
3234static int
3235devirtualization_time_bonus (struct cgraph_node *node,
3236 ipa_auto_call_arg_values *avals)
3237{
3238 struct cgraph_edge *ie;
3239 int res = 0;
3240
3241 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
3242 {
3243 struct cgraph_node *callee;
3244 class ipa_fn_summary *isummary;
3245 enum availability avail;
3246 tree target;
3247 bool speculative;
3248
3249 ipa_argagg_value_list avl (avals);
3250 target = ipa_get_indirect_edge_target_1 (ie, known_csts: avals->m_known_vals,
3251 known_contexts: avals->m_known_contexts,
3252 avs: avl, speculative: &speculative);
3253 if (!target)
3254 continue;
3255
3256 /* Only bare minimum benefit for clearly un-inlineable targets. */
3257 res += 1;
3258 callee = cgraph_node::get (decl: target);
3259 if (!callee || !callee->definition)
3260 continue;
3261 callee = callee->function_symbol (avail: &avail);
3262 if (avail < AVAIL_AVAILABLE)
3263 continue;
3264 isummary = ipa_fn_summaries->get (node: callee);
3265 if (!isummary || !isummary->inlinable)
3266 continue;
3267
3268 int size = ipa_size_summaries->get (node: callee)->size;
3269 /* FIXME: The values below need re-considering and perhaps also
3270 integrating into the cost metrics, at lest in some very basic way. */
3271 int max_inline_insns_auto
3272 = opt_for_fn (callee->decl, param_max_inline_insns_auto);
3273 if (size <= max_inline_insns_auto / 4)
3274 res += 31 / ((int)speculative + 1);
3275 else if (size <= max_inline_insns_auto / 2)
3276 res += 15 / ((int)speculative + 1);
3277 else if (size <= max_inline_insns_auto
3278 || DECL_DECLARED_INLINE_P (callee->decl))
3279 res += 7 / ((int)speculative + 1);
3280 }
3281
3282 return res;
3283}
3284
3285/* Return time bonus incurred because of hints stored in ESTIMATES. */
3286
3287static int
3288hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates)
3289{
3290 int result = 0;
3291 ipa_hints hints = estimates.hints;
3292 if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
3293 result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3294
3295 sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3296
3297 if (hints & INLINE_HINT_loop_iterations)
3298 result += (estimates.loops_with_known_iterations * bonus_for_one).to_int ();
3299
3300 if (hints & INLINE_HINT_loop_stride)
3301 result += (estimates.loops_with_known_strides * bonus_for_one).to_int ();
3302
3303 return result;
3304}
3305
3306/* If there is a reason to penalize the function described by INFO in the
3307 cloning goodness evaluation, do so. */
3308
3309static inline sreal
3310incorporate_penalties (cgraph_node *node, ipa_node_params *info,
3311 sreal evaluation)
3312{
3313 if (info->node_within_scc && !info->node_is_self_scc)
3314 evaluation = (evaluation
3315 * (100 - opt_for_fn (node->decl,
3316 param_ipa_cp_recursion_penalty))) / 100;
3317
3318 if (info->node_calling_single_call)
3319 evaluation = (evaluation
3320 * (100 - opt_for_fn (node->decl,
3321 param_ipa_cp_single_call_penalty)))
3322 / 100;
3323
3324 return evaluation;
3325}
3326
3327/* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3328 and SIZE_COST and with the sum of frequencies of incoming edges to the
3329 potential new clone in FREQUENCIES. */
3330
3331static bool
3332good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit,
3333 sreal freq_sum, profile_count count_sum,
3334 int size_cost)
3335{
3336 if (time_benefit == 0
3337 || !opt_for_fn (node->decl, flag_ipa_cp_clone)
3338 || node->optimize_for_size_p ())
3339 return false;
3340
3341 gcc_assert (size_cost > 0);
3342
3343 ipa_node_params *info = ipa_node_params_sum->get (node);
3344 int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold);
3345 if (count_sum.nonzero_p ())
3346 {
3347 gcc_assert (base_count.nonzero_p ());
3348 sreal factor = count_sum.probability_in (overall: base_count).to_sreal ();
3349 sreal evaluation = (time_benefit * factor) / size_cost;
3350 evaluation = incorporate_penalties (node, info, evaluation);
3351 evaluation *= 1000;
3352
3353 if (dump_file && (dump_flags & TDF_DETAILS))
3354 {
3355 fprintf (stream: dump_file, format: " good_cloning_opportunity_p (time: %g, "
3356 "size: %i, count_sum: ", time_benefit.to_double (),
3357 size_cost);
3358 count_sum.dump (f: dump_file);
3359 fprintf (stream: dump_file, format: "%s%s) -> evaluation: %.2f, threshold: %i\n",
3360 info->node_within_scc
3361 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3362 info->node_calling_single_call ? ", single_call" : "",
3363 evaluation.to_double (), eval_threshold);
3364 }
3365
3366 return evaluation.to_int () >= eval_threshold;
3367 }
3368 else
3369 {
3370 sreal evaluation = (time_benefit * freq_sum) / size_cost;
3371 evaluation = incorporate_penalties (node, info, evaluation);
3372 evaluation *= 1000;
3373
3374 if (dump_file && (dump_flags & TDF_DETAILS))
3375 fprintf (stream: dump_file, format: " good_cloning_opportunity_p (time: %g, "
3376 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3377 "threshold: %i\n",
3378 time_benefit.to_double (), size_cost, freq_sum.to_double (),
3379 info->node_within_scc
3380 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3381 info->node_calling_single_call ? ", single_call" : "",
3382 evaluation.to_double (), eval_threshold);
3383
3384 return evaluation.to_int () >= eval_threshold;
3385 }
3386}
3387
3388/* Grow vectors in AVALS and fill them with information about values of
3389 parameters that are known to be independent of the context. Only calculate
3390 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3391 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3392 parameters will be stored in it.
3393
3394 TODO: Also grow context independent value range vectors. */
3395
3396static bool
3397gather_context_independent_values (class ipa_node_params *info,
3398 ipa_auto_call_arg_values *avals,
3399 bool calculate_aggs,
3400 int *removable_params_cost)
3401{
3402 int i, count = ipa_get_param_count (info);
3403 bool ret = false;
3404
3405 avals->m_known_vals.safe_grow_cleared (len: count, exact: true);
3406 avals->m_known_contexts.safe_grow_cleared (len: count, exact: true);
3407
3408 if (removable_params_cost)
3409 *removable_params_cost = 0;
3410
3411 for (i = 0; i < count; i++)
3412 {
3413 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3414 ipcp_lattice<tree> *lat = &plats->itself;
3415
3416 if (lat->is_single_const ())
3417 {
3418 ipcp_value<tree> *val = lat->values;
3419 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3420 avals->m_known_vals[i] = val->value;
3421 if (removable_params_cost)
3422 *removable_params_cost
3423 += estimate_move_cost (TREE_TYPE (val->value), false);
3424 ret = true;
3425 }
3426 else if (removable_params_cost
3427 && !ipa_is_param_used (info, i))
3428 *removable_params_cost
3429 += ipa_get_param_move_cost (info, i);
3430
3431 if (!ipa_is_param_used (info, i))
3432 continue;
3433
3434 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3435 /* Do not account known context as reason for cloning. We can see
3436 if it permits devirtualization. */
3437 if (ctxlat->is_single_const ())
3438 avals->m_known_contexts[i] = ctxlat->values->value;
3439
3440 if (calculate_aggs)
3441 ret |= push_agg_values_from_plats (plats, dest_index: i, unit_delta: 0, res: &avals->m_known_aggs);
3442 }
3443
3444 return ret;
3445}
3446
3447/* Perform time and size measurement of NODE with the context given in AVALS,
3448 calculate the benefit compared to the node without specialization and store
3449 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3450 context-independent or unused removable parameters and EST_MOVE_COST, the
3451 estimated movement of the considered parameter. */
3452
3453static void
3454perform_estimation_of_a_value (cgraph_node *node,
3455 ipa_auto_call_arg_values *avals,
3456 int removable_params_cost, int est_move_cost,
3457 ipcp_value_base *val)
3458{
3459 sreal time_benefit;
3460 ipa_call_estimates estimates;
3461
3462 estimate_ipcp_clone_size_and_time (node, avals, estimates: &estimates);
3463
3464 /* Extern inline functions have no cloning local time benefits because they
3465 will be inlined anyway. The only reason to clone them is if it enables
3466 optimization in any of the functions they call. */
3467 if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl))
3468 time_benefit = 0;
3469 else
3470 time_benefit = (estimates.nonspecialized_time - estimates.time)
3471 + (devirtualization_time_bonus (node, avals)
3472 + hint_time_bonus (node, estimates)
3473 + removable_params_cost + est_move_cost);
3474
3475 int size = estimates.size;
3476 gcc_checking_assert (size >=0);
3477 /* The inliner-heuristics based estimates may think that in certain
3478 contexts some functions do not have any size at all but we want
3479 all specializations to have at least a tiny cost, not least not to
3480 divide by zero. */
3481 if (size == 0)
3482 size = 1;
3483
3484 val->local_time_benefit = time_benefit;
3485 val->local_size_cost = size;
3486}
3487
3488/* Get the overall limit oof growth based on parameters extracted from growth.
3489 it does not really make sense to mix functions with different overall growth
3490 limits but it is possible and if it happens, we do not want to select one
3491 limit at random. */
3492
3493static long
3494get_max_overall_size (cgraph_node *node)
3495{
3496 long max_new_size = orig_overall_size;
3497 long large_unit = opt_for_fn (node->decl, param_ipa_cp_large_unit_insns);
3498 if (max_new_size < large_unit)
3499 max_new_size = large_unit;
3500 int unit_growth = opt_for_fn (node->decl, param_ipa_cp_unit_growth);
3501 max_new_size += max_new_size * unit_growth / 100 + 1;
3502 return max_new_size;
3503}
3504
3505/* Return true if NODE should be cloned just for a parameter removal, possibly
3506 dumping a reason if not. */
3507
3508static bool
3509clone_for_param_removal_p (cgraph_node *node)
3510{
3511 if (!node->can_change_signature)
3512 {
3513 if (dump_file && (dump_flags & TDF_DETAILS))
3514 fprintf (stream: dump_file, format: " Not considering cloning to remove parameters, "
3515 "function cannot change signature.\n");
3516 return false;
3517 }
3518 if (node->can_be_local_p ())
3519 {
3520 if (dump_file && (dump_flags & TDF_DETAILS))
3521 fprintf (stream: dump_file, format: " Not considering cloning to remove parameters, "
3522 "IPA-SRA can do it potentially better.\n");
3523 return false;
3524 }
3525 return true;
3526}
3527
3528/* Iterate over known values of parameters of NODE and estimate the local
3529 effects in terms of time and size they have. */
3530
3531static void
3532estimate_local_effects (struct cgraph_node *node)
3533{
3534 ipa_node_params *info = ipa_node_params_sum->get (node);
3535 int count = ipa_get_param_count (info);
3536 bool always_const;
3537 int removable_params_cost;
3538
3539 if (!count || !ipcp_versionable_function_p (node))
3540 return;
3541
3542 if (dump_file && (dump_flags & TDF_DETAILS))
3543 fprintf (stream: dump_file, format: "\nEstimating effects for %s.\n", node->dump_name ());
3544
3545 ipa_auto_call_arg_values avals;
3546 always_const = gather_context_independent_values (info, avals: &avals, calculate_aggs: true,
3547 removable_params_cost: &removable_params_cost);
3548 int devirt_bonus = devirtualization_time_bonus (node, avals: &avals);
3549 if (always_const || devirt_bonus
3550 || (removable_params_cost && clone_for_param_removal_p (node)))
3551 {
3552 struct caller_statistics stats;
3553 ipa_call_estimates estimates;
3554
3555 init_caller_stats (stats: &stats);
3556 node->call_for_symbol_thunks_and_aliases (callback: gather_caller_stats, data: &stats,
3557 include_overwritable: false);
3558 estimate_ipcp_clone_size_and_time (node, avals: &avals, estimates: &estimates);
3559 sreal time = estimates.nonspecialized_time - estimates.time;
3560 time += devirt_bonus;
3561 time += hint_time_bonus (node, estimates);
3562 time += removable_params_cost;
3563 int size = estimates.size - stats.n_calls * removable_params_cost;
3564
3565 if (dump_file)
3566 fprintf (stream: dump_file, format: " - context independent values, size: %i, "
3567 "time_benefit: %f\n", size, (time).to_double ());
3568
3569 if (size <= 0 || node->local)
3570 {
3571 info->do_clone_for_all_contexts = true;
3572
3573 if (dump_file)
3574 fprintf (stream: dump_file, format: " Decided to specialize for all "
3575 "known contexts, code not going to grow.\n");
3576 }
3577 else if (good_cloning_opportunity_p (node, time_benefit: time, freq_sum: stats.freq_sum,
3578 count_sum: stats.count_sum, size_cost: size))
3579 {
3580 if (size + overall_size <= get_max_overall_size (node))
3581 {
3582 info->do_clone_for_all_contexts = true;
3583 overall_size += size;
3584
3585 if (dump_file)
3586 fprintf (stream: dump_file, format: " Decided to specialize for all "
3587 "known contexts, growth (to %li) deemed "
3588 "beneficial.\n", overall_size);
3589 }
3590 else if (dump_file && (dump_flags & TDF_DETAILS))
3591 fprintf (stream: dump_file, format: " Not cloning for all contexts because "
3592 "maximum unit size would be reached with %li.\n",
3593 size + overall_size);
3594 }
3595 else if (dump_file && (dump_flags & TDF_DETAILS))
3596 fprintf (stream: dump_file, format: " Not cloning for all contexts because "
3597 "!good_cloning_opportunity_p.\n");
3598
3599 }
3600
3601 for (int i = 0; i < count; i++)
3602 {
3603 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3604 ipcp_lattice<tree> *lat = &plats->itself;
3605 ipcp_value<tree> *val;
3606
3607 if (lat->bottom
3608 || !lat->values
3609 || avals.m_known_vals[i])
3610 continue;
3611
3612 for (val = lat->values; val; val = val->next)
3613 {
3614 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3615 avals.m_known_vals[i] = val->value;
3616
3617 int emc = estimate_move_cost (TREE_TYPE (val->value), true);
3618 perform_estimation_of_a_value (node, avals: &avals, removable_params_cost,
3619 est_move_cost: emc, val);
3620
3621 if (dump_file && (dump_flags & TDF_DETAILS))
3622 {
3623 fprintf (stream: dump_file, format: " - estimates for value ");
3624 print_ipcp_constant_value (f: dump_file, v: val->value);
3625 fprintf (stream: dump_file, format: " for ");
3626 ipa_dump_param (dump_file, info, i);
3627 fprintf (stream: dump_file, format: ": time_benefit: %g, size: %i\n",
3628 val->local_time_benefit.to_double (),
3629 val->local_size_cost);
3630 }
3631 }
3632 avals.m_known_vals[i] = NULL_TREE;
3633 }
3634
3635 for (int i = 0; i < count; i++)
3636 {
3637 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3638
3639 if (!plats->virt_call)
3640 continue;
3641
3642 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3643 ipcp_value<ipa_polymorphic_call_context> *val;
3644
3645 if (ctxlat->bottom
3646 || !ctxlat->values
3647 || !avals.m_known_contexts[i].useless_p ())
3648 continue;
3649
3650 for (val = ctxlat->values; val; val = val->next)
3651 {
3652 avals.m_known_contexts[i] = val->value;
3653 perform_estimation_of_a_value (node, avals: &avals, removable_params_cost,
3654 est_move_cost: 0, val);
3655
3656 if (dump_file && (dump_flags & TDF_DETAILS))
3657 {
3658 fprintf (stream: dump_file, format: " - estimates for polymorphic context ");
3659 print_ipcp_constant_value (f: dump_file, v: val->value);
3660 fprintf (stream: dump_file, format: " for ");
3661 ipa_dump_param (dump_file, info, i);
3662 fprintf (stream: dump_file, format: ": time_benefit: %g, size: %i\n",
3663 val->local_time_benefit.to_double (),
3664 val->local_size_cost);
3665 }
3666 }
3667 avals.m_known_contexts[i] = ipa_polymorphic_call_context ();
3668 }
3669
3670 unsigned all_ctx_len = avals.m_known_aggs.length ();
3671 auto_vec<ipa_argagg_value, 32> all_ctx;
3672 all_ctx.reserve_exact (nelems: all_ctx_len);
3673 all_ctx.splice (src: avals.m_known_aggs);
3674 avals.m_known_aggs.safe_grow_cleared (len: all_ctx_len + 1);
3675
3676 unsigned j = 0;
3677 for (int index = 0; index < count; index++)
3678 {
3679 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i: index);
3680
3681 if (plats->aggs_bottom || !plats->aggs)
3682 continue;
3683
3684 for (ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
3685 {
3686 ipcp_value<tree> *val;
3687 if (aglat->bottom || !aglat->values
3688 /* If the following is true, the one value is already part of all
3689 context estimations. */
3690 || (!plats->aggs_contain_variable
3691 && aglat->is_single_const ()))
3692 continue;
3693
3694 unsigned unit_offset = aglat->offset / BITS_PER_UNIT;
3695 while (j < all_ctx_len
3696 && (all_ctx[j].index < index
3697 || (all_ctx[j].index == index
3698 && all_ctx[j].unit_offset < unit_offset)))
3699 {
3700 avals.m_known_aggs[j] = all_ctx[j];
3701 j++;
3702 }
3703
3704 for (unsigned k = j; k < all_ctx_len; k++)
3705 avals.m_known_aggs[k+1] = all_ctx[k];
3706
3707 for (val = aglat->values; val; val = val->next)
3708 {
3709 avals.m_known_aggs[j].value = val->value;
3710 avals.m_known_aggs[j].unit_offset = unit_offset;
3711 avals.m_known_aggs[j].index = index;
3712 avals.m_known_aggs[j].by_ref = plats->aggs_by_ref;
3713 avals.m_known_aggs[j].killed = false;
3714
3715 perform_estimation_of_a_value (node, avals: &avals,
3716 removable_params_cost, est_move_cost: 0, val);
3717
3718 if (dump_file && (dump_flags & TDF_DETAILS))
3719 {
3720 fprintf (stream: dump_file, format: " - estimates for value ");
3721 print_ipcp_constant_value (f: dump_file, v: val->value);
3722 fprintf (stream: dump_file, format: " for ");
3723 ipa_dump_param (dump_file, info, i: index);
3724 fprintf (stream: dump_file, format: "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3725 "]: time_benefit: %g, size: %i\n",
3726 plats->aggs_by_ref ? "ref " : "",
3727 aglat->offset,
3728 val->local_time_benefit.to_double (),
3729 val->local_size_cost);
3730 }
3731 }
3732 }
3733 }
3734}
3735
3736
3737/* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3738 topological sort of values. */
3739
3740template <typename valtype>
3741void
3742value_topo_info<valtype>::add_val (ipcp_value<valtype> *cur_val)
3743{
3744 ipcp_value_source<valtype> *src;
3745
3746 if (cur_val->dfs)
3747 return;
3748
3749 dfs_counter++;
3750 cur_val->dfs = dfs_counter;
3751 cur_val->low_link = dfs_counter;
3752
3753 cur_val->topo_next = stack;
3754 stack = cur_val;
3755 cur_val->on_stack = true;
3756
3757 for (src = cur_val->sources; src; src = src->next)
3758 if (src->val)
3759 {
3760 if (src->val->dfs == 0)
3761 {
3762 add_val (cur_val: src->val);
3763 if (src->val->low_link < cur_val->low_link)
3764 cur_val->low_link = src->val->low_link;
3765 }
3766 else if (src->val->on_stack
3767 && src->val->dfs < cur_val->low_link)
3768 cur_val->low_link = src->val->dfs;
3769 }
3770
3771 if (cur_val->dfs == cur_val->low_link)
3772 {
3773 ipcp_value<valtype> *v, *scc_list = NULL;
3774
3775 do
3776 {
3777 v = stack;
3778 stack = v->topo_next;
3779 v->on_stack = false;
3780 v->scc_no = cur_val->dfs;
3781
3782 v->scc_next = scc_list;
3783 scc_list = v;
3784 }
3785 while (v != cur_val);
3786
3787 cur_val->topo_next = values_topo;
3788 values_topo = cur_val;
3789 }
3790}
3791
3792/* Add all values in lattices associated with NODE to the topological sort if
3793 they are not there yet. */
3794
3795static void
3796add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo)
3797{
3798 ipa_node_params *info = ipa_node_params_sum->get (node);
3799 int i, count = ipa_get_param_count (info);
3800
3801 for (i = 0; i < count; i++)
3802 {
3803 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3804 ipcp_lattice<tree> *lat = &plats->itself;
3805 struct ipcp_agg_lattice *aglat;
3806
3807 if (!lat->bottom)
3808 {
3809 ipcp_value<tree> *val;
3810 for (val = lat->values; val; val = val->next)
3811 topo->constants.add_val (cur_val: val);
3812 }
3813
3814 if (!plats->aggs_bottom)
3815 for (aglat = plats->aggs; aglat; aglat = aglat->next)
3816 if (!aglat->bottom)
3817 {
3818 ipcp_value<tree> *val;
3819 for (val = aglat->values; val; val = val->next)
3820 topo->constants.add_val (cur_val: val);
3821 }
3822
3823 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3824 if (!ctxlat->bottom)
3825 {
3826 ipcp_value<ipa_polymorphic_call_context> *ctxval;
3827 for (ctxval = ctxlat->values; ctxval; ctxval = ctxval->next)
3828 topo->contexts.add_val (cur_val: ctxval);
3829 }
3830 }
3831}
3832
3833/* One pass of constants propagation along the call graph edges, from callers
3834 to callees (requires topological ordering in TOPO), iterate over strongly
3835 connected components. */
3836
3837static void
3838propagate_constants_topo (class ipa_topo_info *topo)
3839{
3840 int i;
3841
3842 for (i = topo->nnodes - 1; i >= 0; i--)
3843 {
3844 unsigned j;
3845 struct cgraph_node *v, *node = topo->order[i];
3846 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (node);
3847
3848 /* First, iteratively propagate within the strongly connected component
3849 until all lattices stabilize. */
3850 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3851 if (v->has_gimple_body_p ())
3852 {
3853 if (opt_for_fn (v->decl, flag_ipa_cp)
3854 && opt_for_fn (v->decl, optimize))
3855 push_node_to_stack (topo, node: v);
3856 /* When V is not optimized, we can not push it to stack, but
3857 still we need to set all its callees lattices to bottom. */
3858 else
3859 {
3860 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3861 propagate_constants_across_call (cs);
3862 }
3863 }
3864
3865 v = pop_node_from_stack (topo);
3866 while (v)
3867 {
3868 struct cgraph_edge *cs;
3869 class ipa_node_params *info = NULL;
3870 bool self_scc = true;
3871
3872 for (cs = v->callees; cs; cs = cs->next_callee)
3873 if (ipa_edge_within_scc (cs))
3874 {
3875 cgraph_node *callee = cs->callee->function_symbol ();
3876
3877 if (v != callee)
3878 self_scc = false;
3879
3880 if (!info)
3881 {
3882 info = ipa_node_params_sum->get (node: v);
3883 info->node_within_scc = true;
3884 }
3885
3886 if (propagate_constants_across_call (cs))
3887 push_node_to_stack (topo, node: callee);
3888 }
3889
3890 if (info)
3891 info->node_is_self_scc = self_scc;
3892
3893 v = pop_node_from_stack (topo);
3894 }
3895
3896 /* Afterwards, propagate along edges leading out of the SCC, calculates
3897 the local effects of the discovered constants and all valid values to
3898 their topological sort. */
3899 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3900 if (v->has_gimple_body_p ()
3901 && opt_for_fn (v->decl, flag_ipa_cp)
3902 && opt_for_fn (v->decl, optimize))
3903 {
3904 struct cgraph_edge *cs;
3905
3906 estimate_local_effects (node: v);
3907 add_all_node_vals_to_toposort (node: v, topo);
3908 for (cs = v->callees; cs; cs = cs->next_callee)
3909 if (!ipa_edge_within_scc (cs))
3910 propagate_constants_across_call (cs);
3911 }
3912 cycle_nodes.release ();
3913 }
3914}
3915
3916/* Propagate the estimated effects of individual values along the topological
3917 from the dependent values to those they depend on. */
3918
3919template <typename valtype>
3920void
3921value_topo_info<valtype>::propagate_effects ()
3922{
3923 ipcp_value<valtype> *base;
3924 hash_set<ipcp_value<valtype> *> processed_srcvals;
3925
3926 for (base = values_topo; base; base = base->topo_next)
3927 {
3928 ipcp_value_source<valtype> *src;
3929 ipcp_value<valtype> *val;
3930 sreal time = 0;
3931 HOST_WIDE_INT size = 0;
3932
3933 for (val = base; val; val = val->scc_next)
3934 {
3935 time = time + val->local_time_benefit + val->prop_time_benefit;
3936 size = size + val->local_size_cost + val->prop_size_cost;
3937 }
3938
3939 for (val = base; val; val = val->scc_next)
3940 {
3941 processed_srcvals.empty ();
3942 for (src = val->sources; src; src = src->next)
3943 if (src->val
3944 && src->cs->maybe_hot_p ())
3945 {
3946 if (!processed_srcvals.add (src->val))
3947 {
3948 HOST_WIDE_INT prop_size = size + src->val->prop_size_cost;
3949 if (prop_size < INT_MAX)
3950 src->val->prop_size_cost = prop_size;
3951 else
3952 continue;
3953 }
3954
3955 int special_factor = 1;
3956 if (val->same_scc (src->val))
3957 special_factor
3958 = opt_for_fn(src->cs->caller->decl,
3959 param_ipa_cp_recursive_freq_factor);
3960 else if (val->self_recursion_generated_p ()
3961 && (src->cs->callee->function_symbol ()
3962 == src->cs->caller))
3963 {
3964 int max_recur_gen_depth
3965 = opt_for_fn(src->cs->caller->decl,
3966 param_ipa_cp_max_recursive_depth);
3967 special_factor = max_recur_gen_depth
3968 - val->self_recursion_generated_level + 1;
3969 }
3970
3971 src->val->prop_time_benefit
3972 += time * special_factor * src->cs->sreal_frequency ();
3973 }
3974
3975 if (size < INT_MAX)
3976 {
3977 val->prop_time_benefit = time;
3978 val->prop_size_cost = size;
3979 }
3980 else
3981 {
3982 val->prop_time_benefit = 0;
3983 val->prop_size_cost = 0;
3984 }
3985 }
3986 }
3987}
3988
3989/* Callback for qsort to sort counts of all edges. */
3990
3991static int
3992compare_edge_profile_counts (const void *a, const void *b)
3993{
3994 const profile_count *cnt1 = (const profile_count *) a;
3995 const profile_count *cnt2 = (const profile_count *) b;
3996
3997 if (*cnt1 < *cnt2)
3998 return 1;
3999 if (*cnt1 > *cnt2)
4000 return -1;
4001 return 0;
4002}
4003
4004
4005/* Propagate constants, polymorphic contexts and their effects from the
4006 summaries interprocedurally. */
4007
4008static void
4009ipcp_propagate_stage (class ipa_topo_info *topo)
4010{
4011 struct cgraph_node *node;
4012
4013 if (dump_file)
4014 fprintf (stream: dump_file, format: "\n Propagating constants:\n\n");
4015
4016 base_count = profile_count::uninitialized ();
4017
4018 bool compute_count_base = false;
4019 unsigned base_count_pos_percent = 0;
4020 FOR_EACH_DEFINED_FUNCTION (node)
4021 {
4022 if (node->has_gimple_body_p ()
4023 && opt_for_fn (node->decl, flag_ipa_cp)
4024 && opt_for_fn (node->decl, optimize))
4025 {
4026 ipa_node_params *info = ipa_node_params_sum->get (node);
4027 determine_versionability (node, info);
4028
4029 unsigned nlattices = ipa_get_param_count (info);
4030 info->lattices.safe_grow_cleared (len: nlattices, exact: true);
4031 initialize_node_lattices (node);
4032 }
4033 ipa_size_summary *s = ipa_size_summaries->get (node);
4034 if (node->definition && !node->alias && s != NULL)
4035 overall_size += s->self_size;
4036 if (node->count.ipa ().initialized_p ())
4037 {
4038 compute_count_base = true;
4039 unsigned pos_percent = opt_for_fn (node->decl,
4040 param_ipa_cp_profile_count_base);
4041 base_count_pos_percent = MAX (base_count_pos_percent, pos_percent);
4042 }
4043 }
4044
4045 if (compute_count_base)
4046 {
4047 auto_vec<profile_count> all_edge_counts;
4048 all_edge_counts.reserve_exact (nelems: symtab->edges_count);
4049 FOR_EACH_DEFINED_FUNCTION (node)
4050 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4051 {
4052 profile_count count = cs->count.ipa ();
4053 if (!count.nonzero_p ())
4054 continue;
4055
4056 enum availability avail;
4057 cgraph_node *tgt
4058 = cs->callee->function_or_virtual_thunk_symbol (avail: &avail);
4059 ipa_node_params *info = ipa_node_params_sum->get (node: tgt);
4060 if (info && info->versionable)
4061 all_edge_counts.quick_push (obj: count);
4062 }
4063
4064 if (!all_edge_counts.is_empty ())
4065 {
4066 gcc_assert (base_count_pos_percent <= 100);
4067 all_edge_counts.qsort (compare_edge_profile_counts);
4068
4069 unsigned base_count_pos
4070 = ((all_edge_counts.length () * (base_count_pos_percent)) / 100);
4071 base_count = all_edge_counts[base_count_pos];
4072
4073 if (dump_file)
4074 {
4075 fprintf (stream: dump_file, format: "\nSelected base_count from %u edges at "
4076 "position %u, arriving at: ", all_edge_counts.length (),
4077 base_count_pos);
4078 base_count.dump (f: dump_file);
4079 fprintf (stream: dump_file, format: "\n");
4080 }
4081 }
4082 else if (dump_file)
4083 fprintf (stream: dump_file, format: "\nNo candidates with non-zero call count found, "
4084 "continuing as if without profile feedback.\n");
4085 }
4086
4087 orig_overall_size = overall_size;
4088
4089 if (dump_file)
4090 fprintf (stream: dump_file, format: "\noverall_size: %li\n", overall_size);
4091
4092 propagate_constants_topo (topo);
4093 if (flag_checking)
4094 ipcp_verify_propagated_values ();
4095 topo->constants.propagate_effects ();
4096 topo->contexts.propagate_effects ();
4097
4098 if (dump_file)
4099 {
4100 fprintf (stream: dump_file, format: "\nIPA lattices after all propagation:\n");
4101 print_all_lattices (f: dump_file, dump_sources: (dump_flags & TDF_DETAILS), dump_benefits: true);
4102 }
4103}
4104
4105/* Discover newly direct outgoing edges from NODE which is a new clone with
4106 known KNOWN_CSTS and make them direct. */
4107
4108static void
4109ipcp_discover_new_direct_edges (struct cgraph_node *node,
4110 vec<tree> known_csts,
4111 vec<ipa_polymorphic_call_context>
4112 known_contexts,
4113 vec<ipa_argagg_value, va_gc> *aggvals)
4114{
4115 struct cgraph_edge *ie, *next_ie;
4116 bool found = false;
4117
4118 for (ie = node->indirect_calls; ie; ie = next_ie)
4119 {
4120 tree target;
4121 bool speculative;
4122
4123 next_ie = ie->next_callee;
4124 ipa_argagg_value_list avs (aggvals);
4125 target = ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts,
4126 avs, speculative: &speculative);
4127 if (target)
4128 {
4129 bool agg_contents = ie->indirect_info->agg_contents;
4130 bool polymorphic = ie->indirect_info->polymorphic;
4131 int param_index = ie->indirect_info->param_index;
4132 struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target,
4133 speculative);
4134 found = true;
4135
4136 if (cs && !agg_contents && !polymorphic)
4137 {
4138 ipa_node_params *info = ipa_node_params_sum->get (node);
4139 int c = ipa_get_controlled_uses (info, i: param_index);
4140 if (c != IPA_UNDESCRIBED_USE
4141 && !ipa_get_param_load_dereferenced (info, i: param_index))
4142 {
4143 struct ipa_ref *to_del;
4144
4145 c--;
4146 ipa_set_controlled_uses (info, i: param_index, val: c);
4147 if (dump_file && (dump_flags & TDF_DETAILS))
4148 fprintf (stream: dump_file, format: " controlled uses count of param "
4149 "%i bumped down to %i\n", param_index, c);
4150 if (c == 0
4151 && (to_del = node->find_reference (referred_node: cs->callee, NULL, lto_stmt_uid: 0,
4152 use_type: IPA_REF_ADDR)))
4153 {
4154 if (dump_file && (dump_flags & TDF_DETAILS))
4155 fprintf (stream: dump_file, format: " and even removing its "
4156 "cloning-created reference\n");
4157 to_del->remove_reference ();
4158 }
4159 }
4160 }
4161 }
4162 }
4163 /* Turning calls to direct calls will improve overall summary. */
4164 if (found)
4165 ipa_update_overall_fn_summary (node);
4166}
4167
4168class edge_clone_summary;
4169static call_summary <edge_clone_summary *> *edge_clone_summaries = NULL;
4170
4171/* Edge clone summary. */
4172
4173class edge_clone_summary
4174{
4175public:
4176 /* Default constructor. */
4177 edge_clone_summary (): prev_clone (NULL), next_clone (NULL) {}
4178
4179 /* Default destructor. */
4180 ~edge_clone_summary ()
4181 {
4182 if (prev_clone)
4183 edge_clone_summaries->get (edge: prev_clone)->next_clone = next_clone;
4184 if (next_clone)
4185 edge_clone_summaries->get (edge: next_clone)->prev_clone = prev_clone;
4186 }
4187
4188 cgraph_edge *prev_clone;
4189 cgraph_edge *next_clone;
4190};
4191
4192class edge_clone_summary_t:
4193 public call_summary <edge_clone_summary *>
4194{
4195public:
4196 edge_clone_summary_t (symbol_table *symtab):
4197 call_summary <edge_clone_summary *> (symtab)
4198 {
4199 m_initialize_when_cloning = true;
4200 }
4201
4202 void duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4203 edge_clone_summary *src_data,
4204 edge_clone_summary *dst_data) final override;
4205};
4206
4207/* Edge duplication hook. */
4208
4209void
4210edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4211 edge_clone_summary *src_data,
4212 edge_clone_summary *dst_data)
4213{
4214 if (src_data->next_clone)
4215 edge_clone_summaries->get (edge: src_data->next_clone)->prev_clone = dst_edge;
4216 dst_data->prev_clone = src_edge;
4217 dst_data->next_clone = src_data->next_clone;
4218 src_data->next_clone = dst_edge;
4219}
4220
4221/* Return true is CS calls DEST or its clone for all contexts. When
4222 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4223 edges from/to an all-context clone. */
4224
4225static bool
4226calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest,
4227 bool allow_recursion_to_clone)
4228{
4229 enum availability availability;
4230 cgraph_node *callee = cs->callee->function_symbol (avail: &availability);
4231
4232 if (availability <= AVAIL_INTERPOSABLE)
4233 return false;
4234 if (callee == dest)
4235 return true;
4236 if (!allow_recursion_to_clone && cs->caller == callee)
4237 return false;
4238
4239 ipa_node_params *info = ipa_node_params_sum->get (node: callee);
4240 return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
4241}
4242
4243/* Return true if edge CS does bring about the value described by SRC to
4244 DEST_VAL of node DEST or its clone for all contexts. */
4245
4246static bool
4247cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
4248 cgraph_node *dest, ipcp_value<tree> *dest_val)
4249{
4250 ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller);
4251
4252 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, allow_recursion_to_clone: !src->val)
4253 || caller_info->node_dead)
4254 return false;
4255
4256 if (!src->val)
4257 return true;
4258
4259 if (caller_info->ipcp_orig_node)
4260 {
4261 tree t = NULL_TREE;
4262 if (src->offset == -1)
4263 t = caller_info->known_csts[src->index];
4264 else if (ipcp_transformation *ts
4265 = ipcp_get_transformation_summary (node: cs->caller))
4266 {
4267 ipa_argagg_value_list avl (ts);
4268 t = avl.get_value (index: src->index, unit_offset: src->offset / BITS_PER_UNIT);
4269 }
4270 return (t != NULL_TREE
4271 && values_equal_for_ipcp_p (x: src->val->value, y: t));
4272 }
4273 else
4274 {
4275 if (src->val == dest_val)
4276 return true;
4277
4278 struct ipcp_agg_lattice *aglat;
4279 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info: caller_info,
4280 i: src->index);
4281 if (src->offset == -1)
4282 return (plats->itself.is_single_const ()
4283 && values_equal_for_ipcp_p (x: src->val->value,
4284 y: plats->itself.values->value));
4285 else
4286 {
4287 if (plats->aggs_bottom || plats->aggs_contain_variable)
4288 return false;
4289 for (aglat = plats->aggs; aglat; aglat = aglat->next)
4290 if (aglat->offset == src->offset)
4291 return (aglat->is_single_const ()
4292 && values_equal_for_ipcp_p (x: src->val->value,
4293 y: aglat->values->value));
4294 }
4295 return false;
4296 }
4297}
4298
4299/* Return true if edge CS does bring about the value described by SRC to
4300 DST_VAL of node DEST or its clone for all contexts. */
4301
4302static bool
4303cgraph_edge_brings_value_p (cgraph_edge *cs,
4304 ipcp_value_source<ipa_polymorphic_call_context> *src,
4305 cgraph_node *dest,
4306 ipcp_value<ipa_polymorphic_call_context> *)
4307{
4308 ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller);
4309
4310 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, allow_recursion_to_clone: true)
4311 || caller_info->node_dead)
4312 return false;
4313 if (!src->val)
4314 return true;
4315
4316 if (caller_info->ipcp_orig_node)
4317 return (caller_info->known_contexts.length () > (unsigned) src->index)
4318 && values_equal_for_ipcp_p (x: src->val->value,
4319 y: caller_info->known_contexts[src->index]);
4320
4321 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info: caller_info,
4322 i: src->index);
4323 return plats->ctxlat.is_single_const ()
4324 && values_equal_for_ipcp_p (x: src->val->value,
4325 y: plats->ctxlat.values->value);
4326}
4327
4328/* Get the next clone in the linked list of clones of an edge. */
4329
4330static inline struct cgraph_edge *
4331get_next_cgraph_edge_clone (struct cgraph_edge *cs)
4332{
4333 edge_clone_summary *s = edge_clone_summaries->get (edge: cs);
4334 return s != NULL ? s->next_clone : NULL;
4335}
4336
4337/* Given VAL that is intended for DEST, iterate over all its sources and if any
4338 of them is viable and hot, return true. In that case, for those that still
4339 hold, add their edge frequency and their number and cumulative profile
4340 counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4341 REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4342
4343template <typename valtype>
4344static bool
4345get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
4346 sreal *freq_sum, int *caller_count,
4347 profile_count *rec_count_sum,
4348 profile_count *nonrec_count_sum)
4349{
4350 ipcp_value_source<valtype> *src;
4351 sreal freq = 0;
4352 int count = 0;
4353 profile_count rec_cnt = profile_count::zero ();
4354 profile_count nonrec_cnt = profile_count::zero ();
4355 bool hot = false;
4356 bool non_self_recursive = false;
4357
4358 for (src = val->sources; src; src = src->next)
4359 {
4360 struct cgraph_edge *cs = src->cs;
4361 while (cs)
4362 {
4363 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4364 {
4365 count++;
4366 freq += cs->sreal_frequency ();
4367 hot |= cs->maybe_hot_p ();
4368 if (cs->caller != dest)
4369 {
4370 non_self_recursive = true;
4371 if (cs->count.ipa ().initialized_p ())
4372 rec_cnt += cs->count.ipa ();
4373 }
4374 else if (cs->count.ipa ().initialized_p ())
4375 nonrec_cnt += cs->count.ipa ();
4376 }
4377 cs = get_next_cgraph_edge_clone (cs);
4378 }
4379 }
4380
4381 /* If the only edges bringing a value are self-recursive ones, do not bother
4382 evaluating it. */
4383 if (!non_self_recursive)
4384 return false;
4385
4386 *freq_sum = freq;
4387 *caller_count = count;
4388 *rec_count_sum = rec_cnt;
4389 *nonrec_count_sum = nonrec_cnt;
4390
4391 if (!hot && ipa_node_params_sum->get (node: dest)->node_within_scc)
4392 {
4393 struct cgraph_edge *cs;
4394
4395 /* Cold non-SCC source edge could trigger hot recursive execution of
4396 function. Consider the case as hot and rely on following cost model
4397 computation to further select right one. */
4398 for (cs = dest->callers; cs; cs = cs->next_caller)
4399 if (cs->caller == dest && cs->maybe_hot_p ())
4400 return true;
4401 }
4402
4403 return hot;
4404}
4405
4406/* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4407 to let a non-self-recursive caller be the first element. Thus, we can
4408 simplify intersecting operations on values that arrive from all of these
4409 callers, especially when there exists self-recursive call. Return true if
4410 this kind of adjustment is possible. */
4411
4412static bool
4413adjust_callers_for_value_intersection (vec<cgraph_edge *> &callers,
4414 cgraph_node *node)
4415{
4416 for (unsigned i = 0; i < callers.length (); i++)
4417 {
4418 cgraph_edge *cs = callers[i];
4419
4420 if (cs->caller != node)
4421 {
4422 if (i > 0)
4423 {
4424 callers[i] = callers[0];
4425 callers[0] = cs;
4426 }
4427 return true;
4428 }
4429 }
4430 return false;
4431}
4432
4433/* Return a vector of incoming edges that do bring value VAL to node DEST. It
4434 is assumed their number is known and equal to CALLER_COUNT. */
4435
4436template <typename valtype>
4437static vec<cgraph_edge *>
4438gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest,
4439 int caller_count)
4440{
4441 ipcp_value_source<valtype> *src;
4442 vec<cgraph_edge *> ret;
4443
4444 ret.create (nelems: caller_count);
4445 for (src = val->sources; src; src = src->next)
4446 {
4447 struct cgraph_edge *cs = src->cs;
4448 while (cs)
4449 {
4450 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4451 ret.quick_push (obj: cs);
4452 cs = get_next_cgraph_edge_clone (cs);
4453 }
4454 }
4455
4456 if (caller_count > 1)
4457 adjust_callers_for_value_intersection (callers&: ret, node: dest);
4458
4459 return ret;
4460}
4461
4462/* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4463 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4464 should be set to true when the reference created for the constant should be
4465 a load one and not an address one because the corresponding parameter p is
4466 only used as *p. */
4467
4468static struct ipa_replace_map *
4469get_replacement_map (class ipa_node_params *info, tree value, int parm_num,
4470 bool force_load_ref)
4471{
4472 struct ipa_replace_map *replace_map;
4473
4474 replace_map = ggc_alloc<ipa_replace_map> ();
4475 if (dump_file)
4476 {
4477 fprintf (stream: dump_file, format: " replacing ");
4478 ipa_dump_param (dump_file, info, i: parm_num);
4479
4480 fprintf (stream: dump_file, format: " with const ");
4481 print_generic_expr (dump_file, value);
4482
4483 if (force_load_ref)
4484 fprintf (stream: dump_file, format: " - forcing load reference\n");
4485 else
4486 fprintf (stream: dump_file, format: "\n");
4487 }
4488 replace_map->parm_num = parm_num;
4489 replace_map->new_tree = value;
4490 replace_map->force_load_ref = force_load_ref;
4491 return replace_map;
4492}
4493
4494/* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4495 one, otherwise it will be referred to as the original node. */
4496
4497static void
4498dump_profile_updates (cgraph_node *node, bool spec)
4499{
4500 if (spec)
4501 fprintf (stream: dump_file, format: " setting count of the specialized node %s to ",
4502 node->dump_name ());
4503 else
4504 fprintf (stream: dump_file, format: " setting count of the original node %s to ",
4505 node->dump_name ());
4506
4507 node->count.dump (f: dump_file);
4508 fprintf (stream: dump_file, format: "\n");
4509 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4510 {
4511 fprintf (stream: dump_file, format: " edge to %s has count ",
4512 cs->callee->dump_name ());
4513 cs->count.dump (f: dump_file);
4514 fprintf (stream: dump_file, format: "\n");
4515 }
4516}
4517
4518/* With partial train run we do not want to assume that original's count is
4519 zero whenever we redurect all executed edges to clone. Simply drop profile
4520 to local one in this case. In eany case, return the new value. ORIG_NODE
4521 is the original node and its count has not been updaed yet. */
4522
4523profile_count
4524lenient_count_portion_handling (profile_count remainder, cgraph_node *orig_node)
4525{
4526 if (remainder.ipa_p () && !remainder.ipa ().nonzero_p ()
4527 && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p ()
4528 && opt_for_fn (orig_node->decl, flag_profile_partial_training))
4529 remainder = remainder.guessed_local ();
4530
4531 return remainder;
4532}
4533
4534/* Structure to sum counts coming from nodes other than the original node and
4535 its clones. */
4536
4537struct gather_other_count_struct
4538{
4539 cgraph_node *orig;
4540 profile_count other_count;
4541};
4542
4543/* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4544 counts that come from non-self-recursive calls.. */
4545
4546static bool
4547gather_count_of_non_rec_edges (cgraph_node *node, void *data)
4548{
4549 gather_other_count_struct *desc = (gather_other_count_struct *) data;
4550 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4551 if (cs->caller != desc->orig && cs->caller->clone_of != desc->orig)
4552 desc->other_count += cs->count.ipa ();
4553 return false;
4554}
4555
4556/* Structure to help analyze if we need to boost counts of some clones of some
4557 non-recursive edges to match the new callee count. */
4558
4559struct desc_incoming_count_struct
4560{
4561 cgraph_node *orig;
4562 hash_set <cgraph_edge *> *processed_edges;
4563 profile_count count;
4564 unsigned unproc_orig_rec_edges;
4565};
4566
4567/* Go over edges calling NODE and its thunks and gather information about
4568 incoming counts so that we know if we need to make any adjustments. */
4569
4570static void
4571analyze_clone_icoming_counts (cgraph_node *node,
4572 desc_incoming_count_struct *desc)
4573{
4574 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4575 if (cs->caller->thunk)
4576 {
4577 analyze_clone_icoming_counts (node: cs->caller, desc);
4578 continue;
4579 }
4580 else
4581 {
4582 if (cs->count.initialized_p ())
4583 desc->count += cs->count.ipa ();
4584 if (!desc->processed_edges->contains (k: cs)
4585 && cs->caller->clone_of == desc->orig)
4586 desc->unproc_orig_rec_edges++;
4587 }
4588}
4589
4590/* If caller edge counts of a clone created for a self-recursive arithmetic
4591 jump function must be adjusted because it is coming from a the "seed" clone
4592 for the first value and so has been excessively scaled back as if it was not
4593 a recursive call, adjust it so that the incoming counts of NODE match its
4594 count. NODE is the node or its thunk. */
4595
4596static void
4597adjust_clone_incoming_counts (cgraph_node *node,
4598 desc_incoming_count_struct *desc)
4599{
4600 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4601 if (cs->caller->thunk)
4602 {
4603 adjust_clone_incoming_counts (node: cs->caller, desc);
4604 profile_count sum = profile_count::zero ();
4605 for (cgraph_edge *e = cs->caller->callers; e; e = e->next_caller)
4606 if (e->count.initialized_p ())
4607 sum += e->count.ipa ();
4608 cs->count = cs->count.combine_with_ipa_count (ipa: sum);
4609 }
4610 else if (!desc->processed_edges->contains (k: cs)
4611 && cs->caller->clone_of == desc->orig)
4612 {
4613 cs->count += desc->count;
4614 if (dump_file)
4615 {
4616 fprintf (stream: dump_file, format: " Adjusted count of an incoming edge of "
4617 "a clone %s -> %s to ", cs->caller->dump_name (),
4618 cs->callee->dump_name ());
4619 cs->count.dump (f: dump_file);
4620 fprintf (stream: dump_file, format: "\n");
4621 }
4622 }
4623}
4624
4625/* When ORIG_NODE has been cloned for values which have been generated fora
4626 self-recursive call as a result of an arithmetic pass-through
4627 jump-functions, adjust its count together with counts of all such clones in
4628 SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4629
4630 The function sums the counts of the original node and all its clones that
4631 cannot be attributed to a specific clone because it comes from a
4632 non-recursive edge. This sum is then evenly divided between the clones and
4633 on top of that each one gets all the counts which can be attributed directly
4634 to it. */
4635
4636static void
4637update_counts_for_self_gen_clones (cgraph_node *orig_node,
4638 const vec<cgraph_node *> &self_gen_clones)
4639{
4640 profile_count redist_sum = orig_node->count.ipa ();
4641 if (!(redist_sum > profile_count::zero ()))
4642 return;
4643
4644 if (dump_file)
4645 fprintf (stream: dump_file, format: " Updating profile of self recursive clone "
4646 "series\n");
4647
4648 gather_other_count_struct gocs;
4649 gocs.orig = orig_node;
4650 gocs.other_count = profile_count::zero ();
4651
4652 auto_vec <profile_count, 8> other_edges_count;
4653 for (cgraph_node *n : self_gen_clones)
4654 {
4655 gocs.other_count = profile_count::zero ();
4656 n->call_for_symbol_thunks_and_aliases (callback: gather_count_of_non_rec_edges,
4657 data: &gocs, include_overwritable: false);
4658 other_edges_count.safe_push (obj: gocs.other_count);
4659 redist_sum -= gocs.other_count;
4660 }
4661
4662 hash_set<cgraph_edge *> processed_edges;
4663 unsigned i = 0;
4664 for (cgraph_node *n : self_gen_clones)
4665 {
4666 profile_count orig_count = n->count;
4667 profile_count new_count
4668 = (redist_sum / self_gen_clones.length () + other_edges_count[i]);
4669 new_count = lenient_count_portion_handling (remainder: new_count, orig_node);
4670 n->count = new_count;
4671 profile_count::adjust_for_ipa_scaling (num: &new_count, den: &orig_count);
4672 for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee)
4673 {
4674 cs->count = cs->count.apply_scale (num: new_count, den: orig_count);
4675 processed_edges.add (k: cs);
4676 }
4677 for (cgraph_edge *cs = n->indirect_calls; cs; cs = cs->next_callee)
4678 cs->count = cs->count.apply_scale (num: new_count, den: orig_count);
4679
4680 i++;
4681 }
4682
4683 /* There are still going to be edges to ORIG_NODE that have one or more
4684 clones coming from another node clone in SELF_GEN_CLONES and which we
4685 scaled by the same amount, which means that the total incoming sum of
4686 counts to ORIG_NODE will be too high, scale such edges back. */
4687 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4688 {
4689 if (cs->callee->ultimate_alias_target () == orig_node)
4690 {
4691 unsigned den = 0;
4692 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (cs: e))
4693 if (e->callee->ultimate_alias_target () == orig_node
4694 && processed_edges.contains (k: e))
4695 den++;
4696 if (den > 0)
4697 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (cs: e))
4698 if (e->callee->ultimate_alias_target () == orig_node
4699 && processed_edges.contains (k: e))
4700 e->count /= den;
4701 }
4702 }
4703
4704 /* Edges from the seeds of the valus generated for arithmetic jump-functions
4705 along self-recursive edges are likely to have fairly low count and so
4706 edges from them to nodes in the self_gen_clones do not correspond to the
4707 artificially distributed count of the nodes, the total sum of incoming
4708 edges to some clones might be too low. Detect this situation and correct
4709 it. */
4710 for (cgraph_node *n : self_gen_clones)
4711 {
4712 if (!(n->count.ipa () > profile_count::zero ()))
4713 continue;
4714
4715 desc_incoming_count_struct desc;
4716 desc.orig = orig_node;
4717 desc.processed_edges = &processed_edges;
4718 desc.count = profile_count::zero ();
4719 desc.unproc_orig_rec_edges = 0;
4720 analyze_clone_icoming_counts (node: n, desc: &desc);
4721
4722 if (n->count.differs_from_p (other: desc.count))
4723 {
4724 if (n->count > desc.count
4725 && desc.unproc_orig_rec_edges > 0)
4726 {
4727 desc.count = n->count - desc.count;
4728 desc.count = desc.count /= desc.unproc_orig_rec_edges;
4729 adjust_clone_incoming_counts (node: n, desc: &desc);
4730 }
4731 else if (dump_file)
4732 fprintf (stream: dump_file,
4733 format: " Unable to fix up incoming counts for %s.\n",
4734 n->dump_name ());
4735 }
4736 }
4737
4738 if (dump_file)
4739 for (cgraph_node *n : self_gen_clones)
4740 dump_profile_updates (node: n, spec: n != orig_node);
4741 return;
4742}
4743
4744/* After a specialized NEW_NODE version of ORIG_NODE has been created, update
4745 their profile information to reflect this. This function should not be used
4746 for clones generated for arithmetic pass-through jump functions on a
4747 self-recursive call graph edge, that situation is handled by
4748 update_counts_for_self_gen_clones. */
4749
4750static void
4751update_profiling_info (struct cgraph_node *orig_node,
4752 struct cgraph_node *new_node)
4753{
4754 struct caller_statistics stats;
4755 profile_count new_sum;
4756 profile_count remainder, orig_node_count = orig_node->count.ipa ();
4757
4758 if (!(orig_node_count > profile_count::zero ()))
4759 return;
4760
4761 if (dump_file)
4762 {
4763 fprintf (stream: dump_file, format: " Updating profile from original count: ");
4764 orig_node_count.dump (f: dump_file);
4765 fprintf (stream: dump_file, format: "\n");
4766 }
4767
4768 init_caller_stats (stats: &stats, itself: new_node);
4769 new_node->call_for_symbol_thunks_and_aliases (callback: gather_caller_stats, data: &stats,
4770 include_overwritable: false);
4771 new_sum = stats.count_sum;
4772
4773 bool orig_edges_processed = false;
4774 if (new_sum > orig_node_count)
4775 {
4776 /* TODO: Profile has alreay gone astray, keep what we have but lower it
4777 to global0 category. */
4778 remainder = orig_node->count.global0 ();
4779
4780 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4781 cs->count = cs->count.global0 ();
4782 for (cgraph_edge *cs = orig_node->indirect_calls;
4783 cs;
4784 cs = cs->next_callee)
4785 cs->count = cs->count.global0 ();
4786 orig_edges_processed = true;
4787 }
4788 else if (stats.rec_count_sum.nonzero_p ())
4789 {
4790 int new_nonrec_calls = stats.n_nonrec_calls;
4791 /* There are self-recursive edges which are likely to bring in the
4792 majority of calls but which we must divide in between the original and
4793 new node. */
4794 init_caller_stats (stats: &stats, itself: orig_node);
4795 orig_node->call_for_symbol_thunks_and_aliases (callback: gather_caller_stats,
4796 data: &stats, include_overwritable: false);
4797 int orig_nonrec_calls = stats.n_nonrec_calls;
4798 profile_count orig_nonrec_call_count = stats.count_sum;
4799
4800 if (orig_node->local)
4801 {
4802 if (!orig_nonrec_call_count.nonzero_p ())
4803 {
4804 if (dump_file)
4805 fprintf (stream: dump_file, format: " The original is local and the only "
4806 "incoming edges from non-dead callers with nonzero "
4807 "counts are self-recursive, assuming it is cold.\n");
4808 /* The NEW_NODE count and counts of all its outgoing edges
4809 are still unmodified copies of ORIG_NODE's. Just clear
4810 the latter and bail out. */
4811 profile_count zero;
4812 if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
4813 zero = profile_count::zero ().guessed_local ();
4814 else
4815 zero = profile_count::adjusted_zero ();
4816 orig_node->count = zero;
4817 for (cgraph_edge *cs = orig_node->callees;
4818 cs;
4819 cs = cs->next_callee)
4820 cs->count = zero;
4821 for (cgraph_edge *cs = orig_node->indirect_calls;
4822 cs;
4823 cs = cs->next_callee)
4824 cs->count = zero;
4825 return;
4826 }
4827 }
4828 else
4829 {
4830 /* Let's behave as if there was another caller that accounts for all
4831 the calls that were either indirect or from other compilation
4832 units. */
4833 orig_nonrec_calls++;
4834 profile_count pretend_caller_count
4835 = (orig_node_count - new_sum - orig_nonrec_call_count
4836 - stats.rec_count_sum);
4837 orig_nonrec_call_count += pretend_caller_count;
4838 }
4839
4840 /* Divide all "unexplained" counts roughly proportionally to sums of
4841 counts of non-recursive calls.
4842
4843 We put rather arbitrary limits on how many counts we claim because the
4844 number of non-self-recursive incoming count is only a rough guideline
4845 and there are cases (such as mcf) where using it blindly just takes
4846 too many. And if lattices are considered in the opposite order we
4847 could also take too few. */
4848 profile_count unexp = orig_node_count - new_sum - orig_nonrec_call_count;
4849
4850 int limit_den = 2 * (orig_nonrec_calls + new_nonrec_calls);
4851 profile_count new_part
4852 = MAX(MIN (unexp.apply_scale (new_sum,
4853 new_sum + orig_nonrec_call_count),
4854 unexp.apply_scale (limit_den - 1, limit_den)),
4855 unexp.apply_scale (new_nonrec_calls, limit_den));
4856 if (dump_file)
4857 {
4858 fprintf (stream: dump_file, format: " Claiming ");
4859 new_part.dump (f: dump_file);
4860 fprintf (stream: dump_file, format: " of unexplained ");
4861 unexp.dump (f: dump_file);
4862 fprintf (stream: dump_file, format: " counts because of self-recursive "
4863 "calls\n");
4864 }
4865 new_sum += new_part;
4866 remainder = lenient_count_portion_handling (remainder: orig_node_count - new_sum,
4867 orig_node);
4868 }
4869 else
4870 remainder = lenient_count_portion_handling (remainder: orig_node_count - new_sum,
4871 orig_node);
4872
4873 new_sum = orig_node_count.combine_with_ipa_count (ipa: new_sum);
4874 new_node->count = new_sum;
4875 orig_node->count = remainder;
4876
4877 profile_count orig_new_node_count = orig_node_count;
4878 profile_count::adjust_for_ipa_scaling (num: &new_sum, den: &orig_new_node_count);
4879 for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee)
4880 cs->count = cs->count.apply_scale (num: new_sum, den: orig_new_node_count);
4881 for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee)
4882 cs->count = cs->count.apply_scale (num: new_sum, den: orig_new_node_count);
4883
4884 if (!orig_edges_processed)
4885 {
4886 profile_count::adjust_for_ipa_scaling (num: &remainder, den: &orig_node_count);
4887 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4888 cs->count = cs->count.apply_scale (num: remainder, den: orig_node_count);
4889 for (cgraph_edge *cs = orig_node->indirect_calls;
4890 cs;
4891 cs = cs->next_callee)
4892 cs->count = cs->count.apply_scale (num: remainder, den: orig_node_count);
4893 }
4894
4895 if (dump_file)
4896 {
4897 dump_profile_updates (node: new_node, spec: true);
4898 dump_profile_updates (node: orig_node, spec: false);
4899 }
4900}
4901
4902/* Update the respective profile of specialized NEW_NODE and the original
4903 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
4904 have been redirected to the specialized version. */
4905
4906static void
4907update_specialized_profile (struct cgraph_node *new_node,
4908 struct cgraph_node *orig_node,
4909 profile_count redirected_sum)
4910{
4911 struct cgraph_edge *cs;
4912 profile_count new_node_count, orig_node_count = orig_node->count.ipa ();
4913
4914 if (dump_file)
4915 {
4916 fprintf (stream: dump_file, format: " the sum of counts of redirected edges is ");
4917 redirected_sum.dump (f: dump_file);
4918 fprintf (stream: dump_file, format: "\n old ipa count of the original node is ");
4919 orig_node_count.dump (f: dump_file);
4920 fprintf (stream: dump_file, format: "\n");
4921 }
4922 if (!(orig_node_count > profile_count::zero ()))
4923 return;
4924
4925 new_node_count = new_node->count;
4926 new_node->count += redirected_sum;
4927 orig_node->count
4928 = lenient_count_portion_handling (remainder: orig_node->count - redirected_sum,
4929 orig_node);
4930
4931 for (cs = new_node->callees; cs; cs = cs->next_callee)
4932 cs->count += cs->count.apply_scale (num: redirected_sum, den: new_node_count);
4933
4934 for (cs = orig_node->callees; cs; cs = cs->next_callee)
4935 {
4936 profile_count dec = cs->count.apply_scale (num: redirected_sum,
4937 den: orig_node_count);
4938 cs->count -= dec;
4939 }
4940
4941 if (dump_file)
4942 {
4943 dump_profile_updates (node: new_node, spec: true);
4944 dump_profile_updates (node: orig_node, spec: false);
4945 }
4946}
4947
4948static void adjust_references_in_caller (cgraph_edge *cs,
4949 symtab_node *symbol, int index);
4950
4951/* Simple structure to pass a symbol and index (with same meaning as parameters
4952 of adjust_references_in_caller) through a void* parameter of a
4953 call_for_symbol_thunks_and_aliases callback. */
4954struct symbol_and_index_together
4955{
4956 symtab_node *symbol;
4957 int index;
4958};
4959
4960/* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
4961 adjust_references_in_caller on edges up in the call-graph, if necessary. */
4962static bool
4963adjust_refs_in_act_callers (struct cgraph_node *node, void *data)
4964{
4965 symbol_and_index_together *pack = (symbol_and_index_together *) data;
4966 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4967 if (!cs->caller->thunk)
4968 adjust_references_in_caller (cs, symbol: pack->symbol, index: pack->index);
4969 return false;
4970}
4971
4972/* At INDEX of a function being called by CS there is an ADDR_EXPR of a
4973 variable which is only dereferenced and which is represented by SYMBOL. See
4974 if we can remove ADDR reference in callers assosiated witht the call. */
4975
4976static void
4977adjust_references_in_caller (cgraph_edge *cs, symtab_node *symbol, int index)
4978{
4979 ipa_edge_args *args = ipa_edge_args_sum->get (edge: cs);
4980 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i: index);
4981 if (jfunc->type == IPA_JF_CONST)
4982 {
4983 ipa_ref *to_del = cs->caller->find_reference (referred_node: symbol, stmt: cs->call_stmt,
4984 lto_stmt_uid: cs->lto_stmt_uid,
4985 use_type: IPA_REF_ADDR);
4986 if (!to_del)
4987 return;
4988 to_del->remove_reference ();
4989 ipa_zap_jf_refdesc (jfunc);
4990 if (dump_file)
4991 fprintf (stream: dump_file, format: " Removed a reference from %s to %s.\n",
4992 cs->caller->dump_name (), symbol->dump_name ());
4993 return;
4994 }
4995
4996 if (jfunc->type != IPA_JF_PASS_THROUGH
4997 || ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR
4998 || ipa_get_jf_pass_through_refdesc_decremented (jfunc))
4999 return;
5000
5001 int fidx = ipa_get_jf_pass_through_formal_id (jfunc);
5002 cgraph_node *caller = cs->caller;
5003 ipa_node_params *caller_info = ipa_node_params_sum->get (node: caller);
5004 /* TODO: This consistency check may be too big and not really
5005 that useful. Consider removing it. */
5006 tree cst;
5007 if (caller_info->ipcp_orig_node)
5008 cst = caller_info->known_csts[fidx];
5009 else
5010 {
5011 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info: caller_info, i: fidx);
5012 gcc_assert (lat->is_single_const ());
5013 cst = lat->values->value;
5014 }
5015 gcc_assert (TREE_CODE (cst) == ADDR_EXPR
5016 && (symtab_node::get (get_base_address (TREE_OPERAND (cst, 0)))
5017 == symbol));
5018
5019 int cuses = ipa_get_controlled_uses (info: caller_info, i: fidx);
5020 if (cuses == IPA_UNDESCRIBED_USE)
5021 return;
5022 gcc_assert (cuses > 0);
5023 cuses--;
5024 ipa_set_controlled_uses (info: caller_info, i: fidx, val: cuses);
5025 ipa_set_jf_pass_through_refdesc_decremented (jfunc, value: true);
5026 if (dump_file && (dump_flags & TDF_DETAILS))
5027 fprintf (stream: dump_file, format: " Controlled uses of parameter %i of %s dropped "
5028 "to %i.\n", fidx, caller->dump_name (), cuses);
5029 if (cuses)
5030 return;
5031
5032 if (caller_info->ipcp_orig_node)
5033 {
5034 /* Cloning machinery has created a reference here, we need to either
5035 remove it or change it to a read one. */
5036 ipa_ref *to_del = caller->find_reference (referred_node: symbol, NULL, lto_stmt_uid: 0, use_type: IPA_REF_ADDR);
5037 if (to_del)
5038 {
5039 to_del->remove_reference ();
5040 if (dump_file)
5041 fprintf (stream: dump_file, format: " Removed a reference from %s to %s.\n",
5042 cs->caller->dump_name (), symbol->dump_name ());
5043 if (ipa_get_param_load_dereferenced (info: caller_info, i: fidx))
5044 {
5045 caller->create_reference (referred_node: symbol, use_type: IPA_REF_LOAD, NULL);
5046 if (dump_file)
5047 fprintf (stream: dump_file,
5048 format: " ...and replaced it with LOAD one.\n");
5049 }
5050 }
5051 }
5052
5053 symbol_and_index_together pack;
5054 pack.symbol = symbol;
5055 pack.index = fidx;
5056 if (caller->can_change_signature)
5057 caller->call_for_symbol_thunks_and_aliases (callback: adjust_refs_in_act_callers,
5058 data: &pack, include_overwritable: true);
5059}
5060
5061
5062/* Return true if we would like to remove a parameter from NODE when cloning it
5063 with KNOWN_CSTS scalar constants. */
5064
5065static bool
5066want_remove_some_param_p (cgraph_node *node, vec<tree> known_csts)
5067{
5068 auto_vec<bool, 16> surviving;
5069 bool filled_vec = false;
5070 ipa_node_params *info = ipa_node_params_sum->get (node);
5071 int i, count = ipa_get_param_count (info);
5072
5073 for (i = 0; i < count; i++)
5074 {
5075 if (!known_csts[i] && ipa_is_param_used (info, i))
5076 continue;
5077
5078 if (!filled_vec)
5079 {
5080 clone_info *info = clone_info::get (node);
5081 if (!info || !info->param_adjustments)
5082 return true;
5083 info->param_adjustments->get_surviving_params (surviving_params: &surviving);
5084 filled_vec = true;
5085 }
5086 if (surviving.length() < (unsigned) i && surviving[i])
5087 return true;
5088 }
5089 return false;
5090}
5091
5092/* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5093 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5094 redirect all edges in CALLERS to it. */
5095
5096static struct cgraph_node *
5097create_specialized_node (struct cgraph_node *node,
5098 vec<tree> known_csts,
5099 vec<ipa_polymorphic_call_context> known_contexts,
5100 vec<ipa_argagg_value, va_gc> *aggvals,
5101 vec<cgraph_edge *> &callers)
5102{
5103 ipa_node_params *new_info, *info = ipa_node_params_sum->get (node);
5104 vec<ipa_replace_map *, va_gc> *replace_trees = NULL;
5105 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
5106 struct cgraph_node *new_node;
5107 int i, count = ipa_get_param_count (info);
5108 clone_info *cinfo = clone_info::get (node);
5109 ipa_param_adjustments *old_adjustments = cinfo
5110 ? cinfo->param_adjustments : NULL;
5111 ipa_param_adjustments *new_adjustments;
5112 gcc_assert (!info->ipcp_orig_node);
5113 gcc_assert (node->can_change_signature
5114 || !old_adjustments);
5115
5116 if (old_adjustments)
5117 {
5118 /* At the moment all IPA optimizations should use the number of
5119 parameters of the prevailing decl as the m_always_copy_start.
5120 Handling any other value would complicate the code below, so for the
5121 time bing let's only assert it is so. */
5122 gcc_assert (old_adjustments->m_always_copy_start == count
5123 || old_adjustments->m_always_copy_start < 0);
5124 int old_adj_count = vec_safe_length (v: old_adjustments->m_adj_params);
5125 for (i = 0; i < old_adj_count; i++)
5126 {
5127 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
5128 if (!node->can_change_signature
5129 || old_adj->op != IPA_PARAM_OP_COPY
5130 || (!known_csts[old_adj->base_index]
5131 && ipa_is_param_used (info, i: old_adj->base_index)))
5132 {
5133 ipa_adjusted_param new_adj = *old_adj;
5134
5135 new_adj.prev_clone_adjustment = true;
5136 new_adj.prev_clone_index = i;
5137 vec_safe_push (v&: new_params, obj: new_adj);
5138 }
5139 }
5140 bool skip_return = old_adjustments->m_skip_return;
5141 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5142 ipa_param_adjustments (new_params, count,
5143 skip_return));
5144 }
5145 else if (node->can_change_signature
5146 && want_remove_some_param_p (node, known_csts))
5147 {
5148 ipa_adjusted_param adj;
5149 memset (s: &adj, c: 0, n: sizeof (adj));
5150 adj.op = IPA_PARAM_OP_COPY;
5151 for (i = 0; i < count; i++)
5152 if (!known_csts[i] && ipa_is_param_used (info, i))
5153 {
5154 adj.base_index = i;
5155 adj.prev_clone_index = i;
5156 vec_safe_push (v&: new_params, obj: adj);
5157 }
5158 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5159 ipa_param_adjustments (new_params, count, false));
5160 }
5161 else
5162 new_adjustments = NULL;
5163
5164 auto_vec<cgraph_edge *, 2> self_recursive_calls;
5165 for (i = callers.length () - 1; i >= 0; i--)
5166 {
5167 cgraph_edge *cs = callers[i];
5168 if (cs->caller == node)
5169 {
5170 self_recursive_calls.safe_push (obj: cs);
5171 callers.unordered_remove (ix: i);
5172 }
5173 }
5174 replace_trees = cinfo ? vec_safe_copy (src: cinfo->tree_map) : NULL;
5175 for (i = 0; i < count; i++)
5176 {
5177 tree t = known_csts[i];
5178 if (!t)
5179 continue;
5180
5181 gcc_checking_assert (TREE_CODE (t) != TREE_BINFO);
5182
5183 bool load_ref = false;
5184 symtab_node *ref_symbol;
5185 if (TREE_CODE (t) == ADDR_EXPR)
5186 {
5187 tree base = get_base_address (TREE_OPERAND (t, 0));
5188 if (TREE_CODE (base) == VAR_DECL
5189 && ipa_get_controlled_uses (info, i) == 0
5190 && ipa_get_param_load_dereferenced (info, i)
5191 && (ref_symbol = symtab_node::get (decl: base)))
5192 {
5193 load_ref = true;
5194 if (node->can_change_signature)
5195 for (cgraph_edge *caller : callers)
5196 adjust_references_in_caller (cs: caller, symbol: ref_symbol, index: i);
5197 }
5198 }
5199
5200 ipa_replace_map *replace_map = get_replacement_map (info, value: t, parm_num: i, force_load_ref: load_ref);
5201 if (replace_map)
5202 vec_safe_push (v&: replace_trees, obj: replace_map);
5203 }
5204
5205 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
5206 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5207 node->decl)));
5208 new_node = node->create_virtual_clone (redirect_callers: callers, tree_map: replace_trees,
5209 param_adjustments: new_adjustments, suffix: "constprop",
5210 num_suffix: suffix_counter);
5211 suffix_counter++;
5212
5213 bool have_self_recursive_calls = !self_recursive_calls.is_empty ();
5214 for (unsigned j = 0; j < self_recursive_calls.length (); j++)
5215 {
5216 cgraph_edge *cs = get_next_cgraph_edge_clone (cs: self_recursive_calls[j]);
5217 /* Cloned edges can disappear during cloning as speculation can be
5218 resolved, check that we have one and that it comes from the last
5219 cloning. */
5220 if (cs && cs->caller == new_node)
5221 cs->redirect_callee_duplicating_thunks (n: new_node);
5222 /* Any future code that would make more than one clone of an outgoing
5223 edge would confuse this mechanism, so let's check that does not
5224 happen. */
5225 gcc_checking_assert (!cs
5226 || !get_next_cgraph_edge_clone (cs)
5227 || get_next_cgraph_edge_clone (cs)->caller != new_node);
5228 }
5229 if (have_self_recursive_calls)
5230 new_node->expand_all_artificial_thunks ();
5231
5232 ipa_set_node_agg_value_chain (node: new_node, aggs: aggvals);
5233 for (const ipa_argagg_value &av : aggvals)
5234 new_node->maybe_create_reference (val: av.value, NULL);
5235
5236 if (dump_file && (dump_flags & TDF_DETAILS))
5237 {
5238 fprintf (stream: dump_file, format: " the new node is %s.\n", new_node->dump_name ());
5239 if (known_contexts.exists ())
5240 {
5241 for (i = 0; i < count; i++)
5242 if (!known_contexts[i].useless_p ())
5243 {
5244 fprintf (stream: dump_file, format: " known ctx %i is ", i);
5245 known_contexts[i].dump (f: dump_file);
5246 }
5247 }
5248 if (aggvals)
5249 {
5250 fprintf (stream: dump_file, format: " Aggregate replacements:");
5251 ipa_argagg_value_list avs (aggvals);
5252 avs.dump (f: dump_file);
5253 }
5254 }
5255
5256 new_info = ipa_node_params_sum->get (node: new_node);
5257 new_info->ipcp_orig_node = node;
5258 new_node->ipcp_clone = true;
5259 new_info->known_csts = known_csts;
5260 new_info->known_contexts = known_contexts;
5261
5262 ipcp_discover_new_direct_edges (node: new_node, known_csts, known_contexts,
5263 aggvals);
5264
5265 return new_node;
5266}
5267
5268/* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5269 pass-through function to itself when the cgraph_node involved is not an
5270 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5271 no-operation pass-through. */
5272
5273static bool
5274self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
5275 bool simple = true)
5276{
5277 enum availability availability;
5278 if (cs->caller == cs->callee->function_symbol (avail: &availability)
5279 && availability > AVAIL_INTERPOSABLE
5280 && jfunc->type == IPA_JF_PASS_THROUGH
5281 && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5282 && ipa_get_jf_pass_through_formal_id (jfunc) == i
5283 && ipa_node_params_sum->get (node: cs->caller)
5284 && !ipa_node_params_sum->get (node: cs->caller)->ipcp_orig_node)
5285 return true;
5286 return false;
5287}
5288
5289/* Return true if JFUNC, which describes a part of an aggregate represented or
5290 pointed to by the i-th parameter of call CS, is a pass-through function to
5291 itself when the cgraph_node involved is not an IPA-CP clone.. When
5292 SIMPLE is true, further check if JFUNC is a simple no-operation
5293 pass-through. */
5294
5295static bool
5296self_recursive_agg_pass_through_p (const cgraph_edge *cs,
5297 const ipa_agg_jf_item *jfunc,
5298 int i, bool simple = true)
5299{
5300 enum availability availability;
5301 if (cs->caller == cs->callee->function_symbol (avail: &availability)
5302 && availability > AVAIL_INTERPOSABLE
5303 && jfunc->jftype == IPA_JF_LOAD_AGG
5304 && jfunc->offset == jfunc->value.load_agg.offset
5305 && (!simple || jfunc->value.pass_through.operation == NOP_EXPR)
5306 && jfunc->value.pass_through.formal_id == i
5307 && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type)
5308 && ipa_node_params_sum->get (node: cs->caller)
5309 && !ipa_node_params_sum->get (node: cs->caller)->ipcp_orig_node)
5310 return true;
5311 return false;
5312}
5313
5314/* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5315 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5316
5317static void
5318find_more_scalar_values_for_callers_subset (struct cgraph_node *node,
5319 vec<tree> &known_csts,
5320 const vec<cgraph_edge *> &callers)
5321{
5322 ipa_node_params *info = ipa_node_params_sum->get (node);
5323 int i, count = ipa_get_param_count (info);
5324
5325 for (i = 0; i < count; i++)
5326 {
5327 struct cgraph_edge *cs;
5328 tree newval = NULL_TREE;
5329 int j;
5330 bool first = true;
5331 tree type = ipa_get_type (info, i);
5332
5333 if (ipa_get_scalar_lat (info, i)->bottom || known_csts[i])
5334 continue;
5335
5336 FOR_EACH_VEC_ELT (callers, j, cs)
5337 {
5338 struct ipa_jump_func *jump_func;
5339 tree t;
5340
5341 ipa_edge_args *args = ipa_edge_args_sum->get (edge: cs);
5342 if (!args
5343 || i >= ipa_get_cs_argument_count (args)
5344 || (i == 0
5345 && call_passes_through_thunk (cs)))
5346 {
5347 newval = NULL_TREE;
5348 break;
5349 }
5350 jump_func = ipa_get_ith_jump_func (args, i);
5351
5352 /* Besides simple pass-through jump function, arithmetic jump
5353 function could also introduce argument-direct-pass-through for
5354 self-feeding recursive call. For example,
5355
5356 fn (int i)
5357 {
5358 fn (i & 1);
5359 }
5360
5361 Given that i is 0, recursive propagation via (i & 1) also gets
5362 0. */
5363 if (self_recursive_pass_through_p (cs, jfunc: jump_func, i, simple: false))
5364 {
5365 gcc_assert (newval);
5366 t = ipa_get_jf_arith_result (
5367 opcode: ipa_get_jf_pass_through_operation (jfunc: jump_func),
5368 input: newval,
5369 operand: ipa_get_jf_pass_through_operand (jfunc: jump_func),
5370 res_type: type);
5371 }
5372 else
5373 t = ipa_value_from_jfunc (info: ipa_node_params_sum->get (node: cs->caller),
5374 jfunc: jump_func, parm_type: type);
5375 if (!t
5376 || (newval
5377 && !values_equal_for_ipcp_p (x: t, y: newval))
5378 || (!first && !newval))
5379 {
5380 newval = NULL_TREE;
5381 break;
5382 }
5383 else
5384 newval = t;
5385 first = false;
5386 }
5387
5388 if (newval)
5389 {
5390 if (dump_file && (dump_flags & TDF_DETAILS))
5391 {
5392 fprintf (stream: dump_file, format: " adding an extra known scalar value ");
5393 print_ipcp_constant_value (f: dump_file, v: newval);
5394 fprintf (stream: dump_file, format: " for ");
5395 ipa_dump_param (dump_file, info, i);
5396 fprintf (stream: dump_file, format: "\n");
5397 }
5398
5399 known_csts[i] = newval;
5400 }
5401 }
5402}
5403
5404/* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5405 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5406 CALLERS. */
5407
5408static void
5409find_more_contexts_for_caller_subset (cgraph_node *node,
5410 vec<ipa_polymorphic_call_context>
5411 *known_contexts,
5412 const vec<cgraph_edge *> &callers)
5413{
5414 ipa_node_params *info = ipa_node_params_sum->get (node);
5415 int i, count = ipa_get_param_count (info);
5416
5417 for (i = 0; i < count; i++)
5418 {
5419 cgraph_edge *cs;
5420
5421 if (ipa_get_poly_ctx_lat (info, i)->bottom
5422 || (known_contexts->exists ()
5423 && !(*known_contexts)[i].useless_p ()))
5424 continue;
5425
5426 ipa_polymorphic_call_context newval;
5427 bool first = true;
5428 int j;
5429
5430 FOR_EACH_VEC_ELT (callers, j, cs)
5431 {
5432 ipa_edge_args *args = ipa_edge_args_sum->get (edge: cs);
5433 if (!args
5434 || i >= ipa_get_cs_argument_count (args))
5435 return;
5436 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
5437 ipa_polymorphic_call_context ctx;
5438 ctx = ipa_context_from_jfunc (info: ipa_node_params_sum->get (node: cs->caller),
5439 cs, csidx: i, jfunc);
5440 if (first)
5441 {
5442 newval = ctx;
5443 first = false;
5444 }
5445 else
5446 newval.meet_with (ctx);
5447 if (newval.useless_p ())
5448 break;
5449 }
5450
5451 if (!newval.useless_p ())
5452 {
5453 if (dump_file && (dump_flags & TDF_DETAILS))
5454 {
5455 fprintf (stream: dump_file, format: " adding an extra known polymorphic "
5456 "context ");
5457 print_ipcp_constant_value (f: dump_file, v: newval);
5458 fprintf (stream: dump_file, format: " for ");
5459 ipa_dump_param (dump_file, info, i);
5460 fprintf (stream: dump_file, format: "\n");
5461 }
5462
5463 if (!known_contexts->exists ())
5464 known_contexts->safe_grow_cleared (len: ipa_get_param_count (info),
5465 exact: true);
5466 (*known_contexts)[i] = newval;
5467 }
5468
5469 }
5470}
5471
5472/* Push all aggregate values coming along edge CS for parameter number INDEX to
5473 RES. If INTERIM is non-NULL, it contains the current interim state of
5474 collected aggregate values which can be used to compute values passed over
5475 self-recursive edges.
5476
5477 This basically one iteration of push_agg_values_from_edge over one
5478 parameter, which allows for simpler early returns. */
5479
5480static void
5481push_agg_values_for_index_from_edge (struct cgraph_edge *cs, int index,
5482 vec<ipa_argagg_value> *res,
5483 const ipa_argagg_value_list *interim)
5484{
5485 bool agg_values_from_caller = false;
5486 bool agg_jf_preserved = false;
5487 unsigned unit_delta = UINT_MAX;
5488 int src_idx = -1;
5489 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args: ipa_edge_args_sum->get (edge: cs),
5490 i: index);
5491
5492 if (jfunc->type == IPA_JF_PASS_THROUGH
5493 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5494 {
5495 agg_values_from_caller = true;
5496 agg_jf_preserved = ipa_get_jf_pass_through_agg_preserved (jfunc);
5497 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
5498 unit_delta = 0;
5499 }
5500 else if (jfunc->type == IPA_JF_ANCESTOR
5501 && ipa_get_jf_ancestor_agg_preserved (jfunc))
5502 {
5503 agg_values_from_caller = true;
5504 agg_jf_preserved = true;
5505 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
5506 unit_delta = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
5507 }
5508
5509 ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller);
5510 if (agg_values_from_caller)
5511 {
5512 if (caller_info->ipcp_orig_node)
5513 {
5514 struct cgraph_node *orig_node = caller_info->ipcp_orig_node;
5515 ipcp_transformation *ts
5516 = ipcp_get_transformation_summary (node: cs->caller);
5517 ipa_node_params *orig_info = ipa_node_params_sum->get (node: orig_node);
5518 ipcp_param_lattices *orig_plats
5519 = ipa_get_parm_lattices (info: orig_info, i: src_idx);
5520 if (ts
5521 && orig_plats->aggs
5522 && (agg_jf_preserved || !orig_plats->aggs_by_ref))
5523 {
5524 ipa_argagg_value_list src (ts);
5525 src.push_adjusted_values (src_index: src_idx, dest_index: index, unit_delta, res);
5526 return;
5527 }
5528 }
5529 else
5530 {
5531 ipcp_param_lattices *src_plats
5532 = ipa_get_parm_lattices (info: caller_info, i: src_idx);
5533 if (src_plats->aggs
5534 && !src_plats->aggs_bottom
5535 && (agg_jf_preserved || !src_plats->aggs_by_ref))
5536 {
5537 if (interim && self_recursive_pass_through_p (cs, jfunc, i: index))
5538 {
5539 interim->push_adjusted_values (src_index: src_idx, dest_index: index, unit_delta,
5540 res);
5541 return;
5542 }
5543 if (!src_plats->aggs_contain_variable)
5544 {
5545 push_agg_values_from_plats (plats: src_plats, dest_index: index, unit_delta,
5546 res);
5547 return;
5548 }
5549 }
5550 }
5551 }
5552
5553 if (!jfunc->agg.items)
5554 return;
5555 bool first = true;
5556 unsigned prev_unit_offset = 0;
5557 for (const ipa_agg_jf_item &agg_jf : *jfunc->agg.items)
5558 {
5559 tree value, srcvalue;
5560 /* Besides simple pass-through aggregate jump function, arithmetic
5561 aggregate jump function could also bring same aggregate value as
5562 parameter passed-in for self-feeding recursive call. For example,
5563
5564 fn (int *i)
5565 {
5566 int j = *i & 1;
5567 fn (&j);
5568 }
5569
5570 Given that *i is 0, recursive propagation via (*i & 1) also gets 0. */
5571 if (interim
5572 && self_recursive_agg_pass_through_p (cs, jfunc: &agg_jf, i: index, simple: false)
5573 && (srcvalue = interim->get_value(index,
5574 unit_offset: agg_jf.offset / BITS_PER_UNIT)))
5575 value = ipa_get_jf_arith_result (opcode: agg_jf.value.pass_through.operation,
5576 input: srcvalue,
5577 operand: agg_jf.value.pass_through.operand,
5578 res_type: agg_jf.type);
5579 else
5580 value = ipa_agg_value_from_jfunc (info: caller_info, node: cs->caller,
5581 item: &agg_jf);
5582 if (value)
5583 {
5584 struct ipa_argagg_value iav;
5585 iav.value = value;
5586 iav.unit_offset = agg_jf.offset / BITS_PER_UNIT;
5587 iav.index = index;
5588 iav.by_ref = jfunc->agg.by_ref;
5589 iav.killed = false;
5590
5591 gcc_assert (first
5592 || iav.unit_offset > prev_unit_offset);
5593 prev_unit_offset = iav.unit_offset;
5594 first = false;
5595
5596 res->safe_push (obj: iav);
5597 }
5598 }
5599 return;
5600}
5601
5602/* Push all aggregate values coming along edge CS to RES. DEST_INFO is the
5603 description of ultimate callee of CS or the one it was cloned from (the
5604 summary where lattices are). If INTERIM is non-NULL, it contains the
5605 current interim state of collected aggregate values which can be used to
5606 compute values passed over self-recursive edges (if OPTIMIZE_SELF_RECURSION
5607 is true) and to skip values which clearly will not be part of intersection
5608 with INTERIM. */
5609
5610static void
5611push_agg_values_from_edge (struct cgraph_edge *cs,
5612 ipa_node_params *dest_info,
5613 vec<ipa_argagg_value> *res,
5614 const ipa_argagg_value_list *interim,
5615 bool optimize_self_recursion)
5616{
5617 ipa_edge_args *args = ipa_edge_args_sum->get (edge: cs);
5618 if (!args)
5619 return;
5620
5621 int count = MIN (ipa_get_param_count (dest_info),
5622 ipa_get_cs_argument_count (args));
5623
5624 unsigned interim_index = 0;
5625 for (int index = 0; index < count; index++)
5626 {
5627 if (interim)
5628 {
5629 while (interim_index < interim->m_elts.size ()
5630 && interim->m_elts[interim_index].value
5631 && interim->m_elts[interim_index].index < index)
5632 interim_index++;
5633 if (interim_index >= interim->m_elts.size ()
5634 || interim->m_elts[interim_index].index > index)
5635 continue;
5636 }
5637
5638 ipcp_param_lattices *plats = ipa_get_parm_lattices (info: dest_info, i: index);
5639 if (!ipa_is_param_used (info: dest_info, i: index)
5640 || plats->aggs_bottom)
5641 continue;
5642 push_agg_values_for_index_from_edge (cs, index, res,
5643 interim: optimize_self_recursion ? interim
5644 : NULL);
5645 }
5646}
5647
5648
5649/* Look at edges in CALLERS and collect all known aggregate values that arrive
5650 from all of them. Return nullptr if there are none. */
5651
5652static struct vec<ipa_argagg_value, va_gc> *
5653find_aggregate_values_for_callers_subset (struct cgraph_node *node,
5654 const vec<cgraph_edge *> &callers)
5655{
5656 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5657 if (dest_info->ipcp_orig_node)
5658 dest_info = ipa_node_params_sum->get (node: dest_info->ipcp_orig_node);
5659
5660 /* gather_edges_for_value puts a non-recursive call into the first element of
5661 callers if it can. */
5662 auto_vec<ipa_argagg_value, 32> interim;
5663 push_agg_values_from_edge (cs: callers[0], dest_info, res: &interim, NULL, optimize_self_recursion: true);
5664
5665 unsigned valid_entries = interim.length ();
5666 if (!valid_entries)
5667 return nullptr;
5668
5669 unsigned caller_count = callers.length();
5670 for (unsigned i = 1; i < caller_count; i++)
5671 {
5672 auto_vec<ipa_argagg_value, 32> last;
5673 ipa_argagg_value_list avs (&interim);
5674 push_agg_values_from_edge (cs: callers[i], dest_info, res: &last, interim: &avs, optimize_self_recursion: true);
5675
5676 valid_entries = intersect_argaggs_with (elts&: interim, other: last);
5677 if (!valid_entries)
5678 return nullptr;
5679 }
5680
5681 vec<ipa_argagg_value, va_gc> *res = NULL;
5682 vec_safe_reserve_exact (v&: res, nelems: valid_entries);
5683 for (const ipa_argagg_value &av : interim)
5684 if (av.value)
5685 res->quick_push(obj: av);
5686 gcc_checking_assert (res->length () == valid_entries);
5687 return res;
5688}
5689
5690/* Determine whether CS also brings all scalar values that the NODE is
5691 specialized for. */
5692
5693static bool
5694cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs,
5695 struct cgraph_node *node)
5696{
5697 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5698 int count = ipa_get_param_count (info: dest_info);
5699 class ipa_node_params *caller_info;
5700 class ipa_edge_args *args;
5701 int i;
5702
5703 caller_info = ipa_node_params_sum->get (node: cs->caller);
5704 args = ipa_edge_args_sum->get (edge: cs);
5705 for (i = 0; i < count; i++)
5706 {
5707 struct ipa_jump_func *jump_func;
5708 tree val, t;
5709
5710 val = dest_info->known_csts[i];
5711 if (!val)
5712 continue;
5713
5714 if (i >= ipa_get_cs_argument_count (args))
5715 return false;
5716 jump_func = ipa_get_ith_jump_func (args, i);
5717 t = ipa_value_from_jfunc (info: caller_info, jfunc: jump_func,
5718 parm_type: ipa_get_type (info: dest_info, i));
5719 if (!t || !values_equal_for_ipcp_p (x: val, y: t))
5720 return false;
5721 }
5722 return true;
5723}
5724
5725/* Determine whether CS also brings all aggregate values that NODE is
5726 specialized for. */
5727
5728static bool
5729cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs,
5730 struct cgraph_node *node)
5731{
5732 ipcp_transformation *ts = ipcp_get_transformation_summary (node);
5733 if (!ts || vec_safe_is_empty (v: ts->m_agg_values))
5734 return true;
5735
5736 const ipa_argagg_value_list existing (ts->m_agg_values);
5737 auto_vec<ipa_argagg_value, 32> edge_values;
5738 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5739 gcc_checking_assert (dest_info->ipcp_orig_node);
5740 dest_info = ipa_node_params_sum->get (node: dest_info->ipcp_orig_node);
5741 push_agg_values_from_edge (cs, dest_info, res: &edge_values, interim: &existing, optimize_self_recursion: false);
5742 const ipa_argagg_value_list avl (&edge_values);
5743 return avl.superset_of_p (other: existing);
5744}
5745
5746/* Given an original NODE and a VAL for which we have already created a
5747 specialized clone, look whether there are incoming edges that still lead
5748 into the old node but now also bring the requested value and also conform to
5749 all other criteria such that they can be redirected the special node.
5750 This function can therefore redirect the final edge in a SCC. */
5751
5752template <typename valtype>
5753static void
5754perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val)
5755{
5756 ipcp_value_source<valtype> *src;
5757 profile_count redirected_sum = profile_count::zero ();
5758
5759 for (src = val->sources; src; src = src->next)
5760 {
5761 struct cgraph_edge *cs = src->cs;
5762 while (cs)
5763 {
5764 if (cgraph_edge_brings_value_p (cs, src, node, val)
5765 && cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node)
5766 && cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node))
5767 {
5768 if (dump_file)
5769 fprintf (dump_file, " - adding an extra caller %s of %s\n",
5770 cs->caller->dump_name (),
5771 val->spec_node->dump_name ());
5772
5773 cs->redirect_callee_duplicating_thunks (n: val->spec_node);
5774 val->spec_node->expand_all_artificial_thunks ();
5775 if (cs->count.ipa ().initialized_p ())
5776 redirected_sum = redirected_sum + cs->count.ipa ();
5777 }
5778 cs = get_next_cgraph_edge_clone (cs);
5779 }
5780 }
5781
5782 if (redirected_sum.nonzero_p ())
5783 update_specialized_profile (val->spec_node, node, redirected_sum);
5784}
5785
5786/* Return true if KNOWN_CONTEXTS contain at least one useful context. */
5787
5788static bool
5789known_contexts_useful_p (vec<ipa_polymorphic_call_context> known_contexts)
5790{
5791 ipa_polymorphic_call_context *ctx;
5792 int i;
5793
5794 FOR_EACH_VEC_ELT (known_contexts, i, ctx)
5795 if (!ctx->useless_p ())
5796 return true;
5797 return false;
5798}
5799
5800/* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
5801
5802static vec<ipa_polymorphic_call_context>
5803copy_useful_known_contexts (const vec<ipa_polymorphic_call_context> &known_contexts)
5804{
5805 if (known_contexts_useful_p (known_contexts))
5806 return known_contexts.copy ();
5807 else
5808 return vNULL;
5809}
5810
5811/* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
5812 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
5813 copy too. */
5814
5815static void
5816copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
5817 vec<tree> *known_csts,
5818 vec<ipa_polymorphic_call_context> *known_contexts,
5819 ipcp_value<tree> *val, int index)
5820{
5821 *known_csts = avals->m_known_vals.copy ();
5822 *known_contexts = copy_useful_known_contexts (known_contexts: avals->m_known_contexts);
5823 (*known_csts)[index] = val->value;
5824}
5825
5826/* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
5827 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
5828 INDEX. */
5829
5830static void
5831copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
5832 vec<tree> *known_csts,
5833 vec<ipa_polymorphic_call_context> *known_contexts,
5834 ipcp_value<ipa_polymorphic_call_context> *val,
5835 int index)
5836{
5837 *known_csts = avals->m_known_vals.copy ();
5838 *known_contexts = avals->m_known_contexts.copy ();
5839 (*known_contexts)[index] = val->value;
5840}
5841
5842/* Return true if OFFSET indicates this was not an aggregate value or there is
5843 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
5844 AGGVALS list. */
5845
5846DEBUG_FUNCTION bool
5847ipcp_val_agg_replacement_ok_p (vec<ipa_argagg_value, va_gc> *aggvals,
5848 int index, HOST_WIDE_INT offset, tree value)
5849{
5850 if (offset == -1)
5851 return true;
5852
5853 const ipa_argagg_value_list avl (aggvals);
5854 tree v = avl.get_value (index, unit_offset: offset / BITS_PER_UNIT);
5855 return v && values_equal_for_ipcp_p (x: v, y: value);
5856}
5857
5858/* Return true if offset is minus one because source of a polymorphic context
5859 cannot be an aggregate value. */
5860
5861DEBUG_FUNCTION bool
5862ipcp_val_agg_replacement_ok_p (vec<ipa_argagg_value, va_gc> *,
5863 int , HOST_WIDE_INT offset,
5864 ipa_polymorphic_call_context)
5865{
5866 return offset == -1;
5867}
5868
5869/* Decide whether to create a special version of NODE for value VAL of
5870 parameter at the given INDEX. If OFFSET is -1, the value is for the
5871 parameter itself, otherwise it is stored at the given OFFSET of the
5872 parameter. AVALS describes the other already known values. SELF_GEN_CLONES
5873 is a vector which contains clones created for self-recursive calls with an
5874 arithmetic pass-through jump function. */
5875
5876template <typename valtype>
5877static bool
5878decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
5879 ipcp_value<valtype> *val, ipa_auto_call_arg_values *avals,
5880 vec<cgraph_node *> *self_gen_clones)
5881{
5882 int caller_count;
5883 sreal freq_sum;
5884 profile_count count_sum, rec_count_sum;
5885 vec<cgraph_edge *> callers;
5886
5887 if (val->spec_node)
5888 {
5889 perhaps_add_new_callers (node, val);
5890 return false;
5891 }
5892 else if (val->local_size_cost + overall_size > get_max_overall_size (node))
5893 {
5894 if (dump_file && (dump_flags & TDF_DETAILS))
5895 fprintf (dump_file, " Ignoring candidate value because "
5896 "maximum unit size would be reached with %li.\n",
5897 val->local_size_cost + overall_size);
5898 return false;
5899 }
5900 else if (!get_info_about_necessary_edges (val, node, &freq_sum, &caller_count,
5901 &rec_count_sum, &count_sum))
5902 return false;
5903
5904 if (!dbg_cnt (index: ipa_cp_values))
5905 return false;
5906
5907 if (val->self_recursion_generated_p ())
5908 {
5909 /* The edge counts in this case might not have been adjusted yet.
5910 Nevertleless, even if they were it would be only a guesswork which we
5911 can do now. The recursive part of the counts can be derived from the
5912 count of the original node anyway. */
5913 if (node->count.ipa ().nonzero_p ())
5914 {
5915 unsigned dem = self_gen_clones->length () + 1;
5916 rec_count_sum = node->count.ipa () / dem;
5917 }
5918 else
5919 rec_count_sum = profile_count::zero ();
5920 }
5921
5922 /* get_info_about_necessary_edges only sums up ipa counts. */
5923 count_sum += rec_count_sum;
5924
5925 if (dump_file && (dump_flags & TDF_DETAILS))
5926 {
5927 fprintf (stream: dump_file, format: " - considering value ");
5928 print_ipcp_constant_value (dump_file, val->value);
5929 fprintf (stream: dump_file, format: " for ");
5930 ipa_dump_param (dump_file, info: ipa_node_params_sum->get (node), i: index);
5931 if (offset != -1)
5932 fprintf (stream: dump_file, format: ", offset: " HOST_WIDE_INT_PRINT_DEC, offset);
5933 fprintf (stream: dump_file, format: " (caller_count: %i)\n", caller_count);
5934 }
5935
5936 if (!good_cloning_opportunity_p (node, val->local_time_benefit,
5937 freq_sum, count_sum,
5938 val->local_size_cost)
5939 && !good_cloning_opportunity_p (node, val->prop_time_benefit,
5940 freq_sum, count_sum, val->prop_size_cost))
5941 return false;
5942
5943 if (dump_file)
5944 fprintf (stream: dump_file, format: " Creating a specialized node of %s.\n",
5945 node->dump_name ());
5946
5947 vec<tree> known_csts;
5948 vec<ipa_polymorphic_call_context> known_contexts;
5949
5950 callers = gather_edges_for_value (val, node, caller_count);
5951 if (offset == -1)
5952 copy_known_vectors_add_val (avals, &known_csts, &known_contexts, val, index);
5953 else
5954 {
5955 known_csts = avals->m_known_vals.copy ();
5956 known_contexts = copy_useful_known_contexts (known_contexts: avals->m_known_contexts);
5957 }
5958 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
5959 find_more_contexts_for_caller_subset (node, known_contexts: &known_contexts, callers);
5960 vec<ipa_argagg_value, va_gc> *aggvals
5961 = find_aggregate_values_for_callers_subset (node, callers);
5962 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals, index,
5963 offset, val->value));
5964 val->spec_node = create_specialized_node (node, known_csts, known_contexts,
5965 aggvals, callers);
5966
5967 if (val->self_recursion_generated_p ())
5968 self_gen_clones->safe_push (obj: val->spec_node);
5969 else
5970 update_profiling_info (node, val->spec_node);
5971
5972 callers.release ();
5973 overall_size += val->local_size_cost;
5974 if (dump_file && (dump_flags & TDF_DETAILS))
5975 fprintf (stream: dump_file, format: " overall size reached %li\n",
5976 overall_size);
5977
5978 /* TODO: If for some lattice there is only one other known value
5979 left, make a special node for it too. */
5980
5981 return true;
5982}
5983
5984/* Like irange::contains_p(), but convert VAL to the range of R if
5985 necessary. */
5986
5987static inline bool
5988ipa_range_contains_p (const vrange &r, tree val)
5989{
5990 if (r.undefined_p ())
5991 return false;
5992
5993 tree type = r.type ();
5994 if (!wi::fits_to_tree_p (x: wi::to_wide (t: val), type))
5995 return false;
5996
5997 val = fold_convert (type, val);
5998 return r.contains_p (cst: val);
5999}
6000
6001/* Decide whether and what specialized clones of NODE should be created. */
6002
6003static bool
6004decide_whether_version_node (struct cgraph_node *node)
6005{
6006 ipa_node_params *info = ipa_node_params_sum->get (node);
6007 int i, count = ipa_get_param_count (info);
6008 bool ret = false;
6009
6010 if (count == 0)
6011 return false;
6012
6013 if (dump_file && (dump_flags & TDF_DETAILS))
6014 fprintf (stream: dump_file, format: "\nEvaluating opportunities for %s.\n",
6015 node->dump_name ());
6016
6017 auto_vec <cgraph_node *, 9> self_gen_clones;
6018 ipa_auto_call_arg_values avals;
6019 gather_context_independent_values (info, avals: &avals, calculate_aggs: false, NULL);
6020
6021 for (i = 0; i < count;i++)
6022 {
6023 if (!ipa_is_param_used (info, i))
6024 continue;
6025
6026 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6027 ipcp_lattice<tree> *lat = &plats->itself;
6028 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
6029
6030 if (!lat->bottom
6031 && !avals.m_known_vals[i])
6032 {
6033 ipcp_value<tree> *val;
6034 for (val = lat->values; val; val = val->next)
6035 {
6036 /* If some values generated for self-recursive calls with
6037 arithmetic jump functions fall outside of the known
6038 range for the parameter, we can skip them. */
6039 if (TREE_CODE (val->value) == INTEGER_CST
6040 && !plats->m_value_range.bottom_p ()
6041 && !ipa_range_contains_p (r: plats->m_value_range.m_vr,
6042 val: val->value))
6043 {
6044 /* This can happen also if a constant present in the source
6045 code falls outside of the range of parameter's type, so we
6046 cannot assert. */
6047 if (dump_file && (dump_flags & TDF_DETAILS))
6048 {
6049 fprintf (stream: dump_file, format: " - skipping%s value ",
6050 val->self_recursion_generated_p ()
6051 ? " self_recursion_generated" : "");
6052 print_ipcp_constant_value (f: dump_file, v: val->value);
6053 fprintf (stream: dump_file, format: " because it is outside known "
6054 "value range.\n");
6055 }
6056 continue;
6057 }
6058 ret |= decide_about_value (node, index: i, offset: -1, val, avals: &avals,
6059 self_gen_clones: &self_gen_clones);
6060 }
6061 }
6062
6063 if (!plats->aggs_bottom)
6064 {
6065 struct ipcp_agg_lattice *aglat;
6066 ipcp_value<tree> *val;
6067 for (aglat = plats->aggs; aglat; aglat = aglat->next)
6068 if (!aglat->bottom && aglat->values
6069 /* If the following is false, the one value has been considered
6070 for cloning for all contexts. */
6071 && (plats->aggs_contain_variable
6072 || !aglat->is_single_const ()))
6073 for (val = aglat->values; val; val = val->next)
6074 ret |= decide_about_value (node, index: i, offset: aglat->offset, val, avals: &avals,
6075 self_gen_clones: &self_gen_clones);
6076 }
6077
6078 if (!ctxlat->bottom
6079 && avals.m_known_contexts[i].useless_p ())
6080 {
6081 ipcp_value<ipa_polymorphic_call_context> *val;
6082 for (val = ctxlat->values; val; val = val->next)
6083 ret |= decide_about_value (node, index: i, offset: -1, val, avals: &avals,
6084 self_gen_clones: &self_gen_clones);
6085 }
6086 }
6087
6088 if (!self_gen_clones.is_empty ())
6089 {
6090 self_gen_clones.safe_push (obj: node);
6091 update_counts_for_self_gen_clones (orig_node: node, self_gen_clones);
6092 }
6093
6094 if (info->do_clone_for_all_contexts)
6095 {
6096 if (!dbg_cnt (index: ipa_cp_values))
6097 {
6098 info->do_clone_for_all_contexts = false;
6099 return ret;
6100 }
6101
6102 struct cgraph_node *clone;
6103 auto_vec<cgraph_edge *> callers = node->collect_callers ();
6104
6105 for (int i = callers.length () - 1; i >= 0; i--)
6106 {
6107 cgraph_edge *cs = callers[i];
6108 ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller);
6109
6110 if (caller_info && caller_info->node_dead)
6111 callers.unordered_remove (ix: i);
6112 }
6113
6114 if (!adjust_callers_for_value_intersection (callers, node))
6115 {
6116 /* If node is not called by anyone, or all its caller edges are
6117 self-recursive, the node is not really in use, no need to do
6118 cloning. */
6119 info->do_clone_for_all_contexts = false;
6120 return ret;
6121 }
6122
6123 if (dump_file)
6124 fprintf (stream: dump_file, format: " - Creating a specialized node of %s "
6125 "for all known contexts.\n", node->dump_name ());
6126
6127 vec<tree> known_csts = avals.m_known_vals.copy ();
6128 vec<ipa_polymorphic_call_context> known_contexts
6129 = copy_useful_known_contexts (known_contexts: avals.m_known_contexts);
6130 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6131 find_more_contexts_for_caller_subset (node, known_contexts: &known_contexts, callers);
6132 vec<ipa_argagg_value, va_gc> *aggvals
6133 = find_aggregate_values_for_callers_subset (node, callers);
6134
6135 if (!known_contexts_useful_p (known_contexts))
6136 {
6137 known_contexts.release ();
6138 known_contexts = vNULL;
6139 }
6140 clone = create_specialized_node (node, known_csts, known_contexts,
6141 aggvals, callers);
6142 info->do_clone_for_all_contexts = false;
6143 ipa_node_params_sum->get (node: clone)->is_all_contexts_clone = true;
6144 ret = true;
6145 }
6146
6147 return ret;
6148}
6149
6150/* Transitively mark all callees of NODE within the same SCC as not dead. */
6151
6152static void
6153spread_undeadness (struct cgraph_node *node)
6154{
6155 struct cgraph_edge *cs;
6156
6157 for (cs = node->callees; cs; cs = cs->next_callee)
6158 if (ipa_edge_within_scc (cs))
6159 {
6160 struct cgraph_node *callee;
6161 class ipa_node_params *info;
6162
6163 callee = cs->callee->function_symbol (NULL);
6164 info = ipa_node_params_sum->get (node: callee);
6165
6166 if (info && info->node_dead)
6167 {
6168 info->node_dead = 0;
6169 spread_undeadness (node: callee);
6170 }
6171 }
6172}
6173
6174/* Return true if NODE has a caller from outside of its SCC that is not
6175 dead. Worker callback for cgraph_for_node_and_aliases. */
6176
6177static bool
6178has_undead_caller_from_outside_scc_p (struct cgraph_node *node,
6179 void *data ATTRIBUTE_UNUSED)
6180{
6181 struct cgraph_edge *cs;
6182
6183 for (cs = node->callers; cs; cs = cs->next_caller)
6184 if (cs->caller->thunk
6185 && cs->caller->call_for_symbol_thunks_and_aliases
6186 (callback: has_undead_caller_from_outside_scc_p, NULL, include_overwritable: true))
6187 return true;
6188 else if (!ipa_edge_within_scc (cs))
6189 {
6190 ipa_node_params *caller_info = ipa_node_params_sum->get (node: cs->caller);
6191 if (!caller_info /* Unoptimized caller are like dead ones. */
6192 || !caller_info->node_dead)
6193 return true;
6194 }
6195 return false;
6196}
6197
6198
6199/* Identify nodes within the same SCC as NODE which are no longer needed
6200 because of new clones and will be removed as unreachable. */
6201
6202static void
6203identify_dead_nodes (struct cgraph_node *node)
6204{
6205 struct cgraph_node *v;
6206 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6207 if (v->local)
6208 {
6209 ipa_node_params *info = ipa_node_params_sum->get (node: v);
6210 if (info
6211 && !v->call_for_symbol_thunks_and_aliases
6212 (callback: has_undead_caller_from_outside_scc_p, NULL, include_overwritable: true))
6213 info->node_dead = 1;
6214 }
6215
6216 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6217 {
6218 ipa_node_params *info = ipa_node_params_sum->get (node: v);
6219 if (info && !info->node_dead)
6220 spread_undeadness (node: v);
6221 }
6222
6223 if (dump_file && (dump_flags & TDF_DETAILS))
6224 {
6225 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6226 if (ipa_node_params_sum->get (node: v)
6227 && ipa_node_params_sum->get (node: v)->node_dead)
6228 fprintf (stream: dump_file, format: " Marking node as dead: %s.\n",
6229 v->dump_name ());
6230 }
6231}
6232
6233/* The decision stage. Iterate over the topological order of call graph nodes
6234 TOPO and make specialized clones if deemed beneficial. */
6235
6236static void
6237ipcp_decision_stage (class ipa_topo_info *topo)
6238{
6239 int i;
6240
6241 if (dump_file)
6242 fprintf (stream: dump_file, format: "\nIPA decision stage:\n\n");
6243
6244 for (i = topo->nnodes - 1; i >= 0; i--)
6245 {
6246 struct cgraph_node *node = topo->order[i];
6247 bool change = false, iterate = true;
6248
6249 while (iterate)
6250 {
6251 struct cgraph_node *v;
6252 iterate = false;
6253 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6254 if (v->has_gimple_body_p ()
6255 && ipcp_versionable_function_p (node: v))
6256 iterate |= decide_whether_version_node (node: v);
6257
6258 change |= iterate;
6259 }
6260 if (change)
6261 identify_dead_nodes (node);
6262 }
6263}
6264
6265/* Look up all VR and bits information that we have discovered and copy it
6266 over to the transformation summary. */
6267
6268static void
6269ipcp_store_vr_results (void)
6270{
6271 cgraph_node *node;
6272
6273 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6274 {
6275 ipa_node_params *info = ipa_node_params_sum->get (node);
6276 bool dumped_sth = false;
6277 bool found_useful_result = false;
6278 bool do_vr = true;
6279 bool do_bits = true;
6280
6281 if (!info || !opt_for_fn (node->decl, flag_ipa_vrp))
6282 {
6283 if (dump_file)
6284 fprintf (stream: dump_file, format: "Not considering %s for VR discovery "
6285 "and propagate; -fipa-ipa-vrp: disabled.\n",
6286 node->dump_name ());
6287 do_vr = false;
6288 }
6289 if (!info || !opt_for_fn (node->decl, flag_ipa_bit_cp))
6290 {
6291 if (dump_file)
6292 fprintf (stream: dump_file, format: "Not considering %s for ipa bitwise "
6293 "propagation ; -fipa-bit-cp: disabled.\n",
6294 node->dump_name ());
6295 do_bits = false;
6296 }
6297 if (!do_bits && !do_vr)
6298 continue;
6299
6300 if (info->ipcp_orig_node)
6301 info = ipa_node_params_sum->get (node: info->ipcp_orig_node);
6302 if (info->lattices.is_empty ())
6303 /* Newly expanded artificial thunks do not have lattices. */
6304 continue;
6305
6306 unsigned count = ipa_get_param_count (info);
6307 for (unsigned i = 0; i < count; i++)
6308 {
6309 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6310 if (do_vr
6311 && !plats->m_value_range.bottom_p ()
6312 && !plats->m_value_range.top_p ())
6313 {
6314 found_useful_result = true;
6315 break;
6316 }
6317 if (do_bits && plats->bits_lattice.constant_p ())
6318 {
6319 found_useful_result = true;
6320 break;
6321 }
6322 }
6323 if (!found_useful_result)
6324 continue;
6325
6326 ipcp_transformation_initialize ();
6327 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6328 vec_safe_reserve_exact (v&: ts->m_vr, nelems: count);
6329
6330 for (unsigned i = 0; i < count; i++)
6331 {
6332 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6333 ipcp_bits_lattice *bits = NULL;
6334
6335 if (do_bits
6336 && plats->bits_lattice.constant_p ()
6337 && dbg_cnt (index: ipa_cp_bits))
6338 bits = &plats->bits_lattice;
6339
6340 if (do_vr
6341 && !plats->m_value_range.bottom_p ()
6342 && !plats->m_value_range.top_p ()
6343 && dbg_cnt (index: ipa_cp_vr))
6344 {
6345 if (bits)
6346 {
6347 Value_Range tmp = plats->m_value_range.m_vr;
6348 tree type = ipa_get_type (info, i);
6349 irange &r = as_a<irange> (v&: tmp);
6350 irange_bitmask bm (wide_int::from (x: bits->get_value (),
6351 TYPE_PRECISION (type),
6352 TYPE_SIGN (type)),
6353 wide_int::from (x: bits->get_mask (),
6354 TYPE_PRECISION (type),
6355 TYPE_SIGN (type)));
6356 r.update_bitmask (bm);
6357 ipa_vr vr (tmp);
6358 ts->m_vr->quick_push (obj: vr);
6359 }
6360 else
6361 {
6362 ipa_vr vr (plats->m_value_range.m_vr);
6363 ts->m_vr->quick_push (obj: vr);
6364 }
6365 }
6366 else if (bits)
6367 {
6368 tree type = ipa_get_type (info, i);
6369 Value_Range tmp;
6370 tmp.set_varying (type);
6371 irange &r = as_a<irange> (v&: tmp);
6372 irange_bitmask bm (wide_int::from (x: bits->get_value (),
6373 TYPE_PRECISION (type),
6374 TYPE_SIGN (type)),
6375 wide_int::from (x: bits->get_mask (),
6376 TYPE_PRECISION (type),
6377 TYPE_SIGN (type)));
6378 r.update_bitmask (bm);
6379 ipa_vr vr (tmp);
6380 ts->m_vr->quick_push (obj: vr);
6381 }
6382 else
6383 {
6384 ipa_vr vr;
6385 ts->m_vr->quick_push (obj: vr);
6386 }
6387
6388 if (!dump_file || !bits)
6389 continue;
6390
6391 if (!dumped_sth)
6392 {
6393 fprintf (stream: dump_file, format: "Propagated bits info for function %s:\n",
6394 node->dump_name ());
6395 dumped_sth = true;
6396 }
6397 fprintf (stream: dump_file, format: " param %i: value = ", i);
6398 print_hex (wi: bits->get_value (), file: dump_file);
6399 fprintf (stream: dump_file, format: ", mask = ");
6400 print_hex (wi: bits->get_mask (), file: dump_file);
6401 fprintf (stream: dump_file, format: "\n");
6402 }
6403 }
6404}
6405
6406/* The IPCP driver. */
6407
6408static unsigned int
6409ipcp_driver (void)
6410{
6411 class ipa_topo_info topo;
6412
6413 if (edge_clone_summaries == NULL)
6414 edge_clone_summaries = new edge_clone_summary_t (symtab);
6415
6416 ipa_check_create_node_params ();
6417 ipa_check_create_edge_args ();
6418 clone_num_suffixes = new hash_map<const char *, unsigned>;
6419
6420 if (dump_file)
6421 {
6422 fprintf (stream: dump_file, format: "\nIPA structures before propagation:\n");
6423 if (dump_flags & TDF_DETAILS)
6424 ipa_print_all_params (dump_file);
6425 ipa_print_all_jump_functions (f: dump_file);
6426 }
6427
6428 /* Topological sort. */
6429 build_toporder_info (topo: &topo);
6430 /* Do the interprocedural propagation. */
6431 ipcp_propagate_stage (topo: &topo);
6432 /* Decide what constant propagation and cloning should be performed. */
6433 ipcp_decision_stage (topo: &topo);
6434 /* Store results of value range and bits propagation. */
6435 ipcp_store_vr_results ();
6436
6437 /* Free all IPCP structures. */
6438 delete clone_num_suffixes;
6439 free_toporder_info (topo: &topo);
6440 delete edge_clone_summaries;
6441 edge_clone_summaries = NULL;
6442 ipa_free_all_structures_after_ipa_cp ();
6443 if (dump_file)
6444 fprintf (stream: dump_file, format: "\nIPA constant propagation end\n");
6445 return 0;
6446}
6447
6448/* Initialization and computation of IPCP data structures. This is the initial
6449 intraprocedural analysis of functions, which gathers information to be
6450 propagated later on. */
6451
6452static void
6453ipcp_generate_summary (void)
6454{
6455 struct cgraph_node *node;
6456
6457 if (dump_file)
6458 fprintf (stream: dump_file, format: "\nIPA constant propagation start:\n");
6459 ipa_register_cgraph_hooks ();
6460
6461 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6462 ipa_analyze_node (node);
6463}
6464
6465namespace {
6466
6467const pass_data pass_data_ipa_cp =
6468{
6469 .type: IPA_PASS, /* type */
6470 .name: "cp", /* name */
6471 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
6472 .tv_id: TV_IPA_CONSTANT_PROP, /* tv_id */
6473 .properties_required: 0, /* properties_required */
6474 .properties_provided: 0, /* properties_provided */
6475 .properties_destroyed: 0, /* properties_destroyed */
6476 .todo_flags_start: 0, /* todo_flags_start */
6477 .todo_flags_finish: ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
6478};
6479
6480class pass_ipa_cp : public ipa_opt_pass_d
6481{
6482public:
6483 pass_ipa_cp (gcc::context *ctxt)
6484 : ipa_opt_pass_d (pass_data_ipa_cp, ctxt,
6485 ipcp_generate_summary, /* generate_summary */
6486 NULL, /* write_summary */
6487 NULL, /* read_summary */
6488 ipcp_write_transformation_summaries, /*
6489 write_optimization_summary */
6490 ipcp_read_transformation_summaries, /*
6491 read_optimization_summary */
6492 NULL, /* stmt_fixup */
6493 0, /* function_transform_todo_flags_start */
6494 ipcp_transform_function, /* function_transform */
6495 NULL) /* variable_transform */
6496 {}
6497
6498 /* opt_pass methods: */
6499 bool gate (function *) final override
6500 {
6501 /* FIXME: We should remove the optimize check after we ensure we never run
6502 IPA passes when not optimizing. */
6503 return (flag_ipa_cp && optimize) || in_lto_p;
6504 }
6505
6506 unsigned int execute (function *) final override { return ipcp_driver (); }
6507
6508}; // class pass_ipa_cp
6509
6510} // anon namespace
6511
6512ipa_opt_pass_d *
6513make_pass_ipa_cp (gcc::context *ctxt)
6514{
6515 return new pass_ipa_cp (ctxt);
6516}
6517
6518/* Reset all state within ipa-cp.cc so that we can rerun the compiler
6519 within the same process. For use by toplev::finalize. */
6520
6521void
6522ipa_cp_cc_finalize (void)
6523{
6524 base_count = profile_count::uninitialized ();
6525 overall_size = 0;
6526 orig_overall_size = 0;
6527 ipcp_free_transformation_sum ();
6528}
6529
6530/* Given PARAM which must be a parameter of function FNDECL described by THIS,
6531 return its index in the DECL_ARGUMENTS chain, using a pre-computed
6532 DECL_UID-sorted vector if available (which is pre-computed only if there are
6533 many parameters). Can return -1 if param is static chain not represented
6534 among DECL_ARGUMENTS. */
6535
6536int
6537ipcp_transformation::get_param_index (const_tree fndecl, const_tree param) const
6538{
6539 gcc_assert (TREE_CODE (param) == PARM_DECL);
6540 if (m_uid_to_idx)
6541 {
6542 unsigned puid = DECL_UID (param);
6543 const ipa_uid_to_idx_map_elt *res
6544 = std::lower_bound (first: m_uid_to_idx->begin(), last: m_uid_to_idx->end (), val: puid,
6545 comp: [] (const ipa_uid_to_idx_map_elt &elt, unsigned uid)
6546 {
6547 return elt.uid < uid;
6548 });
6549 if (res == m_uid_to_idx->end ()
6550 || res->uid != puid)
6551 {
6552 gcc_assert (DECL_STATIC_CHAIN (fndecl));
6553 return -1;
6554 }
6555 return res->index;
6556 }
6557
6558 unsigned index = 0;
6559 for (tree p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), index++)
6560 if (p == param)
6561 return (int) index;
6562
6563 gcc_assert (DECL_STATIC_CHAIN (fndecl));
6564 return -1;
6565}
6566
6567/* Helper function to qsort a vector of ipa_uid_to_idx_map_elt elements
6568 according to the uid. */
6569
6570static int
6571compare_uids (const void *a, const void *b)
6572{
6573 const ipa_uid_to_idx_map_elt *e1 = (const ipa_uid_to_idx_map_elt *) a;
6574 const ipa_uid_to_idx_map_elt *e2 = (const ipa_uid_to_idx_map_elt *) b;
6575 if (e1->uid < e2->uid)
6576 return -1;
6577 if (e1->uid > e2->uid)
6578 return 1;
6579 gcc_unreachable ();
6580}
6581
6582/* Assuming THIS describes FNDECL and it has sufficiently many parameters to
6583 justify the overhead, create a DECL_UID-sorted vector to speed up mapping
6584 from parameters to their indices in DECL_ARGUMENTS chain. */
6585
6586void
6587ipcp_transformation::maybe_create_parm_idx_map (tree fndecl)
6588{
6589 int c = count_formal_params (fndecl);
6590 if (c < 32)
6591 return;
6592
6593 m_uid_to_idx = NULL;
6594 vec_safe_reserve (v&: m_uid_to_idx, nelems: c, exact: true);
6595 unsigned index = 0;
6596 for (tree p = DECL_ARGUMENTS (fndecl); p; p = DECL_CHAIN (p), index++)
6597 {
6598 ipa_uid_to_idx_map_elt elt;
6599 elt.uid = DECL_UID (p);
6600 elt.index = index;
6601 m_uid_to_idx->quick_push (obj: elt);
6602 }
6603 m_uid_to_idx->qsort (compare_uids);
6604}
6605

source code of gcc/ipa-cp.cc