1/* Basic IPA optimizations based on profile.
2 Copyright (C) 2003-2024 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License as published by the Free
8Software Foundation; either version 3, or (at your option) any later
9version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
19
20/* ipa-profile pass implements the following analysis propagating profille
21 inter-procedurally.
22
23 - Count histogram construction. This is a histogram analyzing how much
24 time is spent executing statements with a given execution count read
25 from profile feedback. This histogram is complete only with LTO,
26 otherwise it contains information only about the current unit.
27
28 The information is used to set hot/cold thresholds.
29 - Next speculative indirect call resolution is performed: the local
30 profile pass assigns profile-id to each function and provide us with a
31 histogram specifying the most common target. We look up the callgraph
32 node corresponding to the target and produce a speculative call.
33
34 This call may or may not survive through IPA optimization based on decision
35 of inliner.
36 - Finally we propagate the following flags: unlikely executed, executed
37 once, executed at startup and executed at exit. These flags are used to
38 control code size/performance threshold and code placement (by producing
39 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
40#include "config.h"
41#include "system.h"
42#include "coretypes.h"
43#include "backend.h"
44#include "tree.h"
45#include "gimple.h"
46#include "predict.h"
47#include "alloc-pool.h"
48#include "tree-pass.h"
49#include "cgraph.h"
50#include "data-streamer.h"
51#include "gimple-iterator.h"
52#include "ipa-utils.h"
53#include "profile.h"
54#include "value-prof.h"
55#include "tree-inline.h"
56#include "symbol-summary.h"
57#include "tree-vrp.h"
58#include "sreal.h"
59#include "ipa-cp.h"
60#include "ipa-prop.h"
61#include "ipa-fnsummary.h"
62
63/* Entry in the histogram. */
64
65struct histogram_entry
66{
67 gcov_type count;
68 int time;
69 int size;
70};
71
72/* Histogram of profile values.
73 The histogram is represented as an ordered vector of entries allocated via
74 histogram_pool. During construction a separate hashtable is kept to lookup
75 duplicate entries. */
76
77vec<histogram_entry *> histogram;
78static object_allocator<histogram_entry> histogram_pool ("IPA histogram");
79
80/* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
81
82struct histogram_hash : nofree_ptr_hash <histogram_entry>
83{
84 static inline hashval_t hash (const histogram_entry *);
85 static inline int equal (const histogram_entry *, const histogram_entry *);
86};
87
88inline hashval_t
89histogram_hash::hash (const histogram_entry *val)
90{
91 return val->count;
92}
93
94inline int
95histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2)
96{
97 return val->count == val2->count;
98}
99
100/* Account TIME and SIZE executed COUNT times into HISTOGRAM.
101 HASHTABLE is the on-side hash kept to avoid duplicates. */
102
103static void
104account_time_size (hash_table<histogram_hash> *hashtable,
105 vec<histogram_entry *> &histogram,
106 gcov_type count, int time, int size)
107{
108 histogram_entry key = {.count: count, .time: 0, .size: 0};
109 histogram_entry **val = hashtable->find_slot (value: &key, insert: INSERT);
110
111 if (!*val)
112 {
113 *val = histogram_pool.allocate ();
114 **val = key;
115 histogram.safe_push (obj: *val);
116 }
117 (*val)->time += time;
118 (*val)->size += size;
119}
120
121int
122cmp_counts (const void *v1, const void *v2)
123{
124 const histogram_entry *h1 = *(const histogram_entry * const *)v1;
125 const histogram_entry *h2 = *(const histogram_entry * const *)v2;
126 if (h1->count < h2->count)
127 return 1;
128 if (h1->count > h2->count)
129 return -1;
130 return 0;
131}
132
133/* Dump HISTOGRAM to FILE. */
134
135static void
136dump_histogram (FILE *file, vec<histogram_entry *> histogram)
137{
138 unsigned int i;
139 gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0,
140 overall_size = 0;
141
142 fprintf (stream: dump_file, format: "Histogram:\n");
143 for (i = 0; i < histogram.length (); i++)
144 {
145 overall_time += histogram[i]->count * histogram[i]->time;
146 overall_size += histogram[i]->size;
147 }
148 if (!overall_time)
149 overall_time = 1;
150 if (!overall_size)
151 overall_size = 1;
152 for (i = 0; i < histogram.length (); i++)
153 {
154 cumulated_time += histogram[i]->count * histogram[i]->time;
155 cumulated_size += histogram[i]->size;
156 fprintf (stream: file, format: " %" PRId64": time:%i (%2.2f) size:%i (%2.2f)\n",
157 (int64_t) histogram[i]->count,
158 histogram[i]->time,
159 cumulated_time * 100.0 / overall_time,
160 histogram[i]->size,
161 cumulated_size * 100.0 / overall_size);
162 }
163}
164
165/* Structure containing speculative target information from profile. */
166
167struct speculative_call_target
168{
169 speculative_call_target (unsigned int id = 0, int prob = 0)
170 : target_id (id), target_probability (prob)
171 {
172 }
173
174 /* Profile_id of target obtained from profile. */
175 unsigned int target_id;
176 /* Probability that call will land in function with target_id. */
177 unsigned int target_probability;
178};
179
180class speculative_call_summary
181{
182public:
183 speculative_call_summary () : speculative_call_targets ()
184 {}
185
186 auto_vec<speculative_call_target> speculative_call_targets;
187
188 void dump (FILE *f);
189
190};
191
192 /* Class to manage call summaries. */
193
194class ipa_profile_call_summaries
195 : public call_summary<speculative_call_summary *>
196{
197public:
198 ipa_profile_call_summaries (symbol_table *table)
199 : call_summary<speculative_call_summary *> (table)
200 {}
201
202 /* Duplicate info when an edge is cloned. */
203 void duplicate (cgraph_edge *, cgraph_edge *,
204 speculative_call_summary *old_sum,
205 speculative_call_summary *new_sum) final override;
206};
207
208static ipa_profile_call_summaries *call_sums = NULL;
209
210/* Dump all information in speculative call summary to F. */
211
212void
213speculative_call_summary::dump (FILE *f)
214{
215 cgraph_node *n2;
216
217 unsigned spec_count = speculative_call_targets.length ();
218 for (unsigned i = 0; i < spec_count; i++)
219 {
220 speculative_call_target item = speculative_call_targets[i];
221 n2 = find_func_by_profile_id (func_id: item.target_id);
222 if (n2)
223 fprintf (stream: f, format: " The %i speculative target is %s with prob %3.2f\n", i,
224 n2->dump_name (),
225 item.target_probability / (float) REG_BR_PROB_BASE);
226 else
227 fprintf (stream: f, format: " The %i speculative target is %u with prob %3.2f\n", i,
228 item.target_id,
229 item.target_probability / (float) REG_BR_PROB_BASE);
230 }
231}
232
233/* Duplicate info when an edge is cloned. */
234
235void
236ipa_profile_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
237 speculative_call_summary *old_sum,
238 speculative_call_summary *new_sum)
239{
240 if (!old_sum)
241 return;
242
243 unsigned old_count = old_sum->speculative_call_targets.length ();
244 if (!old_count)
245 return;
246
247 new_sum->speculative_call_targets.reserve_exact (nelems: old_count);
248 new_sum->speculative_call_targets.quick_grow_cleared (len: old_count);
249
250 for (unsigned i = 0; i < old_count; i++)
251 {
252 new_sum->speculative_call_targets[i]
253 = old_sum->speculative_call_targets[i];
254 }
255}
256
257/* Collect histogram and speculative target summaries from CFG profiles. */
258
259static void
260ipa_profile_generate_summary (void)
261{
262 struct cgraph_node *node;
263 gimple_stmt_iterator gsi;
264 basic_block bb;
265
266 hash_table<histogram_hash> hashtable (10);
267
268 gcc_checking_assert (!call_sums);
269 call_sums = new ipa_profile_call_summaries (symtab);
270
271 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
272 if (ENTRY_BLOCK_PTR_FOR_FN
273 (DECL_STRUCT_FUNCTION (node->decl))->count.ipa_p ())
274 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
275 {
276 int time = 0;
277 int size = 0;
278 for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi))
279 {
280 gimple *stmt = gsi_stmt (i: gsi);
281 if (gimple_code (g: stmt) == GIMPLE_CALL
282 && !gimple_call_fndecl (gs: stmt))
283 {
284 histogram_value h;
285 h = gimple_histogram_value_of_type
286 (DECL_STRUCT_FUNCTION (node->decl),
287 stmt, HIST_TYPE_INDIR_CALL);
288 /* No need to do sanity check: gimple_ic_transform already
289 takes away bad histograms. */
290 if (h)
291 {
292 gcov_type val, count, all;
293 struct cgraph_edge *e = node->get_edge (call_stmt: stmt);
294 if (e && !e->indirect_unknown_callee)
295 continue;
296
297 speculative_call_summary *csum
298 = call_sums->get_create (edge: e);
299
300 for (unsigned j = 0; j < GCOV_TOPN_MAXIMUM_TRACKED_VALUES;
301 j++)
302 {
303 if (!get_nth_most_common_value (NULL, counter_type: "indirect call",
304 hist: h, value: &val, count: &count, all: &all,
305 n: j))
306 continue;
307
308 if (val == 0 || count == 0)
309 continue;
310
311 if (count > all)
312 {
313 if (dump_file)
314 fprintf (stream: dump_file,
315 format: "Probability capped to 1\n");
316 count = all;
317 }
318 speculative_call_target item (
319 val, GCOV_COMPUTE_SCALE (count, all));
320 csum->speculative_call_targets.safe_push (obj: item);
321 }
322
323 gimple_remove_histogram_value
324 (DECL_STRUCT_FUNCTION (node->decl), stmt, h);
325 }
326 }
327 time += estimate_num_insns (stmt, &eni_time_weights);
328 size += estimate_num_insns (stmt, &eni_size_weights);
329 }
330 if (bb->count.ipa_p () && bb->count.initialized_p ())
331 account_time_size (hashtable: &hashtable, histogram,
332 count: bb->count.ipa ().to_gcov_type (),
333 time, size);
334 }
335 histogram.qsort (cmp_counts);
336}
337
338/* Serialize the speculative summary info for LTO. */
339
340static void
341ipa_profile_write_edge_summary (lto_simple_output_block *ob,
342 speculative_call_summary *csum)
343{
344 unsigned len = 0;
345
346 len = csum->speculative_call_targets.length ();
347
348 gcc_assert (len <= GCOV_TOPN_MAXIMUM_TRACKED_VALUES);
349
350 streamer_write_hwi_stream (ob->main_stream, len);
351
352 if (len)
353 {
354 unsigned spec_count = csum->speculative_call_targets.length ();
355 for (unsigned i = 0; i < spec_count; i++)
356 {
357 speculative_call_target item = csum->speculative_call_targets[i];
358 gcc_assert (item.target_id);
359 streamer_write_hwi_stream (ob->main_stream, item.target_id);
360 streamer_write_hwi_stream (ob->main_stream, item.target_probability);
361 }
362 }
363}
364
365/* Serialize the ipa info for lto. */
366
367static void
368ipa_profile_write_summary (void)
369{
370 struct lto_simple_output_block *ob
371 = lto_create_simple_output_block (LTO_section_ipa_profile);
372 unsigned int i;
373
374 streamer_write_uhwi_stream (ob->main_stream, histogram.length ());
375 for (i = 0; i < histogram.length (); i++)
376 {
377 streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count);
378 streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time);
379 streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size);
380 }
381
382 if (!call_sums)
383 return;
384
385 /* Serialize speculative targets information. */
386 unsigned int count = 0;
387 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
388 lto_symtab_encoder_iterator lsei;
389 cgraph_node *node;
390
391 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
392 lsei_next_function_in_partition (lsei: &lsei))
393 {
394 node = lsei_cgraph_node (lsei);
395 if (node->definition && node->has_gimple_body_p ()
396 && node->indirect_calls)
397 count++;
398 }
399
400 streamer_write_uhwi_stream (ob->main_stream, count);
401
402 /* Process all of the functions. */
403 for (lsei = lsei_start_function_in_partition (encoder);
404 !lsei_end_p (lsei) && count; lsei_next_function_in_partition (lsei: &lsei))
405 {
406 cgraph_node *node = lsei_cgraph_node (lsei);
407 if (node->definition && node->has_gimple_body_p ()
408 && node->indirect_calls)
409 {
410 int node_ref = lto_symtab_encoder_encode (encoder, node);
411 streamer_write_uhwi_stream (ob->main_stream, node_ref);
412
413 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
414 {
415 speculative_call_summary *csum = call_sums->get_create (edge: e);
416 ipa_profile_write_edge_summary (ob, csum);
417 }
418 }
419 }
420
421 lto_destroy_simple_output_block (ob);
422}
423
424/* Dump all profile summary data for all cgraph nodes and edges to file F. */
425
426static void
427ipa_profile_dump_all_summaries (FILE *f)
428{
429 fprintf (stream: dump_file,
430 format: "\n========== IPA-profile speculative targets: ==========\n");
431 cgraph_node *node;
432 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
433 {
434 fprintf (stream: f, format: "\nSummary for node %s:\n", node->dump_name ());
435 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
436 {
437 fprintf (stream: f, format: " Summary for %s of indirect edge %d:\n",
438 e->caller->dump_name (), e->lto_stmt_uid);
439 speculative_call_summary *csum = call_sums->get_create (edge: e);
440 csum->dump (f);
441 }
442 }
443 fprintf (stream: f, format: "\n\n");
444}
445
446/* Read speculative targets information about edge for LTO WPA. */
447
448static void
449ipa_profile_read_edge_summary (class lto_input_block *ib, cgraph_edge *edge)
450{
451 unsigned i, len;
452
453 len = streamer_read_hwi (ib);
454 gcc_assert (len <= GCOV_TOPN_MAXIMUM_TRACKED_VALUES);
455 speculative_call_summary *csum = call_sums->get_create (edge);
456
457 for (i = 0; i < len; i++)
458 {
459 unsigned int target_id = streamer_read_hwi (ib);
460 int target_probability = streamer_read_hwi (ib);
461 speculative_call_target item (target_id, target_probability);
462 csum->speculative_call_targets.safe_push (obj: item);
463 }
464}
465
466/* Read profile speculative targets section information for LTO WPA. */
467
468static void
469ipa_profile_read_summary_section (struct lto_file_decl_data *file_data,
470 class lto_input_block *ib)
471{
472 if (!ib)
473 return;
474
475 lto_symtab_encoder_t encoder = file_data->symtab_node_encoder;
476
477 unsigned int count = streamer_read_uhwi (ib);
478
479 unsigned int i;
480 unsigned int index;
481 cgraph_node * node;
482
483 for (i = 0; i < count; i++)
484 {
485 index = streamer_read_uhwi (ib);
486 node
487 = dyn_cast<cgraph_node *> (p: lto_symtab_encoder_deref (encoder, ref: index));
488
489 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
490 ipa_profile_read_edge_summary (ib, edge: e);
491 }
492}
493
494/* Deserialize the IPA histogram and speculative targets summary info for LTO.
495 */
496
497static void
498ipa_profile_read_summary (void)
499{
500 struct lto_file_decl_data ** file_data_vec
501 = lto_get_file_decl_data ();
502 struct lto_file_decl_data * file_data;
503 int j = 0;
504
505 hash_table<histogram_hash> hashtable (10);
506
507 gcc_checking_assert (!call_sums);
508 call_sums = new ipa_profile_call_summaries (symtab);
509
510 while ((file_data = file_data_vec[j++]))
511 {
512 const char *data;
513 size_t len;
514 class lto_input_block *ib
515 = lto_create_simple_input_block (file_data,
516 LTO_section_ipa_profile,
517 &data, &len);
518 if (ib)
519 {
520 unsigned int num = streamer_read_uhwi (ib);
521 unsigned int n;
522 for (n = 0; n < num; n++)
523 {
524 gcov_type count = streamer_read_gcov_count (ib);
525 int time = streamer_read_uhwi (ib);
526 int size = streamer_read_uhwi (ib);
527 account_time_size (hashtable: &hashtable, histogram,
528 count, time, size);
529 }
530
531 ipa_profile_read_summary_section (file_data, ib);
532
533 lto_destroy_simple_input_block (file_data,
534 LTO_section_ipa_profile,
535 ib, data, len);
536 }
537 }
538 histogram.qsort (cmp_counts);
539}
540
541/* Data used by ipa_propagate_frequency. */
542
543struct ipa_propagate_frequency_data
544{
545 cgraph_node *function_symbol;
546 bool maybe_unlikely_executed;
547 bool maybe_executed_once;
548 bool only_called_at_startup;
549 bool only_called_at_exit;
550};
551
552/* Worker for ipa_propagate_frequency_1. */
553
554static bool
555ipa_propagate_frequency_1 (struct cgraph_node *node, void *data)
556{
557 struct ipa_propagate_frequency_data *d;
558 struct cgraph_edge *edge;
559
560 d = (struct ipa_propagate_frequency_data *)data;
561 for (edge = node->callers;
562 edge && (d->maybe_unlikely_executed || d->maybe_executed_once
563 || d->only_called_at_startup || d->only_called_at_exit);
564 edge = edge->next_caller)
565 {
566 if (edge->caller != d->function_symbol)
567 {
568 d->only_called_at_startup &= edge->caller->only_called_at_startup;
569 /* It makes sense to put main() together with the static constructors.
570 It will be executed for sure, but rest of functions called from
571 main are definitely not at startup only. */
572 if (MAIN_NAME_P (DECL_NAME (edge->caller->decl)))
573 d->only_called_at_startup = 0;
574 d->only_called_at_exit &= edge->caller->only_called_at_exit;
575 }
576
577 /* When profile feedback is available, do not try to propagate too hard;
578 counts are already good guide on function frequencies and roundoff
579 errors can make us to push function into unlikely section even when
580 it is executed by the train run. Transfer the function only if all
581 callers are unlikely executed. */
582 if (profile_info
583 && !(edge->callee->count.ipa () == profile_count::zero ())
584 && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED
585 || (edge->caller->inlined_to
586 && edge->caller->inlined_to->frequency
587 != NODE_FREQUENCY_UNLIKELY_EXECUTED)))
588 d->maybe_unlikely_executed = false;
589 if (edge->count.ipa ().initialized_p ()
590 && !edge->count.ipa ().nonzero_p ())
591 continue;
592 switch (edge->caller->frequency)
593 {
594 case NODE_FREQUENCY_UNLIKELY_EXECUTED:
595 break;
596 case NODE_FREQUENCY_EXECUTED_ONCE:
597 {
598 if (dump_file && (dump_flags & TDF_DETAILS))
599 fprintf (stream: dump_file, format: " Called by %s that is executed once\n",
600 edge->caller->dump_name ());
601 d->maybe_unlikely_executed = false;
602 ipa_call_summary *s = ipa_call_summaries->get (edge);
603 if (s != NULL && s->loop_depth)
604 {
605 d->maybe_executed_once = false;
606 if (dump_file && (dump_flags & TDF_DETAILS))
607 fprintf (stream: dump_file, format: " Called in loop\n");
608 }
609 break;
610 }
611 case NODE_FREQUENCY_HOT:
612 case NODE_FREQUENCY_NORMAL:
613 if (dump_file && (dump_flags & TDF_DETAILS))
614 fprintf (stream: dump_file, format: " Called by %s that is normal or hot\n",
615 edge->caller->dump_name ());
616 d->maybe_unlikely_executed = false;
617 d->maybe_executed_once = false;
618 break;
619 }
620 }
621 return edge != NULL;
622}
623
624/* Return ture if NODE contains hot calls. */
625
626bool
627contains_hot_call_p (struct cgraph_node *node)
628{
629 struct cgraph_edge *e;
630 for (e = node->callees; e; e = e->next_callee)
631 if (e->maybe_hot_p ())
632 return true;
633 else if (!e->inline_failed
634 && contains_hot_call_p (node: e->callee))
635 return true;
636 for (e = node->indirect_calls; e; e = e->next_callee)
637 if (e->maybe_hot_p ())
638 return true;
639 return false;
640}
641
642/* See if the frequency of NODE can be updated based on frequencies of its
643 callers. */
644bool
645ipa_propagate_frequency (struct cgraph_node *node)
646{
647 struct ipa_propagate_frequency_data d = {.function_symbol: node, .maybe_unlikely_executed: true, .maybe_executed_once: true, .only_called_at_startup: true, .only_called_at_exit: true};
648 bool changed = false;
649
650 /* We cannot propagate anything useful about externally visible functions
651 nor about virtuals. */
652 if (!node->local
653 || node->alias
654 || (opt_for_fn (node->decl, flag_devirtualize)
655 && DECL_VIRTUAL_P (node->decl)))
656 return false;
657 gcc_assert (node->analyzed);
658 if (dump_file && (dump_flags & TDF_DETAILS))
659 fprintf (stream: dump_file, format: "Processing frequency %s\n", node->dump_name ());
660
661 node->call_for_symbol_and_aliases (callback: ipa_propagate_frequency_1, data: &d,
662 include_overwritable: true);
663
664 if ((d.only_called_at_startup && !d.only_called_at_exit)
665 && !node->only_called_at_startup)
666 {
667 node->only_called_at_startup = true;
668 if (dump_file)
669 fprintf (stream: dump_file, format: "Node %s promoted to only called at startup.\n",
670 node->dump_name ());
671 changed = true;
672 }
673 if ((d.only_called_at_exit && !d.only_called_at_startup)
674 && !node->only_called_at_exit)
675 {
676 node->only_called_at_exit = true;
677 if (dump_file)
678 fprintf (stream: dump_file, format: "Node %s promoted to only called at exit.\n",
679 node->dump_name ());
680 changed = true;
681 }
682
683 /* With profile we can decide on hot/normal based on count. */
684 if (node->count. ipa().initialized_p ())
685 {
686 bool hot = false;
687 if (!(node->count. ipa() == profile_count::zero ())
688 && node->count. ipa() >= get_hot_bb_threshold ())
689 hot = true;
690 if (!hot)
691 hot |= contains_hot_call_p (node);
692 if (hot)
693 {
694 if (node->frequency != NODE_FREQUENCY_HOT)
695 {
696 if (dump_file)
697 fprintf (stream: dump_file, format: "Node %s promoted to hot.\n",
698 node->dump_name ());
699 node->frequency = NODE_FREQUENCY_HOT;
700 return true;
701 }
702 return false;
703 }
704 else if (node->frequency == NODE_FREQUENCY_HOT)
705 {
706 if (dump_file)
707 fprintf (stream: dump_file, format: "Node %s reduced to normal.\n",
708 node->dump_name ());
709 node->frequency = NODE_FREQUENCY_NORMAL;
710 changed = true;
711 }
712 }
713 /* These come either from profile or user hints; never update them. */
714 if (node->frequency == NODE_FREQUENCY_HOT
715 || node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
716 return changed;
717 if (d.maybe_unlikely_executed)
718 {
719 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
720 if (dump_file)
721 fprintf (stream: dump_file, format: "Node %s promoted to unlikely executed.\n",
722 node->dump_name ());
723 changed = true;
724 }
725 else if (d.maybe_executed_once && node->frequency != NODE_FREQUENCY_EXECUTED_ONCE)
726 {
727 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
728 if (dump_file)
729 fprintf (stream: dump_file, format: "Node %s promoted to executed once.\n",
730 node->dump_name ());
731 changed = true;
732 }
733 return changed;
734}
735
736/* Check that number of arguments of N agrees with E.
737 Be conservative when summaries are not present. */
738
739static bool
740check_argument_count (struct cgraph_node *n, struct cgraph_edge *e)
741{
742 if (!ipa_node_params_sum || !ipa_edge_args_sum)
743 return true;
744 ipa_node_params *info = ipa_node_params_sum->get (node: n->function_symbol ());
745 if (!info)
746 return true;
747 ipa_edge_args *e_info = ipa_edge_args_sum->get (edge: e);
748 if (!e_info)
749 return true;
750 if (ipa_get_param_count (info) != ipa_get_cs_argument_count (args: e_info)
751 && (ipa_get_param_count (info) >= ipa_get_cs_argument_count (args: e_info)
752 || !stdarg_p (TREE_TYPE (n->decl))))
753 return false;
754 return true;
755}
756
757/* Simple ipa profile pass propagating frequencies across the callgraph. */
758
759static unsigned int
760ipa_profile (void)
761{
762 struct cgraph_node **order;
763 struct cgraph_edge *e;
764 int order_pos;
765 bool something_changed = false;
766 int i;
767 gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0;
768 struct cgraph_node *n,*n2;
769 int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = 0;
770 int nmismatch = 0, nimpossible = 0;
771 bool node_map_initialized = false;
772 gcov_type threshold;
773
774 if (dump_file)
775 dump_histogram (file: dump_file, histogram);
776 for (i = 0; i < (int)histogram.length (); i++)
777 {
778 overall_time += histogram[i]->count * histogram[i]->time;
779 overall_size += histogram[i]->size;
780 }
781 threshold = 0;
782 if (overall_time)
783 {
784 gcc_assert (overall_size);
785
786 cutoff = (overall_time * param_hot_bb_count_ws_permille + 500) / 1000;
787 for (i = 0; cumulated < cutoff; i++)
788 {
789 cumulated += histogram[i]->count * histogram[i]->time;
790 threshold = histogram[i]->count;
791 }
792 if (!threshold)
793 threshold = 1;
794 if (dump_file)
795 {
796 gcov_type cumulated_time = 0, cumulated_size = 0;
797
798 for (i = 0;
799 i < (int)histogram.length () && histogram[i]->count >= threshold;
800 i++)
801 {
802 cumulated_time += histogram[i]->count * histogram[i]->time;
803 cumulated_size += histogram[i]->size;
804 }
805 fprintf (stream: dump_file, format: "Determined min count: %" PRId64
806 " Time:%3.2f%% Size:%3.2f%%\n",
807 (int64_t)threshold,
808 cumulated_time * 100.0 / overall_time,
809 cumulated_size * 100.0 / overall_size);
810 }
811
812 if (in_lto_p)
813 {
814 if (dump_file)
815 fprintf (stream: dump_file, format: "Setting hotness threshold in LTO mode.\n");
816 set_hot_bb_threshold (threshold);
817 }
818 }
819 histogram.release ();
820 histogram_pool.release ();
821
822 /* Produce speculative calls: we saved common target from profiling into
823 e->target_id. Now, at link time, we can look up corresponding
824 function node and produce speculative call. */
825
826 gcc_checking_assert (call_sums);
827
828 if (dump_file)
829 {
830 if (!node_map_initialized)
831 init_node_map (false);
832 node_map_initialized = true;
833
834 ipa_profile_dump_all_summaries (f: dump_file);
835 }
836
837 FOR_EACH_DEFINED_FUNCTION (n)
838 {
839 bool update = false;
840
841 if (!opt_for_fn (n->decl, flag_ipa_profile))
842 continue;
843
844 for (e = n->indirect_calls; e; e = e->next_callee)
845 {
846 if (n->count.initialized_p ())
847 nindirect++;
848
849 speculative_call_summary *csum = call_sums->get_create (edge: e);
850 unsigned spec_count = csum->speculative_call_targets.length ();
851 if (spec_count)
852 {
853 if (!node_map_initialized)
854 init_node_map (false);
855 node_map_initialized = true;
856 ncommon++;
857
858 unsigned speculative_id = 0;
859 profile_count orig = e->count;
860 for (unsigned i = 0; i < spec_count; i++)
861 {
862 speculative_call_target item
863 = csum->speculative_call_targets[i];
864 n2 = find_func_by_profile_id (func_id: item.target_id);
865 if (n2)
866 {
867 if (dump_file)
868 {
869 fprintf (stream: dump_file,
870 format: "Indirect call -> direct call from"
871 " other module %s => %s, prob %3.2f\n",
872 n->dump_name (),
873 n2->dump_name (),
874 item.target_probability
875 / (float) REG_BR_PROB_BASE);
876 }
877 if (item.target_probability < REG_BR_PROB_BASE / 2)
878 {
879 nuseless++;
880 if (dump_file)
881 fprintf (stream: dump_file,
882 format: "Not speculating: "
883 "probability is too low.\n");
884 }
885 else if (!e->maybe_hot_p ())
886 {
887 nuseless++;
888 if (dump_file)
889 fprintf (stream: dump_file,
890 format: "Not speculating: call is cold.\n");
891 }
892 else if (n2->get_availability () <= AVAIL_INTERPOSABLE
893 && n2->can_be_discarded_p ())
894 {
895 nuseless++;
896 if (dump_file)
897 fprintf (stream: dump_file,
898 format: "Not speculating: target is overwritable "
899 "and can be discarded.\n");
900 }
901 else if (!check_argument_count (n: n2, e))
902 {
903 nmismatch++;
904 if (dump_file)
905 fprintf (stream: dump_file,
906 format: "Not speculating: "
907 "parameter count mismatch\n");
908 }
909 else if (e->indirect_info->polymorphic
910 && !opt_for_fn (n->decl, flag_devirtualize)
911 && !possible_polymorphic_call_target_p (e, n: n2))
912 {
913 nimpossible++;
914 if (dump_file)
915 fprintf (stream: dump_file,
916 format: "Not speculating: "
917 "function is not in the polymorphic "
918 "call target list\n");
919 }
920 else
921 {
922 /* Target may be overwritable, but profile says that
923 control flow goes to this particular implementation
924 of N2. Speculate on the local alias to allow
925 inlining. */
926 if (!n2->can_be_discarded_p ())
927 {
928 cgraph_node *alias;
929 alias = dyn_cast<cgraph_node *>
930 (p: n2->noninterposable_alias ());
931 if (alias)
932 n2 = alias;
933 }
934 nconverted++;
935 profile_probability prob
936 = profile_probability::from_reg_br_prob_base
937 (v: item.target_probability).adjusted ();
938 e->make_speculative (n2,
939 direct_count: orig.apply_probability (prob),
940 speculative_id);
941 update = true;
942 speculative_id++;
943 }
944 }
945 else
946 {
947 if (dump_file)
948 fprintf (stream: dump_file,
949 format: "Function with profile-id %i not found.\n",
950 item.target_id);
951 nunknown++;
952 }
953 }
954 }
955 }
956 if (update)
957 ipa_update_overall_fn_summary (node: n);
958 }
959 if (node_map_initialized)
960 del_node_map ();
961 if (dump_file && nindirect)
962 fprintf (stream: dump_file,
963 format: "%i indirect calls trained.\n"
964 "%i (%3.2f%%) have common target.\n"
965 "%i (%3.2f%%) targets was not found.\n"
966 "%i (%3.2f%%) targets had parameter count mismatch.\n"
967 "%i (%3.2f%%) targets was not in polymorphic call target list.\n"
968 "%i (%3.2f%%) speculations seems useless.\n"
969 "%i (%3.2f%%) speculations produced.\n",
970 nindirect,
971 ncommon, ncommon * 100.0 / nindirect,
972 nunknown, nunknown * 100.0 / nindirect,
973 nmismatch, nmismatch * 100.0 / nindirect,
974 nimpossible, nimpossible * 100.0 / nindirect,
975 nuseless, nuseless * 100.0 / nindirect,
976 nconverted, nconverted * 100.0 / nindirect);
977
978 order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
979 order_pos = ipa_reverse_postorder (order);
980 for (i = order_pos - 1; i >= 0; i--)
981 {
982 if (order[i]->local
983 && opt_for_fn (order[i]->decl, flag_ipa_profile)
984 && ipa_propagate_frequency (node: order[i]))
985 {
986 for (e = order[i]->callees; e; e = e->next_callee)
987 if (e->callee->local && !e->callee->aux)
988 {
989 something_changed = true;
990 e->callee->aux = (void *)1;
991 }
992 }
993 order[i]->aux = NULL;
994 }
995
996 while (something_changed)
997 {
998 something_changed = false;
999 for (i = order_pos - 1; i >= 0; i--)
1000 {
1001 if (order[i]->aux
1002 && opt_for_fn (order[i]->decl, flag_ipa_profile)
1003 && ipa_propagate_frequency (node: order[i]))
1004 {
1005 for (e = order[i]->callees; e; e = e->next_callee)
1006 if (e->callee->local && !e->callee->aux)
1007 {
1008 something_changed = true;
1009 e->callee->aux = (void *)1;
1010 }
1011 }
1012 order[i]->aux = NULL;
1013 }
1014 }
1015 free (ptr: order);
1016
1017 if (dump_file && (dump_flags & TDF_DETAILS))
1018 symtab->dump (f: dump_file);
1019
1020 delete call_sums;
1021 call_sums = NULL;
1022
1023 return 0;
1024}
1025
1026namespace {
1027
1028const pass_data pass_data_ipa_profile =
1029{
1030 .type: IPA_PASS, /* type */
1031 .name: "profile_estimate", /* name */
1032 .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */
1033 .tv_id: TV_IPA_PROFILE, /* tv_id */
1034 .properties_required: 0, /* properties_required */
1035 .properties_provided: 0, /* properties_provided */
1036 .properties_destroyed: 0, /* properties_destroyed */
1037 .todo_flags_start: 0, /* todo_flags_start */
1038 .todo_flags_finish: 0, /* todo_flags_finish */
1039};
1040
1041class pass_ipa_profile : public ipa_opt_pass_d
1042{
1043public:
1044 pass_ipa_profile (gcc::context *ctxt)
1045 : ipa_opt_pass_d (pass_data_ipa_profile, ctxt,
1046 ipa_profile_generate_summary, /* generate_summary */
1047 ipa_profile_write_summary, /* write_summary */
1048 ipa_profile_read_summary, /* read_summary */
1049 NULL, /* write_optimization_summary */
1050 NULL, /* read_optimization_summary */
1051 NULL, /* stmt_fixup */
1052 0, /* function_transform_todo_flags_start */
1053 NULL, /* function_transform */
1054 NULL) /* variable_transform */
1055 {}
1056
1057 /* opt_pass methods: */
1058 bool gate (function *) final override { return flag_ipa_profile || in_lto_p; }
1059 unsigned int execute (function *) final override { return ipa_profile (); }
1060
1061}; // class pass_ipa_profile
1062
1063} // anon namespace
1064
1065ipa_opt_pass_d *
1066make_pass_ipa_profile (gcc::context *ctxt)
1067{
1068 return new pass_ipa_profile (ctxt);
1069}
1070
1071/* Reset all state within ipa-profile.cc so that we can rerun the compiler
1072 within the same process. For use by toplev::finalize. */
1073
1074void
1075ipa_profile_cc_finalize (void)
1076{
1077 delete call_sums;
1078 call_sums = NULL;
1079}
1080

source code of gcc/ipa-profile.cc