1 | /* Basic IPA optimizations based on profile. |
2 | Copyright (C) 2003-2024 Free Software Foundation, Inc. |
3 | |
4 | This file is part of GCC. |
5 | |
6 | GCC is free software; you can redistribute it and/or modify it under |
7 | the terms of the GNU General Public License as published by the Free |
8 | Software Foundation; either version 3, or (at your option) any later |
9 | version. |
10 | |
11 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
12 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
13 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
14 | for more details. |
15 | |
16 | You should have received a copy of the GNU General Public License |
17 | along with GCC; see the file COPYING3. If not see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | /* ipa-profile pass implements the following analysis propagating profille |
21 | inter-procedurally. |
22 | |
23 | - Count histogram construction. This is a histogram analyzing how much |
24 | time is spent executing statements with a given execution count read |
25 | from profile feedback. This histogram is complete only with LTO, |
26 | otherwise it contains information only about the current unit. |
27 | |
28 | The information is used to set hot/cold thresholds. |
29 | - Next speculative indirect call resolution is performed: the local |
30 | profile pass assigns profile-id to each function and provide us with a |
31 | histogram specifying the most common target. We look up the callgraph |
32 | node corresponding to the target and produce a speculative call. |
33 | |
34 | This call may or may not survive through IPA optimization based on decision |
35 | of inliner. |
36 | - Finally we propagate the following flags: unlikely executed, executed |
37 | once, executed at startup and executed at exit. These flags are used to |
38 | control code size/performance threshold and code placement (by producing |
39 | .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */ |
40 | #include "config.h" |
41 | #include "system.h" |
42 | #include "coretypes.h" |
43 | #include "backend.h" |
44 | #include "tree.h" |
45 | #include "gimple.h" |
46 | #include "predict.h" |
47 | #include "alloc-pool.h" |
48 | #include "tree-pass.h" |
49 | #include "cgraph.h" |
50 | #include "data-streamer.h" |
51 | #include "gimple-iterator.h" |
52 | #include "ipa-utils.h" |
53 | #include "profile.h" |
54 | #include "value-prof.h" |
55 | #include "tree-inline.h" |
56 | #include "symbol-summary.h" |
57 | #include "tree-vrp.h" |
58 | #include "sreal.h" |
59 | #include "ipa-cp.h" |
60 | #include "ipa-prop.h" |
61 | #include "ipa-fnsummary.h" |
62 | |
63 | /* Entry in the histogram. */ |
64 | |
65 | struct histogram_entry |
66 | { |
67 | gcov_type count; |
68 | int time; |
69 | int size; |
70 | }; |
71 | |
72 | /* Histogram of profile values. |
73 | The histogram is represented as an ordered vector of entries allocated via |
74 | histogram_pool. During construction a separate hashtable is kept to lookup |
75 | duplicate entries. */ |
76 | |
77 | vec<histogram_entry *> histogram; |
78 | static object_allocator<histogram_entry> histogram_pool ("IPA histogram" ); |
79 | |
80 | /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */ |
81 | |
82 | struct histogram_hash : nofree_ptr_hash <histogram_entry> |
83 | { |
84 | static inline hashval_t hash (const histogram_entry *); |
85 | static inline int equal (const histogram_entry *, const histogram_entry *); |
86 | }; |
87 | |
88 | inline hashval_t |
89 | histogram_hash::hash (const histogram_entry *val) |
90 | { |
91 | return val->count; |
92 | } |
93 | |
94 | inline int |
95 | histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2) |
96 | { |
97 | return val->count == val2->count; |
98 | } |
99 | |
100 | /* Account TIME and SIZE executed COUNT times into HISTOGRAM. |
101 | HASHTABLE is the on-side hash kept to avoid duplicates. */ |
102 | |
103 | static void |
104 | account_time_size (hash_table<histogram_hash> *hashtable, |
105 | vec<histogram_entry *> &histogram, |
106 | gcov_type count, int time, int size) |
107 | { |
108 | histogram_entry key = {.count: count, .time: 0, .size: 0}; |
109 | histogram_entry **val = hashtable->find_slot (value: &key, insert: INSERT); |
110 | |
111 | if (!*val) |
112 | { |
113 | *val = histogram_pool.allocate (); |
114 | **val = key; |
115 | histogram.safe_push (obj: *val); |
116 | } |
117 | (*val)->time += time; |
118 | (*val)->size += size; |
119 | } |
120 | |
121 | int |
122 | cmp_counts (const void *v1, const void *v2) |
123 | { |
124 | const histogram_entry *h1 = *(const histogram_entry * const *)v1; |
125 | const histogram_entry *h2 = *(const histogram_entry * const *)v2; |
126 | if (h1->count < h2->count) |
127 | return 1; |
128 | if (h1->count > h2->count) |
129 | return -1; |
130 | return 0; |
131 | } |
132 | |
133 | /* Dump HISTOGRAM to FILE. */ |
134 | |
135 | static void |
136 | dump_histogram (FILE *file, vec<histogram_entry *> histogram) |
137 | { |
138 | unsigned int i; |
139 | gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0, |
140 | overall_size = 0; |
141 | |
142 | fprintf (stream: dump_file, format: "Histogram:\n" ); |
143 | for (i = 0; i < histogram.length (); i++) |
144 | { |
145 | overall_time += histogram[i]->count * histogram[i]->time; |
146 | overall_size += histogram[i]->size; |
147 | } |
148 | if (!overall_time) |
149 | overall_time = 1; |
150 | if (!overall_size) |
151 | overall_size = 1; |
152 | for (i = 0; i < histogram.length (); i++) |
153 | { |
154 | cumulated_time += histogram[i]->count * histogram[i]->time; |
155 | cumulated_size += histogram[i]->size; |
156 | fprintf (stream: file, format: " %" PRId64": time:%i (%2.2f) size:%i (%2.2f)\n" , |
157 | (int64_t) histogram[i]->count, |
158 | histogram[i]->time, |
159 | cumulated_time * 100.0 / overall_time, |
160 | histogram[i]->size, |
161 | cumulated_size * 100.0 / overall_size); |
162 | } |
163 | } |
164 | |
165 | /* Structure containing speculative target information from profile. */ |
166 | |
167 | struct speculative_call_target |
168 | { |
169 | speculative_call_target (unsigned int id = 0, int prob = 0) |
170 | : target_id (id), target_probability (prob) |
171 | { |
172 | } |
173 | |
174 | /* Profile_id of target obtained from profile. */ |
175 | unsigned int target_id; |
176 | /* Probability that call will land in function with target_id. */ |
177 | unsigned int target_probability; |
178 | }; |
179 | |
180 | class speculative_call_summary |
181 | { |
182 | public: |
183 | speculative_call_summary () : speculative_call_targets () |
184 | {} |
185 | |
186 | auto_vec<speculative_call_target> speculative_call_targets; |
187 | |
188 | void dump (FILE *f); |
189 | |
190 | }; |
191 | |
192 | /* Class to manage call summaries. */ |
193 | |
194 | class ipa_profile_call_summaries |
195 | : public call_summary<speculative_call_summary *> |
196 | { |
197 | public: |
198 | ipa_profile_call_summaries (symbol_table *table) |
199 | : call_summary<speculative_call_summary *> (table) |
200 | {} |
201 | |
202 | /* Duplicate info when an edge is cloned. */ |
203 | void duplicate (cgraph_edge *, cgraph_edge *, |
204 | speculative_call_summary *old_sum, |
205 | speculative_call_summary *new_sum) final override; |
206 | }; |
207 | |
208 | static ipa_profile_call_summaries *call_sums = NULL; |
209 | |
210 | /* Dump all information in speculative call summary to F. */ |
211 | |
212 | void |
213 | speculative_call_summary::dump (FILE *f) |
214 | { |
215 | cgraph_node *n2; |
216 | |
217 | unsigned spec_count = speculative_call_targets.length (); |
218 | for (unsigned i = 0; i < spec_count; i++) |
219 | { |
220 | speculative_call_target item = speculative_call_targets[i]; |
221 | n2 = find_func_by_profile_id (func_id: item.target_id); |
222 | if (n2) |
223 | fprintf (stream: f, format: " The %i speculative target is %s with prob %3.2f\n" , i, |
224 | n2->dump_name (), |
225 | item.target_probability / (float) REG_BR_PROB_BASE); |
226 | else |
227 | fprintf (stream: f, format: " The %i speculative target is %u with prob %3.2f\n" , i, |
228 | item.target_id, |
229 | item.target_probability / (float) REG_BR_PROB_BASE); |
230 | } |
231 | } |
232 | |
233 | /* Duplicate info when an edge is cloned. */ |
234 | |
235 | void |
236 | ipa_profile_call_summaries::duplicate (cgraph_edge *, cgraph_edge *, |
237 | speculative_call_summary *old_sum, |
238 | speculative_call_summary *new_sum) |
239 | { |
240 | if (!old_sum) |
241 | return; |
242 | |
243 | unsigned old_count = old_sum->speculative_call_targets.length (); |
244 | if (!old_count) |
245 | return; |
246 | |
247 | new_sum->speculative_call_targets.reserve_exact (nelems: old_count); |
248 | new_sum->speculative_call_targets.quick_grow_cleared (len: old_count); |
249 | |
250 | for (unsigned i = 0; i < old_count; i++) |
251 | { |
252 | new_sum->speculative_call_targets[i] |
253 | = old_sum->speculative_call_targets[i]; |
254 | } |
255 | } |
256 | |
257 | /* Collect histogram and speculative target summaries from CFG profiles. */ |
258 | |
259 | static void |
260 | ipa_profile_generate_summary (void) |
261 | { |
262 | struct cgraph_node *node; |
263 | gimple_stmt_iterator gsi; |
264 | basic_block bb; |
265 | |
266 | hash_table<histogram_hash> hashtable (10); |
267 | |
268 | gcc_checking_assert (!call_sums); |
269 | call_sums = new ipa_profile_call_summaries (symtab); |
270 | |
271 | FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) |
272 | if (ENTRY_BLOCK_PTR_FOR_FN |
273 | (DECL_STRUCT_FUNCTION (node->decl))->count.ipa_p ()) |
274 | FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl)) |
275 | { |
276 | int time = 0; |
277 | int size = 0; |
278 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
279 | { |
280 | gimple *stmt = gsi_stmt (i: gsi); |
281 | if (gimple_code (g: stmt) == GIMPLE_CALL |
282 | && !gimple_call_fndecl (gs: stmt)) |
283 | { |
284 | histogram_value h; |
285 | h = gimple_histogram_value_of_type |
286 | (DECL_STRUCT_FUNCTION (node->decl), |
287 | stmt, HIST_TYPE_INDIR_CALL); |
288 | /* No need to do sanity check: gimple_ic_transform already |
289 | takes away bad histograms. */ |
290 | if (h) |
291 | { |
292 | gcov_type val, count, all; |
293 | struct cgraph_edge *e = node->get_edge (call_stmt: stmt); |
294 | if (e && !e->indirect_unknown_callee) |
295 | continue; |
296 | |
297 | speculative_call_summary *csum |
298 | = call_sums->get_create (edge: e); |
299 | |
300 | for (unsigned j = 0; j < GCOV_TOPN_MAXIMUM_TRACKED_VALUES; |
301 | j++) |
302 | { |
303 | if (!get_nth_most_common_value (NULL, counter_type: "indirect call" , |
304 | hist: h, value: &val, count: &count, all: &all, |
305 | n: j)) |
306 | continue; |
307 | |
308 | if (val == 0 || count == 0) |
309 | continue; |
310 | |
311 | if (count > all) |
312 | { |
313 | if (dump_file) |
314 | fprintf (stream: dump_file, |
315 | format: "Probability capped to 1\n" ); |
316 | count = all; |
317 | } |
318 | speculative_call_target item ( |
319 | val, GCOV_COMPUTE_SCALE (count, all)); |
320 | csum->speculative_call_targets.safe_push (obj: item); |
321 | } |
322 | |
323 | gimple_remove_histogram_value |
324 | (DECL_STRUCT_FUNCTION (node->decl), stmt, h); |
325 | } |
326 | } |
327 | time += estimate_num_insns (stmt, &eni_time_weights); |
328 | size += estimate_num_insns (stmt, &eni_size_weights); |
329 | } |
330 | if (bb->count.ipa_p () && bb->count.initialized_p ()) |
331 | account_time_size (hashtable: &hashtable, histogram, |
332 | count: bb->count.ipa ().to_gcov_type (), |
333 | time, size); |
334 | } |
335 | histogram.qsort (cmp_counts); |
336 | } |
337 | |
338 | /* Serialize the speculative summary info for LTO. */ |
339 | |
340 | static void |
341 | ipa_profile_write_edge_summary (lto_simple_output_block *ob, |
342 | speculative_call_summary *csum) |
343 | { |
344 | unsigned len = 0; |
345 | |
346 | len = csum->speculative_call_targets.length (); |
347 | |
348 | gcc_assert (len <= GCOV_TOPN_MAXIMUM_TRACKED_VALUES); |
349 | |
350 | streamer_write_hwi_stream (ob->main_stream, len); |
351 | |
352 | if (len) |
353 | { |
354 | unsigned spec_count = csum->speculative_call_targets.length (); |
355 | for (unsigned i = 0; i < spec_count; i++) |
356 | { |
357 | speculative_call_target item = csum->speculative_call_targets[i]; |
358 | gcc_assert (item.target_id); |
359 | streamer_write_hwi_stream (ob->main_stream, item.target_id); |
360 | streamer_write_hwi_stream (ob->main_stream, item.target_probability); |
361 | } |
362 | } |
363 | } |
364 | |
365 | /* Serialize the ipa info for lto. */ |
366 | |
367 | static void |
368 | ipa_profile_write_summary (void) |
369 | { |
370 | struct lto_simple_output_block *ob |
371 | = lto_create_simple_output_block (LTO_section_ipa_profile); |
372 | unsigned int i; |
373 | |
374 | streamer_write_uhwi_stream (ob->main_stream, histogram.length ()); |
375 | for (i = 0; i < histogram.length (); i++) |
376 | { |
377 | streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count); |
378 | streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time); |
379 | streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size); |
380 | } |
381 | |
382 | if (!call_sums) |
383 | return; |
384 | |
385 | /* Serialize speculative targets information. */ |
386 | unsigned int count = 0; |
387 | lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder; |
388 | lto_symtab_encoder_iterator lsei; |
389 | cgraph_node *node; |
390 | |
391 | for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei); |
392 | lsei_next_function_in_partition (lsei: &lsei)) |
393 | { |
394 | node = lsei_cgraph_node (lsei); |
395 | if (node->definition && node->has_gimple_body_p () |
396 | && node->indirect_calls) |
397 | count++; |
398 | } |
399 | |
400 | streamer_write_uhwi_stream (ob->main_stream, count); |
401 | |
402 | /* Process all of the functions. */ |
403 | for (lsei = lsei_start_function_in_partition (encoder); |
404 | !lsei_end_p (lsei) && count; lsei_next_function_in_partition (lsei: &lsei)) |
405 | { |
406 | cgraph_node *node = lsei_cgraph_node (lsei); |
407 | if (node->definition && node->has_gimple_body_p () |
408 | && node->indirect_calls) |
409 | { |
410 | int node_ref = lto_symtab_encoder_encode (encoder, node); |
411 | streamer_write_uhwi_stream (ob->main_stream, node_ref); |
412 | |
413 | for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee) |
414 | { |
415 | speculative_call_summary *csum = call_sums->get_create (edge: e); |
416 | ipa_profile_write_edge_summary (ob, csum); |
417 | } |
418 | } |
419 | } |
420 | |
421 | lto_destroy_simple_output_block (ob); |
422 | } |
423 | |
424 | /* Dump all profile summary data for all cgraph nodes and edges to file F. */ |
425 | |
426 | static void |
427 | ipa_profile_dump_all_summaries (FILE *f) |
428 | { |
429 | fprintf (stream: dump_file, |
430 | format: "\n========== IPA-profile speculative targets: ==========\n" ); |
431 | cgraph_node *node; |
432 | FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) |
433 | { |
434 | fprintf (stream: f, format: "\nSummary for node %s:\n" , node->dump_name ()); |
435 | for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee) |
436 | { |
437 | fprintf (stream: f, format: " Summary for %s of indirect edge %d:\n" , |
438 | e->caller->dump_name (), e->lto_stmt_uid); |
439 | speculative_call_summary *csum = call_sums->get_create (edge: e); |
440 | csum->dump (f); |
441 | } |
442 | } |
443 | fprintf (stream: f, format: "\n\n" ); |
444 | } |
445 | |
446 | /* Read speculative targets information about edge for LTO WPA. */ |
447 | |
448 | static void |
449 | ipa_profile_read_edge_summary (class lto_input_block *ib, cgraph_edge *edge) |
450 | { |
451 | unsigned i, len; |
452 | |
453 | len = streamer_read_hwi (ib); |
454 | gcc_assert (len <= GCOV_TOPN_MAXIMUM_TRACKED_VALUES); |
455 | speculative_call_summary *csum = call_sums->get_create (edge); |
456 | |
457 | for (i = 0; i < len; i++) |
458 | { |
459 | unsigned int target_id = streamer_read_hwi (ib); |
460 | int target_probability = streamer_read_hwi (ib); |
461 | speculative_call_target item (target_id, target_probability); |
462 | csum->speculative_call_targets.safe_push (obj: item); |
463 | } |
464 | } |
465 | |
466 | /* Read profile speculative targets section information for LTO WPA. */ |
467 | |
468 | static void |
469 | ipa_profile_read_summary_section (struct lto_file_decl_data *file_data, |
470 | class lto_input_block *ib) |
471 | { |
472 | if (!ib) |
473 | return; |
474 | |
475 | lto_symtab_encoder_t encoder = file_data->symtab_node_encoder; |
476 | |
477 | unsigned int count = streamer_read_uhwi (ib); |
478 | |
479 | unsigned int i; |
480 | unsigned int index; |
481 | cgraph_node * node; |
482 | |
483 | for (i = 0; i < count; i++) |
484 | { |
485 | index = streamer_read_uhwi (ib); |
486 | node |
487 | = dyn_cast<cgraph_node *> (p: lto_symtab_encoder_deref (encoder, ref: index)); |
488 | |
489 | for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee) |
490 | ipa_profile_read_edge_summary (ib, edge: e); |
491 | } |
492 | } |
493 | |
494 | /* Deserialize the IPA histogram and speculative targets summary info for LTO. |
495 | */ |
496 | |
497 | static void |
498 | ipa_profile_read_summary (void) |
499 | { |
500 | struct lto_file_decl_data ** file_data_vec |
501 | = lto_get_file_decl_data (); |
502 | struct lto_file_decl_data * file_data; |
503 | int j = 0; |
504 | |
505 | hash_table<histogram_hash> hashtable (10); |
506 | |
507 | gcc_checking_assert (!call_sums); |
508 | call_sums = new ipa_profile_call_summaries (symtab); |
509 | |
510 | while ((file_data = file_data_vec[j++])) |
511 | { |
512 | const char *data; |
513 | size_t len; |
514 | class lto_input_block *ib |
515 | = lto_create_simple_input_block (file_data, |
516 | LTO_section_ipa_profile, |
517 | &data, &len); |
518 | if (ib) |
519 | { |
520 | unsigned int num = streamer_read_uhwi (ib); |
521 | unsigned int n; |
522 | for (n = 0; n < num; n++) |
523 | { |
524 | gcov_type count = streamer_read_gcov_count (ib); |
525 | int time = streamer_read_uhwi (ib); |
526 | int size = streamer_read_uhwi (ib); |
527 | account_time_size (hashtable: &hashtable, histogram, |
528 | count, time, size); |
529 | } |
530 | |
531 | ipa_profile_read_summary_section (file_data, ib); |
532 | |
533 | lto_destroy_simple_input_block (file_data, |
534 | LTO_section_ipa_profile, |
535 | ib, data, len); |
536 | } |
537 | } |
538 | histogram.qsort (cmp_counts); |
539 | } |
540 | |
541 | /* Data used by ipa_propagate_frequency. */ |
542 | |
543 | struct ipa_propagate_frequency_data |
544 | { |
545 | cgraph_node *function_symbol; |
546 | bool maybe_unlikely_executed; |
547 | bool maybe_executed_once; |
548 | bool only_called_at_startup; |
549 | bool only_called_at_exit; |
550 | }; |
551 | |
552 | /* Worker for ipa_propagate_frequency_1. */ |
553 | |
554 | static bool |
555 | ipa_propagate_frequency_1 (struct cgraph_node *node, void *data) |
556 | { |
557 | struct ipa_propagate_frequency_data *d; |
558 | struct cgraph_edge *edge; |
559 | |
560 | d = (struct ipa_propagate_frequency_data *)data; |
561 | for (edge = node->callers; |
562 | edge && (d->maybe_unlikely_executed || d->maybe_executed_once |
563 | || d->only_called_at_startup || d->only_called_at_exit); |
564 | edge = edge->next_caller) |
565 | { |
566 | if (edge->caller != d->function_symbol) |
567 | { |
568 | d->only_called_at_startup &= edge->caller->only_called_at_startup; |
569 | /* It makes sense to put main() together with the static constructors. |
570 | It will be executed for sure, but rest of functions called from |
571 | main are definitely not at startup only. */ |
572 | if (MAIN_NAME_P (DECL_NAME (edge->caller->decl))) |
573 | d->only_called_at_startup = 0; |
574 | d->only_called_at_exit &= edge->caller->only_called_at_exit; |
575 | } |
576 | |
577 | /* When profile feedback is available, do not try to propagate too hard; |
578 | counts are already good guide on function frequencies and roundoff |
579 | errors can make us to push function into unlikely section even when |
580 | it is executed by the train run. Transfer the function only if all |
581 | callers are unlikely executed. */ |
582 | if (profile_info |
583 | && !(edge->callee->count.ipa () == profile_count::zero ()) |
584 | && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED |
585 | || (edge->caller->inlined_to |
586 | && edge->caller->inlined_to->frequency |
587 | != NODE_FREQUENCY_UNLIKELY_EXECUTED))) |
588 | d->maybe_unlikely_executed = false; |
589 | if (edge->count.ipa ().initialized_p () |
590 | && !edge->count.ipa ().nonzero_p ()) |
591 | continue; |
592 | switch (edge->caller->frequency) |
593 | { |
594 | case NODE_FREQUENCY_UNLIKELY_EXECUTED: |
595 | break; |
596 | case NODE_FREQUENCY_EXECUTED_ONCE: |
597 | { |
598 | if (dump_file && (dump_flags & TDF_DETAILS)) |
599 | fprintf (stream: dump_file, format: " Called by %s that is executed once\n" , |
600 | edge->caller->dump_name ()); |
601 | d->maybe_unlikely_executed = false; |
602 | ipa_call_summary *s = ipa_call_summaries->get (edge); |
603 | if (s != NULL && s->loop_depth) |
604 | { |
605 | d->maybe_executed_once = false; |
606 | if (dump_file && (dump_flags & TDF_DETAILS)) |
607 | fprintf (stream: dump_file, format: " Called in loop\n" ); |
608 | } |
609 | break; |
610 | } |
611 | case NODE_FREQUENCY_HOT: |
612 | case NODE_FREQUENCY_NORMAL: |
613 | if (dump_file && (dump_flags & TDF_DETAILS)) |
614 | fprintf (stream: dump_file, format: " Called by %s that is normal or hot\n" , |
615 | edge->caller->dump_name ()); |
616 | d->maybe_unlikely_executed = false; |
617 | d->maybe_executed_once = false; |
618 | break; |
619 | } |
620 | } |
621 | return edge != NULL; |
622 | } |
623 | |
624 | /* Return ture if NODE contains hot calls. */ |
625 | |
626 | bool |
627 | contains_hot_call_p (struct cgraph_node *node) |
628 | { |
629 | struct cgraph_edge *e; |
630 | for (e = node->callees; e; e = e->next_callee) |
631 | if (e->maybe_hot_p ()) |
632 | return true; |
633 | else if (!e->inline_failed |
634 | && contains_hot_call_p (node: e->callee)) |
635 | return true; |
636 | for (e = node->indirect_calls; e; e = e->next_callee) |
637 | if (e->maybe_hot_p ()) |
638 | return true; |
639 | return false; |
640 | } |
641 | |
642 | /* See if the frequency of NODE can be updated based on frequencies of its |
643 | callers. */ |
644 | bool |
645 | ipa_propagate_frequency (struct cgraph_node *node) |
646 | { |
647 | struct ipa_propagate_frequency_data d = {.function_symbol: node, .maybe_unlikely_executed: true, .maybe_executed_once: true, .only_called_at_startup: true, .only_called_at_exit: true}; |
648 | bool changed = false; |
649 | |
650 | /* We cannot propagate anything useful about externally visible functions |
651 | nor about virtuals. */ |
652 | if (!node->local |
653 | || node->alias |
654 | || (opt_for_fn (node->decl, flag_devirtualize) |
655 | && DECL_VIRTUAL_P (node->decl))) |
656 | return false; |
657 | gcc_assert (node->analyzed); |
658 | if (dump_file && (dump_flags & TDF_DETAILS)) |
659 | fprintf (stream: dump_file, format: "Processing frequency %s\n" , node->dump_name ()); |
660 | |
661 | node->call_for_symbol_and_aliases (callback: ipa_propagate_frequency_1, data: &d, |
662 | include_overwritable: true); |
663 | |
664 | if ((d.only_called_at_startup && !d.only_called_at_exit) |
665 | && !node->only_called_at_startup) |
666 | { |
667 | node->only_called_at_startup = true; |
668 | if (dump_file) |
669 | fprintf (stream: dump_file, format: "Node %s promoted to only called at startup.\n" , |
670 | node->dump_name ()); |
671 | changed = true; |
672 | } |
673 | if ((d.only_called_at_exit && !d.only_called_at_startup) |
674 | && !node->only_called_at_exit) |
675 | { |
676 | node->only_called_at_exit = true; |
677 | if (dump_file) |
678 | fprintf (stream: dump_file, format: "Node %s promoted to only called at exit.\n" , |
679 | node->dump_name ()); |
680 | changed = true; |
681 | } |
682 | |
683 | /* With profile we can decide on hot/normal based on count. */ |
684 | if (node->count. ipa().initialized_p ()) |
685 | { |
686 | bool hot = false; |
687 | if (!(node->count. ipa() == profile_count::zero ()) |
688 | && node->count. ipa() >= get_hot_bb_threshold ()) |
689 | hot = true; |
690 | if (!hot) |
691 | hot |= contains_hot_call_p (node); |
692 | if (hot) |
693 | { |
694 | if (node->frequency != NODE_FREQUENCY_HOT) |
695 | { |
696 | if (dump_file) |
697 | fprintf (stream: dump_file, format: "Node %s promoted to hot.\n" , |
698 | node->dump_name ()); |
699 | node->frequency = NODE_FREQUENCY_HOT; |
700 | return true; |
701 | } |
702 | return false; |
703 | } |
704 | else if (node->frequency == NODE_FREQUENCY_HOT) |
705 | { |
706 | if (dump_file) |
707 | fprintf (stream: dump_file, format: "Node %s reduced to normal.\n" , |
708 | node->dump_name ()); |
709 | node->frequency = NODE_FREQUENCY_NORMAL; |
710 | changed = true; |
711 | } |
712 | } |
713 | /* These come either from profile or user hints; never update them. */ |
714 | if (node->frequency == NODE_FREQUENCY_HOT |
715 | || node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED) |
716 | return changed; |
717 | if (d.maybe_unlikely_executed) |
718 | { |
719 | node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED; |
720 | if (dump_file) |
721 | fprintf (stream: dump_file, format: "Node %s promoted to unlikely executed.\n" , |
722 | node->dump_name ()); |
723 | changed = true; |
724 | } |
725 | else if (d.maybe_executed_once && node->frequency != NODE_FREQUENCY_EXECUTED_ONCE) |
726 | { |
727 | node->frequency = NODE_FREQUENCY_EXECUTED_ONCE; |
728 | if (dump_file) |
729 | fprintf (stream: dump_file, format: "Node %s promoted to executed once.\n" , |
730 | node->dump_name ()); |
731 | changed = true; |
732 | } |
733 | return changed; |
734 | } |
735 | |
736 | /* Check that number of arguments of N agrees with E. |
737 | Be conservative when summaries are not present. */ |
738 | |
739 | static bool |
740 | check_argument_count (struct cgraph_node *n, struct cgraph_edge *e) |
741 | { |
742 | if (!ipa_node_params_sum || !ipa_edge_args_sum) |
743 | return true; |
744 | ipa_node_params *info = ipa_node_params_sum->get (node: n->function_symbol ()); |
745 | if (!info) |
746 | return true; |
747 | ipa_edge_args *e_info = ipa_edge_args_sum->get (edge: e); |
748 | if (!e_info) |
749 | return true; |
750 | if (ipa_get_param_count (info) != ipa_get_cs_argument_count (args: e_info) |
751 | && (ipa_get_param_count (info) >= ipa_get_cs_argument_count (args: e_info) |
752 | || !stdarg_p (TREE_TYPE (n->decl)))) |
753 | return false; |
754 | return true; |
755 | } |
756 | |
757 | /* Simple ipa profile pass propagating frequencies across the callgraph. */ |
758 | |
759 | static unsigned int |
760 | ipa_profile (void) |
761 | { |
762 | struct cgraph_node **order; |
763 | struct cgraph_edge *e; |
764 | int order_pos; |
765 | bool something_changed = false; |
766 | int i; |
767 | gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0; |
768 | struct cgraph_node *n,*n2; |
769 | int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = 0; |
770 | int nmismatch = 0, nimpossible = 0; |
771 | bool node_map_initialized = false; |
772 | gcov_type threshold; |
773 | |
774 | if (dump_file) |
775 | dump_histogram (file: dump_file, histogram); |
776 | for (i = 0; i < (int)histogram.length (); i++) |
777 | { |
778 | overall_time += histogram[i]->count * histogram[i]->time; |
779 | overall_size += histogram[i]->size; |
780 | } |
781 | threshold = 0; |
782 | if (overall_time) |
783 | { |
784 | gcc_assert (overall_size); |
785 | |
786 | cutoff = (overall_time * param_hot_bb_count_ws_permille + 500) / 1000; |
787 | for (i = 0; cumulated < cutoff; i++) |
788 | { |
789 | cumulated += histogram[i]->count * histogram[i]->time; |
790 | threshold = histogram[i]->count; |
791 | } |
792 | if (!threshold) |
793 | threshold = 1; |
794 | if (dump_file) |
795 | { |
796 | gcov_type cumulated_time = 0, cumulated_size = 0; |
797 | |
798 | for (i = 0; |
799 | i < (int)histogram.length () && histogram[i]->count >= threshold; |
800 | i++) |
801 | { |
802 | cumulated_time += histogram[i]->count * histogram[i]->time; |
803 | cumulated_size += histogram[i]->size; |
804 | } |
805 | fprintf (stream: dump_file, format: "Determined min count: %" PRId64 |
806 | " Time:%3.2f%% Size:%3.2f%%\n" , |
807 | (int64_t)threshold, |
808 | cumulated_time * 100.0 / overall_time, |
809 | cumulated_size * 100.0 / overall_size); |
810 | } |
811 | |
812 | if (in_lto_p) |
813 | { |
814 | if (dump_file) |
815 | fprintf (stream: dump_file, format: "Setting hotness threshold in LTO mode.\n" ); |
816 | set_hot_bb_threshold (threshold); |
817 | } |
818 | } |
819 | histogram.release (); |
820 | histogram_pool.release (); |
821 | |
822 | /* Produce speculative calls: we saved common target from profiling into |
823 | e->target_id. Now, at link time, we can look up corresponding |
824 | function node and produce speculative call. */ |
825 | |
826 | gcc_checking_assert (call_sums); |
827 | |
828 | if (dump_file) |
829 | { |
830 | if (!node_map_initialized) |
831 | init_node_map (false); |
832 | node_map_initialized = true; |
833 | |
834 | ipa_profile_dump_all_summaries (f: dump_file); |
835 | } |
836 | |
837 | FOR_EACH_DEFINED_FUNCTION (n) |
838 | { |
839 | bool update = false; |
840 | |
841 | if (!opt_for_fn (n->decl, flag_ipa_profile)) |
842 | continue; |
843 | |
844 | for (e = n->indirect_calls; e; e = e->next_callee) |
845 | { |
846 | if (n->count.initialized_p ()) |
847 | nindirect++; |
848 | |
849 | speculative_call_summary *csum = call_sums->get_create (edge: e); |
850 | unsigned spec_count = csum->speculative_call_targets.length (); |
851 | if (spec_count) |
852 | { |
853 | if (!node_map_initialized) |
854 | init_node_map (false); |
855 | node_map_initialized = true; |
856 | ncommon++; |
857 | |
858 | unsigned speculative_id = 0; |
859 | profile_count orig = e->count; |
860 | for (unsigned i = 0; i < spec_count; i++) |
861 | { |
862 | speculative_call_target item |
863 | = csum->speculative_call_targets[i]; |
864 | n2 = find_func_by_profile_id (func_id: item.target_id); |
865 | if (n2) |
866 | { |
867 | if (dump_file) |
868 | { |
869 | fprintf (stream: dump_file, |
870 | format: "Indirect call -> direct call from" |
871 | " other module %s => %s, prob %3.2f\n" , |
872 | n->dump_name (), |
873 | n2->dump_name (), |
874 | item.target_probability |
875 | / (float) REG_BR_PROB_BASE); |
876 | } |
877 | if (item.target_probability < REG_BR_PROB_BASE / 2) |
878 | { |
879 | nuseless++; |
880 | if (dump_file) |
881 | fprintf (stream: dump_file, |
882 | format: "Not speculating: " |
883 | "probability is too low.\n" ); |
884 | } |
885 | else if (!e->maybe_hot_p ()) |
886 | { |
887 | nuseless++; |
888 | if (dump_file) |
889 | fprintf (stream: dump_file, |
890 | format: "Not speculating: call is cold.\n" ); |
891 | } |
892 | else if (n2->get_availability () <= AVAIL_INTERPOSABLE |
893 | && n2->can_be_discarded_p ()) |
894 | { |
895 | nuseless++; |
896 | if (dump_file) |
897 | fprintf (stream: dump_file, |
898 | format: "Not speculating: target is overwritable " |
899 | "and can be discarded.\n" ); |
900 | } |
901 | else if (!check_argument_count (n: n2, e)) |
902 | { |
903 | nmismatch++; |
904 | if (dump_file) |
905 | fprintf (stream: dump_file, |
906 | format: "Not speculating: " |
907 | "parameter count mismatch\n" ); |
908 | } |
909 | else if (e->indirect_info->polymorphic |
910 | && !opt_for_fn (n->decl, flag_devirtualize) |
911 | && !possible_polymorphic_call_target_p (e, n: n2)) |
912 | { |
913 | nimpossible++; |
914 | if (dump_file) |
915 | fprintf (stream: dump_file, |
916 | format: "Not speculating: " |
917 | "function is not in the polymorphic " |
918 | "call target list\n" ); |
919 | } |
920 | else |
921 | { |
922 | /* Target may be overwritable, but profile says that |
923 | control flow goes to this particular implementation |
924 | of N2. Speculate on the local alias to allow |
925 | inlining. */ |
926 | if (!n2->can_be_discarded_p ()) |
927 | { |
928 | cgraph_node *alias; |
929 | alias = dyn_cast<cgraph_node *> |
930 | (p: n2->noninterposable_alias ()); |
931 | if (alias) |
932 | n2 = alias; |
933 | } |
934 | nconverted++; |
935 | profile_probability prob |
936 | = profile_probability::from_reg_br_prob_base |
937 | (v: item.target_probability).adjusted (); |
938 | e->make_speculative (n2, |
939 | direct_count: orig.apply_probability (prob), |
940 | speculative_id); |
941 | update = true; |
942 | speculative_id++; |
943 | } |
944 | } |
945 | else |
946 | { |
947 | if (dump_file) |
948 | fprintf (stream: dump_file, |
949 | format: "Function with profile-id %i not found.\n" , |
950 | item.target_id); |
951 | nunknown++; |
952 | } |
953 | } |
954 | } |
955 | } |
956 | if (update) |
957 | ipa_update_overall_fn_summary (node: n); |
958 | } |
959 | if (node_map_initialized) |
960 | del_node_map (); |
961 | if (dump_file && nindirect) |
962 | fprintf (stream: dump_file, |
963 | format: "%i indirect calls trained.\n" |
964 | "%i (%3.2f%%) have common target.\n" |
965 | "%i (%3.2f%%) targets was not found.\n" |
966 | "%i (%3.2f%%) targets had parameter count mismatch.\n" |
967 | "%i (%3.2f%%) targets was not in polymorphic call target list.\n" |
968 | "%i (%3.2f%%) speculations seems useless.\n" |
969 | "%i (%3.2f%%) speculations produced.\n" , |
970 | nindirect, |
971 | ncommon, ncommon * 100.0 / nindirect, |
972 | nunknown, nunknown * 100.0 / nindirect, |
973 | nmismatch, nmismatch * 100.0 / nindirect, |
974 | nimpossible, nimpossible * 100.0 / nindirect, |
975 | nuseless, nuseless * 100.0 / nindirect, |
976 | nconverted, nconverted * 100.0 / nindirect); |
977 | |
978 | order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count); |
979 | order_pos = ipa_reverse_postorder (order); |
980 | for (i = order_pos - 1; i >= 0; i--) |
981 | { |
982 | if (order[i]->local |
983 | && opt_for_fn (order[i]->decl, flag_ipa_profile) |
984 | && ipa_propagate_frequency (node: order[i])) |
985 | { |
986 | for (e = order[i]->callees; e; e = e->next_callee) |
987 | if (e->callee->local && !e->callee->aux) |
988 | { |
989 | something_changed = true; |
990 | e->callee->aux = (void *)1; |
991 | } |
992 | } |
993 | order[i]->aux = NULL; |
994 | } |
995 | |
996 | while (something_changed) |
997 | { |
998 | something_changed = false; |
999 | for (i = order_pos - 1; i >= 0; i--) |
1000 | { |
1001 | if (order[i]->aux |
1002 | && opt_for_fn (order[i]->decl, flag_ipa_profile) |
1003 | && ipa_propagate_frequency (node: order[i])) |
1004 | { |
1005 | for (e = order[i]->callees; e; e = e->next_callee) |
1006 | if (e->callee->local && !e->callee->aux) |
1007 | { |
1008 | something_changed = true; |
1009 | e->callee->aux = (void *)1; |
1010 | } |
1011 | } |
1012 | order[i]->aux = NULL; |
1013 | } |
1014 | } |
1015 | free (ptr: order); |
1016 | |
1017 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1018 | symtab->dump (f: dump_file); |
1019 | |
1020 | delete call_sums; |
1021 | call_sums = NULL; |
1022 | |
1023 | return 0; |
1024 | } |
1025 | |
1026 | namespace { |
1027 | |
1028 | const pass_data pass_data_ipa_profile = |
1029 | { |
1030 | .type: IPA_PASS, /* type */ |
1031 | .name: "profile_estimate" , /* name */ |
1032 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
1033 | .tv_id: TV_IPA_PROFILE, /* tv_id */ |
1034 | .properties_required: 0, /* properties_required */ |
1035 | .properties_provided: 0, /* properties_provided */ |
1036 | .properties_destroyed: 0, /* properties_destroyed */ |
1037 | .todo_flags_start: 0, /* todo_flags_start */ |
1038 | .todo_flags_finish: 0, /* todo_flags_finish */ |
1039 | }; |
1040 | |
1041 | class pass_ipa_profile : public ipa_opt_pass_d |
1042 | { |
1043 | public: |
1044 | pass_ipa_profile (gcc::context *ctxt) |
1045 | : ipa_opt_pass_d (pass_data_ipa_profile, ctxt, |
1046 | ipa_profile_generate_summary, /* generate_summary */ |
1047 | ipa_profile_write_summary, /* write_summary */ |
1048 | ipa_profile_read_summary, /* read_summary */ |
1049 | NULL, /* write_optimization_summary */ |
1050 | NULL, /* read_optimization_summary */ |
1051 | NULL, /* stmt_fixup */ |
1052 | 0, /* function_transform_todo_flags_start */ |
1053 | NULL, /* function_transform */ |
1054 | NULL) /* variable_transform */ |
1055 | {} |
1056 | |
1057 | /* opt_pass methods: */ |
1058 | bool gate (function *) final override { return flag_ipa_profile || in_lto_p; } |
1059 | unsigned int execute (function *) final override { return ipa_profile (); } |
1060 | |
1061 | }; // class pass_ipa_profile |
1062 | |
1063 | } // anon namespace |
1064 | |
1065 | ipa_opt_pass_d * |
1066 | make_pass_ipa_profile (gcc::context *ctxt) |
1067 | { |
1068 | return new pass_ipa_profile (ctxt); |
1069 | } |
1070 | |
1071 | /* Reset all state within ipa-profile.cc so that we can rerun the compiler |
1072 | within the same process. For use by toplev::finalize. */ |
1073 | |
1074 | void |
1075 | ipa_profile_cc_finalize (void) |
1076 | { |
1077 | delete call_sums; |
1078 | call_sums = NULL; |
1079 | } |
1080 | |