1 | /* Analysis used by inlining decision heuristics. |
2 | Copyright (C) 2003-2024 Free Software Foundation, Inc. |
3 | Contributed by Jan Hubicka |
4 | |
5 | This file is part of GCC. |
6 | |
7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free |
9 | Software Foundation; either version 3, or (at your option) any later |
10 | version. |
11 | |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
15 | for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | #include "config.h" |
22 | #include "system.h" |
23 | #include "coretypes.h" |
24 | #include "backend.h" |
25 | #include "tree.h" |
26 | #include "gimple.h" |
27 | #include "alloc-pool.h" |
28 | #include "tree-pass.h" |
29 | #include "ssa.h" |
30 | #include "tree-streamer.h" |
31 | #include "cgraph.h" |
32 | #include "diagnostic.h" |
33 | #include "fold-const.h" |
34 | #include "print-tree.h" |
35 | #include "tree-inline.h" |
36 | #include "gimple-pretty-print.h" |
37 | #include "cfganal.h" |
38 | #include "gimple-iterator.h" |
39 | #include "tree-cfg.h" |
40 | #include "tree-ssa-loop-niter.h" |
41 | #include "tree-ssa-loop.h" |
42 | #include "symbol-summary.h" |
43 | #include "sreal.h" |
44 | #include "ipa-cp.h" |
45 | #include "ipa-prop.h" |
46 | #include "ipa-fnsummary.h" |
47 | #include "ipa-inline.h" |
48 | #include "cfgloop.h" |
49 | #include "tree-scalar-evolution.h" |
50 | #include "ipa-utils.h" |
51 | #include "cfgexpand.h" |
52 | #include "gimplify.h" |
53 | #include "attribs.h" |
54 | |
55 | /* Cached node/edge growths. */ |
56 | fast_call_summary<edge_growth_cache_entry *, va_heap> *edge_growth_cache = NULL; |
57 | |
58 | /* The context cache remembers estimated time/size and hints for given |
59 | ipa_call_context of a call. */ |
60 | class node_context_cache_entry |
61 | { |
62 | public: |
63 | ipa_cached_call_context ctx; |
64 | sreal time, nonspec_time; |
65 | int size; |
66 | ipa_hints hints; |
67 | |
68 | node_context_cache_entry () |
69 | : ctx () |
70 | { |
71 | } |
72 | ~node_context_cache_entry () |
73 | { |
74 | ctx.release (); |
75 | } |
76 | }; |
77 | |
78 | /* At the moment we implement primitive single entry LRU cache. */ |
79 | class node_context_summary |
80 | { |
81 | public: |
82 | node_context_cache_entry entry; |
83 | |
84 | node_context_summary () |
85 | : entry () |
86 | { |
87 | } |
88 | ~node_context_summary () |
89 | { |
90 | } |
91 | }; |
92 | |
93 | /* Summary holding the context cache. */ |
94 | static fast_function_summary <node_context_summary *, va_heap> |
95 | *node_context_cache = NULL; |
96 | /* Statistics about the context cache effectivity. */ |
97 | static long node_context_cache_hit, node_context_cache_miss, |
98 | node_context_cache_clear; |
99 | |
100 | /* Give initial reasons why inlining would fail on EDGE. This gets either |
101 | nullified or usually overwritten by more precise reasons later. */ |
102 | |
103 | void |
104 | initialize_inline_failed (struct cgraph_edge *e) |
105 | { |
106 | struct cgraph_node *callee = e->callee; |
107 | |
108 | if (e->inline_failed && e->inline_failed != CIF_BODY_NOT_AVAILABLE |
109 | && cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR) |
110 | ; |
111 | else if (e->indirect_unknown_callee) |
112 | e->inline_failed = CIF_INDIRECT_UNKNOWN_CALL; |
113 | else if (!callee->definition) |
114 | e->inline_failed = CIF_BODY_NOT_AVAILABLE; |
115 | else if (callee->redefined_extern_inline) |
116 | e->inline_failed = CIF_REDEFINED_EXTERN_INLINE; |
117 | else |
118 | e->inline_failed = CIF_FUNCTION_NOT_CONSIDERED; |
119 | gcc_checking_assert (!e->call_stmt_cannot_inline_p |
120 | || cgraph_inline_failed_type (e->inline_failed) |
121 | == CIF_FINAL_ERROR); |
122 | } |
123 | |
124 | /* Allocate edge growth caches. */ |
125 | |
126 | void |
127 | initialize_growth_caches () |
128 | { |
129 | edge_growth_cache |
130 | = new fast_call_summary<edge_growth_cache_entry *, va_heap> (symtab); |
131 | node_context_cache |
132 | = new fast_function_summary<node_context_summary *, va_heap> (symtab); |
133 | edge_growth_cache->disable_duplication_hook (); |
134 | node_context_cache->disable_insertion_hook (); |
135 | node_context_cache->disable_duplication_hook (); |
136 | } |
137 | |
138 | /* Free growth caches. */ |
139 | |
140 | void |
141 | free_growth_caches (void) |
142 | { |
143 | delete edge_growth_cache; |
144 | delete node_context_cache; |
145 | edge_growth_cache = NULL; |
146 | node_context_cache = NULL; |
147 | if (dump_file) |
148 | fprintf (stream: dump_file, format: "node context cache: %li hits, %li misses," |
149 | " %li initializations\n" , |
150 | node_context_cache_hit, node_context_cache_miss, |
151 | node_context_cache_clear); |
152 | node_context_cache_hit = 0; |
153 | node_context_cache_miss = 0; |
154 | node_context_cache_clear = 0; |
155 | } |
156 | |
157 | /* Return hints derived from EDGE. */ |
158 | |
159 | int |
160 | simple_edge_hints (struct cgraph_edge *edge) |
161 | { |
162 | int hints = 0; |
163 | struct cgraph_node *to = (edge->caller->inlined_to |
164 | ? edge->caller->inlined_to : edge->caller); |
165 | struct cgraph_node *callee = edge->callee->ultimate_alias_target (); |
166 | int to_scc_no = ipa_fn_summaries->get (node: to)->scc_no; |
167 | int callee_scc_no = ipa_fn_summaries->get (node: callee)->scc_no; |
168 | |
169 | if (to_scc_no && to_scc_no == callee_scc_no && !edge->recursive_p ()) |
170 | hints |= INLINE_HINT_same_scc; |
171 | |
172 | if (cross_module_call_p (edge)) |
173 | hints |= INLINE_HINT_cross_module; |
174 | |
175 | return hints; |
176 | } |
177 | |
178 | /* Estimate the time cost for the caller when inlining EDGE. |
179 | Only to be called via estimate_edge_time, that handles the |
180 | caching mechanism. |
181 | |
182 | When caching, also update the cache entry. Compute both time and |
183 | size, since we always need both metrics eventually. */ |
184 | |
185 | sreal |
186 | do_estimate_edge_time (struct cgraph_edge *edge, sreal *ret_nonspec_time) |
187 | { |
188 | sreal time, nonspec_time; |
189 | int size; |
190 | ipa_hints hints; |
191 | struct cgraph_node *callee; |
192 | clause_t clause, nonspec_clause; |
193 | ipa_auto_call_arg_values avals; |
194 | class ipa_call_summary *es = ipa_call_summaries->get (edge); |
195 | int min_size = -1; |
196 | |
197 | callee = edge->callee->ultimate_alias_target (); |
198 | |
199 | gcc_checking_assert (edge->inline_failed); |
200 | evaluate_properties_for_edge (e: edge, inline_p: true, clause_ptr: &clause, nonspec_clause_ptr: &nonspec_clause, |
201 | avals: &avals, compute_contexts: true); |
202 | ipa_call_context ctx (callee, clause, nonspec_clause, es->param, &avals); |
203 | if (node_context_cache != NULL) |
204 | { |
205 | node_context_summary *e = node_context_cache->get_create (node: callee); |
206 | if (e->entry.ctx.equal_to (ctx)) |
207 | { |
208 | node_context_cache_hit++; |
209 | size = e->entry.size; |
210 | time = e->entry.time; |
211 | nonspec_time = e->entry.nonspec_time; |
212 | hints = e->entry.hints; |
213 | if (flag_checking |
214 | && !opt_for_fn (callee->decl, flag_profile_partial_training) |
215 | && !callee->count.ipa_p ()) |
216 | { |
217 | ipa_call_estimates chk_estimates; |
218 | ctx.estimate_size_and_time (estimates: &chk_estimates); |
219 | gcc_assert (chk_estimates.size == size |
220 | && chk_estimates.time == time |
221 | && chk_estimates.nonspecialized_time == nonspec_time |
222 | && chk_estimates.hints == hints); |
223 | } |
224 | } |
225 | else |
226 | { |
227 | if (e->entry.ctx.exists_p ()) |
228 | node_context_cache_miss++; |
229 | else |
230 | node_context_cache_clear++; |
231 | e->entry.ctx.release (); |
232 | ipa_call_estimates estimates; |
233 | ctx.estimate_size_and_time (estimates: &estimates); |
234 | size = estimates.size; |
235 | e->entry.size = size; |
236 | time = estimates.time; |
237 | e->entry.time = time; |
238 | nonspec_time = estimates.nonspecialized_time; |
239 | e->entry.nonspec_time = nonspec_time; |
240 | hints = estimates.hints; |
241 | e->entry.hints = hints; |
242 | e->entry.ctx.duplicate_from (ctx); |
243 | } |
244 | } |
245 | else |
246 | { |
247 | ipa_call_estimates estimates; |
248 | ctx.estimate_size_and_time (estimates: &estimates); |
249 | size = estimates.size; |
250 | time = estimates.time; |
251 | nonspec_time = estimates.nonspecialized_time; |
252 | hints = estimates.hints; |
253 | } |
254 | |
255 | /* When we have profile feedback or function attribute, we can quite safely |
256 | identify hot edges and for those we disable size limits. Don't do that |
257 | when probability that caller will call the callee is low however, since it |
258 | may hurt optimization of the caller's hot path. */ |
259 | if ((edge->count.ipa ().initialized_p () && edge->maybe_hot_p () |
260 | && (edge->count.ipa () * 2 |
261 | > (edge->caller->inlined_to |
262 | ? edge->caller->inlined_to->count.ipa () |
263 | : edge->caller->count.ipa ()))) |
264 | || (lookup_attribute (attr_name: "hot" , DECL_ATTRIBUTES (edge->caller->decl)) |
265 | != NULL |
266 | && lookup_attribute (attr_name: "hot" , DECL_ATTRIBUTES (edge->callee->decl)) |
267 | != NULL)) |
268 | hints |= INLINE_HINT_known_hot; |
269 | |
270 | gcc_checking_assert (size >= 0); |
271 | gcc_checking_assert (time >= 0); |
272 | |
273 | /* When caching, update the cache entry. */ |
274 | if (edge_growth_cache != NULL) |
275 | { |
276 | if (min_size >= 0) |
277 | ipa_fn_summaries->get (node: edge->callee->function_symbol ())->min_size |
278 | = min_size; |
279 | edge_growth_cache_entry *entry |
280 | = edge_growth_cache->get_create (edge); |
281 | entry->time = time; |
282 | entry->nonspec_time = nonspec_time; |
283 | |
284 | entry->size = size + (size >= 0); |
285 | hints |= simple_edge_hints (edge); |
286 | entry->hints = hints + 1; |
287 | } |
288 | if (ret_nonspec_time) |
289 | *ret_nonspec_time = nonspec_time; |
290 | return time; |
291 | } |
292 | |
293 | /* Reset cache for NODE. |
294 | This must be done each time NODE body is modified. */ |
295 | void |
296 | reset_node_cache (struct cgraph_node *node) |
297 | { |
298 | if (node_context_cache) |
299 | node_context_cache->remove (node); |
300 | } |
301 | |
302 | /* Remove EDGE from caches once it was inlined. */ |
303 | void |
304 | ipa_remove_from_growth_caches (struct cgraph_edge *edge) |
305 | { |
306 | if (node_context_cache) |
307 | node_context_cache->remove (node: edge->callee); |
308 | if (edge_growth_cache) |
309 | edge_growth_cache->remove (edge); |
310 | } |
311 | |
312 | /* Return estimated callee growth after inlining EDGE. |
313 | Only to be called via estimate_edge_size. */ |
314 | |
315 | int |
316 | do_estimate_edge_size (struct cgraph_edge *edge) |
317 | { |
318 | int size; |
319 | struct cgraph_node *callee; |
320 | clause_t clause, nonspec_clause; |
321 | |
322 | /* When we do caching, use do_estimate_edge_time to populate the entry. */ |
323 | |
324 | if (edge_growth_cache != NULL) |
325 | { |
326 | do_estimate_edge_time (edge); |
327 | size = edge_growth_cache->get (edge)->size; |
328 | gcc_checking_assert (size); |
329 | return size - (size > 0); |
330 | } |
331 | |
332 | callee = edge->callee->ultimate_alias_target (); |
333 | |
334 | /* Early inliner runs without caching, go ahead and do the dirty work. */ |
335 | gcc_checking_assert (edge->inline_failed); |
336 | ipa_auto_call_arg_values avals; |
337 | evaluate_properties_for_edge (e: edge, inline_p: true, clause_ptr: &clause, nonspec_clause_ptr: &nonspec_clause, |
338 | avals: &avals, compute_contexts: true); |
339 | ipa_call_context ctx (callee, clause, nonspec_clause, vNULL, &avals); |
340 | ipa_call_estimates estimates; |
341 | ctx.estimate_size_and_time (estimates: &estimates, est_times: false, est_hints: false); |
342 | return estimates.size; |
343 | } |
344 | |
345 | |
346 | /* Estimate the growth of the caller when inlining EDGE. |
347 | Only to be called via estimate_edge_size. */ |
348 | |
349 | ipa_hints |
350 | do_estimate_edge_hints (struct cgraph_edge *edge) |
351 | { |
352 | struct cgraph_node *callee; |
353 | clause_t clause, nonspec_clause; |
354 | |
355 | /* When we do caching, use do_estimate_edge_time to populate the entry. */ |
356 | |
357 | if (edge_growth_cache != NULL) |
358 | { |
359 | do_estimate_edge_time (edge); |
360 | ipa_hints hints = edge_growth_cache->get (edge)->hints; |
361 | gcc_checking_assert (hints); |
362 | return hints - 1; |
363 | } |
364 | |
365 | callee = edge->callee->ultimate_alias_target (); |
366 | |
367 | /* Early inliner runs without caching, go ahead and do the dirty work. */ |
368 | gcc_checking_assert (edge->inline_failed); |
369 | ipa_auto_call_arg_values avals; |
370 | evaluate_properties_for_edge (e: edge, inline_p: true, clause_ptr: &clause, nonspec_clause_ptr: &nonspec_clause, |
371 | avals: &avals, compute_contexts: true); |
372 | ipa_call_context ctx (callee, clause, nonspec_clause, vNULL, &avals); |
373 | ipa_call_estimates estimates; |
374 | ctx.estimate_size_and_time (estimates: &estimates, est_times: false, est_hints: true); |
375 | ipa_hints hints = estimates.hints | simple_edge_hints (edge); |
376 | return hints; |
377 | } |
378 | |
379 | /* Estimate the size of NODE after inlining EDGE which should be an |
380 | edge to either NODE or a call inlined into NODE. */ |
381 | |
382 | int |
383 | estimate_size_after_inlining (struct cgraph_node *node, |
384 | struct cgraph_edge *edge) |
385 | { |
386 | class ipa_call_summary *es = ipa_call_summaries->get (edge); |
387 | ipa_size_summary *s = ipa_size_summaries->get (node); |
388 | if (!es->predicate || *es->predicate != false) |
389 | { |
390 | int size = s->size + estimate_edge_growth (edge); |
391 | gcc_assert (size >= 0); |
392 | return size; |
393 | } |
394 | return s->size; |
395 | } |
396 | |
397 | |
398 | struct growth_data |
399 | { |
400 | struct cgraph_node *node; |
401 | bool self_recursive; |
402 | bool uninlinable; |
403 | int growth; |
404 | int cap; |
405 | }; |
406 | |
407 | |
408 | /* Worker for do_estimate_growth. Collect growth for all callers. */ |
409 | |
410 | static bool |
411 | do_estimate_growth_1 (struct cgraph_node *node, void *data) |
412 | { |
413 | struct cgraph_edge *e; |
414 | struct growth_data *d = (struct growth_data *) data; |
415 | |
416 | for (e = node->callers; e; e = e->next_caller) |
417 | { |
418 | gcc_checking_assert (e->inline_failed); |
419 | |
420 | if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR |
421 | || !opt_for_fn (e->caller->decl, optimize)) |
422 | { |
423 | d->uninlinable = true; |
424 | if (d->cap < INT_MAX) |
425 | return true; |
426 | continue; |
427 | } |
428 | |
429 | if (e->recursive_p ()) |
430 | { |
431 | d->self_recursive = true; |
432 | if (d->cap < INT_MAX) |
433 | return true; |
434 | continue; |
435 | } |
436 | d->growth += estimate_edge_growth (edge: e); |
437 | if (d->growth > d->cap) |
438 | return true; |
439 | } |
440 | return false; |
441 | } |
442 | |
443 | /* Return estimated savings for eliminating offline copy of NODE by inlining |
444 | it everywhere. */ |
445 | |
446 | static int |
447 | offline_size (struct cgraph_node *node, ipa_size_summary *info) |
448 | { |
449 | if (!DECL_EXTERNAL (node->decl)) |
450 | { |
451 | if (node->will_be_removed_from_program_if_no_direct_calls_p ()) |
452 | return info->size; |
453 | /* COMDAT functions are very often not shared across multiple units |
454 | since they come from various template instantiations. |
455 | Take this into account. */ |
456 | else if (DECL_COMDAT (node->decl) |
457 | && node->can_remove_if_no_direct_calls_p ()) |
458 | { |
459 | int prob = opt_for_fn (node->decl, param_comdat_sharing_probability); |
460 | return (info->size * (100 - prob) + 50) / 100; |
461 | } |
462 | } |
463 | return 0; |
464 | } |
465 | |
466 | /* Estimate the growth caused by inlining NODE into all callers. */ |
467 | |
468 | int |
469 | estimate_growth (struct cgraph_node *node) |
470 | { |
471 | struct growth_data d = { .node: node, .self_recursive: false, .uninlinable: false, .growth: 0, INT_MAX }; |
472 | ipa_size_summary *info = ipa_size_summaries->get (node); |
473 | |
474 | if (node->call_for_symbol_and_aliases (callback: do_estimate_growth_1, data: &d, include_overwritable: true)) |
475 | return 1; |
476 | |
477 | /* For self recursive functions the growth estimation really should be |
478 | infinity. We don't want to return very large values because the growth |
479 | plays various roles in badness computation fractions. Be sure to not |
480 | return zero or negative growths. */ |
481 | if (d.self_recursive) |
482 | d.growth = d.growth < info->size ? info->size : d.growth; |
483 | else if (!d.uninlinable) |
484 | d.growth -= offline_size (node, info); |
485 | |
486 | return d.growth; |
487 | } |
488 | |
489 | /* Verify if there are fewer than MAX_CALLERS. */ |
490 | |
491 | static bool |
492 | check_callers (cgraph_node *node, int *growth, int *n, int offline, |
493 | int min_size, struct cgraph_edge *known_edge) |
494 | { |
495 | ipa_ref *ref; |
496 | |
497 | if (!node->can_remove_if_no_direct_calls_and_refs_p ()) |
498 | return true; |
499 | |
500 | for (cgraph_edge *e = node->callers; e; e = e->next_caller) |
501 | { |
502 | edge_growth_cache_entry *entry; |
503 | |
504 | if (e == known_edge) |
505 | continue; |
506 | if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR) |
507 | return true; |
508 | if (edge_growth_cache != NULL |
509 | && (entry = edge_growth_cache->get (edge: e)) != NULL |
510 | && entry->size != 0) |
511 | *growth += entry->size - (entry->size > 0); |
512 | else |
513 | { |
514 | class ipa_call_summary *es = ipa_call_summaries->get (edge: e); |
515 | if (!es) |
516 | return true; |
517 | *growth += min_size - es->call_stmt_size; |
518 | if (--(*n) < 0) |
519 | return false; |
520 | } |
521 | if (*growth > offline) |
522 | return true; |
523 | } |
524 | |
525 | if (*n > 0) |
526 | FOR_EACH_ALIAS (node, ref) |
527 | if (check_callers (node: dyn_cast <cgraph_node *> (p: ref->referring), growth, n, |
528 | offline, min_size, known_edge)) |
529 | return true; |
530 | |
531 | return false; |
532 | } |
533 | |
534 | |
535 | /* Decide if growth of NODE is positive. This is cheaper than calculating |
536 | actual growth. If edge growth of KNOWN_EDGE is known |
537 | it is passed by EDGE_GROWTH. */ |
538 | |
539 | bool |
540 | growth_positive_p (struct cgraph_node *node, |
541 | struct cgraph_edge * known_edge, int edge_growth) |
542 | { |
543 | struct cgraph_edge *e; |
544 | |
545 | ipa_size_summary *s = ipa_size_summaries->get (node); |
546 | |
547 | /* First quickly check if NODE is removable at all. */ |
548 | int offline = offline_size (node, info: s); |
549 | if (offline <= 0 && known_edge && edge_growth > 0) |
550 | return true; |
551 | |
552 | int min_size = ipa_fn_summaries->get (node)->min_size; |
553 | int n = 10; |
554 | |
555 | int min_growth = known_edge ? edge_growth : 0; |
556 | for (e = node->callers; e; e = e->next_caller) |
557 | { |
558 | edge_growth_cache_entry *entry; |
559 | |
560 | if (cgraph_inline_failed_type (e->inline_failed) == CIF_FINAL_ERROR) |
561 | return true; |
562 | if (e == known_edge) |
563 | continue; |
564 | if (edge_growth_cache != NULL |
565 | && (entry = edge_growth_cache->get (edge: e)) != NULL |
566 | && entry->size != 0) |
567 | min_growth += entry->size - (entry->size > 0); |
568 | else |
569 | { |
570 | class ipa_call_summary *es = ipa_call_summaries->get (edge: e); |
571 | if (!es) |
572 | return true; |
573 | min_growth += min_size - es->call_stmt_size; |
574 | if (--n <= 0) |
575 | break; |
576 | } |
577 | if (min_growth > offline) |
578 | return true; |
579 | } |
580 | |
581 | ipa_ref *ref; |
582 | if (n > 0) |
583 | FOR_EACH_ALIAS (node, ref) |
584 | if (check_callers (node: dyn_cast <cgraph_node *> (p: ref->referring), |
585 | growth: &min_growth, n: &n, offline, min_size, known_edge)) |
586 | return true; |
587 | |
588 | struct growth_data d = { .node: node, .self_recursive: false, .uninlinable: false, .growth: 0, .cap: offline }; |
589 | if (node->call_for_symbol_and_aliases (callback: do_estimate_growth_1, data: &d, include_overwritable: true)) |
590 | return true; |
591 | if (d.self_recursive || d.uninlinable) |
592 | return true; |
593 | return (d.growth > offline); |
594 | } |
595 | |