1 | /* Callgraph transformations to handle inlining |
2 | Copyright (C) 2003-2024 Free Software Foundation, Inc. |
3 | Contributed by Jan Hubicka |
4 | |
5 | This file is part of GCC. |
6 | |
7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free |
9 | Software Foundation; either version 3, or (at your option) any later |
10 | version. |
11 | |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
15 | for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | /* The inline decisions are stored in callgraph in "inline plan" and |
22 | applied later. |
23 | |
24 | To mark given call inline, use inline_call function. |
25 | The function marks the edge inlinable and, if necessary, produces |
26 | virtual clone in the callgraph representing the new copy of callee's |
27 | function body. |
28 | |
29 | The inline plan is applied on given function body by inline_transform. */ |
30 | |
31 | #include "config.h" |
32 | #include "system.h" |
33 | #include "coretypes.h" |
34 | #include "tm.h" |
35 | #include "function.h" |
36 | #include "tree.h" |
37 | #include "alloc-pool.h" |
38 | #include "tree-pass.h" |
39 | #include "cgraph.h" |
40 | #include "tree-cfg.h" |
41 | #include "symbol-summary.h" |
42 | #include "tree-vrp.h" |
43 | #include "sreal.h" |
44 | #include "ipa-cp.h" |
45 | #include "ipa-prop.h" |
46 | #include "ipa-fnsummary.h" |
47 | #include "ipa-inline.h" |
48 | #include "tree-inline.h" |
49 | #include "function.h" |
50 | #include "cfg.h" |
51 | #include "basic-block.h" |
52 | #include "ipa-utils.h" |
53 | #include "ipa-modref-tree.h" |
54 | #include "ipa-modref.h" |
55 | #include "symtab-thunks.h" |
56 | #include "symtab-clones.h" |
57 | |
58 | int ncalls_inlined; |
59 | int nfunctions_inlined; |
60 | |
61 | /* Scale counts of NODE edges by NUM/DEN. */ |
62 | |
63 | static void |
64 | update_noncloned_counts (struct cgraph_node *node, |
65 | profile_count num, profile_count den) |
66 | { |
67 | struct cgraph_edge *e; |
68 | |
69 | profile_count::adjust_for_ipa_scaling (num: &num, den: &den); |
70 | |
71 | for (e = node->callees; e; e = e->next_callee) |
72 | { |
73 | if (!e->inline_failed) |
74 | update_noncloned_counts (node: e->callee, num, den); |
75 | e->count = e->count.apply_scale (num, den); |
76 | } |
77 | for (e = node->indirect_calls; e; e = e->next_callee) |
78 | e->count = e->count.apply_scale (num, den); |
79 | node->count = node->count.apply_scale (num, den); |
80 | } |
81 | |
82 | /* We removed or are going to remove the last call to NODE. |
83 | Return true if we can and want proactively remove the NODE now. |
84 | This is important to do, since we want inliner to know when offline |
85 | copy of function was removed. */ |
86 | |
87 | static bool |
88 | can_remove_node_now_p_1 (struct cgraph_node *node, struct cgraph_edge *e) |
89 | { |
90 | ipa_ref *ref; |
91 | |
92 | FOR_EACH_ALIAS (node, ref) |
93 | { |
94 | cgraph_node *alias = dyn_cast <cgraph_node *> (p: ref->referring); |
95 | if ((alias->callers && alias->callers != e) |
96 | || !can_remove_node_now_p_1 (node: alias, e)) |
97 | return false; |
98 | } |
99 | /* FIXME: When address is taken of DECL_EXTERNAL function we still |
100 | can remove its offline copy, but we would need to keep unanalyzed node in |
101 | the callgraph so references can point to it. |
102 | |
103 | Also for comdat group we can ignore references inside a group as we |
104 | want to prove the group as a whole to be dead. */ |
105 | return (!node->address_taken |
106 | && node->can_remove_if_no_direct_calls_and_refs_p () |
107 | /* Inlining might enable more devirtualizing, so we want to remove |
108 | those only after all devirtualizable virtual calls are processed. |
109 | Lacking may edges in callgraph we just preserve them post |
110 | inlining. */ |
111 | && (!DECL_VIRTUAL_P (node->decl) |
112 | || !opt_for_fn (node->decl, flag_devirtualize)) |
113 | /* During early inlining some unanalyzed cgraph nodes might be in the |
114 | callgraph and they might refer the function in question. */ |
115 | && !cgraph_new_nodes.exists ()); |
116 | } |
117 | |
118 | /* We are going to eliminate last direct call to NODE (or alias of it) via edge E. |
119 | Verify that the NODE can be removed from unit and if it is contained in comdat |
120 | group that the whole comdat group is removable. */ |
121 | |
122 | static bool |
123 | can_remove_node_now_p (struct cgraph_node *node, struct cgraph_edge *e) |
124 | { |
125 | struct cgraph_node *next; |
126 | if (!can_remove_node_now_p_1 (node, e)) |
127 | return false; |
128 | |
129 | /* When we see same comdat group, we need to be sure that all |
130 | items can be removed. */ |
131 | if (!node->same_comdat_group || !node->externally_visible) |
132 | return true; |
133 | for (next = dyn_cast<cgraph_node *> (p: node->same_comdat_group); |
134 | next != node; next = dyn_cast<cgraph_node *> (p: next->same_comdat_group)) |
135 | { |
136 | if (next->alias) |
137 | continue; |
138 | if ((next->callers && next->callers != e) |
139 | || !can_remove_node_now_p_1 (node: next, e)) |
140 | return false; |
141 | } |
142 | return true; |
143 | } |
144 | |
145 | /* Return true if NODE is a master clone with non-inline clones. */ |
146 | |
147 | static bool |
148 | master_clone_with_noninline_clones_p (struct cgraph_node *node) |
149 | { |
150 | if (node->clone_of) |
151 | return false; |
152 | |
153 | for (struct cgraph_node *n = node->clones; n; n = n->next_sibling_clone) |
154 | if (n->decl != node->decl) |
155 | return true; |
156 | |
157 | return false; |
158 | } |
159 | |
160 | /* E is expected to be an edge being inlined. Clone destination node of |
161 | the edge and redirect it to the new clone. |
162 | DUPLICATE is used for bookkeeping on whether we are actually creating new |
163 | clones or re-using node originally representing out-of-line function call. |
164 | By default the offline copy is removed, when it appears dead after inlining. |
165 | UPDATE_ORIGINAL prevents this transformation. |
166 | If OVERALL_SIZE is non-NULL, the size is updated to reflect the |
167 | transformation. */ |
168 | |
169 | void |
170 | clone_inlined_nodes (struct cgraph_edge *e, bool duplicate, |
171 | bool update_original, int *overall_size) |
172 | { |
173 | struct cgraph_node *inlining_into; |
174 | struct cgraph_edge *next; |
175 | |
176 | if (e->caller->inlined_to) |
177 | inlining_into = e->caller->inlined_to; |
178 | else |
179 | inlining_into = e->caller; |
180 | |
181 | if (duplicate) |
182 | { |
183 | /* We may eliminate the need for out-of-line copy to be output. |
184 | In that case just go ahead and re-use it. This is not just an |
185 | memory optimization. Making offline copy of function disappear |
186 | from the program will improve future decisions on inlining. */ |
187 | if (!e->callee->callers->next_caller |
188 | /* Recursive inlining never wants the master clone to |
189 | be overwritten. */ |
190 | && update_original |
191 | && can_remove_node_now_p (node: e->callee, e) |
192 | /* We cannot overwrite a master clone with non-inline clones |
193 | until after these clones are materialized. */ |
194 | && !master_clone_with_noninline_clones_p (node: e->callee)) |
195 | { |
196 | /* TODO: When callee is in a comdat group, we could remove all of it, |
197 | including all inline clones inlined into it. That would however |
198 | need small function inlining to register edge removal hook to |
199 | maintain the priority queue. |
200 | |
201 | For now we keep the other functions in the group in program until |
202 | cgraph_remove_unreachable_functions gets rid of them. */ |
203 | gcc_assert (!e->callee->inlined_to); |
204 | e->callee->remove_from_same_comdat_group (); |
205 | if (e->callee->definition |
206 | && inline_account_function_p (node: e->callee)) |
207 | { |
208 | gcc_assert (!e->callee->alias); |
209 | if (overall_size) |
210 | *overall_size -= ipa_size_summaries->get (node: e->callee)->size; |
211 | nfunctions_inlined++; |
212 | } |
213 | duplicate = false; |
214 | e->callee->externally_visible = false; |
215 | update_noncloned_counts (node: e->callee, num: e->count, den: e->callee->count); |
216 | |
217 | dump_callgraph_transformation (original: e->callee, clone: inlining_into, |
218 | suffix: "inlining to" ); |
219 | } |
220 | else |
221 | { |
222 | struct cgraph_node *n; |
223 | |
224 | n = e->callee->create_clone (decl: e->callee->decl, |
225 | count: e->count, |
226 | update_original, redirect_callers: vNULL, call_duplication_hook: true, |
227 | new_inlined_to: inlining_into, |
228 | NULL); |
229 | n->used_as_abstract_origin = e->callee->used_as_abstract_origin; |
230 | e->redirect_callee (n); |
231 | } |
232 | } |
233 | else |
234 | e->callee->remove_from_same_comdat_group (); |
235 | |
236 | e->callee->inlined_to = inlining_into; |
237 | if (e->callee->ipa_transforms_to_apply.length ()) |
238 | { |
239 | e->callee->ipa_transforms_to_apply.release (); |
240 | e->callee->ipa_transforms_to_apply = vNULL; |
241 | } |
242 | |
243 | /* Recursively clone all bodies. */ |
244 | for (e = e->callee->callees; e; e = next) |
245 | { |
246 | next = e->next_callee; |
247 | if (!e->inline_failed) |
248 | clone_inlined_nodes (e, duplicate, update_original, overall_size); |
249 | } |
250 | } |
251 | |
252 | /* Check all speculations in N and if any seem useless, resolve them. When a |
253 | first edge is resolved, pop all edges from NEW_EDGES and insert them to |
254 | EDGE_SET. Then remove each resolved edge from EDGE_SET, if it is there. */ |
255 | |
256 | static bool |
257 | check_speculations_1 (cgraph_node *n, vec<cgraph_edge *> *new_edges, |
258 | hash_set <cgraph_edge *> *edge_set) |
259 | { |
260 | bool speculation_removed = false; |
261 | cgraph_edge *next; |
262 | |
263 | for (cgraph_edge *e = n->callees; e; e = next) |
264 | { |
265 | next = e->next_callee; |
266 | if (e->speculative && !speculation_useful_p (e, anticipate_inlining: true)) |
267 | { |
268 | while (new_edges && !new_edges->is_empty ()) |
269 | edge_set->add (k: new_edges->pop ()); |
270 | edge_set->remove (k: e); |
271 | |
272 | cgraph_edge::resolve_speculation (edge: e, NULL); |
273 | speculation_removed = true; |
274 | } |
275 | else if (!e->inline_failed) |
276 | speculation_removed |= check_speculations_1 (n: e->callee, new_edges, |
277 | edge_set); |
278 | } |
279 | return speculation_removed; |
280 | } |
281 | |
282 | /* Push E to NEW_EDGES. Called from hash_set traverse method, which |
283 | unfortunately means this function has to have external linkage, otherwise |
284 | the code will not compile with gcc 4.8. */ |
285 | |
286 | bool |
287 | push_all_edges_in_set_to_vec (cgraph_edge * const &e, |
288 | vec<cgraph_edge *> *new_edges) |
289 | { |
290 | new_edges->safe_push (obj: e); |
291 | return true; |
292 | } |
293 | |
294 | /* Check all speculations in N and if any seem useless, resolve them and remove |
295 | them from NEW_EDGES. */ |
296 | |
297 | static bool |
298 | check_speculations (cgraph_node *n, vec<cgraph_edge *> *new_edges) |
299 | { |
300 | hash_set <cgraph_edge *> edge_set; |
301 | bool res = check_speculations_1 (n, new_edges, edge_set: &edge_set); |
302 | if (!edge_set.is_empty ()) |
303 | edge_set.traverse <vec<cgraph_edge *> *, |
304 | push_all_edges_in_set_to_vec> (a: new_edges); |
305 | return res; |
306 | } |
307 | |
308 | /* Mark all call graph edges coming out of NODE and all nodes that have been |
309 | inlined to it as in_polymorphic_cdtor. */ |
310 | |
311 | static void |
312 | mark_all_inlined_calls_cdtor (cgraph_node *node) |
313 | { |
314 | for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee) |
315 | { |
316 | cs->in_polymorphic_cdtor = true; |
317 | if (!cs->inline_failed) |
318 | mark_all_inlined_calls_cdtor (node: cs->callee); |
319 | } |
320 | for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee) |
321 | cs->in_polymorphic_cdtor = true; |
322 | } |
323 | |
324 | |
325 | /* Mark edge E as inlined and update callgraph accordingly. UPDATE_ORIGINAL |
326 | specify whether profile of original function should be updated. If any new |
327 | indirect edges are discovered in the process, add them to NEW_EDGES, unless |
328 | it is NULL. If UPDATE_OVERALL_SUMMARY is false, do not bother to recompute overall |
329 | size of caller after inlining. Caller is required to eventually do it via |
330 | ipa_update_overall_fn_summary. |
331 | If callee_removed is non-NULL, set it to true if we removed callee node. |
332 | |
333 | Return true iff any new callgraph edges were discovered as a |
334 | result of inlining. */ |
335 | |
336 | bool |
337 | inline_call (struct cgraph_edge *e, bool update_original, |
338 | vec<cgraph_edge *> *new_edges, |
339 | int *overall_size, bool update_overall_summary, |
340 | bool *callee_removed) |
341 | { |
342 | int old_size = 0, new_size = 0; |
343 | struct cgraph_node *to = NULL; |
344 | struct cgraph_edge *curr = e; |
345 | bool comdat_local = e->callee->comdat_local_p (); |
346 | struct cgraph_node *callee = e->callee->ultimate_alias_target (); |
347 | bool new_edges_found = false; |
348 | |
349 | int estimated_growth = 0; |
350 | if (! update_overall_summary) |
351 | estimated_growth = estimate_edge_growth (edge: e); |
352 | /* This is used only for assert bellow. */ |
353 | #if 0 |
354 | bool predicated = inline_edge_summary (e)->predicate != NULL; |
355 | #endif |
356 | |
357 | /* Don't inline inlined edges. */ |
358 | gcc_assert (e->inline_failed); |
359 | /* Don't even think of inlining inline clone. */ |
360 | gcc_assert (!callee->inlined_to); |
361 | |
362 | to = e->caller; |
363 | if (to->inlined_to) |
364 | to = to->inlined_to; |
365 | if (to->thunk) |
366 | { |
367 | struct cgraph_node *target = to->callees->callee; |
368 | thunk_expansion = true; |
369 | |
370 | /* Remove all annotations, but keep thunk info. */ |
371 | thunk_info info = *thunk_info::get (node: to); |
372 | symtab->call_cgraph_removal_hooks (node: to); |
373 | *thunk_info::get_create (node: to) = info; |
374 | if (in_lto_p) |
375 | to->get_untransformed_body (); |
376 | expand_thunk (to, false, true); |
377 | /* When thunk is instrumented we may have multiple callees. */ |
378 | for (e = to->callees; e && e->callee != target; e = e->next_callee) |
379 | ; |
380 | symtab->call_cgraph_insertion_hooks (node: to); |
381 | thunk_expansion = false; |
382 | gcc_assert (e); |
383 | } |
384 | |
385 | |
386 | e->inline_failed = CIF_OK; |
387 | DECL_POSSIBLY_INLINED (callee->decl) = true; |
388 | |
389 | if (DECL_FUNCTION_PERSONALITY (callee->decl)) |
390 | DECL_FUNCTION_PERSONALITY (to->decl) |
391 | = DECL_FUNCTION_PERSONALITY (callee->decl); |
392 | |
393 | bool reload_optimization_node = false; |
394 | if (!opt_for_fn (callee->decl, flag_strict_aliasing) |
395 | && opt_for_fn (to->decl, flag_strict_aliasing)) |
396 | { |
397 | struct gcc_options opts = global_options; |
398 | struct gcc_options opts_set = global_options_set; |
399 | |
400 | cl_optimization_restore (&opts, &opts_set, opts_for_fn (fndecl: to->decl)); |
401 | opts.x_flag_strict_aliasing = false; |
402 | if (dump_file) |
403 | fprintf (stream: dump_file, format: "Dropping flag_strict_aliasing on %s\n" , |
404 | to->dump_name ()); |
405 | DECL_FUNCTION_SPECIFIC_OPTIMIZATION (to->decl) |
406 | = build_optimization_node (opts: &opts, opts_set: &opts_set); |
407 | reload_optimization_node = true; |
408 | } |
409 | |
410 | ipa_fn_summary *caller_info = ipa_fn_summaries->get (node: to); |
411 | ipa_fn_summary *callee_info = ipa_fn_summaries->get (node: callee); |
412 | if (!caller_info->fp_expressions && callee_info->fp_expressions) |
413 | { |
414 | caller_info->fp_expressions = true; |
415 | if (opt_for_fn (callee->decl, flag_rounding_math) |
416 | != opt_for_fn (to->decl, flag_rounding_math) |
417 | || opt_for_fn (callee->decl, flag_trapping_math) |
418 | != opt_for_fn (to->decl, flag_trapping_math) |
419 | || opt_for_fn (callee->decl, flag_unsafe_math_optimizations) |
420 | != opt_for_fn (to->decl, flag_unsafe_math_optimizations) |
421 | || opt_for_fn (callee->decl, flag_finite_math_only) |
422 | != opt_for_fn (to->decl, flag_finite_math_only) |
423 | || opt_for_fn (callee->decl, flag_signaling_nans) |
424 | != opt_for_fn (to->decl, flag_signaling_nans) |
425 | || opt_for_fn (callee->decl, flag_cx_limited_range) |
426 | != opt_for_fn (to->decl, flag_cx_limited_range) |
427 | || opt_for_fn (callee->decl, flag_signed_zeros) |
428 | != opt_for_fn (to->decl, flag_signed_zeros) |
429 | || opt_for_fn (callee->decl, flag_associative_math) |
430 | != opt_for_fn (to->decl, flag_associative_math) |
431 | || opt_for_fn (callee->decl, flag_reciprocal_math) |
432 | != opt_for_fn (to->decl, flag_reciprocal_math) |
433 | || opt_for_fn (callee->decl, flag_fp_int_builtin_inexact) |
434 | != opt_for_fn (to->decl, flag_fp_int_builtin_inexact) |
435 | || opt_for_fn (callee->decl, flag_errno_math) |
436 | != opt_for_fn (to->decl, flag_errno_math)) |
437 | { |
438 | struct gcc_options opts = global_options; |
439 | struct gcc_options opts_set = global_options_set; |
440 | |
441 | cl_optimization_restore (&opts, &opts_set, opts_for_fn (fndecl: to->decl)); |
442 | opts.x_flag_rounding_math |
443 | = opt_for_fn (callee->decl, flag_rounding_math); |
444 | opts.x_flag_trapping_math |
445 | = opt_for_fn (callee->decl, flag_trapping_math); |
446 | opts.x_flag_unsafe_math_optimizations |
447 | = opt_for_fn (callee->decl, flag_unsafe_math_optimizations); |
448 | opts.x_flag_finite_math_only |
449 | = opt_for_fn (callee->decl, flag_finite_math_only); |
450 | opts.x_flag_signaling_nans |
451 | = opt_for_fn (callee->decl, flag_signaling_nans); |
452 | opts.x_flag_cx_limited_range |
453 | = opt_for_fn (callee->decl, flag_cx_limited_range); |
454 | opts.x_flag_signed_zeros |
455 | = opt_for_fn (callee->decl, flag_signed_zeros); |
456 | opts.x_flag_associative_math |
457 | = opt_for_fn (callee->decl, flag_associative_math); |
458 | opts.x_flag_reciprocal_math |
459 | = opt_for_fn (callee->decl, flag_reciprocal_math); |
460 | opts.x_flag_fp_int_builtin_inexact |
461 | = opt_for_fn (callee->decl, flag_fp_int_builtin_inexact); |
462 | opts.x_flag_errno_math |
463 | = opt_for_fn (callee->decl, flag_errno_math); |
464 | if (dump_file) |
465 | fprintf (stream: dump_file, format: "Copying FP flags from %s to %s\n" , |
466 | callee->dump_name (), to->dump_name ()); |
467 | DECL_FUNCTION_SPECIFIC_OPTIMIZATION (to->decl) |
468 | = build_optimization_node (opts: &opts, opts_set: &opts_set); |
469 | reload_optimization_node = true; |
470 | } |
471 | } |
472 | |
473 | /* Reload global optimization flags. */ |
474 | if (reload_optimization_node && DECL_STRUCT_FUNCTION (to->decl) == cfun) |
475 | set_cfun (cfun, force: true); |
476 | |
477 | /* If aliases are involved, redirect edge to the actual destination and |
478 | possibly remove the aliases. */ |
479 | if (e->callee != callee) |
480 | { |
481 | struct cgraph_node *alias = e->callee, *next_alias; |
482 | e->redirect_callee (n: callee); |
483 | while (alias && alias != callee) |
484 | { |
485 | if (!alias->callers |
486 | && can_remove_node_now_p (node: alias, |
487 | e: !e->next_caller && !e->prev_caller ? e : NULL)) |
488 | { |
489 | next_alias = alias->get_alias_target (); |
490 | alias->remove (); |
491 | if (callee_removed) |
492 | *callee_removed = true; |
493 | alias = next_alias; |
494 | } |
495 | else |
496 | break; |
497 | } |
498 | } |
499 | |
500 | clone_inlined_nodes (e, duplicate: true, update_original, overall_size); |
501 | |
502 | gcc_assert (curr->callee->inlined_to == to); |
503 | |
504 | old_size = ipa_size_summaries->get (node: to)->size; |
505 | ipa_merge_modref_summary_after_inlining (e); |
506 | ipa_merge_fn_summary_after_inlining (edge: e); |
507 | if (e->in_polymorphic_cdtor) |
508 | mark_all_inlined_calls_cdtor (node: e->callee); |
509 | if (opt_for_fn (e->caller->decl, optimize)) |
510 | new_edges_found = ipa_propagate_indirect_call_infos (cs: curr, new_edges); |
511 | bool removed_p = check_speculations (n: e->callee, new_edges); |
512 | if (update_overall_summary) |
513 | ipa_update_overall_fn_summary (node: to, reset: new_edges_found || removed_p); |
514 | else |
515 | /* Update self size by the estimate so overall function growth limits |
516 | work for further inlining into this function. Before inlining |
517 | the function we inlined to again we expect the caller to update |
518 | the overall summary. */ |
519 | ipa_size_summaries->get (node: to)->size += estimated_growth; |
520 | new_size = ipa_size_summaries->get (node: to)->size; |
521 | |
522 | if (callee->calls_comdat_local) |
523 | to->calls_comdat_local = true; |
524 | else if (to->calls_comdat_local && comdat_local) |
525 | to->calls_comdat_local = to->check_calls_comdat_local_p (); |
526 | |
527 | /* FIXME: This assert suffers from roundoff errors, disable it for GCC 5 |
528 | and revisit it after conversion to sreals in GCC 6. |
529 | See PR 65654. */ |
530 | #if 0 |
531 | /* Verify that estimated growth match real growth. Allow off-by-one |
532 | error due to ipa_fn_summary::size_scale roudoff errors. */ |
533 | gcc_assert (!update_overall_summary || !overall_size || new_edges_found |
534 | || abs (estimated_growth - (new_size - old_size)) <= 1 |
535 | || speculation_removed |
536 | /* FIXME: a hack. Edges with false predicate are accounted |
537 | wrong, we should remove them from callgraph. */ |
538 | || predicated); |
539 | #endif |
540 | |
541 | /* Account the change of overall unit size; external functions will be |
542 | removed and are thus not accounted. */ |
543 | if (overall_size && inline_account_function_p (node: to)) |
544 | *overall_size += new_size - old_size; |
545 | ncalls_inlined++; |
546 | |
547 | /* This must happen after ipa_merge_fn_summary_after_inlining that rely on jump |
548 | functions of callee to not be updated. */ |
549 | return new_edges_found; |
550 | } |
551 | |
552 | /* For each node that was made the holder of function body by |
553 | save_inline_function_body, this summary contains pointer to the previous |
554 | holder of the body. */ |
555 | |
556 | function_summary <tree *> *ipa_saved_clone_sources; |
557 | |
558 | /* Copy function body of NODE and redirect all inline clones to it. |
559 | This is done before inline plan is applied to NODE when there are |
560 | still some inline clones if it. |
561 | |
562 | This is necessary because inline decisions are not really transitive |
563 | and the other inline clones may have different bodies. */ |
564 | |
565 | static struct cgraph_node * |
566 | save_inline_function_body (struct cgraph_node *node) |
567 | { |
568 | struct cgraph_node *first_clone, *n; |
569 | |
570 | if (dump_file) |
571 | fprintf (stream: dump_file, format: "\nSaving body of %s for later reuse\n" , |
572 | node->dump_name ()); |
573 | |
574 | gcc_assert (node == cgraph_node::get (node->decl)); |
575 | |
576 | /* first_clone will be turned into real function. */ |
577 | first_clone = node->clones; |
578 | |
579 | /* Arrange first clone to not be thunk as those do not have bodies. */ |
580 | if (first_clone->thunk) |
581 | { |
582 | while (first_clone->thunk) |
583 | first_clone = first_clone->next_sibling_clone; |
584 | first_clone->prev_sibling_clone->next_sibling_clone |
585 | = first_clone->next_sibling_clone; |
586 | if (first_clone->next_sibling_clone) |
587 | first_clone->next_sibling_clone->prev_sibling_clone |
588 | = first_clone->prev_sibling_clone; |
589 | first_clone->next_sibling_clone = node->clones; |
590 | first_clone->prev_sibling_clone = NULL; |
591 | node->clones->prev_sibling_clone = first_clone; |
592 | node->clones = first_clone; |
593 | } |
594 | first_clone->decl = copy_node (node->decl); |
595 | first_clone->decl->decl_with_vis.symtab_node = first_clone; |
596 | gcc_assert (first_clone == cgraph_node::get (first_clone->decl)); |
597 | |
598 | /* Now reshape the clone tree, so all other clones descends from |
599 | first_clone. */ |
600 | if (first_clone->next_sibling_clone) |
601 | { |
602 | for (n = first_clone->next_sibling_clone; n->next_sibling_clone; |
603 | n = n->next_sibling_clone) |
604 | n->clone_of = first_clone; |
605 | n->clone_of = first_clone; |
606 | n->next_sibling_clone = first_clone->clones; |
607 | if (first_clone->clones) |
608 | first_clone->clones->prev_sibling_clone = n; |
609 | first_clone->clones = first_clone->next_sibling_clone; |
610 | first_clone->next_sibling_clone->prev_sibling_clone = NULL; |
611 | first_clone->next_sibling_clone = NULL; |
612 | gcc_assert (!first_clone->prev_sibling_clone); |
613 | } |
614 | |
615 | tree prev_body_holder = node->decl; |
616 | if (!ipa_saved_clone_sources) |
617 | { |
618 | ipa_saved_clone_sources = new function_summary <tree *> (symtab); |
619 | ipa_saved_clone_sources->disable_insertion_hook (); |
620 | } |
621 | else |
622 | { |
623 | tree *p = ipa_saved_clone_sources->get (node); |
624 | if (p) |
625 | { |
626 | prev_body_holder = *p; |
627 | gcc_assert (prev_body_holder); |
628 | } |
629 | } |
630 | *ipa_saved_clone_sources->get_create (node: first_clone) = prev_body_holder; |
631 | first_clone->former_clone_of |
632 | = node->former_clone_of ? node->former_clone_of : node->decl; |
633 | first_clone->clone_of = NULL; |
634 | |
635 | /* Now node in question has no clones. */ |
636 | node->clones = NULL; |
637 | |
638 | /* Inline clones share decl with the function they are cloned |
639 | from. Walk the whole clone tree and redirect them all to the |
640 | new decl. */ |
641 | if (first_clone->clones) |
642 | for (n = first_clone->clones; n != first_clone;) |
643 | { |
644 | gcc_assert (n->decl == node->decl); |
645 | n->decl = first_clone->decl; |
646 | if (n->clones) |
647 | n = n->clones; |
648 | else if (n->next_sibling_clone) |
649 | n = n->next_sibling_clone; |
650 | else |
651 | { |
652 | while (n != first_clone && !n->next_sibling_clone) |
653 | n = n->clone_of; |
654 | if (n != first_clone) |
655 | n = n->next_sibling_clone; |
656 | } |
657 | } |
658 | |
659 | /* Copy the OLD_VERSION_NODE function tree to the new version. */ |
660 | tree_function_versioning (node->decl, first_clone->decl, |
661 | NULL, NULL, true, NULL, NULL); |
662 | |
663 | /* The function will be short lived and removed after we inline all the |
664 | clones, but make it internal so we won't confuse ourself. */ |
665 | DECL_EXTERNAL (first_clone->decl) = 0; |
666 | TREE_PUBLIC (first_clone->decl) = 0; |
667 | DECL_COMDAT (first_clone->decl) = 0; |
668 | first_clone->ipa_transforms_to_apply.release (); |
669 | |
670 | /* When doing recursive inlining, the clone may become unnecessary. |
671 | This is possible i.e. in the case when the recursive function is proved to |
672 | be non-throwing and the recursion happens only in the EH landing pad. |
673 | We cannot remove the clone until we are done with saving the body. |
674 | Remove it now. */ |
675 | if (!first_clone->callers) |
676 | { |
677 | first_clone->remove_symbol_and_inline_clones (); |
678 | first_clone = NULL; |
679 | } |
680 | else if (flag_checking) |
681 | first_clone->verify (); |
682 | |
683 | return first_clone; |
684 | } |
685 | |
686 | /* Return true when function body of DECL still needs to be kept around |
687 | for later re-use. */ |
688 | static bool |
689 | preserve_function_body_p (struct cgraph_node *node) |
690 | { |
691 | gcc_assert (symtab->global_info_ready); |
692 | gcc_assert (!node->alias && !node->thunk); |
693 | |
694 | /* Look if there is any non-thunk clone around. */ |
695 | for (node = node->clones; node; node = node->next_sibling_clone) |
696 | if (!node->thunk) |
697 | return true; |
698 | return false; |
699 | } |
700 | |
701 | /* tree-inline can not recurse; materialize all function bodie we will need |
702 | during inlining. This includes inlined functions, but also called functions |
703 | with param manipulation because IPA param manipulation attaches debug |
704 | statements to PARM_DECLs of called clone. Materialize them if needed. |
705 | |
706 | FIXME: This is somehwat broken by design because it does not play well |
707 | with partitioning. */ |
708 | |
709 | static void |
710 | maybe_materialize_called_clones (cgraph_node *node) |
711 | { |
712 | for (cgraph_edge *e = node->callees; e; e = e->next_callee) |
713 | { |
714 | clone_info *info; |
715 | |
716 | if (!e->inline_failed) |
717 | maybe_materialize_called_clones (node: e->callee); |
718 | |
719 | cgraph_node *callee = cgraph_node::get (decl: e->callee->decl); |
720 | if (callee->clone_of |
721 | && (info = clone_info::get (node: callee)) && info->param_adjustments) |
722 | callee->get_untransformed_body (); |
723 | } |
724 | } |
725 | |
726 | /* Apply inline plan to function. */ |
727 | |
728 | unsigned int |
729 | inline_transform (struct cgraph_node *node) |
730 | { |
731 | unsigned int todo = 0; |
732 | struct cgraph_edge *e, *next; |
733 | bool has_inline = false; |
734 | |
735 | /* FIXME: Currently the pass manager is adding inline transform more than |
736 | once to some clones. This needs revisiting after WPA cleanups. */ |
737 | if (cfun->after_inlining) |
738 | return 0; |
739 | |
740 | cgraph_node *next_clone; |
741 | for (cgraph_node *n = node->clones; n; n = next_clone) |
742 | { |
743 | next_clone = n->next_sibling_clone; |
744 | if (n->decl != node->decl) |
745 | n->materialize_clone (); |
746 | } |
747 | node->clear_stmts_in_references (); |
748 | |
749 | /* We might need the body of this function so that we can expand |
750 | it inline somewhere else. */ |
751 | if (preserve_function_body_p (node)) |
752 | save_inline_function_body (node); |
753 | |
754 | profile_count num = node->count; |
755 | profile_count den = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; |
756 | bool scale = num.initialized_p () && !(num == den); |
757 | if (scale) |
758 | { |
759 | profile_count::adjust_for_ipa_scaling (num: &num, den: &den); |
760 | if (dump_file) |
761 | { |
762 | fprintf (stream: dump_file, format: "Applying count scale " ); |
763 | num.dump (f: dump_file); |
764 | fprintf (stream: dump_file, format: "/" ); |
765 | den.dump (f: dump_file); |
766 | fprintf (stream: dump_file, format: "\n" ); |
767 | } |
768 | |
769 | basic_block bb; |
770 | cfun->cfg->count_max = profile_count::uninitialized (); |
771 | FOR_ALL_BB_FN (bb, cfun) |
772 | { |
773 | bb->count = bb->count.apply_scale (num, den); |
774 | cfun->cfg->count_max = cfun->cfg->count_max.max (other: bb->count); |
775 | } |
776 | ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = node->count; |
777 | } |
778 | |
779 | maybe_materialize_called_clones (node); |
780 | for (e = node->callees; e; e = next) |
781 | { |
782 | if (!e->inline_failed) |
783 | has_inline = true; |
784 | next = e->next_callee; |
785 | cgraph_edge::redirect_call_stmt_to_callee (e); |
786 | } |
787 | node->remove_all_references (); |
788 | |
789 | timevar_push (tv: TV_INTEGRATION); |
790 | if (node->callees && (opt_for_fn (node->decl, optimize) || has_inline)) |
791 | { |
792 | todo = optimize_inline_calls (current_function_decl); |
793 | } |
794 | timevar_pop (tv: TV_INTEGRATION); |
795 | |
796 | cfun->always_inline_functions_inlined = true; |
797 | cfun->after_inlining = true; |
798 | todo |= execute_fixup_cfg (); |
799 | |
800 | if (!(todo & TODO_update_ssa_any)) |
801 | /* Redirecting edges might lead to a need for vops to be recomputed. */ |
802 | todo |= TODO_update_ssa_only_virtuals; |
803 | |
804 | return todo; |
805 | } |
806 | |