1 | /* Utilities for ipa analysis. |
2 | Copyright (C) 2005-2024 Free Software Foundation, Inc. |
3 | Contributed by Kenneth Zadeck <zadeck@naturalbridge.com> |
4 | |
5 | This file is part of GCC. |
6 | |
7 | GCC is free software; you can redistribute it and/or modify it under |
8 | the terms of the GNU General Public License as published by the Free |
9 | Software Foundation; either version 3, or (at your option) any later |
10 | version. |
11 | |
12 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
13 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
14 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
15 | for more details. |
16 | |
17 | You should have received a copy of the GNU General Public License |
18 | along with GCC; see the file COPYING3. If not see |
19 | <http://www.gnu.org/licenses/>. */ |
20 | |
21 | #include "config.h" |
22 | #include "system.h" |
23 | #include "coretypes.h" |
24 | #include "backend.h" |
25 | #include "tree.h" |
26 | #include "gimple.h" |
27 | #include "predict.h" |
28 | #include "alloc-pool.h" |
29 | #include "cgraph.h" |
30 | #include "lto-streamer.h" |
31 | #include "dumpfile.h" |
32 | #include "splay-tree.h" |
33 | #include "ipa-utils.h" |
34 | #include "symbol-summary.h" |
35 | #include "tree-vrp.h" |
36 | #include "sreal.h" |
37 | #include "ipa-cp.h" |
38 | #include "ipa-prop.h" |
39 | #include "ipa-fnsummary.h" |
40 | #include "tree-eh.h" |
41 | #include "gimple-iterator.h" |
42 | #include "ipa-modref-tree.h" |
43 | #include "ipa-modref.h" |
44 | #include "tree-ssa-loop-niter.h" |
45 | #include "calls.h" |
46 | #include "cfgloop.h" |
47 | #include "cfganal.h" |
48 | |
49 | /* Debugging function for postorder and inorder code. NOTE is a string |
50 | that is printed before the nodes are printed. ORDER is an array of |
51 | cgraph_nodes that has COUNT useful nodes in it. */ |
52 | |
53 | void |
54 | ipa_print_order (FILE* out, |
55 | const char * note, |
56 | struct cgraph_node** order, |
57 | int count) |
58 | { |
59 | int i; |
60 | fprintf (stream: out, format: "\n\n ordered call graph: %s\n" , note); |
61 | |
62 | for (i = count - 1; i >= 0; i--) |
63 | order[i]->dump (f: out); |
64 | fprintf (stream: out, format: "\n" ); |
65 | fflush (stream: out); |
66 | } |
67 | |
68 | |
69 | struct searchc_env { |
70 | struct cgraph_node **stack; |
71 | struct cgraph_node **result; |
72 | int stack_size; |
73 | int order_pos; |
74 | splay_tree nodes_marked_new; |
75 | bool reduce; |
76 | int count; |
77 | }; |
78 | |
79 | /* This is an implementation of Tarjan's strongly connected region |
80 | finder as reprinted in Aho Hopcraft and Ullman's The Design and |
81 | Analysis of Computer Programs (1975) pages 192-193. This version |
82 | has been customized for cgraph_nodes. The env parameter is because |
83 | it is recursive and there are no nested functions here. This |
84 | function should only be called from itself or |
85 | ipa_reduced_postorder. ENV is a stack env and would be |
86 | unnecessary if C had nested functions. V is the node to start |
87 | searching from. */ |
88 | |
89 | static void |
90 | searchc (struct searchc_env* env, struct cgraph_node *v, |
91 | bool (*ignore_edge) (struct cgraph_edge *)) |
92 | { |
93 | struct cgraph_edge *edge; |
94 | struct ipa_dfs_info *v_info = (struct ipa_dfs_info *) v->aux; |
95 | |
96 | /* mark node as old */ |
97 | v_info->new_node = false; |
98 | splay_tree_remove (env->nodes_marked_new, v->get_uid ()); |
99 | |
100 | v_info->dfn_number = env->count; |
101 | v_info->low_link = env->count; |
102 | env->count++; |
103 | env->stack[(env->stack_size)++] = v; |
104 | v_info->on_stack = true; |
105 | |
106 | for (edge = v->callees; edge; edge = edge->next_callee) |
107 | { |
108 | struct ipa_dfs_info * w_info; |
109 | enum availability avail; |
110 | struct cgraph_node *w = edge->callee->ultimate_alias_target (availability: &avail); |
111 | |
112 | if (!w || (ignore_edge && ignore_edge (edge))) |
113 | continue; |
114 | |
115 | if (w->aux |
116 | && (avail >= AVAIL_INTERPOSABLE)) |
117 | { |
118 | w_info = (struct ipa_dfs_info *) w->aux; |
119 | if (w_info->new_node) |
120 | { |
121 | searchc (env, v: w, ignore_edge); |
122 | v_info->low_link = |
123 | (v_info->low_link < w_info->low_link) ? |
124 | v_info->low_link : w_info->low_link; |
125 | } |
126 | else |
127 | if ((w_info->dfn_number < v_info->dfn_number) |
128 | && (w_info->on_stack)) |
129 | v_info->low_link = |
130 | (w_info->dfn_number < v_info->low_link) ? |
131 | w_info->dfn_number : v_info->low_link; |
132 | } |
133 | } |
134 | |
135 | |
136 | if (v_info->low_link == v_info->dfn_number) |
137 | { |
138 | struct cgraph_node *last = NULL; |
139 | struct cgraph_node *x; |
140 | struct ipa_dfs_info *x_info; |
141 | do { |
142 | x = env->stack[--(env->stack_size)]; |
143 | x_info = (struct ipa_dfs_info *) x->aux; |
144 | x_info->on_stack = false; |
145 | x_info->scc_no = v_info->dfn_number; |
146 | |
147 | if (env->reduce) |
148 | { |
149 | x_info->next_cycle = last; |
150 | last = x; |
151 | } |
152 | else |
153 | env->result[env->order_pos++] = x; |
154 | } |
155 | while (v != x); |
156 | if (env->reduce) |
157 | env->result[env->order_pos++] = v; |
158 | } |
159 | } |
160 | |
161 | /* Topsort the call graph by caller relation. Put the result in ORDER. |
162 | |
163 | The REDUCE flag is true if you want the cycles reduced to single nodes. |
164 | You can use ipa_get_nodes_in_cycle to obtain a vector containing all real |
165 | call graph nodes in a reduced node. |
166 | |
167 | Set ALLOW_OVERWRITABLE if nodes with such availability should be included. |
168 | IGNORE_EDGE, if non-NULL is a hook that may make some edges insignificant |
169 | for the topological sort. */ |
170 | |
171 | int |
172 | ipa_reduced_postorder (struct cgraph_node **order, |
173 | bool reduce, |
174 | bool (*ignore_edge) (struct cgraph_edge *)) |
175 | { |
176 | struct cgraph_node *node; |
177 | struct searchc_env env; |
178 | splay_tree_node result; |
179 | env.stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count); |
180 | env.stack_size = 0; |
181 | env.result = order; |
182 | env.order_pos = 0; |
183 | env.nodes_marked_new = splay_tree_new (splay_tree_compare_ints, 0, 0); |
184 | env.count = 1; |
185 | env.reduce = reduce; |
186 | |
187 | FOR_EACH_DEFINED_FUNCTION (node) |
188 | { |
189 | enum availability avail = node->get_availability (); |
190 | |
191 | if (avail > AVAIL_INTERPOSABLE |
192 | || avail == AVAIL_INTERPOSABLE) |
193 | { |
194 | /* Reuse the info if it is already there. */ |
195 | struct ipa_dfs_info *info = (struct ipa_dfs_info *) node->aux; |
196 | if (!info) |
197 | info = XCNEW (struct ipa_dfs_info); |
198 | info->new_node = true; |
199 | info->on_stack = false; |
200 | info->next_cycle = NULL; |
201 | node->aux = info; |
202 | |
203 | splay_tree_insert (env.nodes_marked_new, |
204 | (splay_tree_key)node->get_uid (), |
205 | (splay_tree_value)node); |
206 | } |
207 | else |
208 | node->aux = NULL; |
209 | } |
210 | result = splay_tree_min (env.nodes_marked_new); |
211 | while (result) |
212 | { |
213 | node = (struct cgraph_node *)result->value; |
214 | searchc (env: &env, v: node, ignore_edge); |
215 | result = splay_tree_min (env.nodes_marked_new); |
216 | } |
217 | splay_tree_delete (env.nodes_marked_new); |
218 | free (ptr: env.stack); |
219 | |
220 | return env.order_pos; |
221 | } |
222 | |
223 | /* Deallocate all ipa_dfs_info structures pointed to by the aux pointer of call |
224 | graph nodes. */ |
225 | |
226 | void |
227 | ipa_free_postorder_info (void) |
228 | { |
229 | struct cgraph_node *node; |
230 | FOR_EACH_DEFINED_FUNCTION (node) |
231 | { |
232 | /* Get rid of the aux information. */ |
233 | if (node->aux) |
234 | { |
235 | free (ptr: node->aux); |
236 | node->aux = NULL; |
237 | } |
238 | } |
239 | } |
240 | |
241 | /* Get the set of nodes for the cycle in the reduced call graph starting |
242 | from NODE. */ |
243 | |
244 | vec<cgraph_node *> |
245 | ipa_get_nodes_in_cycle (struct cgraph_node *node) |
246 | { |
247 | vec<cgraph_node *> v = vNULL; |
248 | struct ipa_dfs_info *node_dfs_info; |
249 | while (node) |
250 | { |
251 | v.safe_push (obj: node); |
252 | node_dfs_info = (struct ipa_dfs_info *) node->aux; |
253 | node = node_dfs_info->next_cycle; |
254 | } |
255 | return v; |
256 | } |
257 | |
258 | /* Return true iff the CS is an edge within a strongly connected component as |
259 | computed by ipa_reduced_postorder. */ |
260 | |
261 | bool |
262 | ipa_edge_within_scc (struct cgraph_edge *cs) |
263 | { |
264 | struct ipa_dfs_info *caller_dfs = (struct ipa_dfs_info *) cs->caller->aux; |
265 | struct ipa_dfs_info *callee_dfs; |
266 | struct cgraph_node *callee = cs->callee->function_symbol (); |
267 | |
268 | callee_dfs = (struct ipa_dfs_info *) callee->aux; |
269 | return (caller_dfs |
270 | && callee_dfs |
271 | && caller_dfs->scc_no == callee_dfs->scc_no); |
272 | } |
273 | |
274 | struct postorder_stack |
275 | { |
276 | struct cgraph_node *node; |
277 | struct cgraph_edge *edge; |
278 | int ref; |
279 | }; |
280 | |
281 | /* Fill array order with all nodes with output flag set in the reverse |
282 | topological order. Return the number of elements in the array. |
283 | FIXME: While walking, consider aliases, too. */ |
284 | |
285 | int |
286 | ipa_reverse_postorder (struct cgraph_node **order) |
287 | { |
288 | struct cgraph_node *node, *node2; |
289 | int stack_size = 0; |
290 | int order_pos = 0; |
291 | struct cgraph_edge *edge; |
292 | int pass; |
293 | struct ipa_ref *ref = NULL; |
294 | |
295 | struct postorder_stack *stack = |
296 | XCNEWVEC (struct postorder_stack, symtab->cgraph_count); |
297 | |
298 | /* We have to deal with cycles nicely, so use a depth first traversal |
299 | output algorithm. Ignore the fact that some functions won't need |
300 | to be output and put them into order as well, so we get dependencies |
301 | right through inline functions. */ |
302 | FOR_EACH_FUNCTION (node) |
303 | node->aux = NULL; |
304 | for (pass = 0; pass < 2; pass++) |
305 | FOR_EACH_FUNCTION (node) |
306 | if (!node->aux |
307 | && (pass |
308 | || (!node->address_taken |
309 | && !node->inlined_to |
310 | && !node->alias && !node->thunk |
311 | && !node->only_called_directly_p ()))) |
312 | { |
313 | stack_size = 0; |
314 | stack[stack_size].node = node; |
315 | stack[stack_size].edge = node->callers; |
316 | stack[stack_size].ref = 0; |
317 | node->aux = (void *)(size_t)1; |
318 | while (stack_size >= 0) |
319 | { |
320 | while (true) |
321 | { |
322 | node2 = NULL; |
323 | while (stack[stack_size].edge && !node2) |
324 | { |
325 | edge = stack[stack_size].edge; |
326 | node2 = edge->caller; |
327 | stack[stack_size].edge = edge->next_caller; |
328 | } |
329 | for (; stack[stack_size].node->iterate_referring ( |
330 | i: stack[stack_size].ref, |
331 | ref) && !node2; |
332 | stack[stack_size].ref++) |
333 | { |
334 | if (ref->use == IPA_REF_ALIAS) |
335 | node2 = dyn_cast <cgraph_node *> (p: ref->referring); |
336 | } |
337 | if (!node2) |
338 | break; |
339 | if (!node2->aux) |
340 | { |
341 | stack[++stack_size].node = node2; |
342 | stack[stack_size].edge = node2->callers; |
343 | stack[stack_size].ref = 0; |
344 | node2->aux = (void *)(size_t)1; |
345 | } |
346 | } |
347 | order[order_pos++] = stack[stack_size--].node; |
348 | } |
349 | } |
350 | free (ptr: stack); |
351 | FOR_EACH_FUNCTION (node) |
352 | node->aux = NULL; |
353 | return order_pos; |
354 | } |
355 | |
356 | |
357 | |
358 | /* Given a memory reference T, will return the variable at the bottom |
359 | of the access. Unlike get_base_address, this will recurse through |
360 | INDIRECT_REFS. */ |
361 | |
362 | tree |
363 | get_base_var (tree t) |
364 | { |
365 | while (!SSA_VAR_P (t) |
366 | && (!CONSTANT_CLASS_P (t)) |
367 | && TREE_CODE (t) != LABEL_DECL |
368 | && TREE_CODE (t) != FUNCTION_DECL |
369 | && TREE_CODE (t) != CONST_DECL |
370 | && TREE_CODE (t) != CONSTRUCTOR) |
371 | { |
372 | t = TREE_OPERAND (t, 0); |
373 | } |
374 | return t; |
375 | } |
376 | |
377 | /* Scale function of calls in NODE by ratio ORIG_COUNT/NODE->count. */ |
378 | |
379 | void |
380 | scale_ipa_profile_for_fn (struct cgraph_node *node, profile_count orig_count) |
381 | { |
382 | profile_count to = node->count; |
383 | profile_count::adjust_for_ipa_scaling (num: &to, den: &orig_count); |
384 | struct cgraph_edge *e; |
385 | |
386 | for (e = node->callees; e; e = e->next_callee) |
387 | e->count = e->count.apply_scale (num: to, den: orig_count); |
388 | for (e = node->indirect_calls; e; e = e->next_callee) |
389 | e->count = e->count.apply_scale (num: to, den: orig_count); |
390 | } |
391 | |
392 | /* SRC and DST are going to be merged. Take SRC's profile and merge it into |
393 | DST so it is not going to be lost. Possibly destroy SRC's body on the way |
394 | unless PRESERVE_BODY is set. */ |
395 | |
396 | void |
397 | ipa_merge_profiles (struct cgraph_node *dst, |
398 | struct cgraph_node *src, |
399 | bool preserve_body) |
400 | { |
401 | tree oldsrcdecl = src->decl; |
402 | struct function *srccfun, *dstcfun; |
403 | bool match = true; |
404 | bool copy_counts = false; |
405 | |
406 | if (!src->definition |
407 | || !dst->definition) |
408 | return; |
409 | |
410 | if (src->frequency < dst->frequency) |
411 | src->frequency = dst->frequency; |
412 | |
413 | /* Time profiles are merged. */ |
414 | if (dst->tp_first_run > src->tp_first_run && src->tp_first_run) |
415 | dst->tp_first_run = src->tp_first_run; |
416 | |
417 | if (src->profile_id && !dst->profile_id) |
418 | dst->profile_id = src->profile_id; |
419 | |
420 | /* Merging zero profile to dst is no-op. */ |
421 | if (src->count.ipa () == profile_count::zero ()) |
422 | return; |
423 | |
424 | /* FIXME when we merge in unknown profile, we ought to set counts as |
425 | unsafe. */ |
426 | if (!src->count.initialized_p () |
427 | || !(src->count.ipa () == src->count)) |
428 | return; |
429 | profile_count orig_count = dst->count; |
430 | |
431 | /* Either sum the profiles if both are IPA and not global0, or |
432 | pick more informative one (that is nonzero IPA if other is |
433 | uninitialized, guessed or global0). */ |
434 | |
435 | if ((dst->count.ipa ().nonzero_p () |
436 | || src->count.ipa ().nonzero_p ()) |
437 | && dst->count.ipa ().initialized_p () |
438 | && src->count.ipa ().initialized_p ()) |
439 | dst->count = dst->count.ipa () + src->count.ipa (); |
440 | else if (dst->count.ipa ().initialized_p ()) |
441 | ; |
442 | else if (src->count.ipa ().initialized_p ()) |
443 | { |
444 | copy_counts = true; |
445 | dst->count = src->count.ipa (); |
446 | } |
447 | |
448 | /* If no updating needed return early. */ |
449 | if (dst->count == orig_count) |
450 | return; |
451 | |
452 | if (symtab->dump_file) |
453 | { |
454 | fprintf (stream: symtab->dump_file, format: "Merging profiles of %s count:" , |
455 | src->dump_name ()); |
456 | src->count.dump (f: symtab->dump_file); |
457 | fprintf (stream: symtab->dump_file, format: " to %s count:" , |
458 | dst->dump_name ()); |
459 | orig_count.dump (f: symtab->dump_file); |
460 | fprintf (stream: symtab->dump_file, format: " resulting count:" ); |
461 | dst->count.dump (f: symtab->dump_file); |
462 | fprintf (stream: symtab->dump_file, format: "\n" ); |
463 | } |
464 | |
465 | /* First handle functions with no gimple body. */ |
466 | if (dst->thunk || dst->alias |
467 | || src->thunk || src->alias) |
468 | { |
469 | scale_ipa_profile_for_fn (node: dst, orig_count); |
470 | return; |
471 | } |
472 | |
473 | /* This is ugly. We need to get both function bodies into memory. |
474 | If declaration is merged, we need to duplicate it to be able |
475 | to load body that is being replaced. This makes symbol table |
476 | temporarily inconsistent. */ |
477 | if (src->decl == dst->decl) |
478 | { |
479 | struct lto_in_decl_state temp; |
480 | struct lto_in_decl_state *state; |
481 | |
482 | /* We are going to move the decl, we want to remove its file decl data. |
483 | and link these with the new decl. */ |
484 | temp.fn_decl = src->decl; |
485 | lto_in_decl_state **slot |
486 | = src->lto_file_data->function_decl_states->find_slot (value: &temp, |
487 | insert: NO_INSERT); |
488 | state = *slot; |
489 | src->lto_file_data->function_decl_states->clear_slot (slot); |
490 | gcc_assert (state); |
491 | |
492 | /* Duplicate the decl and be sure it does not link into body of DST. */ |
493 | src->decl = copy_node (src->decl); |
494 | DECL_STRUCT_FUNCTION (src->decl) = NULL; |
495 | DECL_ARGUMENTS (src->decl) = NULL; |
496 | DECL_INITIAL (src->decl) = NULL; |
497 | DECL_RESULT (src->decl) = NULL; |
498 | |
499 | /* Associate the decl state with new declaration, so LTO streamer |
500 | can look it up. */ |
501 | state->fn_decl = src->decl; |
502 | slot |
503 | = src->lto_file_data->function_decl_states->find_slot (value: state, insert: INSERT); |
504 | gcc_assert (!*slot); |
505 | *slot = state; |
506 | } |
507 | src->get_untransformed_body (); |
508 | dst->get_untransformed_body (); |
509 | srccfun = DECL_STRUCT_FUNCTION (src->decl); |
510 | dstcfun = DECL_STRUCT_FUNCTION (dst->decl); |
511 | if (n_basic_blocks_for_fn (srccfun) |
512 | != n_basic_blocks_for_fn (dstcfun)) |
513 | { |
514 | if (symtab->dump_file) |
515 | fprintf (stream: symtab->dump_file, |
516 | format: "Giving up; number of basic block mismatch.\n" ); |
517 | match = false; |
518 | } |
519 | else if (last_basic_block_for_fn (srccfun) |
520 | != last_basic_block_for_fn (dstcfun)) |
521 | { |
522 | if (symtab->dump_file) |
523 | fprintf (stream: symtab->dump_file, |
524 | format: "Giving up; last block mismatch.\n" ); |
525 | match = false; |
526 | } |
527 | else |
528 | { |
529 | basic_block srcbb, dstbb; |
530 | struct cgraph_edge *e, *e2; |
531 | |
532 | for (e = dst->callees, e2 = src->callees; e && e2 && match; |
533 | e2 = e2->next_callee, e = e->next_callee) |
534 | { |
535 | if (gimple_bb (g: e->call_stmt)->index |
536 | != gimple_bb (g: e2->call_stmt)->index) |
537 | { |
538 | if (symtab->dump_file) |
539 | fprintf (stream: symtab->dump_file, |
540 | format: "Giving up; call stmt mismatch.\n" ); |
541 | match = false; |
542 | } |
543 | } |
544 | if (e || e2) |
545 | { |
546 | if (symtab->dump_file) |
547 | fprintf (stream: symtab->dump_file, |
548 | format: "Giving up; number of calls differs.\n" ); |
549 | match = false; |
550 | } |
551 | for (e = dst->indirect_calls, e2 = src->indirect_calls; e && e2 && match; |
552 | e2 = e2->next_callee, e = e->next_callee) |
553 | { |
554 | if (gimple_bb (g: e->call_stmt)->index |
555 | != gimple_bb (g: e2->call_stmt)->index) |
556 | { |
557 | if (symtab->dump_file) |
558 | fprintf (stream: symtab->dump_file, |
559 | format: "Giving up; indirect call stmt mismatch.\n" ); |
560 | match = false; |
561 | } |
562 | } |
563 | if (e || e2) |
564 | { |
565 | if (symtab->dump_file) |
566 | fprintf (stream: symtab->dump_file, |
567 | format: "Giving up; number of indirect calls differs.\n" ); |
568 | match=false; |
569 | } |
570 | |
571 | if (match) |
572 | FOR_ALL_BB_FN (srcbb, srccfun) |
573 | { |
574 | unsigned int i; |
575 | |
576 | dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index); |
577 | if (dstbb == NULL) |
578 | { |
579 | if (symtab->dump_file) |
580 | fprintf (stream: symtab->dump_file, |
581 | format: "No matching block for bb %i.\n" , |
582 | srcbb->index); |
583 | match = false; |
584 | break; |
585 | } |
586 | if (EDGE_COUNT (srcbb->succs) != EDGE_COUNT (dstbb->succs)) |
587 | { |
588 | if (symtab->dump_file) |
589 | fprintf (stream: symtab->dump_file, |
590 | format: "Edge count mismatch for bb %i.\n" , |
591 | srcbb->index); |
592 | match = false; |
593 | break; |
594 | } |
595 | for (i = 0; i < EDGE_COUNT (srcbb->succs); i++) |
596 | { |
597 | edge srce = EDGE_SUCC (srcbb, i); |
598 | edge dste = EDGE_SUCC (dstbb, i); |
599 | if (srce->dest->index != dste->dest->index) |
600 | { |
601 | if (symtab->dump_file) |
602 | fprintf (stream: symtab->dump_file, |
603 | format: "Succ edge mismatch for bb %i.\n" , |
604 | srce->dest->index); |
605 | match = false; |
606 | break; |
607 | } |
608 | } |
609 | } |
610 | } |
611 | if (match) |
612 | { |
613 | struct cgraph_edge *e, *e2; |
614 | basic_block srcbb, dstbb; |
615 | |
616 | /* Function and global profile may be out of sync. First scale it same |
617 | way as fixup_cfg would. */ |
618 | profile_count srcnum = src->count; |
619 | profile_count srcden = ENTRY_BLOCK_PTR_FOR_FN (srccfun)->count; |
620 | bool srcscale = srcnum.initialized_p () && !(srcnum == srcden); |
621 | profile_count dstnum = orig_count; |
622 | profile_count dstden = ENTRY_BLOCK_PTR_FOR_FN (dstcfun)->count; |
623 | bool dstscale = !copy_counts |
624 | && dstnum.initialized_p () && !(dstnum == dstden); |
625 | |
626 | /* TODO: merge also statement histograms. */ |
627 | FOR_ALL_BB_FN (srcbb, srccfun) |
628 | { |
629 | unsigned int i; |
630 | |
631 | dstbb = BASIC_BLOCK_FOR_FN (dstcfun, srcbb->index); |
632 | |
633 | profile_count srccount = srcbb->count; |
634 | if (srcscale) |
635 | srccount = srccount.apply_scale (num: srcnum, den: srcden); |
636 | if (dstscale) |
637 | dstbb->count = dstbb->count.apply_scale (num: dstnum, den: dstden); |
638 | |
639 | if (copy_counts) |
640 | { |
641 | dstbb->count = srccount; |
642 | for (i = 0; i < EDGE_COUNT (srcbb->succs); i++) |
643 | { |
644 | edge srce = EDGE_SUCC (srcbb, i); |
645 | edge dste = EDGE_SUCC (dstbb, i); |
646 | if (srce->probability.initialized_p ()) |
647 | dste->probability = srce->probability; |
648 | } |
649 | } |
650 | else |
651 | { |
652 | for (i = 0; i < EDGE_COUNT (srcbb->succs); i++) |
653 | { |
654 | edge srce = EDGE_SUCC (srcbb, i); |
655 | edge dste = EDGE_SUCC (dstbb, i); |
656 | profile_count sum = |
657 | dstbb->count.ipa () + srccount.ipa (); |
658 | if (sum.nonzero_p ()) |
659 | dste->probability = |
660 | dste->probability * dstbb->count.ipa ().probability_in |
661 | (overall: sum) |
662 | + srce->probability * srcbb->count.ipa ().probability_in |
663 | (overall: sum); |
664 | } |
665 | dstbb->count = dstbb->count.ipa () + srccount.ipa (); |
666 | } |
667 | } |
668 | push_cfun (new_cfun: dstcfun); |
669 | update_max_bb_count (); |
670 | compute_function_frequency (); |
671 | pop_cfun (); |
672 | for (e = dst->callees; e; e = e->next_callee) |
673 | { |
674 | if (e->speculative) |
675 | continue; |
676 | e->count = gimple_bb (g: e->call_stmt)->count; |
677 | } |
678 | for (e = dst->indirect_calls, e2 = src->indirect_calls; e; |
679 | e2 = (e2 ? e2->next_callee : NULL), e = e->next_callee) |
680 | { |
681 | if (!e->speculative && !e2->speculative) |
682 | { |
683 | /* FIXME: we need to also merge ipa-profile histograms |
684 | because with LTO merging happens from lto-symtab before |
685 | these are converted to indirect edges. */ |
686 | e->count = gimple_bb (g: e->call_stmt)->count; |
687 | continue; |
688 | } |
689 | |
690 | /* When copying just remove all speuclations on dst and then copy |
691 | one from src. */ |
692 | if (copy_counts) |
693 | { |
694 | while (e->speculative) |
695 | cgraph_edge::resolve_speculation (edge: e, NULL); |
696 | e->count = gimple_bb (g: e->call_stmt)->count; |
697 | if (e2->speculative) |
698 | { |
699 | for (cgraph_edge *e3 = e2->first_speculative_call_target (); |
700 | e3; |
701 | e3 = e3->next_speculative_call_target ()) |
702 | { |
703 | cgraph_edge *ns; |
704 | ns = e->make_speculative |
705 | (n2: dyn_cast <cgraph_node *> |
706 | (p: e3->speculative_call_target_ref ()->referred), |
707 | direct_count: e3->count, speculative_id: e3->speculative_id); |
708 | /* Target may differ from ref (for example it may be |
709 | redirected to local alias. */ |
710 | ns->redirect_callee (n: e3->callee); |
711 | } |
712 | } |
713 | continue; |
714 | } |
715 | |
716 | /* Iterate all speculations in SRC, see if corresponding ones exist |
717 | int DST and if so, sum the counts. Otherwise create new |
718 | speculation. */ |
719 | int max_spec = 0; |
720 | for (cgraph_edge *e3 = e->first_speculative_call_target (); |
721 | e3; |
722 | e3 = e3->next_speculative_call_target ()) |
723 | if (e3->speculative_id > max_spec) |
724 | max_spec = e3->speculative_id; |
725 | for (cgraph_edge *e3 = e2->first_speculative_call_target (); |
726 | e3; |
727 | e3 = e3->next_speculative_call_target ()) |
728 | { |
729 | cgraph_edge *te |
730 | = e->speculative_call_for_target |
731 | (dyn_cast <cgraph_node *> |
732 | (p: e3->speculative_call_target_ref ()->referred)); |
733 | if (te) |
734 | te->count = te->count + e3->count; |
735 | else |
736 | { |
737 | e->count = e->count + e3->count; |
738 | cgraph_edge *ns; |
739 | ns = e->make_speculative |
740 | (n2: dyn_cast <cgraph_node *> |
741 | (p: e3->speculative_call_target_ref () |
742 | ->referred), |
743 | direct_count: e3->count, |
744 | speculative_id: e3->speculative_id + max_spec + 1); |
745 | /* Target may differ from ref (for example it may be |
746 | redirected to local alias. */ |
747 | ns->redirect_callee (n: e3->callee); |
748 | } |
749 | } |
750 | } |
751 | if (!preserve_body) |
752 | src->release_body (); |
753 | /* Update summary. */ |
754 | compute_fn_summary (dst, 0); |
755 | } |
756 | /* We can't update CFG profile, but we can scale IPA profile. CFG |
757 | will be scaled according to dst->count after IPA passes. */ |
758 | else |
759 | scale_ipa_profile_for_fn (node: dst, orig_count); |
760 | src->decl = oldsrcdecl; |
761 | } |
762 | |
763 | /* Return true if call to DEST is known to be self-recusive |
764 | call withing FUNC. */ |
765 | |
766 | bool |
767 | recursive_call_p (tree func, tree dest) |
768 | { |
769 | struct cgraph_node *dest_node = cgraph_node::get_create (dest); |
770 | struct cgraph_node *cnode = cgraph_node::get_create (func); |
771 | ipa_ref *alias; |
772 | enum availability avail; |
773 | |
774 | gcc_assert (!cnode->alias); |
775 | if (cnode != dest_node->ultimate_alias_target (availability: &avail)) |
776 | return false; |
777 | if (avail >= AVAIL_AVAILABLE) |
778 | return true; |
779 | if (!dest_node->semantically_equivalent_p (target: cnode)) |
780 | return false; |
781 | /* If there is only one way to call the fuction or we know all of them |
782 | are semantically equivalent, we still can consider call recursive. */ |
783 | FOR_EACH_ALIAS (cnode, alias) |
784 | if (!dest_node->semantically_equivalent_p (target: alias->referring)) |
785 | return false; |
786 | return true; |
787 | } |
788 | |
789 | /* Return true if stmt may terminate execution of function. |
790 | If assume_return_or_eh we can further assume that the function ends |
791 | either by retrn statement or EH (no trapping or infinite loops). */ |
792 | |
793 | bool |
794 | stmt_may_terminate_function_p (function *fun, gimple *stmt, bool assume_return_or_eh) |
795 | { |
796 | if (stmt_can_throw_external (fun, stmt)) |
797 | return true; |
798 | if (assume_return_or_eh) |
799 | return false; |
800 | gasm *astmt = dyn_cast <gasm *> (p: stmt); |
801 | if (astmt && gimple_asm_volatile_p (asm_stmt: astmt)) |
802 | return true; |
803 | if (gimple_could_trap_p (stmt)) |
804 | return true; |
805 | if (gcall *call = dyn_cast <gcall *> (p: stmt)) |
806 | { |
807 | int flags = gimple_call_flags (call); |
808 | if (flags & (ECF_PURE | ECF_CONST) && ! (flags & ECF_LOOPING_CONST_OR_PURE)) |
809 | return false; |
810 | modref_summary *s = get_modref_function_summary (call, NULL); |
811 | if (s && !s->side_effects) |
812 | return false; |
813 | return true; |
814 | } |
815 | return false; |
816 | } |
817 | |
818 | /* Return bitmap of all basic blocks whose first statements are known to |
819 | execute on every invocation of the function. |
820 | |
821 | If assume_return_or_eh we can further assume that the function ends |
822 | either by retrn statement or EH (no trapping or infinite loops). |
823 | This is useful when sumarizing function in passes like ipa-modref. |
824 | |
825 | Seeing assume_return_or_eh to false is used to prove that given |
826 | statmeent will be executed even if the function gets into infinite |
827 | loop or trap. */ |
828 | bitmap |
829 | find_always_executed_bbs (function *fun, bool assume_return_or_eh) |
830 | { |
831 | auto_vec<basic_block, 20> stack; |
832 | auto_vec<basic_block, 20> terminating_bbs; |
833 | hash_set<basic_block> visited; |
834 | hash_set<basic_block> terminating_bbs_set; |
835 | edge e; |
836 | edge_iterator ei; |
837 | int flags = flags_from_decl_or_type (fun->decl); |
838 | /* PUre and const functions always return. */ |
839 | assume_return_or_eh |= (flags & (ECF_CONST|ECF_PURE)) && !(flags & ECF_LOOPING_CONST_OR_PURE); |
840 | if (!assume_return_or_eh) |
841 | mark_dfs_back_edges (fun); |
842 | |
843 | /* First walk all BBs reachable from entry stopping on statements that may |
844 | terminate execution. Everything past this statement is not going to be executed |
845 | each invocation. */ |
846 | stack.safe_push (ENTRY_BLOCK_PTR_FOR_FN (fun)); |
847 | while (!stack.is_empty ()) |
848 | { |
849 | basic_block bb = stack.pop (); |
850 | bool found = false, found_exit = false; |
851 | if (bb->index == EXIT_BLOCK) |
852 | continue; |
853 | FOR_EACH_EDGE (e, ei, bb->succs) |
854 | { |
855 | if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun)) |
856 | { |
857 | found_exit = true; |
858 | break; |
859 | } |
860 | /* Watch for infinite loops. */ |
861 | if (!found |
862 | && !assume_return_or_eh && (e->flags & EDGE_DFS_BACK)) |
863 | { |
864 | if (!dom_info_available_p (CDI_DOMINATORS)) |
865 | calculate_dominance_info (CDI_DOMINATORS); |
866 | /* If this is not a loop latch edge it is an irreducible region. |
867 | Assume that it is infinite. |
868 | TODO: with C++ forced progression we can still walk the |
869 | irreducible region and see if it contains any side effects. |
870 | Similarly for loops. -ffinite-loops does not really imply |
871 | this since we allow inlining across -ffinite-loops bondary |
872 | and thus it can be used only as a loop flag. */ |
873 | if (e->dest->loop_father->header != e->dest |
874 | || !dominated_by_p (CDI_DOMINATORS, bb, e->dest)) |
875 | found = true; |
876 | else if (!finite_loop_p (e->dest->loop_father)) |
877 | found = true; |
878 | } |
879 | } |
880 | if (!assume_return_or_eh |
881 | && (EDGE_COUNT (bb->succs) == 0 || (bb->flags & BB_IRREDUCIBLE_LOOP))) |
882 | found = true; |
883 | for (gimple_stmt_iterator si = gsi_start_nondebug_after_labels_bb (bb); |
884 | !gsi_end_p (i: si) && !found; gsi_next_nondebug (i: &si)) |
885 | if (stmt_may_terminate_function_p (fun, stmt: gsi_stmt (i: si), assume_return_or_eh)) |
886 | { |
887 | found = true; |
888 | break; |
889 | } |
890 | if (found) |
891 | { |
892 | visited.add (EXIT_BLOCK_PTR_FOR_FN (fun)); |
893 | if (!found_exit) |
894 | { |
895 | terminating_bbs.safe_push (obj: bb); |
896 | terminating_bbs_set.add (k: bb); |
897 | } |
898 | } |
899 | else |
900 | FOR_EACH_EDGE (e, ei, bb->succs) |
901 | if (!visited.add (k: e->dest)) |
902 | stack.safe_push (obj: e->dest); |
903 | } |
904 | |
905 | /* Next walk from exit block and find all articulations in the CFG. |
906 | Add all terminating basic blocks as "fake" predecessors of the |
907 | exit block. */ |
908 | |
909 | bitmap ret = BITMAP_ALLOC (NULL); |
910 | /* A degenerated case when there is no path to exit. */ |
911 | if (!visited.contains (EXIT_BLOCK_PTR_FOR_FN (fun))) |
912 | { |
913 | bitmap_set_bit (ret, |
914 | single_succ_edge |
915 | (ENTRY_BLOCK_PTR_FOR_FN (fun))->dest->index); |
916 | return ret; |
917 | } |
918 | |
919 | struct astate |
920 | { |
921 | unsigned int dfs_preorder; |
922 | unsigned int dfs_postorder; |
923 | |
924 | unsigned int low, high; |
925 | }; |
926 | |
927 | struct worklist |
928 | { |
929 | basic_block bb; |
930 | astate *cstate; |
931 | }; |
932 | |
933 | struct obstack state_obstack; |
934 | gcc_obstack_init (&state_obstack); |
935 | hash_map<basic_block, astate *> state; |
936 | auto_vec<worklist, 32> worklist_vec; |
937 | unsigned int next_dfs_num = 1; |
938 | |
939 | /* Always executed blocks are blocks that are on every path from entry to exit. |
940 | We proceed in two steps. First we do backward DFS walk (so we know that entry |
941 | is always reached) and record preorder and postorder visiting times. |
942 | |
943 | In second step we proceed in postorder and for every block A we compute |
944 | minimal preorder (A.low) and maximal postorder (A.high) of block reachable |
945 | from the BBs in DFS subtree of A. If A is always executed there are no |
946 | edges out of this subtree. This can be tested by checking that A.low == A.preorder |
947 | and B.high == A.postorder. |
948 | |
949 | This is first step. Do backward DFS walk and record preorder, postorder |
950 | and predecessor info. Initialize stack in postorder. */ |
951 | worklist we = {EXIT_BLOCK_PTR_FOR_FN (fun), NULL}; |
952 | worklist_vec.safe_push (obj: we); |
953 | while (!worklist_vec.is_empty ()) |
954 | { |
955 | worklist &w = worklist_vec.last (); |
956 | basic_block bb = w.bb; |
957 | astate *cstate = w.cstate; |
958 | |
959 | if (!cstate) |
960 | { |
961 | astate **slot = &state.get_or_insert (k: bb); |
962 | |
963 | cstate = *slot; |
964 | /* Already processed by DFS? */ |
965 | if (cstate) |
966 | { |
967 | worklist_vec.pop (); |
968 | continue; |
969 | } |
970 | /* DFS is visiting BB for first time. */ |
971 | *slot = cstate = XOBNEW (&state_obstack, struct astate); |
972 | cstate->low = cstate->high = cstate->dfs_preorder = next_dfs_num++; |
973 | w.cstate = cstate; |
974 | /* Exit block is special; process all fake edges we identified. */ |
975 | if (bb == EXIT_BLOCK_PTR_FOR_FN (fun)) |
976 | for (basic_block bb2 : terminating_bbs) |
977 | { |
978 | worklist we = {.bb: bb2, NULL}; |
979 | worklist_vec.safe_push (obj: we); |
980 | } |
981 | FOR_EACH_EDGE (e, ei, bb->preds) |
982 | if (visited.contains (k: e->src)) |
983 | { |
984 | worklist we = {.bb: e->src, NULL}; |
985 | worklist_vec.safe_push (obj: we); |
986 | } |
987 | /* Keep BB on worklist so we process it last time. */ |
988 | continue; |
989 | } |
990 | /* We are finished with processing reachable BBs, see if we have articulation. */ |
991 | worklist_vec.pop (); |
992 | cstate->high = cstate->dfs_postorder = next_dfs_num++; |
993 | stack.safe_push (obj: bb); |
994 | } |
995 | /* This is the final postorder walk. Determine low and high values and mark |
996 | always executed blocks. */ |
997 | for (basic_block bb : stack) |
998 | { |
999 | astate *cstate = *state.get (k: bb); |
1000 | FOR_EACH_EDGE (e, ei, bb->preds) |
1001 | { |
1002 | astate **cstate2 = state.get (k: e->src); |
1003 | /* We skip walking part of CFG reached only after first edge to exit. |
1004 | No BB reachable from the skipped part is always executed */ |
1005 | if (!cstate2) |
1006 | { |
1007 | if (e->src != ENTRY_BLOCK_PTR_FOR_FN (fun)) |
1008 | cstate->low = 0; |
1009 | continue; |
1010 | } |
1011 | cstate->low = MIN (cstate->low, (*cstate2)->low); |
1012 | cstate->high = MAX (cstate->high, (*cstate2)->high); |
1013 | } |
1014 | if (dump_file && (dump_flags & TDF_DETAILS) && bb != EXIT_BLOCK_PTR_FOR_FN (fun)) |
1015 | fprintf (stream: dump_file, format: "BB %i %s preorder %i posorder %i low %i high %i\n" , |
1016 | bb->index, terminating_bbs_set.contains (k: bb) ? "(terminating)" : "" , |
1017 | cstate->dfs_preorder, cstate->dfs_postorder, cstate->low, cstate->high); |
1018 | if (cstate->low == cstate->dfs_preorder && cstate->high == cstate->dfs_postorder |
1019 | && bb != EXIT_BLOCK_PTR_FOR_FN (fun)) |
1020 | bitmap_set_bit (ret, bb->index); |
1021 | if (terminating_bbs_set.contains (k: bb)) |
1022 | cstate->low = 0; |
1023 | else |
1024 | FOR_EACH_EDGE (e, ei, bb->succs) |
1025 | { |
1026 | astate **cstate2 = state.get (k: e->dest); |
1027 | if (!cstate2) |
1028 | continue; |
1029 | cstate->low = MIN (cstate->low, (*cstate2)->low); |
1030 | cstate->high = MAX (cstate->high, (*cstate2)->high); |
1031 | } |
1032 | } |
1033 | obstack_free (&state_obstack, NULL); |
1034 | if (dump_file) |
1035 | { |
1036 | fprintf (stream: dump_file, format: "Always executed bbbs %s: " , |
1037 | assume_return_or_eh ? "(assuming return or EH)" : "" ); |
1038 | bitmap_print (dump_file, ret, "" , "\n" ); |
1039 | } |
1040 | |
1041 | return ret; |
1042 | } |
1043 | |