1/* Function summary pass.
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20
21/* Analysis of function bodies used by inter-procedural passes
22
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
26 - function frame size
27 For each call
28 - call statement size, time and how often the parameters change
29
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
34
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
37
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
44 we use predicates.
45
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
49
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
53
54#include "config.h"
55#include "system.h"
56#include "coretypes.h"
57#include "backend.h"
58#include "tree.h"
59#include "gimple.h"
60#include "alloc-pool.h"
61#include "tree-pass.h"
62#include "ssa.h"
63#include "tree-streamer.h"
64#include "cgraph.h"
65#include "diagnostic.h"
66#include "fold-const.h"
67#include "print-tree.h"
68#include "tree-inline.h"
69#include "gimple-pretty-print.h"
70#include "params.h"
71#include "cfganal.h"
72#include "gimple-iterator.h"
73#include "tree-cfg.h"
74#include "tree-ssa-loop-niter.h"
75#include "tree-ssa-loop.h"
76#include "symbol-summary.h"
77#include "ipa-prop.h"
78#include "ipa-fnsummary.h"
79#include "cfgloop.h"
80#include "tree-scalar-evolution.h"
81#include "ipa-utils.h"
82#include "cfgexpand.h"
83#include "gimplify.h"
84#include "stringpool.h"
85#include "attribs.h"
86
87/* Summaries. */
88function_summary <ipa_fn_summary *> *ipa_fn_summaries;
89call_summary <ipa_call_summary *> *ipa_call_summaries;
90
91/* Edge predicates goes here. */
92static object_allocator<predicate> edge_predicate_pool ("edge predicates");
93
94
95/* Dump IPA hints. */
96void
97ipa_dump_hints (FILE *f, ipa_hints hints)
98{
99 if (!hints)
100 return;
101 fprintf (f, "IPA hints:");
102 if (hints & INLINE_HINT_indirect_call)
103 {
104 hints &= ~INLINE_HINT_indirect_call;
105 fprintf (f, " indirect_call");
106 }
107 if (hints & INLINE_HINT_loop_iterations)
108 {
109 hints &= ~INLINE_HINT_loop_iterations;
110 fprintf (f, " loop_iterations");
111 }
112 if (hints & INLINE_HINT_loop_stride)
113 {
114 hints &= ~INLINE_HINT_loop_stride;
115 fprintf (f, " loop_stride");
116 }
117 if (hints & INLINE_HINT_same_scc)
118 {
119 hints &= ~INLINE_HINT_same_scc;
120 fprintf (f, " same_scc");
121 }
122 if (hints & INLINE_HINT_in_scc)
123 {
124 hints &= ~INLINE_HINT_in_scc;
125 fprintf (f, " in_scc");
126 }
127 if (hints & INLINE_HINT_cross_module)
128 {
129 hints &= ~INLINE_HINT_cross_module;
130 fprintf (f, " cross_module");
131 }
132 if (hints & INLINE_HINT_declared_inline)
133 {
134 hints &= ~INLINE_HINT_declared_inline;
135 fprintf (f, " declared_inline");
136 }
137 if (hints & INLINE_HINT_array_index)
138 {
139 hints &= ~INLINE_HINT_array_index;
140 fprintf (f, " array_index");
141 }
142 if (hints & INLINE_HINT_known_hot)
143 {
144 hints &= ~INLINE_HINT_known_hot;
145 fprintf (f, " known_hot");
146 }
147 gcc_assert (!hints);
148}
149
150
151/* Record SIZE and TIME to SUMMARY.
152 The accounted code will be executed when EXEC_PRED is true.
153 When NONCONST_PRED is false the code will evaulate to constant and
154 will get optimized out in specialized clones of the function. */
155
156void
157ipa_fn_summary::account_size_time (int size, sreal time,
158 const predicate &exec_pred,
159 const predicate &nonconst_pred_in)
160{
161 size_time_entry *e;
162 bool found = false;
163 int i;
164 predicate nonconst_pred;
165
166 if (exec_pred == false)
167 return;
168
169 nonconst_pred = nonconst_pred_in & exec_pred;
170
171 if (nonconst_pred == false)
172 return;
173
174 /* We need to create initial empty unconitional clause, but otherwie
175 we don't need to account empty times and sizes. */
176 if (!size && time == 0 && size_time_table)
177 return;
178
179 gcc_assert (time >= 0);
180
181 for (i = 0; vec_safe_iterate (size_time_table, i, &e); i++)
182 if (e->exec_predicate == exec_pred
183 && e->nonconst_predicate == nonconst_pred)
184 {
185 found = true;
186 break;
187 }
188 if (i == 256)
189 {
190 i = 0;
191 found = true;
192 e = &(*size_time_table)[0];
193 if (dump_file && (dump_flags & TDF_DETAILS))
194 fprintf (dump_file,
195 "\t\tReached limit on number of entries, "
196 "ignoring the predicate.");
197 }
198 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
199 {
200 fprintf (dump_file,
201 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
202 ((double) size) / ipa_fn_summary::size_scale,
203 (time.to_double ()), found ? "" : "new ");
204 exec_pred.dump (dump_file, conds, 0);
205 if (exec_pred != nonconst_pred)
206 {
207 fprintf (dump_file, " nonconst:");
208 nonconst_pred.dump (dump_file, conds);
209 }
210 else
211 fprintf (dump_file, "\n");
212 }
213 if (!found)
214 {
215 struct size_time_entry new_entry;
216 new_entry.size = size;
217 new_entry.time = time;
218 new_entry.exec_predicate = exec_pred;
219 new_entry.nonconst_predicate = nonconst_pred;
220 vec_safe_push (size_time_table, new_entry);
221 }
222 else
223 {
224 e->size += size;
225 e->time += time;
226 }
227}
228
229/* We proved E to be unreachable, redirect it to __bultin_unreachable. */
230
231static struct cgraph_edge *
232redirect_to_unreachable (struct cgraph_edge *e)
233{
234 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
235 struct cgraph_node *target = cgraph_node::get_create
236 (builtin_decl_implicit (BUILT_IN_UNREACHABLE));
237
238 if (e->speculative)
239 e = e->resolve_speculation (target->decl);
240 else if (!e->callee)
241 e->make_direct (target);
242 else
243 e->redirect_callee (target);
244 struct ipa_call_summary *es = ipa_call_summaries->get (e);
245 e->inline_failed = CIF_UNREACHABLE;
246 e->count = profile_count::zero ();
247 es->call_stmt_size = 0;
248 es->call_stmt_time = 0;
249 if (callee)
250 callee->remove_symbol_and_inline_clones ();
251 return e;
252}
253
254/* Set predicate for edge E. */
255
256static void
257edge_set_predicate (struct cgraph_edge *e, predicate *predicate)
258{
259 /* If the edge is determined to be never executed, redirect it
260 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
261 be optimized out. */
262 if (predicate && *predicate == false
263 /* When handling speculative edges, we need to do the redirection
264 just once. Do it always on the direct edge, so we do not
265 attempt to resolve speculation while duplicating the edge. */
266 && (!e->speculative || e->callee))
267 e = redirect_to_unreachable (e);
268
269 struct ipa_call_summary *es = ipa_call_summaries->get (e);
270 if (predicate && *predicate != true)
271 {
272 if (!es->predicate)
273 es->predicate = edge_predicate_pool.allocate ();
274 *es->predicate = *predicate;
275 }
276 else
277 {
278 if (es->predicate)
279 edge_predicate_pool.remove (es->predicate);
280 es->predicate = NULL;
281 }
282}
283
284/* Set predicate for hint *P. */
285
286static void
287set_hint_predicate (predicate **p, predicate new_predicate)
288{
289 if (new_predicate == false || new_predicate == true)
290 {
291 if (*p)
292 edge_predicate_pool.remove (*p);
293 *p = NULL;
294 }
295 else
296 {
297 if (!*p)
298 *p = edge_predicate_pool.allocate ();
299 **p = new_predicate;
300 }
301}
302
303
304/* Compute what conditions may or may not hold given invormation about
305 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
306 whie RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
307 copy when called in a given context. It is a bitmask of conditions. Bit
308 0 means that condition is known to be false, while bit 1 means that condition
309 may or may not be true. These differs - for example NOT_INLINED condition
310 is always false in the second and also builtin_constant_p tests can not use
311 the fact that parameter is indeed a constant.
312
313 KNOWN_VALS is partial mapping of parameters of NODE to constant values.
314 KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
315 Return clause of possible truths. When INLINE_P is true, assume that we are
316 inlining.
317
318 ERROR_MARK means compile time invariant. */
319
320static void
321evaluate_conditions_for_known_args (struct cgraph_node *node,
322 bool inline_p,
323 vec<tree> known_vals,
324 vec<ipa_agg_jump_function_p>
325 known_aggs,
326 clause_t *ret_clause,
327 clause_t *ret_nonspec_clause)
328{
329 clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
330 clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
331 struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
332 int i;
333 struct condition *c;
334
335 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
336 {
337 tree val;
338 tree res;
339
340 /* We allow call stmt to have fewer arguments than the callee function
341 (especially for K&R style programs). So bound check here (we assume
342 known_aggs vector, if non-NULL, has the same length as
343 known_vals). */
344 gcc_checking_assert (!known_aggs.exists ()
345 || (known_vals.length () == known_aggs.length ()));
346 if (c->operand_num >= (int) known_vals.length ())
347 {
348 clause |= 1 << (i + predicate::first_dynamic_condition);
349 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
350 continue;
351 }
352
353 if (c->agg_contents)
354 {
355 struct ipa_agg_jump_function *agg;
356
357 if (c->code == predicate::changed
358 && !c->by_ref
359 && (known_vals[c->operand_num] == error_mark_node))
360 continue;
361
362 if (known_aggs.exists ())
363 {
364 agg = known_aggs[c->operand_num];
365 val = ipa_find_agg_cst_for_param (agg, known_vals[c->operand_num],
366 c->offset, c->by_ref);
367 }
368 else
369 val = NULL_TREE;
370 }
371 else
372 {
373 val = known_vals[c->operand_num];
374 if (val == error_mark_node && c->code != predicate::changed)
375 val = NULL_TREE;
376 }
377
378 if (!val)
379 {
380 clause |= 1 << (i + predicate::first_dynamic_condition);
381 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
382 continue;
383 }
384 if (c->code == predicate::changed)
385 {
386 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
387 continue;
388 }
389
390 if (tree_to_shwi (TYPE_SIZE (TREE_TYPE (val))) != c->size)
391 {
392 clause |= 1 << (i + predicate::first_dynamic_condition);
393 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
394 continue;
395 }
396 if (c->code == predicate::is_not_constant)
397 {
398 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
399 continue;
400 }
401
402 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (c->val), val);
403 res = val
404 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
405 : NULL;
406
407 if (res && integer_zerop (res))
408 continue;
409
410 clause |= 1 << (i + predicate::first_dynamic_condition);
411 nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
412 }
413 *ret_clause = clause;
414 if (ret_nonspec_clause)
415 *ret_nonspec_clause = nonspec_clause;
416}
417
418
419/* Work out what conditions might be true at invocation of E. */
420
421void
422evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
423 clause_t *clause_ptr,
424 clause_t *nonspec_clause_ptr,
425 vec<tree> *known_vals_ptr,
426 vec<ipa_polymorphic_call_context>
427 *known_contexts_ptr,
428 vec<ipa_agg_jump_function_p> *known_aggs_ptr)
429{
430 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
431 struct ipa_fn_summary *info = ipa_fn_summaries->get (callee);
432 vec<tree> known_vals = vNULL;
433 vec<ipa_agg_jump_function_p> known_aggs = vNULL;
434
435 if (clause_ptr)
436 *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
437 if (known_vals_ptr)
438 known_vals_ptr->create (0);
439 if (known_contexts_ptr)
440 known_contexts_ptr->create (0);
441
442 if (ipa_node_params_sum
443 && !e->call_stmt_cannot_inline_p
444 && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr))
445 {
446 struct ipa_node_params *caller_parms_info, *callee_pi;
447 struct ipa_edge_args *args = IPA_EDGE_REF (e);
448 struct ipa_call_summary *es = ipa_call_summaries->get (e);
449 int i, count = ipa_get_cs_argument_count (args);
450
451 if (e->caller->global.inlined_to)
452 caller_parms_info = IPA_NODE_REF (e->caller->global.inlined_to);
453 else
454 caller_parms_info = IPA_NODE_REF (e->caller);
455 callee_pi = IPA_NODE_REF (e->callee);
456
457 if (count && (info->conds || known_vals_ptr))
458 known_vals.safe_grow_cleared (count);
459 if (count && (info->conds || known_aggs_ptr))
460 known_aggs.safe_grow_cleared (count);
461 if (count && known_contexts_ptr)
462 known_contexts_ptr->safe_grow_cleared (count);
463
464 for (i = 0; i < count; i++)
465 {
466 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
467 tree cst = ipa_value_from_jfunc (caller_parms_info, jf,
468 ipa_get_type (callee_pi, i));
469
470 if (!cst && e->call_stmt
471 && i < (int)gimple_call_num_args (e->call_stmt))
472 {
473 cst = gimple_call_arg (e->call_stmt, i);
474 if (!is_gimple_min_invariant (cst))
475 cst = NULL;
476 }
477 if (cst)
478 {
479 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
480 if (known_vals.exists ())
481 known_vals[i] = cst;
482 }
483 else if (inline_p && !es->param[i].change_prob)
484 known_vals[i] = error_mark_node;
485
486 if (known_contexts_ptr)
487 (*known_contexts_ptr)[i]
488 = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
489 /* TODO: When IPA-CP starts propagating and merging aggregate jump
490 functions, use its knowledge of the caller too, just like the
491 scalar case above. */
492 known_aggs[i] = &jf->agg;
493 }
494 }
495 else if (e->call_stmt && !e->call_stmt_cannot_inline_p
496 && ((clause_ptr && info->conds) || known_vals_ptr))
497 {
498 int i, count = (int)gimple_call_num_args (e->call_stmt);
499
500 if (count && (info->conds || known_vals_ptr))
501 known_vals.safe_grow_cleared (count);
502 for (i = 0; i < count; i++)
503 {
504 tree cst = gimple_call_arg (e->call_stmt, i);
505 if (!is_gimple_min_invariant (cst))
506 cst = NULL;
507 if (cst)
508 known_vals[i] = cst;
509 }
510 }
511
512 evaluate_conditions_for_known_args (callee, inline_p,
513 known_vals, known_aggs, clause_ptr,
514 nonspec_clause_ptr);
515
516 if (known_vals_ptr)
517 *known_vals_ptr = known_vals;
518 else
519 known_vals.release ();
520
521 if (known_aggs_ptr)
522 *known_aggs_ptr = known_aggs;
523 else
524 known_aggs.release ();
525}
526
527
528/* Allocate the function summary. */
529
530static void
531ipa_fn_summary_alloc (void)
532{
533 gcc_checking_assert (!ipa_fn_summaries);
534 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
535 ipa_call_summaries = new ipa_call_summary_t (symtab, false);
536}
537
538/* We are called multiple time for given function; clear
539 data from previous run so they are not cumulated. */
540
541void
542ipa_call_summary::reset ()
543{
544 call_stmt_size = call_stmt_time = 0;
545 is_return_callee_uncaptured = false;
546 if (predicate)
547 edge_predicate_pool.remove (predicate);
548 predicate = NULL;
549 param.release ();
550}
551
552/* We are called multiple time for given function; clear
553 data from previous run so they are not cumulated. */
554
555void
556ipa_fn_summary::reset (struct cgraph_node *node)
557{
558 struct cgraph_edge *e;
559
560 self_size = 0;
561 estimated_stack_size = 0;
562 estimated_self_stack_size = 0;
563 stack_frame_offset = 0;
564 size = 0;
565 time = 0;
566 growth = 0;
567 scc_no = 0;
568 if (loop_iterations)
569 {
570 edge_predicate_pool.remove (loop_iterations);
571 loop_iterations = NULL;
572 }
573 if (loop_stride)
574 {
575 edge_predicate_pool.remove (loop_stride);
576 loop_stride = NULL;
577 }
578 if (array_index)
579 {
580 edge_predicate_pool.remove (array_index);
581 array_index = NULL;
582 }
583 vec_free (conds);
584 vec_free (size_time_table);
585 for (e = node->callees; e; e = e->next_callee)
586 ipa_call_summaries->get (e)->reset ();
587 for (e = node->indirect_calls; e; e = e->next_callee)
588 ipa_call_summaries->get (e)->reset ();
589 fp_expressions = false;
590}
591
592/* Hook that is called by cgraph.c when a node is removed. */
593
594void
595ipa_fn_summary_t::remove (cgraph_node *node, ipa_fn_summary *info)
596{
597 info->reset (node);
598}
599
600/* Same as remap_predicate_after_duplication but handle hint predicate *P.
601 Additionally care about allocating new memory slot for updated predicate
602 and set it to NULL when it becomes true or false (and thus uninteresting).
603 */
604
605static void
606remap_hint_predicate_after_duplication (predicate **p,
607 clause_t possible_truths)
608{
609 predicate new_predicate;
610
611 if (!*p)
612 return;
613
614 new_predicate = (*p)->remap_after_duplication (possible_truths);
615 /* We do not want to free previous predicate; it is used by node origin. */
616 *p = NULL;
617 set_hint_predicate (p, new_predicate);
618}
619
620
621/* Hook that is called by cgraph.c when a node is duplicated. */
622void
623ipa_fn_summary_t::duplicate (cgraph_node *src,
624 cgraph_node *dst,
625 ipa_fn_summary *,
626 ipa_fn_summary *info)
627{
628 memcpy (info, ipa_fn_summaries->get (src), sizeof (ipa_fn_summary));
629 /* TODO: as an optimization, we may avoid copying conditions
630 that are known to be false or true. */
631 info->conds = vec_safe_copy (info->conds);
632
633 /* When there are any replacements in the function body, see if we can figure
634 out that something was optimized out. */
635 if (ipa_node_params_sum && dst->clone.tree_map)
636 {
637 vec<size_time_entry, va_gc> *entry = info->size_time_table;
638 /* Use SRC parm info since it may not be copied yet. */
639 struct ipa_node_params *parms_info = IPA_NODE_REF (src);
640 vec<tree> known_vals = vNULL;
641 int count = ipa_get_param_count (parms_info);
642 int i, j;
643 clause_t possible_truths;
644 predicate true_pred = true;
645 size_time_entry *e;
646 int optimized_out_size = 0;
647 bool inlined_to_p = false;
648 struct cgraph_edge *edge, *next;
649
650 info->size_time_table = 0;
651 known_vals.safe_grow_cleared (count);
652 for (i = 0; i < count; i++)
653 {
654 struct ipa_replace_map *r;
655
656 for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++)
657 {
658 if (((!r->old_tree && r->parm_num == i)
659 || (r->old_tree && r->old_tree == ipa_get_param (parms_info, i)))
660 && r->replace_p && !r->ref_p)
661 {
662 known_vals[i] = r->new_tree;
663 break;
664 }
665 }
666 }
667 evaluate_conditions_for_known_args (dst, false,
668 known_vals,
669 vNULL,
670 &possible_truths,
671 /* We are going to specialize,
672 so ignore nonspec truths. */
673 NULL);
674 known_vals.release ();
675
676 info->account_size_time (0, 0, true_pred, true_pred);
677
678 /* Remap size_time vectors.
679 Simplify the predicate by prunning out alternatives that are known
680 to be false.
681 TODO: as on optimization, we can also eliminate conditions known
682 to be true. */
683 for (i = 0; vec_safe_iterate (entry, i, &e); i++)
684 {
685 predicate new_exec_pred;
686 predicate new_nonconst_pred;
687 new_exec_pred = e->exec_predicate.remap_after_duplication
688 (possible_truths);
689 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
690 (possible_truths);
691 if (new_exec_pred == false || new_nonconst_pred == false)
692 optimized_out_size += e->size;
693 else
694 info->account_size_time (e->size, e->time, new_exec_pred,
695 new_nonconst_pred);
696 }
697
698 /* Remap edge predicates with the same simplification as above.
699 Also copy constantness arrays. */
700 for (edge = dst->callees; edge; edge = next)
701 {
702 predicate new_predicate;
703 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
704 next = edge->next_callee;
705
706 if (!edge->inline_failed)
707 inlined_to_p = true;
708 if (!es->predicate)
709 continue;
710 new_predicate = es->predicate->remap_after_duplication
711 (possible_truths);
712 if (new_predicate == false && *es->predicate != false)
713 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
714 edge_set_predicate (edge, &new_predicate);
715 }
716
717 /* Remap indirect edge predicates with the same simplificaiton as above.
718 Also copy constantness arrays. */
719 for (edge = dst->indirect_calls; edge; edge = next)
720 {
721 predicate new_predicate;
722 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
723 next = edge->next_callee;
724
725 gcc_checking_assert (edge->inline_failed);
726 if (!es->predicate)
727 continue;
728 new_predicate = es->predicate->remap_after_duplication
729 (possible_truths);
730 if (new_predicate == false && *es->predicate != false)
731 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
732 edge_set_predicate (edge, &new_predicate);
733 }
734 remap_hint_predicate_after_duplication (&info->loop_iterations,
735 possible_truths);
736 remap_hint_predicate_after_duplication (&info->loop_stride,
737 possible_truths);
738 remap_hint_predicate_after_duplication (&info->array_index,
739 possible_truths);
740
741 /* If inliner or someone after inliner will ever start producing
742 non-trivial clones, we will get trouble with lack of information
743 about updating self sizes, because size vectors already contains
744 sizes of the calees. */
745 gcc_assert (!inlined_to_p || !optimized_out_size);
746 }
747 else
748 {
749 info->size_time_table = vec_safe_copy (info->size_time_table);
750 if (info->loop_iterations)
751 {
752 predicate p = *info->loop_iterations;
753 info->loop_iterations = NULL;
754 set_hint_predicate (&info->loop_iterations, p);
755 }
756 if (info->loop_stride)
757 {
758 predicate p = *info->loop_stride;
759 info->loop_stride = NULL;
760 set_hint_predicate (&info->loop_stride, p);
761 }
762 if (info->array_index)
763 {
764 predicate p = *info->array_index;
765 info->array_index = NULL;
766 set_hint_predicate (&info->array_index, p);
767 }
768 }
769 if (!dst->global.inlined_to)
770 ipa_update_overall_fn_summary (dst);
771}
772
773
774/* Hook that is called by cgraph.c when a node is duplicated. */
775
776void
777ipa_call_summary_t::duplicate (struct cgraph_edge *src,
778 struct cgraph_edge *dst,
779 struct ipa_call_summary *srcinfo,
780 struct ipa_call_summary *info)
781{
782 *info = *srcinfo;
783 info->predicate = NULL;
784 edge_set_predicate (dst, srcinfo->predicate);
785 info->param = srcinfo->param.copy ();
786 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
787 {
788 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
789 - eni_size_weights.call_cost);
790 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
791 - eni_time_weights.call_cost);
792 }
793}
794
795
796/* Keep edge cache consistent across edge removal. */
797
798void
799ipa_call_summary_t::remove (struct cgraph_edge *,
800 struct ipa_call_summary *sum)
801{
802 sum->reset ();
803}
804
805
806/* Dump edge summaries associated to NODE and recursively to all clones.
807 Indent by INDENT. */
808
809static void
810dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
811 struct ipa_fn_summary *info)
812{
813 struct cgraph_edge *edge;
814 for (edge = node->callees; edge; edge = edge->next_callee)
815 {
816 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
817 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
818 int i;
819
820 fprintf (f,
821 "%*s%s/%i %s\n%*s loop depth:%2i freq:%4.2f size:%2i"
822 " time: %2i callee size:%2i stack:%2i",
823 indent, "", callee->name (), callee->order,
824 !edge->inline_failed
825 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
826 indent, "", es->loop_depth, edge->sreal_frequency ().to_double (),
827 es->call_stmt_size, es->call_stmt_time,
828 (int) ipa_fn_summaries->get (callee)->size / ipa_fn_summary::size_scale,
829 (int) ipa_fn_summaries->get (callee)->estimated_stack_size);
830
831 if (es->predicate)
832 {
833 fprintf (f, " predicate: ");
834 es->predicate->dump (f, info->conds);
835 }
836 else
837 fprintf (f, "\n");
838 if (es->param.exists ())
839 for (i = 0; i < (int) es->param.length (); i++)
840 {
841 int prob = es->param[i].change_prob;
842
843 if (!prob)
844 fprintf (f, "%*s op%i is compile time invariant\n",
845 indent + 2, "", i);
846 else if (prob != REG_BR_PROB_BASE)
847 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
848 prob * 100.0 / REG_BR_PROB_BASE);
849 }
850 if (!edge->inline_failed)
851 {
852 fprintf (f, "%*sStack frame offset %i, callee self size %i,"
853 " callee size %i\n",
854 indent + 2, "",
855 (int) ipa_fn_summaries->get (callee)->stack_frame_offset,
856 (int) ipa_fn_summaries->get (callee)->estimated_self_stack_size,
857 (int) ipa_fn_summaries->get (callee)->estimated_stack_size);
858 dump_ipa_call_summary (f, indent + 2, callee, info);
859 }
860 }
861 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
862 {
863 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
864 fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
865 " time: %2i",
866 indent, "",
867 es->loop_depth,
868 edge->sreal_frequency ().to_double (), es->call_stmt_size,
869 es->call_stmt_time);
870 if (es->predicate)
871 {
872 fprintf (f, "predicate: ");
873 es->predicate->dump (f, info->conds);
874 }
875 else
876 fprintf (f, "\n");
877 }
878}
879
880
881void
882ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
883{
884 if (node->definition)
885 {
886 struct ipa_fn_summary *s = ipa_fn_summaries->get (node);
887 size_time_entry *e;
888 int i;
889 fprintf (f, "IPA function summary for %s/%i", node->name (),
890 node->order);
891 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
892 fprintf (f, " always_inline");
893 if (s->inlinable)
894 fprintf (f, " inlinable");
895 if (s->fp_expressions)
896 fprintf (f, " fp_expression");
897 fprintf (f, "\n global time: %f\n", s->time.to_double ());
898 fprintf (f, " self size: %i\n", s->self_size);
899 fprintf (f, " global size: %i\n", s->size);
900 fprintf (f, " min size: %i\n", s->min_size);
901 fprintf (f, " self stack: %i\n",
902 (int) s->estimated_self_stack_size);
903 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
904 if (s->growth)
905 fprintf (f, " estimated growth:%i\n", (int) s->growth);
906 if (s->scc_no)
907 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
908 for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++)
909 {
910 fprintf (f, " size:%f, time:%f",
911 (double) e->size / ipa_fn_summary::size_scale,
912 e->time.to_double ());
913 if (e->exec_predicate != true)
914 {
915 fprintf (f, ", executed if:");
916 e->exec_predicate.dump (f, s->conds, 0);
917 }
918 if (e->exec_predicate != e->nonconst_predicate)
919 {
920 fprintf (f, ", nonconst if:");
921 e->nonconst_predicate.dump (f, s->conds, 0);
922 }
923 fprintf (f, "\n");
924 }
925 if (s->loop_iterations)
926 {
927 fprintf (f, " loop iterations:");
928 s->loop_iterations->dump (f, s->conds);
929 }
930 if (s->loop_stride)
931 {
932 fprintf (f, " loop stride:");
933 s->loop_stride->dump (f, s->conds);
934 }
935 if (s->array_index)
936 {
937 fprintf (f, " array index:");
938 s->array_index->dump (f, s->conds);
939 }
940 fprintf (f, " calls:\n");
941 dump_ipa_call_summary (f, 4, node, s);
942 fprintf (f, "\n");
943 }
944}
945
946DEBUG_FUNCTION void
947ipa_debug_fn_summary (struct cgraph_node *node)
948{
949 ipa_dump_fn_summary (stderr, node);
950}
951
952void
953ipa_dump_fn_summaries (FILE *f)
954{
955 struct cgraph_node *node;
956
957 FOR_EACH_DEFINED_FUNCTION (node)
958 if (!node->global.inlined_to)
959 ipa_dump_fn_summary (f, node);
960}
961
962/* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
963 boolean variable pointed to by DATA. */
964
965static bool
966mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
967 void *data)
968{
969 bool *b = (bool *) data;
970 *b = true;
971 return true;
972}
973
974/* If OP refers to value of function parameter, return the corresponding
975 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
976 PARM_DECL) will be stored to *SIZE_P in that case too. */
977
978static tree
979unmodified_parm_1 (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
980{
981 /* SSA_NAME referring to parm default def? */
982 if (TREE_CODE (op) == SSA_NAME
983 && SSA_NAME_IS_DEFAULT_DEF (op)
984 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
985 {
986 if (size_p)
987 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
988 return SSA_NAME_VAR (op);
989 }
990 /* Non-SSA parm reference? */
991 if (TREE_CODE (op) == PARM_DECL)
992 {
993 bool modified = false;
994
995 ao_ref refd;
996 ao_ref_init (&refd, op);
997 walk_aliased_vdefs (&refd, gimple_vuse (stmt), mark_modified, &modified,
998 NULL);
999 if (!modified)
1000 {
1001 if (size_p)
1002 *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
1003 return op;
1004 }
1005 }
1006 return NULL_TREE;
1007}
1008
1009/* If OP refers to value of function parameter, return the corresponding
1010 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1011 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1012 stored to *SIZE_P in that case too. */
1013
1014static tree
1015unmodified_parm (gimple *stmt, tree op, HOST_WIDE_INT *size_p)
1016{
1017 tree res = unmodified_parm_1 (stmt, op, size_p);
1018 if (res)
1019 return res;
1020
1021 if (TREE_CODE (op) == SSA_NAME
1022 && !SSA_NAME_IS_DEFAULT_DEF (op)
1023 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1024 return unmodified_parm (SSA_NAME_DEF_STMT (op),
1025 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1026 size_p);
1027 return NULL_TREE;
1028}
1029
1030/* If OP refers to a value of a function parameter or value loaded from an
1031 aggregate passed to a parameter (either by value or reference), return TRUE
1032 and store the number of the parameter to *INDEX_P, the access size into
1033 *SIZE_P, and information whether and how it has been loaded from an
1034 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1035 statement in which OP is used or loaded. */
1036
1037static bool
1038unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1039 gimple *stmt, tree op, int *index_p,
1040 HOST_WIDE_INT *size_p,
1041 struct agg_position_info *aggpos)
1042{
1043 tree res = unmodified_parm_1 (stmt, op, size_p);
1044
1045 gcc_checking_assert (aggpos);
1046 if (res)
1047 {
1048 *index_p = ipa_get_param_decl_index (fbi->info, res);
1049 if (*index_p < 0)
1050 return false;
1051 aggpos->agg_contents = false;
1052 aggpos->by_ref = false;
1053 return true;
1054 }
1055
1056 if (TREE_CODE (op) == SSA_NAME)
1057 {
1058 if (SSA_NAME_IS_DEFAULT_DEF (op)
1059 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1060 return false;
1061 stmt = SSA_NAME_DEF_STMT (op);
1062 op = gimple_assign_rhs1 (stmt);
1063 if (!REFERENCE_CLASS_P (op))
1064 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1065 aggpos);
1066 }
1067
1068 aggpos->agg_contents = true;
1069 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1070 stmt, op, index_p, &aggpos->offset,
1071 size_p, &aggpos->by_ref);
1072}
1073
1074/* See if statement might disappear after inlining.
1075 0 - means not eliminated
1076 1 - half of statements goes away
1077 2 - for sure it is eliminated.
1078 We are not terribly sophisticated, basically looking for simple abstraction
1079 penalty wrappers. */
1080
1081static int
1082eliminated_by_inlining_prob (gimple *stmt)
1083{
1084 enum gimple_code code = gimple_code (stmt);
1085 enum tree_code rhs_code;
1086
1087 if (!optimize)
1088 return 0;
1089
1090 switch (code)
1091 {
1092 case GIMPLE_RETURN:
1093 return 2;
1094 case GIMPLE_ASSIGN:
1095 if (gimple_num_ops (stmt) != 2)
1096 return 0;
1097
1098 rhs_code = gimple_assign_rhs_code (stmt);
1099
1100 /* Casts of parameters, loads from parameters passed by reference
1101 and stores to return value or parameters are often free after
1102 inlining dua to SRA and further combining.
1103 Assume that half of statements goes away. */
1104 if (CONVERT_EXPR_CODE_P (rhs_code)
1105 || rhs_code == VIEW_CONVERT_EXPR
1106 || rhs_code == ADDR_EXPR
1107 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1108 {
1109 tree rhs = gimple_assign_rhs1 (stmt);
1110 tree lhs = gimple_assign_lhs (stmt);
1111 tree inner_rhs = get_base_address (rhs);
1112 tree inner_lhs = get_base_address (lhs);
1113 bool rhs_free = false;
1114 bool lhs_free = false;
1115
1116 if (!inner_rhs)
1117 inner_rhs = rhs;
1118 if (!inner_lhs)
1119 inner_lhs = lhs;
1120
1121 /* Reads of parameter are expected to be free. */
1122 if (unmodified_parm (stmt, inner_rhs, NULL))
1123 rhs_free = true;
1124 /* Match expressions of form &this->field. Those will most likely
1125 combine with something upstream after inlining. */
1126 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1127 {
1128 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1129 if (TREE_CODE (op) == PARM_DECL)
1130 rhs_free = true;
1131 else if (TREE_CODE (op) == MEM_REF
1132 && unmodified_parm (stmt, TREE_OPERAND (op, 0), NULL))
1133 rhs_free = true;
1134 }
1135
1136 /* When parameter is not SSA register because its address is taken
1137 and it is just copied into one, the statement will be completely
1138 free after inlining (we will copy propagate backward). */
1139 if (rhs_free && is_gimple_reg (lhs))
1140 return 2;
1141
1142 /* Reads of parameters passed by reference
1143 expected to be free (i.e. optimized out after inlining). */
1144 if (TREE_CODE (inner_rhs) == MEM_REF
1145 && unmodified_parm (stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1146 rhs_free = true;
1147
1148 /* Copying parameter passed by reference into gimple register is
1149 probably also going to copy propagate, but we can't be quite
1150 sure. */
1151 if (rhs_free && is_gimple_reg (lhs))
1152 lhs_free = true;
1153
1154 /* Writes to parameters, parameters passed by value and return value
1155 (either dirrectly or passed via invisible reference) are free.
1156
1157 TODO: We ought to handle testcase like
1158 struct a {int a,b;};
1159 struct a
1160 retrurnsturct (void)
1161 {
1162 struct a a ={1,2};
1163 return a;
1164 }
1165
1166 This translate into:
1167
1168 retrurnsturct ()
1169 {
1170 int a$b;
1171 int a$a;
1172 struct a a;
1173 struct a D.2739;
1174
1175 <bb 2>:
1176 D.2739.a = 1;
1177 D.2739.b = 2;
1178 return D.2739;
1179
1180 }
1181 For that we either need to copy ipa-split logic detecting writes
1182 to return value. */
1183 if (TREE_CODE (inner_lhs) == PARM_DECL
1184 || TREE_CODE (inner_lhs) == RESULT_DECL
1185 || (TREE_CODE (inner_lhs) == MEM_REF
1186 && (unmodified_parm (stmt, TREE_OPERAND (inner_lhs, 0), NULL)
1187 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1188 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1189 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1190 (inner_lhs,
1191 0))) == RESULT_DECL))))
1192 lhs_free = true;
1193 if (lhs_free
1194 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1195 rhs_free = true;
1196 if (lhs_free && rhs_free)
1197 return 1;
1198 }
1199 return 0;
1200 default:
1201 return 0;
1202 }
1203}
1204
1205
1206/* If BB ends by a conditional we can turn into predicates, attach corresponding
1207 predicates to the CFG edges. */
1208
1209static void
1210set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1211 struct ipa_fn_summary *summary,
1212 basic_block bb)
1213{
1214 gimple *last;
1215 tree op;
1216 int index;
1217 HOST_WIDE_INT size;
1218 struct agg_position_info aggpos;
1219 enum tree_code code, inverted_code;
1220 edge e;
1221 edge_iterator ei;
1222 gimple *set_stmt;
1223 tree op2;
1224
1225 last = last_stmt (bb);
1226 if (!last || gimple_code (last) != GIMPLE_COND)
1227 return;
1228 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1229 return;
1230 op = gimple_cond_lhs (last);
1231 /* TODO: handle conditionals like
1232 var = op0 < 4;
1233 if (var != 0). */
1234 if (unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
1235 {
1236 code = gimple_cond_code (last);
1237 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1238
1239 FOR_EACH_EDGE (e, ei, bb->succs)
1240 {
1241 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1242 ? code : inverted_code);
1243 /* invert_tree_comparison will return ERROR_MARK on FP
1244 comparsions that are not EQ/NE instead of returning proper
1245 unordered one. Be sure it is not confused with NON_CONSTANT. */
1246 if (this_code != ERROR_MARK)
1247 {
1248 predicate p
1249 = add_condition (summary, index, size, &aggpos, this_code,
1250 unshare_expr_without_location
1251 (gimple_cond_rhs (last)));
1252 e->aux = edge_predicate_pool.allocate ();
1253 *(predicate *) e->aux = p;
1254 }
1255 }
1256 }
1257
1258 if (TREE_CODE (op) != SSA_NAME)
1259 return;
1260 /* Special case
1261 if (builtin_constant_p (op))
1262 constant_code
1263 else
1264 nonconstant_code.
1265 Here we can predicate nonconstant_code. We can't
1266 really handle constant_code since we have no predicate
1267 for this and also the constant code is not known to be
1268 optimized away when inliner doen't see operand is constant.
1269 Other optimizers might think otherwise. */
1270 if (gimple_cond_code (last) != NE_EXPR
1271 || !integer_zerop (gimple_cond_rhs (last)))
1272 return;
1273 set_stmt = SSA_NAME_DEF_STMT (op);
1274 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1275 || gimple_call_num_args (set_stmt) != 1)
1276 return;
1277 op2 = gimple_call_arg (set_stmt, 0);
1278 if (!unmodified_parm_or_parm_agg_item (fbi, set_stmt, op2, &index, &size,
1279 &aggpos))
1280 return;
1281 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1282 {
1283 predicate p = add_condition (summary, index, size, &aggpos,
1284 predicate::is_not_constant, NULL_TREE);
1285 e->aux = edge_predicate_pool.allocate ();
1286 *(predicate *) e->aux = p;
1287 }
1288}
1289
1290
1291/* If BB ends by a switch we can turn into predicates, attach corresponding
1292 predicates to the CFG edges. */
1293
1294static void
1295set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1296 struct ipa_fn_summary *summary,
1297 basic_block bb)
1298{
1299 gimple *lastg;
1300 tree op;
1301 int index;
1302 HOST_WIDE_INT size;
1303 struct agg_position_info aggpos;
1304 edge e;
1305 edge_iterator ei;
1306 size_t n;
1307 size_t case_idx;
1308
1309 lastg = last_stmt (bb);
1310 if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
1311 return;
1312 gswitch *last = as_a <gswitch *> (lastg);
1313 op = gimple_switch_index (last);
1314 if (!unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
1315 return;
1316
1317 FOR_EACH_EDGE (e, ei, bb->succs)
1318 {
1319 e->aux = edge_predicate_pool.allocate ();
1320 *(predicate *) e->aux = false;
1321 }
1322 n = gimple_switch_num_labels (last);
1323 for (case_idx = 0; case_idx < n; ++case_idx)
1324 {
1325 tree cl = gimple_switch_label (last, case_idx);
1326 tree min, max;
1327 predicate p;
1328
1329 e = find_edge (bb, label_to_block (CASE_LABEL (cl)));
1330 min = CASE_LOW (cl);
1331 max = CASE_HIGH (cl);
1332
1333 /* For default we might want to construct predicate that none
1334 of cases is met, but it is bit hard to do not having negations
1335 of conditionals handy. */
1336 if (!min && !max)
1337 p = true;
1338 else if (!max)
1339 p = add_condition (summary, index, size, &aggpos, EQ_EXPR,
1340 unshare_expr_without_location (min));
1341 else
1342 {
1343 predicate p1, p2;
1344 p1 = add_condition (summary, index, size, &aggpos, GE_EXPR,
1345 unshare_expr_without_location (min));
1346 p2 = add_condition (summary, index, size, &aggpos, LE_EXPR,
1347 unshare_expr_without_location (max));
1348 p = p1 & p2;
1349 }
1350 *(struct predicate *) e->aux
1351 = p.or_with (summary->conds, *(struct predicate *) e->aux);
1352 }
1353}
1354
1355
1356/* For each BB in NODE attach to its AUX pointer predicate under
1357 which it is executable. */
1358
1359static void
1360compute_bb_predicates (struct ipa_func_body_info *fbi,
1361 struct cgraph_node *node,
1362 struct ipa_fn_summary *summary)
1363{
1364 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1365 bool done = false;
1366 basic_block bb;
1367
1368 FOR_EACH_BB_FN (bb, my_function)
1369 {
1370 set_cond_stmt_execution_predicate (fbi, summary, bb);
1371 set_switch_stmt_execution_predicate (fbi, summary, bb);
1372 }
1373
1374 /* Entry block is always executable. */
1375 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1376 = edge_predicate_pool.allocate ();
1377 *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1378
1379 /* A simple dataflow propagation of predicates forward in the CFG.
1380 TODO: work in reverse postorder. */
1381 while (!done)
1382 {
1383 done = true;
1384 FOR_EACH_BB_FN (bb, my_function)
1385 {
1386 predicate p = false;
1387 edge e;
1388 edge_iterator ei;
1389 FOR_EACH_EDGE (e, ei, bb->preds)
1390 {
1391 if (e->src->aux)
1392 {
1393 predicate this_bb_predicate
1394 = *(predicate *) e->src->aux;
1395 if (e->aux)
1396 this_bb_predicate &= (*(struct predicate *) e->aux);
1397 p = p.or_with (summary->conds, this_bb_predicate);
1398 if (p == true)
1399 break;
1400 }
1401 }
1402 if (p == false)
1403 gcc_checking_assert (!bb->aux);
1404 else
1405 {
1406 if (!bb->aux)
1407 {
1408 done = false;
1409 bb->aux = edge_predicate_pool.allocate ();
1410 *((predicate *) bb->aux) = p;
1411 }
1412 else if (p != *(predicate *) bb->aux)
1413 {
1414 /* This OR operation is needed to ensure monotonous data flow
1415 in the case we hit the limit on number of clauses and the
1416 and/or operations above give approximate answers. */
1417 p = p.or_with (summary->conds, *(predicate *)bb->aux);
1418 if (p != *(predicate *) bb->aux)
1419 {
1420 done = false;
1421 *((predicate *) bb->aux) = p;
1422 }
1423 }
1424 }
1425 }
1426 }
1427}
1428
1429
1430/* Return predicate specifying when the STMT might have result that is not
1431 a compile time constant. */
1432
1433static predicate
1434will_be_nonconstant_expr_predicate (struct ipa_node_params *info,
1435 struct ipa_fn_summary *summary,
1436 tree expr,
1437 vec<predicate> nonconstant_names)
1438{
1439 tree parm;
1440 int index;
1441 HOST_WIDE_INT size;
1442
1443 while (UNARY_CLASS_P (expr))
1444 expr = TREE_OPERAND (expr, 0);
1445
1446 parm = unmodified_parm (NULL, expr, &size);
1447 if (parm && (index = ipa_get_param_decl_index (info, parm)) >= 0)
1448 return add_condition (summary, index, size, NULL, predicate::changed,
1449 NULL_TREE);
1450 if (is_gimple_min_invariant (expr))
1451 return false;
1452 if (TREE_CODE (expr) == SSA_NAME)
1453 return nonconstant_names[SSA_NAME_VERSION (expr)];
1454 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
1455 {
1456 predicate p1 = will_be_nonconstant_expr_predicate
1457 (info, summary, TREE_OPERAND (expr, 0),
1458 nonconstant_names);
1459 if (p1 == true)
1460 return p1;
1461
1462 predicate p2;
1463 p2 = will_be_nonconstant_expr_predicate (info, summary,
1464 TREE_OPERAND (expr, 1),
1465 nonconstant_names);
1466 return p1.or_with (summary->conds, p2);
1467 }
1468 else if (TREE_CODE (expr) == COND_EXPR)
1469 {
1470 predicate p1 = will_be_nonconstant_expr_predicate
1471 (info, summary, TREE_OPERAND (expr, 0),
1472 nonconstant_names);
1473 if (p1 == true)
1474 return p1;
1475
1476 predicate p2;
1477 p2 = will_be_nonconstant_expr_predicate (info, summary,
1478 TREE_OPERAND (expr, 1),
1479 nonconstant_names);
1480 if (p2 == true)
1481 return p2;
1482 p1 = p1.or_with (summary->conds, p2);
1483 p2 = will_be_nonconstant_expr_predicate (info, summary,
1484 TREE_OPERAND (expr, 2),
1485 nonconstant_names);
1486 return p2.or_with (summary->conds, p1);
1487 }
1488 else
1489 {
1490 debug_tree (expr);
1491 gcc_unreachable ();
1492 }
1493 return false;
1494}
1495
1496
1497/* Return predicate specifying when the STMT might have result that is not
1498 a compile time constant. */
1499
1500static predicate
1501will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
1502 struct ipa_fn_summary *summary,
1503 gimple *stmt,
1504 vec<predicate> nonconstant_names)
1505{
1506 predicate p = true;
1507 ssa_op_iter iter;
1508 tree use;
1509 predicate op_non_const;
1510 bool is_load;
1511 int base_index;
1512 HOST_WIDE_INT size;
1513 struct agg_position_info aggpos;
1514
1515 /* What statments might be optimized away
1516 when their arguments are constant. */
1517 if (gimple_code (stmt) != GIMPLE_ASSIGN
1518 && gimple_code (stmt) != GIMPLE_COND
1519 && gimple_code (stmt) != GIMPLE_SWITCH
1520 && (gimple_code (stmt) != GIMPLE_CALL
1521 || !(gimple_call_flags (stmt) & ECF_CONST)))
1522 return p;
1523
1524 /* Stores will stay anyway. */
1525 if (gimple_store_p (stmt))
1526 return p;
1527
1528 is_load = gimple_assign_load_p (stmt);
1529
1530 /* Loads can be optimized when the value is known. */
1531 if (is_load)
1532 {
1533 tree op;
1534 gcc_assert (gimple_assign_single_p (stmt));
1535 op = gimple_assign_rhs1 (stmt);
1536 if (!unmodified_parm_or_parm_agg_item (fbi, stmt, op, &base_index, &size,
1537 &aggpos))
1538 return p;
1539 }
1540 else
1541 base_index = -1;
1542
1543 /* See if we understand all operands before we start
1544 adding conditionals. */
1545 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1546 {
1547 tree parm = unmodified_parm (stmt, use, NULL);
1548 /* For arguments we can build a condition. */
1549 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
1550 continue;
1551 if (TREE_CODE (use) != SSA_NAME)
1552 return p;
1553 /* If we know when operand is constant,
1554 we still can say something useful. */
1555 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
1556 continue;
1557 return p;
1558 }
1559
1560 if (is_load)
1561 op_non_const =
1562 add_condition (summary, base_index, size, &aggpos, predicate::changed,
1563 NULL);
1564 else
1565 op_non_const = false;
1566 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1567 {
1568 HOST_WIDE_INT size;
1569 tree parm = unmodified_parm (stmt, use, &size);
1570 int index;
1571
1572 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
1573 {
1574 if (index != base_index)
1575 p = add_condition (summary, index, size, NULL, predicate::changed,
1576 NULL_TREE);
1577 else
1578 continue;
1579 }
1580 else
1581 p = nonconstant_names[SSA_NAME_VERSION (use)];
1582 op_non_const = p.or_with (summary->conds, op_non_const);
1583 }
1584 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
1585 && gimple_op (stmt, 0)
1586 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1587 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
1588 = op_non_const;
1589 return op_non_const;
1590}
1591
1592struct record_modified_bb_info
1593{
1594 bitmap bb_set;
1595 gimple *stmt;
1596};
1597
1598/* Value is initialized in INIT_BB and used in USE_BB. We want to copute
1599 probability how often it changes between USE_BB.
1600 INIT_BB->frequency/USE_BB->frequency is an estimate, but if INIT_BB
1601 is in different loop nest, we can do better.
1602 This is all just estimate. In theory we look for minimal cut separating
1603 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
1604 anyway. */
1605
1606static basic_block
1607get_minimal_bb (basic_block init_bb, basic_block use_bb)
1608{
1609 struct loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
1610 if (l && l->header->count < init_bb->count)
1611 return l->header;
1612 return init_bb;
1613}
1614
1615/* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
1616 set except for info->stmt. */
1617
1618static bool
1619record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
1620{
1621 struct record_modified_bb_info *info =
1622 (struct record_modified_bb_info *) data;
1623 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
1624 return false;
1625 bitmap_set_bit (info->bb_set,
1626 SSA_NAME_IS_DEFAULT_DEF (vdef)
1627 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
1628 : get_minimal_bb
1629 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
1630 gimple_bb (info->stmt))->index);
1631 return false;
1632}
1633
1634/* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
1635 will change since last invocation of STMT.
1636
1637 Value 0 is reserved for compile time invariants.
1638 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
1639 ought to be REG_BR_PROB_BASE / estimated_iters. */
1640
1641static int
1642param_change_prob (gimple *stmt, int i)
1643{
1644 tree op = gimple_call_arg (stmt, i);
1645 basic_block bb = gimple_bb (stmt);
1646
1647 if (TREE_CODE (op) == WITH_SIZE_EXPR)
1648 op = TREE_OPERAND (op, 0);
1649
1650 tree base = get_base_address (op);
1651
1652 /* Global invariants never change. */
1653 if (is_gimple_min_invariant (base))
1654 return 0;
1655
1656 /* We would have to do non-trivial analysis to really work out what
1657 is the probability of value to change (i.e. when init statement
1658 is in a sibling loop of the call).
1659
1660 We do an conservative estimate: when call is executed N times more often
1661 than the statement defining value, we take the frequency 1/N. */
1662 if (TREE_CODE (base) == SSA_NAME)
1663 {
1664 int init_freq;
1665
1666 if (!bb->count.to_frequency (cfun))
1667 return REG_BR_PROB_BASE;
1668
1669 if (SSA_NAME_IS_DEFAULT_DEF (base))
1670 init_freq = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun);
1671 else
1672 init_freq = get_minimal_bb
1673 (gimple_bb (SSA_NAME_DEF_STMT (base)),
1674 gimple_bb (stmt))->count.to_frequency (cfun);
1675
1676 if (!init_freq)
1677 init_freq = 1;
1678 if (init_freq < bb->count.to_frequency (cfun))
1679 return MAX (GCOV_COMPUTE_SCALE (init_freq,
1680 bb->count.to_frequency (cfun)), 1);
1681 else
1682 return REG_BR_PROB_BASE;
1683 }
1684 else
1685 {
1686 ao_ref refd;
1687 int max;
1688 struct record_modified_bb_info info;
1689 bitmap_iterator bi;
1690 unsigned index;
1691 tree init = ctor_for_folding (base);
1692
1693 if (init != error_mark_node)
1694 return 0;
1695 if (!bb->count.to_frequency (cfun))
1696 return REG_BR_PROB_BASE;
1697 ao_ref_init (&refd, op);
1698 info.stmt = stmt;
1699 info.bb_set = BITMAP_ALLOC (NULL);
1700 walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
1701 NULL);
1702 if (bitmap_bit_p (info.bb_set, bb->index))
1703 {
1704 BITMAP_FREE (info.bb_set);
1705 return REG_BR_PROB_BASE;
1706 }
1707
1708 /* Assume that every memory is initialized at entry.
1709 TODO: Can we easilly determine if value is always defined
1710 and thus we may skip entry block? */
1711 if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun))
1712 max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.to_frequency (cfun);
1713 else
1714 max = 1;
1715
1716 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
1717 max = MIN (max, BASIC_BLOCK_FOR_FN (cfun, index)->count.to_frequency (cfun));
1718
1719 BITMAP_FREE (info.bb_set);
1720 if (max < bb->count.to_frequency (cfun))
1721 return MAX (GCOV_COMPUTE_SCALE (max, bb->count.to_frequency (cfun)), 1);
1722 else
1723 return REG_BR_PROB_BASE;
1724 }
1725}
1726
1727/* Find whether a basic block BB is the final block of a (half) diamond CFG
1728 sub-graph and if the predicate the condition depends on is known. If so,
1729 return true and store the pointer the predicate in *P. */
1730
1731static bool
1732phi_result_unknown_predicate (struct ipa_node_params *info,
1733 ipa_fn_summary *summary, basic_block bb,
1734 predicate *p,
1735 vec<predicate> nonconstant_names)
1736{
1737 edge e;
1738 edge_iterator ei;
1739 basic_block first_bb = NULL;
1740 gimple *stmt;
1741
1742 if (single_pred_p (bb))
1743 {
1744 *p = false;
1745 return true;
1746 }
1747
1748 FOR_EACH_EDGE (e, ei, bb->preds)
1749 {
1750 if (single_succ_p (e->src))
1751 {
1752 if (!single_pred_p (e->src))
1753 return false;
1754 if (!first_bb)
1755 first_bb = single_pred (e->src);
1756 else if (single_pred (e->src) != first_bb)
1757 return false;
1758 }
1759 else
1760 {
1761 if (!first_bb)
1762 first_bb = e->src;
1763 else if (e->src != first_bb)
1764 return false;
1765 }
1766 }
1767
1768 if (!first_bb)
1769 return false;
1770
1771 stmt = last_stmt (first_bb);
1772 if (!stmt
1773 || gimple_code (stmt) != GIMPLE_COND
1774 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
1775 return false;
1776
1777 *p = will_be_nonconstant_expr_predicate (info, summary,
1778 gimple_cond_lhs (stmt),
1779 nonconstant_names);
1780 if (*p == true)
1781 return false;
1782 else
1783 return true;
1784}
1785
1786/* Given a PHI statement in a function described by inline properties SUMMARY
1787 and *P being the predicate describing whether the selected PHI argument is
1788 known, store a predicate for the result of the PHI statement into
1789 NONCONSTANT_NAMES, if possible. */
1790
1791static void
1792predicate_for_phi_result (struct ipa_fn_summary *summary, gphi *phi,
1793 predicate *p,
1794 vec<predicate> nonconstant_names)
1795{
1796 unsigned i;
1797
1798 for (i = 0; i < gimple_phi_num_args (phi); i++)
1799 {
1800 tree arg = gimple_phi_arg (phi, i)->def;
1801 if (!is_gimple_min_invariant (arg))
1802 {
1803 gcc_assert (TREE_CODE (arg) == SSA_NAME);
1804 *p = p->or_with (summary->conds,
1805 nonconstant_names[SSA_NAME_VERSION (arg)]);
1806 if (*p == true)
1807 return;
1808 }
1809 }
1810
1811 if (dump_file && (dump_flags & TDF_DETAILS))
1812 {
1813 fprintf (dump_file, "\t\tphi predicate: ");
1814 p->dump (dump_file, summary->conds);
1815 }
1816 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
1817}
1818
1819/* Return predicate specifying when array index in access OP becomes non-constant. */
1820
1821static predicate
1822array_index_predicate (ipa_fn_summary *info,
1823 vec< predicate> nonconstant_names, tree op)
1824{
1825 predicate p = false;
1826 while (handled_component_p (op))
1827 {
1828 if (TREE_CODE (op) == ARRAY_REF || TREE_CODE (op) == ARRAY_RANGE_REF)
1829 {
1830 if (TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
1831 p = p.or_with (info->conds,
1832 nonconstant_names[SSA_NAME_VERSION
1833 (TREE_OPERAND (op, 1))]);
1834 }
1835 op = TREE_OPERAND (op, 0);
1836 }
1837 return p;
1838}
1839
1840/* For a typical usage of __builtin_expect (a<b, 1), we
1841 may introduce an extra relation stmt:
1842 With the builtin, we have
1843 t1 = a <= b;
1844 t2 = (long int) t1;
1845 t3 = __builtin_expect (t2, 1);
1846 if (t3 != 0)
1847 goto ...
1848 Without the builtin, we have
1849 if (a<=b)
1850 goto...
1851 This affects the size/time estimation and may have
1852 an impact on the earlier inlining.
1853 Here find this pattern and fix it up later. */
1854
1855static gimple *
1856find_foldable_builtin_expect (basic_block bb)
1857{
1858 gimple_stmt_iterator bsi;
1859
1860 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1861 {
1862 gimple *stmt = gsi_stmt (bsi);
1863 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
1864 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
1865 {
1866 tree var = gimple_call_lhs (stmt);
1867 tree arg = gimple_call_arg (stmt, 0);
1868 use_operand_p use_p;
1869 gimple *use_stmt;
1870 bool match = false;
1871 bool done = false;
1872
1873 if (!var || !arg)
1874 continue;
1875 gcc_assert (TREE_CODE (var) == SSA_NAME);
1876
1877 while (TREE_CODE (arg) == SSA_NAME)
1878 {
1879 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
1880 if (!is_gimple_assign (stmt_tmp))
1881 break;
1882 switch (gimple_assign_rhs_code (stmt_tmp))
1883 {
1884 case LT_EXPR:
1885 case LE_EXPR:
1886 case GT_EXPR:
1887 case GE_EXPR:
1888 case EQ_EXPR:
1889 case NE_EXPR:
1890 match = true;
1891 done = true;
1892 break;
1893 CASE_CONVERT:
1894 break;
1895 default:
1896 done = true;
1897 break;
1898 }
1899 if (done)
1900 break;
1901 arg = gimple_assign_rhs1 (stmt_tmp);
1902 }
1903
1904 if (match && single_imm_use (var, &use_p, &use_stmt)
1905 && gimple_code (use_stmt) == GIMPLE_COND)
1906 return use_stmt;
1907 }
1908 }
1909 return NULL;
1910}
1911
1912/* Return true when the basic blocks contains only clobbers followed by RESX.
1913 Such BBs are kept around to make removal of dead stores possible with
1914 presence of EH and will be optimized out by optimize_clobbers later in the
1915 game.
1916
1917 NEED_EH is used to recurse in case the clobber has non-EH predecestors
1918 that can be clobber only, too.. When it is false, the RESX is not necessary
1919 on the end of basic block. */
1920
1921static bool
1922clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
1923{
1924 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1925 edge_iterator ei;
1926 edge e;
1927
1928 if (need_eh)
1929 {
1930 if (gsi_end_p (gsi))
1931 return false;
1932 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
1933 return false;
1934 gsi_prev (&gsi);
1935 }
1936 else if (!single_succ_p (bb))
1937 return false;
1938
1939 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
1940 {
1941 gimple *stmt = gsi_stmt (gsi);
1942 if (is_gimple_debug (stmt))
1943 continue;
1944 if (gimple_clobber_p (stmt))
1945 continue;
1946 if (gimple_code (stmt) == GIMPLE_LABEL)
1947 break;
1948 return false;
1949 }
1950
1951 /* See if all predecestors are either throws or clobber only BBs. */
1952 FOR_EACH_EDGE (e, ei, bb->preds)
1953 if (!(e->flags & EDGE_EH)
1954 && !clobber_only_eh_bb_p (e->src, false))
1955 return false;
1956
1957 return true;
1958}
1959
1960/* Return true if STMT compute a floating point expression that may be affected
1961 by -ffast-math and similar flags. */
1962
1963static bool
1964fp_expression_p (gimple *stmt)
1965{
1966 ssa_op_iter i;
1967 tree op;
1968
1969 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
1970 if (FLOAT_TYPE_P (TREE_TYPE (op)))
1971 return true;
1972 return false;
1973}
1974
1975/* Analyze function body for NODE.
1976 EARLY indicates run from early optimization pipeline. */
1977
1978static void
1979analyze_function_body (struct cgraph_node *node, bool early)
1980{
1981 sreal time = 0;
1982 /* Estimate static overhead for function prologue/epilogue and alignment. */
1983 int size = 2;
1984 /* Benefits are scaled by probability of elimination that is in range
1985 <0,2>. */
1986 basic_block bb;
1987 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1988 sreal freq;
1989 struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
1990 predicate bb_predicate;
1991 struct ipa_func_body_info fbi;
1992 vec<predicate> nonconstant_names = vNULL;
1993 int nblocks, n;
1994 int *order;
1995 predicate array_index = true;
1996 gimple *fix_builtin_expect_stmt;
1997
1998 gcc_assert (my_function && my_function->cfg);
1999 gcc_assert (cfun == my_function);
2000
2001 memset(&fbi, 0, sizeof(fbi));
2002 info->conds = NULL;
2003 info->size_time_table = NULL;
2004
2005 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2006 so we can produce proper inline hints.
2007
2008 When optimizing and analyzing for early inliner, initialize node params
2009 so we can produce correct BB predicates. */
2010
2011 if (opt_for_fn (node->decl, optimize))
2012 {
2013 calculate_dominance_info (CDI_DOMINATORS);
2014 if (!early)
2015 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2016 else
2017 {
2018 ipa_check_create_node_params ();
2019 ipa_initialize_node_params (node);
2020 }
2021
2022 if (ipa_node_params_sum)
2023 {
2024 fbi.node = node;
2025 fbi.info = IPA_NODE_REF (node);
2026 fbi.bb_infos = vNULL;
2027 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun));
2028 fbi.param_count = count_formal_params(node->decl);
2029 nonconstant_names.safe_grow_cleared
2030 (SSANAMES (my_function)->length ());
2031 }
2032 }
2033
2034 if (dump_file)
2035 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2036 node->name ());
2037
2038 /* When we run into maximal number of entries, we assign everything to the
2039 constant truth case. Be sure to have it in list. */
2040 bb_predicate = true;
2041 info->account_size_time (0, 0, bb_predicate, bb_predicate);
2042
2043 bb_predicate = predicate::not_inlined ();
2044 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, bb_predicate,
2045 bb_predicate);
2046
2047 if (fbi.info)
2048 compute_bb_predicates (&fbi, node, info);
2049 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2050 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2051 for (n = 0; n < nblocks; n++)
2052 {
2053 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2054 freq = bb->count.to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count);
2055 if (clobber_only_eh_bb_p (bb))
2056 {
2057 if (dump_file && (dump_flags & TDF_DETAILS))
2058 fprintf (dump_file, "\n Ignoring BB %i;"
2059 " it will be optimized away by cleanup_clobbers\n",
2060 bb->index);
2061 continue;
2062 }
2063
2064 /* TODO: Obviously predicates can be propagated down across CFG. */
2065 if (fbi.info)
2066 {
2067 if (bb->aux)
2068 bb_predicate = *(predicate *) bb->aux;
2069 else
2070 bb_predicate = false;
2071 }
2072 else
2073 bb_predicate = true;
2074
2075 if (dump_file && (dump_flags & TDF_DETAILS))
2076 {
2077 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2078 bb_predicate.dump (dump_file, info->conds);
2079 }
2080
2081 if (fbi.info && nonconstant_names.exists ())
2082 {
2083 predicate phi_predicate;
2084 bool first_phi = true;
2085
2086 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2087 gsi_next (&bsi))
2088 {
2089 if (first_phi
2090 && !phi_result_unknown_predicate (fbi.info, info, bb,
2091 &phi_predicate,
2092 nonconstant_names))
2093 break;
2094 first_phi = false;
2095 if (dump_file && (dump_flags & TDF_DETAILS))
2096 {
2097 fprintf (dump_file, " ");
2098 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2099 }
2100 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2101 nonconstant_names);
2102 }
2103 }
2104
2105 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2106
2107 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
2108 gsi_next (&bsi))
2109 {
2110 gimple *stmt = gsi_stmt (bsi);
2111 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2112 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2113 int prob;
2114 predicate will_be_nonconstant;
2115
2116 /* This relation stmt should be folded after we remove
2117 buildin_expect call. Adjust the cost here. */
2118 if (stmt == fix_builtin_expect_stmt)
2119 {
2120 this_size--;
2121 this_time--;
2122 }
2123
2124 if (dump_file && (dump_flags & TDF_DETAILS))
2125 {
2126 fprintf (dump_file, " ");
2127 print_gimple_stmt (dump_file, stmt, 0);
2128 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2129 freq.to_double (), this_size,
2130 this_time);
2131 }
2132
2133 if (gimple_assign_load_p (stmt) && nonconstant_names.exists ())
2134 {
2135 predicate this_array_index;
2136 this_array_index =
2137 array_index_predicate (info, nonconstant_names,
2138 gimple_assign_rhs1 (stmt));
2139 if (this_array_index != false)
2140 array_index &= this_array_index;
2141 }
2142 if (gimple_store_p (stmt) && nonconstant_names.exists ())
2143 {
2144 predicate this_array_index;
2145 this_array_index =
2146 array_index_predicate (info, nonconstant_names,
2147 gimple_get_lhs (stmt));
2148 if (this_array_index != false)
2149 array_index &= this_array_index;
2150 }
2151
2152
2153 if (is_gimple_call (stmt)
2154 && !gimple_call_internal_p (stmt))
2155 {
2156 struct cgraph_edge *edge = node->get_edge (stmt);
2157 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
2158
2159 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2160 resolved as constant. We however don't want to optimize
2161 out the cgraph edges. */
2162 if (nonconstant_names.exists ()
2163 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2164 && gimple_call_lhs (stmt)
2165 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2166 {
2167 predicate false_p = false;
2168 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2169 = false_p;
2170 }
2171 if (ipa_node_params_sum)
2172 {
2173 int count = gimple_call_num_args (stmt);
2174 int i;
2175
2176 if (count)
2177 es->param.safe_grow_cleared (count);
2178 for (i = 0; i < count; i++)
2179 {
2180 int prob = param_change_prob (stmt, i);
2181 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2182 es->param[i].change_prob = prob;
2183 }
2184 }
2185
2186 es->call_stmt_size = this_size;
2187 es->call_stmt_time = this_time;
2188 es->loop_depth = bb_loop_depth (bb);
2189 edge_set_predicate (edge, &bb_predicate);
2190 }
2191
2192 /* TODO: When conditional jump or swithc is known to be constant, but
2193 we did not translate it into the predicates, we really can account
2194 just maximum of the possible paths. */
2195 if (fbi.info)
2196 will_be_nonconstant
2197 = will_be_nonconstant_predicate (&fbi, info,
2198 stmt, nonconstant_names);
2199 else
2200 will_be_nonconstant = true;
2201 if (this_time || this_size)
2202 {
2203 sreal final_time = (sreal)this_time * freq;
2204
2205 prob = eliminated_by_inlining_prob (stmt);
2206 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2207 fprintf (dump_file,
2208 "\t\t50%% will be eliminated by inlining\n");
2209 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2210 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2211
2212 struct predicate p = bb_predicate & will_be_nonconstant;
2213
2214 /* We can ignore statement when we proved it is never going
2215 to happen, but we can not do that for call statements
2216 because edges are accounted specially. */
2217
2218 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2219 {
2220 time += final_time;
2221 size += this_size;
2222 }
2223
2224 /* We account everything but the calls. Calls have their own
2225 size/time info attached to cgraph edges. This is necessary
2226 in order to make the cost disappear after inlining. */
2227 if (!is_gimple_call (stmt))
2228 {
2229 if (prob)
2230 {
2231 predicate ip = bb_predicate & predicate::not_inlined ();
2232 info->account_size_time (this_size * prob,
2233 (this_time * prob) / 2, ip,
2234 p);
2235 }
2236 if (prob != 2)
2237 info->account_size_time (this_size * (2 - prob),
2238 (this_time * (2 - prob) / 2),
2239 bb_predicate,
2240 p);
2241 }
2242
2243 if (!info->fp_expressions && fp_expression_p (stmt))
2244 {
2245 info->fp_expressions = true;
2246 if (dump_file)
2247 fprintf (dump_file, " fp_expression set\n");
2248 }
2249
2250 gcc_assert (time >= 0);
2251 gcc_assert (size >= 0);
2252 }
2253 }
2254 }
2255 set_hint_predicate (&ipa_fn_summaries->get (node)->array_index, array_index);
2256 free (order);
2257
2258 if (nonconstant_names.exists () && !early)
2259 {
2260 struct loop *loop;
2261 predicate loop_iterations = true;
2262 predicate loop_stride = true;
2263
2264 if (dump_file && (dump_flags & TDF_DETAILS))
2265 flow_loops_dump (dump_file, NULL, 0);
2266 scev_initialize ();
2267 FOR_EACH_LOOP (loop, 0)
2268 {
2269 vec<edge> exits;
2270 edge ex;
2271 unsigned int j;
2272 struct tree_niter_desc niter_desc;
2273 bb_predicate = *(predicate *) loop->header->aux;
2274
2275 exits = get_loop_exit_edges (loop);
2276 FOR_EACH_VEC_ELT (exits, j, ex)
2277 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
2278 && !is_gimple_min_invariant (niter_desc.niter))
2279 {
2280 predicate will_be_nonconstant
2281 = will_be_nonconstant_expr_predicate (fbi.info, info,
2282 niter_desc.niter,
2283 nonconstant_names);
2284 if (will_be_nonconstant != true)
2285 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2286 if (will_be_nonconstant != true
2287 && will_be_nonconstant != false)
2288 /* This is slightly inprecise. We may want to represent each
2289 loop with independent predicate. */
2290 loop_iterations &= will_be_nonconstant;
2291 }
2292 exits.release ();
2293 }
2294
2295 /* To avoid quadratic behavior we analyze stride predicates only
2296 with respect to the containing loop. Thus we simply iterate
2297 over all defs in the outermost loop body. */
2298 for (loop = loops_for_fn (cfun)->tree_root->inner;
2299 loop != NULL; loop = loop->next)
2300 {
2301 basic_block *body = get_loop_body (loop);
2302 for (unsigned i = 0; i < loop->num_nodes; i++)
2303 {
2304 gimple_stmt_iterator gsi;
2305 bb_predicate = *(predicate *) body[i]->aux;
2306 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
2307 gsi_next (&gsi))
2308 {
2309 gimple *stmt = gsi_stmt (gsi);
2310
2311 if (!is_gimple_assign (stmt))
2312 continue;
2313
2314 tree def = gimple_assign_lhs (stmt);
2315 if (TREE_CODE (def) != SSA_NAME)
2316 continue;
2317
2318 affine_iv iv;
2319 if (!simple_iv (loop_containing_stmt (stmt),
2320 loop_containing_stmt (stmt),
2321 def, &iv, true)
2322 || is_gimple_min_invariant (iv.step))
2323 continue;
2324
2325 predicate will_be_nonconstant
2326 = will_be_nonconstant_expr_predicate (fbi.info, info,
2327 iv.step,
2328 nonconstant_names);
2329 if (will_be_nonconstant != true)
2330 will_be_nonconstant = bb_predicate & will_be_nonconstant;
2331 if (will_be_nonconstant != true
2332 && will_be_nonconstant != false)
2333 /* This is slightly inprecise. We may want to represent
2334 each loop with independent predicate. */
2335 loop_stride = loop_stride & will_be_nonconstant;
2336 }
2337 }
2338 free (body);
2339 }
2340 set_hint_predicate (&ipa_fn_summaries->get (node)->loop_iterations,
2341 loop_iterations);
2342 set_hint_predicate (&ipa_fn_summaries->get (node)->loop_stride,
2343 loop_stride);
2344 scev_finalize ();
2345 }
2346 FOR_ALL_BB_FN (bb, my_function)
2347 {
2348 edge e;
2349 edge_iterator ei;
2350
2351 if (bb->aux)
2352 edge_predicate_pool.remove ((predicate *)bb->aux);
2353 bb->aux = NULL;
2354 FOR_EACH_EDGE (e, ei, bb->succs)
2355 {
2356 if (e->aux)
2357 edge_predicate_pool.remove ((predicate *) e->aux);
2358 e->aux = NULL;
2359 }
2360 }
2361 ipa_fn_summaries->get (node)->time = time;
2362 ipa_fn_summaries->get (node)->self_size = size;
2363 nonconstant_names.release ();
2364 ipa_release_body_info (&fbi);
2365 if (opt_for_fn (node->decl, optimize))
2366 {
2367 if (!early)
2368 loop_optimizer_finalize ();
2369 else if (!ipa_edge_args_sum)
2370 ipa_free_all_node_params ();
2371 free_dominance_info (CDI_DOMINATORS);
2372 }
2373 if (dump_file)
2374 {
2375 fprintf (dump_file, "\n");
2376 ipa_dump_fn_summary (dump_file, node);
2377 }
2378}
2379
2380
2381/* Compute function summary.
2382 EARLY is true when we compute parameters during early opts. */
2383
2384void
2385compute_fn_summary (struct cgraph_node *node, bool early)
2386{
2387 HOST_WIDE_INT self_stack_size;
2388 struct cgraph_edge *e;
2389 struct ipa_fn_summary *info;
2390
2391 gcc_assert (!node->global.inlined_to);
2392
2393 if (!ipa_fn_summaries)
2394 ipa_fn_summary_alloc ();
2395
2396 info = ipa_fn_summaries->get (node);
2397 info->reset (node);
2398
2399 /* Estimate the stack size for the function if we're optimizing. */
2400 self_stack_size = optimize && !node->thunk.thunk_p
2401 ? estimated_stack_frame_size (node) : 0;
2402 info->estimated_self_stack_size = self_stack_size;
2403 info->estimated_stack_size = self_stack_size;
2404 info->stack_frame_offset = 0;
2405
2406 if (node->thunk.thunk_p)
2407 {
2408 struct ipa_call_summary *es = ipa_call_summaries->get (node->callees);
2409 predicate t = true;
2410
2411 node->local.can_change_signature = false;
2412 es->call_stmt_size = eni_size_weights.call_cost;
2413 es->call_stmt_time = eni_time_weights.call_cost;
2414 info->account_size_time (ipa_fn_summary::size_scale * 2, 2, t, t);
2415 t = predicate::not_inlined ();
2416 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
2417 ipa_update_overall_fn_summary (node);
2418 info->self_size = info->size;
2419 /* We can not inline instrumentation clones. */
2420 if (node->thunk.add_pointer_bounds_args)
2421 {
2422 info->inlinable = false;
2423 node->callees->inline_failed = CIF_CHKP;
2424 }
2425 else
2426 info->inlinable = true;
2427 }
2428 else
2429 {
2430 /* Even is_gimple_min_invariant rely on current_function_decl. */
2431 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2432
2433 /* Can this function be inlined at all? */
2434 if (!opt_for_fn (node->decl, optimize)
2435 && !lookup_attribute ("always_inline",
2436 DECL_ATTRIBUTES (node->decl)))
2437 info->inlinable = false;
2438 else
2439 info->inlinable = tree_inlinable_function_p (node->decl);
2440
2441 /* Type attributes can use parameter indices to describe them. */
2442 if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)))
2443 node->local.can_change_signature = false;
2444 else
2445 {
2446 /* Otherwise, inlinable functions always can change signature. */
2447 if (info->inlinable)
2448 node->local.can_change_signature = true;
2449 else
2450 {
2451 /* Functions calling builtin_apply can not change signature. */
2452 for (e = node->callees; e; e = e->next_callee)
2453 {
2454 tree cdecl = e->callee->decl;
2455 if (DECL_BUILT_IN (cdecl)
2456 && DECL_BUILT_IN_CLASS (cdecl) == BUILT_IN_NORMAL
2457 && (DECL_FUNCTION_CODE (cdecl) == BUILT_IN_APPLY_ARGS
2458 || DECL_FUNCTION_CODE (cdecl) == BUILT_IN_VA_START))
2459 break;
2460 }
2461 node->local.can_change_signature = !e;
2462 }
2463 }
2464 /* Functions called by instrumentation thunk can't change signature
2465 because instrumentation thunk modification is not supported. */
2466 if (node->local.can_change_signature)
2467 for (e = node->callers; e; e = e->next_caller)
2468 if (e->caller->thunk.thunk_p
2469 && e->caller->thunk.add_pointer_bounds_args)
2470 {
2471 node->local.can_change_signature = false;
2472 break;
2473 }
2474 analyze_function_body (node, early);
2475 pop_cfun ();
2476 }
2477 for (e = node->callees; e; e = e->next_callee)
2478 if (e->callee->comdat_local_p ())
2479 break;
2480 node->calls_comdat_local = (e != NULL);
2481
2482 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
2483 info->size = info->self_size;
2484 info->stack_frame_offset = 0;
2485 info->estimated_stack_size = info->estimated_self_stack_size;
2486
2487 /* Code above should compute exactly the same result as
2488 ipa_update_overall_fn_summary but because computation happens in
2489 different order the roundoff errors result in slight changes. */
2490 ipa_update_overall_fn_summary (node);
2491 gcc_assert (info->size == info->self_size);
2492}
2493
2494
2495/* Compute parameters of functions used by inliner using
2496 current_function_decl. */
2497
2498static unsigned int
2499compute_fn_summary_for_current (void)
2500{
2501 compute_fn_summary (cgraph_node::get (current_function_decl), true);
2502 return 0;
2503}
2504
2505/* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS,
2506 KNOWN_CONTEXTS and KNOWN_AGGS. */
2507
2508static bool
2509estimate_edge_devirt_benefit (struct cgraph_edge *ie,
2510 int *size, int *time,
2511 vec<tree> known_vals,
2512 vec<ipa_polymorphic_call_context> known_contexts,
2513 vec<ipa_agg_jump_function_p> known_aggs)
2514{
2515 tree target;
2516 struct cgraph_node *callee;
2517 struct ipa_fn_summary *isummary;
2518 enum availability avail;
2519 bool speculative;
2520
2521 if (!known_vals.exists () && !known_contexts.exists ())
2522 return false;
2523 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
2524 return false;
2525
2526 target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts,
2527 known_aggs, &speculative);
2528 if (!target || speculative)
2529 return false;
2530
2531 /* Account for difference in cost between indirect and direct calls. */
2532 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
2533 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
2534 gcc_checking_assert (*time >= 0);
2535 gcc_checking_assert (*size >= 0);
2536
2537 callee = cgraph_node::get (target);
2538 if (!callee || !callee->definition)
2539 return false;
2540 callee = callee->function_symbol (&avail);
2541 if (avail < AVAIL_AVAILABLE)
2542 return false;
2543 isummary = ipa_fn_summaries->get (callee);
2544 return isummary->inlinable;
2545}
2546
2547/* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
2548 handle edge E with probability PROB.
2549 Set HINTS if edge may be devirtualized.
2550 KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call
2551 site. */
2552
2553static inline void
2554estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
2555 sreal *time,
2556 int prob,
2557 vec<tree> known_vals,
2558 vec<ipa_polymorphic_call_context> known_contexts,
2559 vec<ipa_agg_jump_function_p> known_aggs,
2560 ipa_hints *hints)
2561{
2562 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2563 int call_size = es->call_stmt_size;
2564 int call_time = es->call_stmt_time;
2565 int cur_size;
2566 if (!e->callee
2567 && estimate_edge_devirt_benefit (e, &call_size, &call_time,
2568 known_vals, known_contexts, known_aggs)
2569 && hints && e->maybe_hot_p ())
2570 *hints |= INLINE_HINT_indirect_call;
2571 cur_size = call_size * ipa_fn_summary::size_scale;
2572 *size += cur_size;
2573 if (min_size)
2574 *min_size += cur_size;
2575 if (prob == REG_BR_PROB_BASE)
2576 *time += ((sreal)call_time) * e->sreal_frequency ();
2577 else
2578 *time += ((sreal)call_time * prob) * e->sreal_frequency ();
2579}
2580
2581
2582
2583/* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
2584 calls in NODE. POSSIBLE_TRUTHS, KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2585 describe context of the call site. */
2586
2587static void
2588estimate_calls_size_and_time (struct cgraph_node *node, int *size,
2589 int *min_size, sreal *time,
2590 ipa_hints *hints,
2591 clause_t possible_truths,
2592 vec<tree> known_vals,
2593 vec<ipa_polymorphic_call_context> known_contexts,
2594 vec<ipa_agg_jump_function_p> known_aggs)
2595{
2596 struct cgraph_edge *e;
2597 for (e = node->callees; e; e = e->next_callee)
2598 {
2599 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2600
2601 /* Do not care about zero sized builtins. */
2602 if (e->inline_failed && !es->call_stmt_size)
2603 {
2604 gcc_checking_assert (!es->call_stmt_time);
2605 continue;
2606 }
2607 if (!es->predicate
2608 || es->predicate->evaluate (possible_truths))
2609 {
2610 if (e->inline_failed)
2611 {
2612 /* Predicates of calls shall not use NOT_CHANGED codes,
2613 sowe do not need to compute probabilities. */
2614 estimate_edge_size_and_time (e, size,
2615 es->predicate ? NULL : min_size,
2616 time, REG_BR_PROB_BASE,
2617 known_vals, known_contexts,
2618 known_aggs, hints);
2619 }
2620 else
2621 estimate_calls_size_and_time (e->callee, size, min_size, time,
2622 hints,
2623 possible_truths,
2624 known_vals, known_contexts,
2625 known_aggs);
2626 }
2627 }
2628 for (e = node->indirect_calls; e; e = e->next_callee)
2629 {
2630 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2631 if (!es->predicate
2632 || es->predicate->evaluate (possible_truths))
2633 estimate_edge_size_and_time (e, size,
2634 es->predicate ? NULL : min_size,
2635 time, REG_BR_PROB_BASE,
2636 known_vals, known_contexts, known_aggs,
2637 hints);
2638 }
2639}
2640
2641
2642/* Estimate size and time needed to execute NODE assuming
2643 POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
2644 information about NODE's arguments. If non-NULL use also probability
2645 information present in INLINE_PARAM_SUMMARY vector.
2646 Additionally detemine hints determined by the context. Finally compute
2647 minimal size needed for the call that is independent on the call context and
2648 can be used for fast estimates. Return the values in RET_SIZE,
2649 RET_MIN_SIZE, RET_TIME and RET_HINTS. */
2650
2651void
2652estimate_node_size_and_time (struct cgraph_node *node,
2653 clause_t possible_truths,
2654 clause_t nonspec_possible_truths,
2655 vec<tree> known_vals,
2656 vec<ipa_polymorphic_call_context> known_contexts,
2657 vec<ipa_agg_jump_function_p> known_aggs,
2658 int *ret_size, int *ret_min_size,
2659 sreal *ret_time,
2660 sreal *ret_nonspecialized_time,
2661 ipa_hints *ret_hints,
2662 vec<inline_param_summary>
2663 inline_param_summary)
2664{
2665 struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
2666 size_time_entry *e;
2667 int size = 0;
2668 sreal time = 0;
2669 int min_size = 0;
2670 ipa_hints hints = 0;
2671 int i;
2672
2673 if (dump_file && (dump_flags & TDF_DETAILS))
2674 {
2675 bool found = false;
2676 fprintf (dump_file, " Estimating body: %s/%i\n"
2677 " Known to be false: ", node->name (),
2678 node->order);
2679
2680 for (i = predicate::not_inlined_condition;
2681 i < (predicate::first_dynamic_condition
2682 + (int) vec_safe_length (info->conds)); i++)
2683 if (!(possible_truths & (1 << i)))
2684 {
2685 if (found)
2686 fprintf (dump_file, ", ");
2687 found = true;
2688 dump_condition (dump_file, info->conds, i);
2689 }
2690 }
2691
2692 estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, possible_truths,
2693 known_vals, known_contexts, known_aggs);
2694 sreal nonspecialized_time = time;
2695
2696 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
2697 {
2698 bool exec = e->exec_predicate.evaluate (nonspec_possible_truths);
2699
2700 /* Because predicates are conservative, it can happen that nonconst is 1
2701 but exec is 0. */
2702 if (exec)
2703 {
2704 bool nonconst = e->nonconst_predicate.evaluate (possible_truths);
2705
2706 gcc_checking_assert (e->time >= 0);
2707 gcc_checking_assert (time >= 0);
2708
2709 /* We compute specialized size only because size of nonspecialized
2710 copy is context independent.
2711
2712 The difference between nonspecialized execution and specialized is
2713 that nonspecialized is not going to have optimized out computations
2714 known to be constant in a specialized setting. */
2715 if (nonconst)
2716 size += e->size;
2717 nonspecialized_time += e->time;
2718 if (!nonconst)
2719 ;
2720 else if (!inline_param_summary.exists ())
2721 {
2722 if (nonconst)
2723 time += e->time;
2724 }
2725 else
2726 {
2727 int prob = e->nonconst_predicate.probability
2728 (info->conds, possible_truths,
2729 inline_param_summary);
2730 gcc_checking_assert (prob >= 0);
2731 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
2732 time += e->time * prob / REG_BR_PROB_BASE;
2733 }
2734 gcc_checking_assert (time >= 0);
2735 }
2736 }
2737 gcc_checking_assert ((*info->size_time_table)[0].exec_predicate == true);
2738 gcc_checking_assert ((*info->size_time_table)[0].nonconst_predicate == true);
2739 min_size = (*info->size_time_table)[0].size;
2740 gcc_checking_assert (size >= 0);
2741 gcc_checking_assert (time >= 0);
2742 /* nonspecialized_time should be always bigger than specialized time.
2743 Roundoff issues however may get into the way. */
2744 gcc_checking_assert ((nonspecialized_time - time * 0.99) >= -1);
2745
2746 /* Roundoff issues may make specialized time bigger than nonspecialized
2747 time. We do not really want that to happen because some heurstics
2748 may get confused by seeing negative speedups. */
2749 if (time > nonspecialized_time)
2750 time = nonspecialized_time;
2751
2752 if (info->loop_iterations
2753 && !info->loop_iterations->evaluate (possible_truths))
2754 hints |= INLINE_HINT_loop_iterations;
2755 if (info->loop_stride
2756 && !info->loop_stride->evaluate (possible_truths))
2757 hints |= INLINE_HINT_loop_stride;
2758 if (info->array_index
2759 && !info->array_index->evaluate (possible_truths))
2760 hints |= INLINE_HINT_array_index;
2761 if (info->scc_no)
2762 hints |= INLINE_HINT_in_scc;
2763 if (DECL_DECLARED_INLINE_P (node->decl))
2764 hints |= INLINE_HINT_declared_inline;
2765
2766 size = RDIV (size, ipa_fn_summary::size_scale);
2767 min_size = RDIV (min_size, ipa_fn_summary::size_scale);
2768
2769 if (dump_file && (dump_flags & TDF_DETAILS))
2770 fprintf (dump_file, "\n size:%i time:%f nonspec time:%f\n", (int) size,
2771 time.to_double (), nonspecialized_time.to_double ());
2772 if (ret_time)
2773 *ret_time = time;
2774 if (ret_nonspecialized_time)
2775 *ret_nonspecialized_time = nonspecialized_time;
2776 if (ret_size)
2777 *ret_size = size;
2778 if (ret_min_size)
2779 *ret_min_size = min_size;
2780 if (ret_hints)
2781 *ret_hints = hints;
2782 return;
2783}
2784
2785
2786/* Estimate size and time needed to execute callee of EDGE assuming that
2787 parameters known to be constant at caller of EDGE are propagated.
2788 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
2789 and types for parameters. */
2790
2791void
2792estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
2793 vec<tree> known_vals,
2794 vec<ipa_polymorphic_call_context>
2795 known_contexts,
2796 vec<ipa_agg_jump_function_p> known_aggs,
2797 int *ret_size, sreal *ret_time,
2798 sreal *ret_nonspec_time,
2799 ipa_hints *hints)
2800{
2801 clause_t clause, nonspec_clause;
2802
2803 evaluate_conditions_for_known_args (node, false, known_vals, known_aggs,
2804 &clause, &nonspec_clause);
2805 estimate_node_size_and_time (node, clause, nonspec_clause,
2806 known_vals, known_contexts,
2807 known_aggs, ret_size, NULL, ret_time,
2808 ret_nonspec_time, hints, vNULL);
2809}
2810
2811
2812/* Update summary information of inline clones after inlining.
2813 Compute peak stack usage. */
2814
2815static void
2816inline_update_callee_summaries (struct cgraph_node *node, int depth)
2817{
2818 struct cgraph_edge *e;
2819 struct ipa_fn_summary *callee_info = ipa_fn_summaries->get (node);
2820 struct ipa_fn_summary *caller_info = ipa_fn_summaries->get (node->callers->caller);
2821 HOST_WIDE_INT peak;
2822
2823 callee_info->stack_frame_offset
2824 = caller_info->stack_frame_offset
2825 + caller_info->estimated_self_stack_size;
2826 peak = callee_info->stack_frame_offset
2827 + callee_info->estimated_self_stack_size;
2828 if (ipa_fn_summaries->get (node->global.inlined_to)->estimated_stack_size < peak)
2829 ipa_fn_summaries->get (node->global.inlined_to)->estimated_stack_size = peak;
2830 ipa_propagate_frequency (node);
2831 for (e = node->callees; e; e = e->next_callee)
2832 {
2833 if (!e->inline_failed)
2834 inline_update_callee_summaries (e->callee, depth);
2835 ipa_call_summaries->get (e)->loop_depth += depth;
2836 }
2837 for (e = node->indirect_calls; e; e = e->next_callee)
2838 ipa_call_summaries->get (e)->loop_depth += depth;
2839}
2840
2841/* Update change_prob of EDGE after INLINED_EDGE has been inlined.
2842 When functoin A is inlined in B and A calls C with parameter that
2843 changes with probability PROB1 and C is known to be passthroug
2844 of argument if B that change with probability PROB2, the probability
2845 of change is now PROB1*PROB2. */
2846
2847static void
2848remap_edge_change_prob (struct cgraph_edge *inlined_edge,
2849 struct cgraph_edge *edge)
2850{
2851 if (ipa_node_params_sum)
2852 {
2853 int i;
2854 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
2855 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
2856 struct ipa_call_summary *inlined_es
2857 = ipa_call_summaries->get (inlined_edge);
2858
2859 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
2860 {
2861 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
2862 if (jfunc->type == IPA_JF_PASS_THROUGH
2863 || jfunc->type == IPA_JF_ANCESTOR)
2864 {
2865 int id = jfunc->type == IPA_JF_PASS_THROUGH
2866 ? ipa_get_jf_pass_through_formal_id (jfunc)
2867 : ipa_get_jf_ancestor_formal_id (jfunc);
2868 if (id < (int) inlined_es->param.length ())
2869 {
2870 int prob1 = es->param[i].change_prob;
2871 int prob2 = inlined_es->param[id].change_prob;
2872 int prob = combine_probabilities (prob1, prob2);
2873
2874 if (prob1 && prob2 && !prob)
2875 prob = 1;
2876
2877 es->param[i].change_prob = prob;
2878 }
2879 }
2880 }
2881 }
2882}
2883
2884/* Update edge summaries of NODE after INLINED_EDGE has been inlined.
2885
2886 Remap predicates of callees of NODE. Rest of arguments match
2887 remap_predicate.
2888
2889 Also update change probabilities. */
2890
2891static void
2892remap_edge_summaries (struct cgraph_edge *inlined_edge,
2893 struct cgraph_node *node,
2894 struct ipa_fn_summary *info,
2895 struct ipa_fn_summary *callee_info,
2896 vec<int> operand_map,
2897 vec<int> offset_map,
2898 clause_t possible_truths,
2899 predicate *toplev_predicate)
2900{
2901 struct cgraph_edge *e, *next;
2902 for (e = node->callees; e; e = next)
2903 {
2904 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2905 predicate p;
2906 next = e->next_callee;
2907
2908 if (e->inline_failed)
2909 {
2910 remap_edge_change_prob (inlined_edge, e);
2911
2912 if (es->predicate)
2913 {
2914 p = es->predicate->remap_after_inlining
2915 (info, callee_info, operand_map,
2916 offset_map, possible_truths,
2917 *toplev_predicate);
2918 edge_set_predicate (e, &p);
2919 }
2920 else
2921 edge_set_predicate (e, toplev_predicate);
2922 }
2923 else
2924 remap_edge_summaries (inlined_edge, e->callee, info, callee_info,
2925 operand_map, offset_map, possible_truths,
2926 toplev_predicate);
2927 }
2928 for (e = node->indirect_calls; e; e = next)
2929 {
2930 struct ipa_call_summary *es = ipa_call_summaries->get (e);
2931 predicate p;
2932 next = e->next_callee;
2933
2934 remap_edge_change_prob (inlined_edge, e);
2935 if (es->predicate)
2936 {
2937 p = es->predicate->remap_after_inlining
2938 (info, callee_info, operand_map, offset_map,
2939 possible_truths, *toplev_predicate);
2940 edge_set_predicate (e, &p);
2941 }
2942 else
2943 edge_set_predicate (e, toplev_predicate);
2944 }
2945}
2946
2947/* Same as remap_predicate, but set result into hint *HINT. */
2948
2949static void
2950remap_hint_predicate (struct ipa_fn_summary *info,
2951 struct ipa_fn_summary *callee_info,
2952 predicate **hint,
2953 vec<int> operand_map,
2954 vec<int> offset_map,
2955 clause_t possible_truths,
2956 predicate *toplev_predicate)
2957{
2958 predicate p;
2959
2960 if (!*hint)
2961 return;
2962 p = (*hint)->remap_after_inlining
2963 (info, callee_info,
2964 operand_map, offset_map,
2965 possible_truths, *toplev_predicate);
2966 if (p != false && p != true)
2967 {
2968 if (!*hint)
2969 set_hint_predicate (hint, p);
2970 else
2971 **hint &= p;
2972 }
2973}
2974
2975/* We inlined EDGE. Update summary of the function we inlined into. */
2976
2977void
2978ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
2979{
2980 struct ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
2981 struct cgraph_node *to = (edge->caller->global.inlined_to
2982 ? edge->caller->global.inlined_to : edge->caller);
2983 struct ipa_fn_summary *info = ipa_fn_summaries->get (to);
2984 clause_t clause = 0; /* not_inline is known to be false. */
2985 size_time_entry *e;
2986 vec<int> operand_map = vNULL;
2987 vec<int> offset_map = vNULL;
2988 int i;
2989 predicate toplev_predicate;
2990 predicate true_p = true;
2991 struct ipa_call_summary *es = ipa_call_summaries->get (edge);
2992
2993 if (es->predicate)
2994 toplev_predicate = *es->predicate;
2995 else
2996 toplev_predicate = true;
2997
2998 info->fp_expressions |= callee_info->fp_expressions;
2999
3000 if (callee_info->conds)
3001 evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL, NULL);
3002 if (ipa_node_params_sum && callee_info->conds)
3003 {
3004 struct ipa_edge_args *args = IPA_EDGE_REF (edge);
3005 int count = ipa_get_cs_argument_count (args);
3006 int i;
3007
3008 if (count)
3009 {
3010 operand_map.safe_grow_cleared (count);
3011 offset_map.safe_grow_cleared (count);
3012 }
3013 for (i = 0; i < count; i++)
3014 {
3015 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
3016 int map = -1;
3017
3018 /* TODO: handle non-NOPs when merging. */
3019 if (jfunc->type == IPA_JF_PASS_THROUGH)
3020 {
3021 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
3022 map = ipa_get_jf_pass_through_formal_id (jfunc);
3023 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
3024 offset_map[i] = -1;
3025 }
3026 else if (jfunc->type == IPA_JF_ANCESTOR)
3027 {
3028 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
3029 if (offset >= 0 && offset < INT_MAX)
3030 {
3031 map = ipa_get_jf_ancestor_formal_id (jfunc);
3032 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
3033 offset = -1;
3034 offset_map[i] = offset;
3035 }
3036 }
3037 operand_map[i] = map;
3038 gcc_assert (map < ipa_get_param_count (IPA_NODE_REF (to)));
3039 }
3040 }
3041 for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
3042 {
3043 predicate p;
3044 p = e->exec_predicate.remap_after_inlining
3045 (info, callee_info, operand_map,
3046 offset_map, clause,
3047 toplev_predicate);
3048 predicate nonconstp;
3049 nonconstp = e->nonconst_predicate.remap_after_inlining
3050 (info, callee_info, operand_map,
3051 offset_map, clause,
3052 toplev_predicate);
3053 if (p != false && nonconstp != false)
3054 {
3055 sreal add_time = ((sreal)e->time * edge->sreal_frequency ());
3056 int prob = e->nonconst_predicate.probability (callee_info->conds,
3057 clause, es->param);
3058 add_time = add_time * prob / REG_BR_PROB_BASE;
3059 if (prob != REG_BR_PROB_BASE
3060 && dump_file && (dump_flags & TDF_DETAILS))
3061 {
3062 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
3063 (double) prob / REG_BR_PROB_BASE);
3064 }
3065 info->account_size_time (e->size, add_time, p, nonconstp);
3066 }
3067 }
3068 remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map,
3069 offset_map, clause, &toplev_predicate);
3070 remap_hint_predicate (info, callee_info,
3071 &callee_info->loop_iterations,
3072 operand_map, offset_map, clause, &toplev_predicate);
3073 remap_hint_predicate (info, callee_info,
3074 &callee_info->loop_stride,
3075 operand_map, offset_map, clause, &toplev_predicate);
3076 remap_hint_predicate (info, callee_info,
3077 &callee_info->array_index,
3078 operand_map, offset_map, clause, &toplev_predicate);
3079
3080 inline_update_callee_summaries (edge->callee,
3081 ipa_call_summaries->get (edge)->loop_depth);
3082
3083 /* We do not maintain predicates of inlined edges, free it. */
3084 edge_set_predicate (edge, &true_p);
3085 /* Similarly remove param summaries. */
3086 es->param.release ();
3087 operand_map.release ();
3088 offset_map.release ();
3089}
3090
3091/* For performance reasons ipa_merge_fn_summary_after_inlining is not updating overall size
3092 and time. Recompute it. */
3093
3094void
3095ipa_update_overall_fn_summary (struct cgraph_node *node)
3096{
3097 struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
3098 size_time_entry *e;
3099 int i;
3100
3101 info->size = 0;
3102 info->time = 0;
3103 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3104 {
3105 info->size += e->size;
3106 info->time += e->time;
3107 }
3108 estimate_calls_size_and_time (node, &info->size, &info->min_size,
3109 &info->time, NULL,
3110 ~(clause_t) (1 << predicate::false_condition),
3111 vNULL, vNULL, vNULL);
3112 info->size = (info->size + ipa_fn_summary::size_scale / 2) / ipa_fn_summary::size_scale;
3113}
3114
3115
3116/* This function performs intraprocedural analysis in NODE that is required to
3117 inline indirect calls. */
3118
3119static void
3120inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
3121{
3122 ipa_analyze_node (node);
3123 if (dump_file && (dump_flags & TDF_DETAILS))
3124 {
3125 ipa_print_node_params (dump_file, node);
3126 ipa_print_node_jump_functions (dump_file, node);
3127 }
3128}
3129
3130
3131/* Note function body size. */
3132
3133void
3134inline_analyze_function (struct cgraph_node *node)
3135{
3136 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3137
3138 if (dump_file)
3139 fprintf (dump_file, "\nAnalyzing function: %s/%u\n",
3140 node->name (), node->order);
3141 if (opt_for_fn (node->decl, optimize) && !node->thunk.thunk_p)
3142 inline_indirect_intraprocedural_analysis (node);
3143 compute_fn_summary (node, false);
3144 if (!optimize)
3145 {
3146 struct cgraph_edge *e;
3147 for (e = node->callees; e; e = e->next_callee)
3148 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3149 for (e = node->indirect_calls; e; e = e->next_callee)
3150 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
3151 }
3152
3153 pop_cfun ();
3154}
3155
3156
3157/* Called when new function is inserted to callgraph late. */
3158
3159void
3160ipa_fn_summary_t::insert (struct cgraph_node *node, ipa_fn_summary *)
3161{
3162 inline_analyze_function (node);
3163}
3164
3165/* Note function body size. */
3166
3167static void
3168ipa_fn_summary_generate (void)
3169{
3170 struct cgraph_node *node;
3171
3172 FOR_EACH_DEFINED_FUNCTION (node)
3173 if (DECL_STRUCT_FUNCTION (node->decl))
3174 node->local.versionable = tree_versionable_function_p (node->decl);
3175
3176 ipa_fn_summary_alloc ();
3177
3178 ipa_fn_summaries->enable_insertion_hook ();
3179
3180 ipa_register_cgraph_hooks ();
3181
3182 FOR_EACH_DEFINED_FUNCTION (node)
3183 if (!node->alias
3184 && (flag_generate_lto || flag_generate_offload|| flag_wpa
3185 || opt_for_fn (node->decl, optimize)))
3186 inline_analyze_function (node);
3187}
3188
3189
3190/* Write inline summary for edge E to OB. */
3191
3192static void
3193read_ipa_call_summary (struct lto_input_block *ib, struct cgraph_edge *e)
3194{
3195 struct ipa_call_summary *es = ipa_call_summaries->get (e);
3196 predicate p;
3197 int length, i;
3198
3199 es->call_stmt_size = streamer_read_uhwi (ib);
3200 es->call_stmt_time = streamer_read_uhwi (ib);
3201 es->loop_depth = streamer_read_uhwi (ib);
3202
3203 bitpack_d bp = streamer_read_bitpack (ib);
3204 es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1);
3205
3206 p.stream_in (ib);
3207 edge_set_predicate (e, &p);
3208 length = streamer_read_uhwi (ib);
3209 if (length)
3210 {
3211 es->param.safe_grow_cleared (length);
3212 for (i = 0; i < length; i++)
3213 es->param[i].change_prob = streamer_read_uhwi (ib);
3214 }
3215}
3216
3217
3218/* Stream in inline summaries from the section. */
3219
3220static void
3221inline_read_section (struct lto_file_decl_data *file_data, const char *data,
3222 size_t len)
3223{
3224 const struct lto_function_header *header =
3225 (const struct lto_function_header *) data;
3226 const int cfg_offset = sizeof (struct lto_function_header);
3227 const int main_offset = cfg_offset + header->cfg_size;
3228 const int string_offset = main_offset + header->main_size;
3229 struct data_in *data_in;
3230 unsigned int i, count2, j;
3231 unsigned int f_count;
3232
3233 lto_input_block ib ((const char *) data + main_offset, header->main_size,
3234 file_data->mode_table);
3235
3236 data_in =
3237 lto_data_in_create (file_data, (const char *) data + string_offset,
3238 header->string_size, vNULL);
3239 f_count = streamer_read_uhwi (&ib);
3240 for (i = 0; i < f_count; i++)
3241 {
3242 unsigned int index;
3243 struct cgraph_node *node;
3244 struct ipa_fn_summary *info;
3245 lto_symtab_encoder_t encoder;
3246 struct bitpack_d bp;
3247 struct cgraph_edge *e;
3248 predicate p;
3249
3250 index = streamer_read_uhwi (&ib);
3251 encoder = file_data->symtab_node_encoder;
3252 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
3253 index));
3254 info = ipa_fn_summaries->get (node);
3255
3256 info->estimated_stack_size
3257 = info->estimated_self_stack_size = streamer_read_uhwi (&ib);
3258 info->size = info->self_size = streamer_read_uhwi (&ib);
3259 info->time = sreal::stream_in (&ib);
3260
3261 bp = streamer_read_bitpack (&ib);
3262 info->inlinable = bp_unpack_value (&bp, 1);
3263 info->fp_expressions = bp_unpack_value (&bp, 1);
3264
3265 count2 = streamer_read_uhwi (&ib);
3266 gcc_assert (!info->conds);
3267 for (j = 0; j < count2; j++)
3268 {
3269 struct condition c;
3270 c.operand_num = streamer_read_uhwi (&ib);
3271 c.size = streamer_read_uhwi (&ib);
3272 c.code = (enum tree_code) streamer_read_uhwi (&ib);
3273 c.val = stream_read_tree (&ib, data_in);
3274 bp = streamer_read_bitpack (&ib);
3275 c.agg_contents = bp_unpack_value (&bp, 1);
3276 c.by_ref = bp_unpack_value (&bp, 1);
3277 if (c.agg_contents)
3278 c.offset = streamer_read_uhwi (&ib);
3279 vec_safe_push (info->conds, c);
3280 }
3281 count2 = streamer_read_uhwi (&ib);
3282 gcc_assert (!info->size_time_table);
3283 for (j = 0; j < count2; j++)
3284 {
3285 struct size_time_entry e;
3286
3287 e.size = streamer_read_uhwi (&ib);
3288 e.time = sreal::stream_in (&ib);
3289 e.exec_predicate.stream_in (&ib);
3290 e.nonconst_predicate.stream_in (&ib);
3291
3292 vec_safe_push (info->size_time_table, e);
3293 }
3294
3295 p.stream_in (&ib);
3296 set_hint_predicate (&info->loop_iterations, p);
3297 p.stream_in (&ib);
3298 set_hint_predicate (&info->loop_stride, p);
3299 p.stream_in (&ib);
3300 set_hint_predicate (&info->array_index, p);
3301 for (e = node->callees; e; e = e->next_callee)
3302 read_ipa_call_summary (&ib, e);
3303 for (e = node->indirect_calls; e; e = e->next_callee)
3304 read_ipa_call_summary (&ib, e);
3305 }
3306
3307 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data,
3308 len);
3309 lto_data_in_delete (data_in);
3310}
3311
3312
3313/* Read inline summary. Jump functions are shared among ipa-cp
3314 and inliner, so when ipa-cp is active, we don't need to write them
3315 twice. */
3316
3317static void
3318ipa_fn_summary_read (void)
3319{
3320 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
3321 struct lto_file_decl_data *file_data;
3322 unsigned int j = 0;
3323
3324 ipa_fn_summary_alloc ();
3325
3326 while ((file_data = file_data_vec[j++]))
3327 {
3328 size_t len;
3329 const char *data = lto_get_section_data (file_data,
3330 LTO_section_ipa_fn_summary,
3331 NULL, &len);
3332 if (data)
3333 inline_read_section (file_data, data, len);
3334 else
3335 /* Fatal error here. We do not want to support compiling ltrans units
3336 with different version of compiler or different flags than the WPA
3337 unit, so this should never happen. */
3338 fatal_error (input_location,
3339 "ipa inline summary is missing in input file");
3340 }
3341 ipa_register_cgraph_hooks ();
3342 if (!flag_ipa_cp)
3343 ipa_prop_read_jump_functions ();
3344
3345 gcc_assert (ipa_fn_summaries);
3346 ipa_fn_summaries->enable_insertion_hook ();
3347}
3348
3349
3350/* Write inline summary for edge E to OB. */
3351
3352static void
3353write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
3354{
3355 struct ipa_call_summary *es = ipa_call_summaries->get (e);
3356 int i;
3357
3358 streamer_write_uhwi (ob, es->call_stmt_size);
3359 streamer_write_uhwi (ob, es->call_stmt_time);
3360 streamer_write_uhwi (ob, es->loop_depth);
3361
3362 bitpack_d bp = bitpack_create (ob->main_stream);
3363 bp_pack_value (&bp, es->is_return_callee_uncaptured, 1);
3364 streamer_write_bitpack (&bp);
3365
3366 if (es->predicate)
3367 es->predicate->stream_out (ob);
3368 else
3369 streamer_write_uhwi (ob, 0);
3370 streamer_write_uhwi (ob, es->param.length ());
3371 for (i = 0; i < (int) es->param.length (); i++)
3372 streamer_write_uhwi (ob, es->param[i].change_prob);
3373}
3374
3375
3376/* Write inline summary for node in SET.
3377 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
3378 active, we don't need to write them twice. */
3379
3380static void
3381ipa_fn_summary_write (void)
3382{
3383 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary);
3384 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
3385 unsigned int count = 0;
3386 int i;
3387
3388 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
3389 {
3390 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
3391 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
3392 if (cnode && cnode->definition && !cnode->alias)
3393 count++;
3394 }
3395 streamer_write_uhwi (ob, count);
3396
3397 for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
3398 {
3399 symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
3400 cgraph_node *cnode = dyn_cast <cgraph_node *> (snode);
3401 if (cnode && cnode->definition && !cnode->alias)
3402 {
3403 struct ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
3404 struct bitpack_d bp;
3405 struct cgraph_edge *edge;
3406 int i;
3407 size_time_entry *e;
3408 struct condition *c;
3409
3410 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
3411 streamer_write_hwi (ob, info->estimated_self_stack_size);
3412 streamer_write_hwi (ob, info->self_size);
3413 info->time.stream_out (ob);
3414 bp = bitpack_create (ob->main_stream);
3415 bp_pack_value (&bp, info->inlinable, 1);
3416 bp_pack_value (&bp, false, 1);
3417 bp_pack_value (&bp, info->fp_expressions, 1);
3418 streamer_write_bitpack (&bp);
3419 streamer_write_uhwi (ob, vec_safe_length (info->conds));
3420 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
3421 {
3422 streamer_write_uhwi (ob, c->operand_num);
3423 streamer_write_uhwi (ob, c->size);
3424 streamer_write_uhwi (ob, c->code);
3425 stream_write_tree (ob, c->val, true);
3426 bp = bitpack_create (ob->main_stream);
3427 bp_pack_value (&bp, c->agg_contents, 1);
3428 bp_pack_value (&bp, c->by_ref, 1);
3429 streamer_write_bitpack (&bp);
3430 if (c->agg_contents)
3431 streamer_write_uhwi (ob, c->offset);
3432 }
3433 streamer_write_uhwi (ob, vec_safe_length (info->size_time_table));
3434 for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
3435 {
3436 streamer_write_uhwi (ob, e->size);
3437 e->time.stream_out (ob);
3438 e->exec_predicate.stream_out (ob);
3439 e->nonconst_predicate.stream_out (ob);
3440 }
3441 if (info->loop_iterations)
3442 info->loop_iterations->stream_out (ob);
3443 else
3444 streamer_write_uhwi (ob, 0);
3445 if (info->loop_stride)
3446 info->loop_stride->stream_out (ob);
3447 else
3448 streamer_write_uhwi (ob, 0);
3449 if (info->array_index)
3450 info->array_index->stream_out (ob);
3451 else
3452 streamer_write_uhwi (ob, 0);
3453 for (edge = cnode->callees; edge; edge = edge->next_callee)
3454 write_ipa_call_summary (ob, edge);
3455 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
3456 write_ipa_call_summary (ob, edge);
3457 }
3458 }
3459 streamer_write_char_stream (ob->main_stream, 0);
3460 produce_asm (ob, NULL);
3461 destroy_output_block (ob);
3462
3463 if (!flag_ipa_cp)
3464 ipa_prop_write_jump_functions ();
3465}
3466
3467
3468/* Release inline summary. */
3469
3470void
3471ipa_free_fn_summary (void)
3472{
3473 struct cgraph_node *node;
3474 if (!ipa_call_summaries)
3475 return;
3476 FOR_EACH_DEFINED_FUNCTION (node)
3477 if (!node->alias)
3478 ipa_fn_summaries->get (node)->reset (node);
3479 ipa_fn_summaries->release ();
3480 ipa_fn_summaries = NULL;
3481 ipa_call_summaries->release ();
3482 delete ipa_call_summaries;
3483 ipa_call_summaries = NULL;
3484 edge_predicate_pool.release ();
3485}
3486
3487namespace {
3488
3489const pass_data pass_data_local_fn_summary =
3490{
3491 GIMPLE_PASS, /* type */
3492 "local-fnsummary", /* name */
3493 OPTGROUP_INLINE, /* optinfo_flags */
3494 TV_INLINE_PARAMETERS, /* tv_id */
3495 0, /* properties_required */
3496 0, /* properties_provided */
3497 0, /* properties_destroyed */
3498 0, /* todo_flags_start */
3499 0, /* todo_flags_finish */
3500};
3501
3502class pass_local_fn_summary : public gimple_opt_pass
3503{
3504public:
3505 pass_local_fn_summary (gcc::context *ctxt)
3506 : gimple_opt_pass (pass_data_local_fn_summary, ctxt)
3507 {}
3508
3509 /* opt_pass methods: */
3510 opt_pass * clone () { return new pass_local_fn_summary (m_ctxt); }
3511 virtual unsigned int execute (function *)
3512 {
3513 return compute_fn_summary_for_current ();
3514 }
3515
3516}; // class pass_local_fn_summary
3517
3518} // anon namespace
3519
3520gimple_opt_pass *
3521make_pass_local_fn_summary (gcc::context *ctxt)
3522{
3523 return new pass_local_fn_summary (ctxt);
3524}
3525
3526
3527/* Free inline summary. */
3528
3529namespace {
3530
3531const pass_data pass_data_ipa_free_fn_summary =
3532{
3533 SIMPLE_IPA_PASS, /* type */
3534 "free-fnsummary", /* name */
3535 OPTGROUP_NONE, /* optinfo_flags */
3536 TV_IPA_FREE_INLINE_SUMMARY, /* tv_id */
3537 0, /* properties_required */
3538 0, /* properties_provided */
3539 0, /* properties_destroyed */
3540 0, /* todo_flags_start */
3541 /* Early optimizations may make function unreachable. We can not
3542 remove unreachable functions as part of the ealry opts pass because
3543 TODOs are run before subpasses. Do it here. */
3544 ( TODO_remove_functions | TODO_dump_symtab ), /* todo_flags_finish */
3545};
3546
3547class pass_ipa_free_fn_summary : public simple_ipa_opt_pass
3548{
3549public:
3550 pass_ipa_free_fn_summary (gcc::context *ctxt)
3551 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt)
3552 {}
3553
3554 /* opt_pass methods: */
3555 virtual unsigned int execute (function *)
3556 {
3557 ipa_free_fn_summary ();
3558 return 0;
3559 }
3560
3561}; // class pass_ipa_free_fn_summary
3562
3563} // anon namespace
3564
3565simple_ipa_opt_pass *
3566make_pass_ipa_free_fn_summary (gcc::context *ctxt)
3567{
3568 return new pass_ipa_free_fn_summary (ctxt);
3569}
3570
3571namespace {
3572
3573const pass_data pass_data_ipa_fn_summary =
3574{
3575 IPA_PASS, /* type */
3576 "fnsummary", /* name */
3577 OPTGROUP_INLINE, /* optinfo_flags */
3578 TV_IPA_FNSUMMARY, /* tv_id */
3579 0, /* properties_required */
3580 0, /* properties_provided */
3581 0, /* properties_destroyed */
3582 0, /* todo_flags_start */
3583 ( TODO_dump_symtab ), /* todo_flags_finish */
3584};
3585
3586class pass_ipa_fn_summary : public ipa_opt_pass_d
3587{
3588public:
3589 pass_ipa_fn_summary (gcc::context *ctxt)
3590 : ipa_opt_pass_d (pass_data_ipa_fn_summary, ctxt,
3591 ipa_fn_summary_generate, /* generate_summary */
3592 ipa_fn_summary_write, /* write_summary */
3593 ipa_fn_summary_read, /* read_summary */
3594 NULL, /* write_optimization_summary */
3595 NULL, /* read_optimization_summary */
3596 NULL, /* stmt_fixup */
3597 0, /* function_transform_todo_flags_start */
3598 NULL, /* function_transform */
3599 NULL) /* variable_transform */
3600 {}
3601
3602 /* opt_pass methods: */
3603 virtual unsigned int execute (function *) { return 0; }
3604
3605}; // class pass_ipa_fn_summary
3606
3607} // anon namespace
3608
3609ipa_opt_pass_d *
3610make_pass_ipa_fn_summary (gcc::context *ctxt)
3611{
3612 return new pass_ipa_fn_summary (ctxt);
3613}
3614
3615/* Reset all state within ipa-fnsummary.c so that we can rerun the compiler
3616 within the same process. For use by toplev::finalize. */
3617
3618void
3619ipa_fnsummary_c_finalize (void)
3620{
3621 ipa_free_fn_summary ();
3622}
3623