1 | /* Calculate branch probabilities, and basic block execution counts. |
2 | Copyright (C) 1990-2024 Free Software Foundation, Inc. |
3 | Contributed by James E. Wilson, UC Berkeley/Cygnus Support; |
4 | based on some ideas from Dain Samples of UC Berkeley. |
5 | Further mangling by Bob Manson, Cygnus Support. |
6 | Converted to use trees by Dale Johannesen, Apple Computer. |
7 | |
8 | This file is part of GCC. |
9 | |
10 | GCC is free software; you can redistribute it and/or modify it under |
11 | the terms of the GNU General Public License as published by the Free |
12 | Software Foundation; either version 3, or (at your option) any later |
13 | version. |
14 | |
15 | GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
16 | WARRANTY; without even the implied warranty of MERCHANTABILITY or |
17 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
18 | for more details. |
19 | |
20 | You should have received a copy of the GNU General Public License |
21 | along with GCC; see the file COPYING3. If not see |
22 | <http://www.gnu.org/licenses/>. */ |
23 | |
24 | /* Generate basic block profile instrumentation and auxiliary files. |
25 | Tree-based version. See profile.cc for overview. */ |
26 | |
27 | #include "config.h" |
28 | #include "system.h" |
29 | #include "coretypes.h" |
30 | #include "memmodel.h" |
31 | #include "backend.h" |
32 | #include "target.h" |
33 | #include "tree.h" |
34 | #include "gimple.h" |
35 | #include "cfghooks.h" |
36 | #include "tree-pass.h" |
37 | #include "ssa.h" |
38 | #include "cgraph.h" |
39 | #include "coverage.h" |
40 | #include "diagnostic-core.h" |
41 | #include "fold-const.h" |
42 | #include "varasm.h" |
43 | #include "tree-nested.h" |
44 | #include "gimplify.h" |
45 | #include "gimple-iterator.h" |
46 | #include "gimplify-me.h" |
47 | #include "tree-cfg.h" |
48 | #include "tree-into-ssa.h" |
49 | #include "value-prof.h" |
50 | #include "profile.h" |
51 | #include "tree-cfgcleanup.h" |
52 | #include "stringpool.h" |
53 | #include "attribs.h" |
54 | #include "tree-pretty-print.h" |
55 | #include "langhooks.h" |
56 | #include "stor-layout.h" |
57 | #include "xregex.h" |
58 | #include "alloc-pool.h" |
59 | #include "symbol-summary.h" |
60 | #include "symtab-thunks.h" |
61 | #include "cfganal.h" |
62 | |
63 | static GTY(()) tree gcov_type_node; |
64 | static GTY(()) tree tree_interval_profiler_fn; |
65 | static GTY(()) tree tree_pow2_profiler_fn; |
66 | static GTY(()) tree tree_topn_values_profiler_fn; |
67 | static GTY(()) tree tree_indirect_call_profiler_fn; |
68 | static GTY(()) tree tree_average_profiler_fn; |
69 | static GTY(()) tree tree_ior_profiler_fn; |
70 | static GTY(()) tree tree_time_profiler_counter; |
71 | |
72 | |
73 | static GTY(()) tree ic_tuple_var; |
74 | static GTY(()) tree ic_tuple_counters_field; |
75 | static GTY(()) tree ic_tuple_callee_field; |
76 | |
77 | /* Types of counter update methods. |
78 | |
79 | By default, the counter updates are done for a single threaded system |
80 | (COUNTER_UPDATE_SINGLE_THREAD). |
81 | |
82 | If the user selected atomic profile counter updates |
83 | (-fprofile-update=atomic), then the counter updates will be done atomically |
84 | on a best-effort basis. One of three methods to do the counter updates is |
85 | selected according to the target capabilities. |
86 | |
87 | Ideally, the counter updates are done through atomic operations in hardware |
88 | (COUNTER_UPDATE_ATOMIC_BUILTIN). |
89 | |
90 | If the target supports only 32-bit atomic increments and gcov_type_node is a |
91 | 64-bit integer type, then for the profile edge counters the increment is |
92 | performed through two separate 32-bit atomic increments |
93 | (COUNTER_UPDATE_ATOMIC_SPLIT or COUNTER_UPDATE_ATOMIC_PARTIAL). If the |
94 | target supports libatomic (targetm.have_libatomic), then other counter |
95 | updates are carried out by libatomic calls (COUNTER_UPDATE_ATOMIC_SPLIT). |
96 | If the target does not support libatomic, then the other counter updates are |
97 | not done atomically (COUNTER_UPDATE_ATOMIC_PARTIAL) and a warning is |
98 | issued. |
99 | |
100 | If the target does not support atomic operations in hardware, however, it |
101 | supports libatomic, then all updates are carried out by libatomic calls |
102 | (COUNTER_UPDATE_ATOMIC_BUILTIN). */ |
103 | enum counter_update_method { |
104 | COUNTER_UPDATE_SINGLE_THREAD, |
105 | COUNTER_UPDATE_ATOMIC_BUILTIN, |
106 | COUNTER_UPDATE_ATOMIC_SPLIT, |
107 | COUNTER_UPDATE_ATOMIC_PARTIAL |
108 | }; |
109 | |
110 | static counter_update_method counter_update = COUNTER_UPDATE_SINGLE_THREAD; |
111 | |
112 | /* These functions support measuring modified conditition/decision coverage |
113 | (MC/DC). MC/DC requires all of the below during testing: |
114 | |
115 | - Each entry and exit point is invoked |
116 | - Each decision takes every possible outcome |
117 | - Each condition in a decision takes every possible outcome |
118 | - Each condition in a decision is shown to independently affect the outcome |
119 | of the decision |
120 | |
121 | Independence of a condition is shown by recording it being evaluated to a |
122 | value (true/false) and not being made irrelevant ("masked") by a later term. |
123 | This feature adds some instrumentation code, a few bitwise operators, that |
124 | records the branches taken in conditions and applies a filter for the |
125 | masking effect. Masking is essentially short-circuiting in reverse: a |
126 | condition does not contribute to the outcome if it would short circuit the |
127 | (sub) expression if it was evaluated right-to-left, (_ && false) and (_ || |
128 | true). |
129 | |
130 | The program is essentially rewritten this way: |
131 | |
132 | - if (a || b) { fn () } |
133 | + if (a) { _t |= 0x1; goto _then; } |
134 | + else { _f |= 0x1; |
135 | + if (b) { _t |= 0x2; _mask |= 0x1; goto _then; } |
136 | + else { _f |= 0x2; goto _else; } |
137 | + _then: |
138 | + _gcov_t |= (_t & _mask); |
139 | + _gcov_f |= (_f & _mask); |
140 | + fn (); goto _end; |
141 | + _else: |
142 | + _gcov_t |= (_t & _mask); |
143 | + _gcov_f |= (_f & _mask); |
144 | + fn (); |
145 | + _end: |
146 | |
147 | It is assumed the front end will provide discrimnators so that conditional |
148 | basic blocks (basic block with a conditional jump and outgoing true/false |
149 | edges) that belong to the same Boolean expression have the same |
150 | discriminator. Masking is determined by analyzing these expressions as a |
151 | reduced order binary decision diagram. */ |
152 | namespace |
153 | { |
154 | /* Some context and reused instances between function calls. Large embedded |
155 | buffers are used to up-front request enough memory for most programs and |
156 | merge them into a single allocation at the cost of using more memory in the |
157 | average case. Some numbers from linux v5.13 which is assumed to be a |
158 | reasonably diverse code base: 75% of the functions in linux have less than |
159 | 16 nodes in the CFG and approx 2.5% have more than 64 nodes. The functions |
160 | that go beyond a few dozen nodes tend to be very large (>100) and so 64 |
161 | seems like a good balance. |
162 | |
163 | This is really just a performance balance of the cost of allocation and |
164 | wasted memory. */ |
165 | struct conds_ctx |
166 | { |
167 | /* This is both a reusable shared allocation which is also used to return |
168 | single expressions, which means it for most code should only hold a |
169 | couple of elements. */ |
170 | auto_vec<basic_block, 64> blocks; |
171 | |
172 | /* Index for the topological order indexed by basic_block->index to an |
173 | ordering so that expression (a || b && c) => top_index[a] < top_index[b] |
174 | < top_index[c]. */ |
175 | auto_vec<int, 256> top_index; |
176 | |
177 | /* Pre-allocate bitmaps and vectors for per-function book keeping. This is |
178 | pure instance reuse and the bitmaps carry no data between function |
179 | calls. */ |
180 | auto_vec<basic_block, 64> B1; |
181 | auto_vec<basic_block, 64> B2; |
182 | auto_sbitmap G1; |
183 | auto_sbitmap G2; |
184 | auto_sbitmap G3; |
185 | |
186 | explicit conds_ctx (unsigned size) noexcept (true) : G1 (size), G2 (size), |
187 | G3 (size) |
188 | { |
189 | } |
190 | }; |
191 | |
192 | /* Only instrument terms with fewer than number of bits in a (wide) gcov |
193 | integer, which is probably 64. The algorithm itself does not impose this |
194 | limitation, but it makes for a simpler implementation. |
195 | |
196 | * Allocating the output data structure (coverage_counter_alloc ()) can |
197 | assume pairs of gcov_type_unsigned and not use a separate length field. |
198 | * A pair gcov_type_unsigned can be used as accumulators. |
199 | * Updating accumulators is can use the bitwise operations |=, &= and not |
200 | custom operators that work for arbitrary-sized bit-sets. |
201 | |
202 | Most real-world code should be unaffected by this, but it is possible |
203 | (especially for generated code) to exceed this limit. */ |
204 | #define CONDITIONS_MAX_TERMS (TYPE_PRECISION (gcov_type_node)) |
205 | #define EDGE_CONDITION (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE) |
206 | |
207 | /* Compare two basic blocks by their order in the expression i.e. for (a || b) |
208 | then topological_cmp (a, b, ...) < 0. The result is undefined if LHS, RHS |
209 | belong to different expressions. The TOP_INDEX argument should be the |
210 | top_index vector from ctx. */ |
211 | int |
212 | topological_cmp (const void *lhs, const void *rhs, void *top_index) |
213 | { |
214 | const_basic_block l = *(const basic_block*) lhs; |
215 | const_basic_block r = *(const basic_block*) rhs; |
216 | const vec<int>* im = (const vec<int>*) top_index; |
217 | return (*im)[l->index] - (*im)[r->index]; |
218 | } |
219 | |
220 | /* Find the index of NEEDLE in BLOCKS; return -1 if not found. This has two |
221 | uses, sometimes for the index and sometimes for set member checks. Sets are |
222 | typically very small (number of conditions, >8 is uncommon) so linear search |
223 | should be very fast. */ |
224 | int |
225 | index_of (const basic_block needle, array_slice<basic_block> blocks) |
226 | { |
227 | for (size_t i = 0; i < blocks.size (); i++) |
228 | if (blocks[i] == needle) |
229 | return int (i); |
230 | return -1; |
231 | } |
232 | |
233 | /* Special cases of the single_*_p and single_*_edge functions in basic-block.h |
234 | that don't consider exception handling or other complex edges. This helps |
235 | create a view of the CFG with only normal edges - if a basic block has both |
236 | an outgoing fallthrough and exceptional edge, it should be considered a |
237 | single-successor. */ |
238 | bool |
239 | single_p (const vec<edge, va_gc> *edges) |
240 | { |
241 | int n = EDGE_COUNT (edges); |
242 | if (n == 0) |
243 | return false; |
244 | |
245 | for (edge e : edges) |
246 | if (e->flags & EDGE_COMPLEX) |
247 | n -= 1; |
248 | |
249 | return n == 1; |
250 | } |
251 | |
252 | /* Get the single, non-complex edge. Behavior is undefined edges have more |
253 | than 1 non-complex edges. */ |
254 | edge |
255 | single_edge (const vec<edge, va_gc> *edges) |
256 | { |
257 | gcc_checking_assert (single_p (edges)); |
258 | for (edge e : edges) |
259 | { |
260 | if (e->flags & EDGE_COMPLEX) |
261 | continue; |
262 | return e; |
263 | } |
264 | return NULL; |
265 | } |
266 | |
267 | /* Sometimes, for example with function calls, goto labels, and C++ |
268 | destructors, the CFG gets extra nodes that are essentially single-entry |
269 | single-exit in the middle of boolean expressions. For example: |
270 | |
271 | x || can_throw (y) |
272 | |
273 | A |
274 | /| |
275 | / | |
276 | B | |
277 | | | |
278 | C | |
279 | / \ | |
280 | / \| |
281 | F T |
282 | |
283 | Without the extra node inserted by the function + exception it becomes a |
284 | proper 2-term graph, not 2 single-term graphs. |
285 | |
286 | A |
287 | /| |
288 | C | |
289 | / \| |
290 | F T |
291 | |
292 | This function finds the source edge of these paths. This is often the |
293 | identity function. */ |
294 | edge |
295 | contract_edge_up (edge e) |
296 | { |
297 | while (true) |
298 | { |
299 | basic_block src = e->src; |
300 | if (!single_p (edges: src->preds)) |
301 | return e; |
302 | if (!single_p (edges: src->succs)) |
303 | return e; |
304 | e = single_edge (edges: src->preds); |
305 | } |
306 | } |
307 | |
308 | /* A simple struct for storing/returning outcome block pairs. Either both |
309 | blocks are set or both are NULL. */ |
310 | struct outcomes |
311 | { |
312 | basic_block t = NULL; |
313 | basic_block f = NULL; |
314 | |
315 | operator bool () const noexcept (true) |
316 | { |
317 | return t && f; |
318 | } |
319 | }; |
320 | |
321 | /* Get the true/false successors of a basic block. If b is not a conditional |
322 | block both edges are NULL. */ |
323 | outcomes |
324 | conditional_succs (const basic_block b) |
325 | { |
326 | outcomes c; |
327 | for (edge e : b->succs) |
328 | { |
329 | if (e->flags & EDGE_TRUE_VALUE) |
330 | c.t = e->dest; |
331 | if (e->flags & EDGE_FALSE_VALUE) |
332 | c.f = e->dest; |
333 | } |
334 | |
335 | gcc_assert ((c.t && c.f) || (!c.t && !c.f)); |
336 | return c; |
337 | } |
338 | |
339 | /* Get the index or offset of a conditional flag, 0 for true and 1 for false. |
340 | These indices carry no semantics but must be consistent as they are used to |
341 | index into data structures in code generation and gcov. */ |
342 | unsigned |
343 | condition_index (unsigned flag) |
344 | { |
345 | return (flag & EDGE_CONDITION) == EDGE_TRUE_VALUE ? 0 : 1; |
346 | } |
347 | |
348 | /* Returns the condition identifier for the basic block if set, otherwise 0. |
349 | This is only meaningful in GIMPLE and is used for condition coverage. |
350 | |
351 | There may be conditions created that did not get an uid, such as those |
352 | implicitly created by destructors. We could include them in the condition |
353 | coverage for completeness (i.e. condition coverage implies (implicit) branch |
354 | coverage), but they have no natural buckets and should all be single-term. |
355 | For now these are ignored and given uid = 0, and branch coverage is left to |
356 | -fprofile-arcs. |
357 | |
358 | Under optimization, COND_EXPRs may be folded, replaced with switches, |
359 | min-max, etc., which leaves ghost identifiers in basic blocks that do not |
360 | end with a conditional jump. They are not really meaningful for condition |
361 | coverage anymore, but since coverage is unreliable under optimization anyway |
362 | this is not a big problem. |
363 | |
364 | The cond_uids map in FN cannot be expected to exist. It will only be |
365 | created if it is needed, and a function may have gconds even though there |
366 | are none in source. This can be seen in PR gcov-profile/114601, when |
367 | -finstrument-functions-once is used and the function has no conditions. */ |
368 | unsigned |
369 | condition_uid (struct function *fn, basic_block b) |
370 | { |
371 | gimple *stmt = gsi_stmt (i: gsi_last_bb (bb: b)); |
372 | if (!safe_is_a <gcond*> (p: stmt) || !fn->cond_uids) |
373 | return 0; |
374 | |
375 | unsigned *v = fn->cond_uids->get (k: as_a <gcond*> (p: stmt)); |
376 | return v ? *v : 0; |
377 | } |
378 | |
379 | /* Compute the masking table. |
380 | |
381 | Masking and short circuiting are deeply connected - masking occurs when |
382 | control flow reaches a state that is also reachable with short circuiting. |
383 | In fact, masking corresponds to short circuiting for the reversed |
384 | expression. This means we can find the limits, the last term in preceeding |
385 | subexpressions, by following the edges that short circuit to the same |
386 | outcome. The algorithm treats the CFG as a reduced order binary decision |
387 | diagram (see Randall E. Bryant's Graph Based Algorithms for Boolean |
388 | Function Manipulation (1987)). |
389 | |
390 | In the simplest case a || b: |
391 | |
392 | a |
393 | |\ |
394 | | b |
395 | |/ \ |
396 | T F |
397 | |
398 | T has multiple incoming edges and is the outcome of a short circuit, |
399 | with top = a, bot = b. The top node (a) is masked when the edge (b, T) is |
400 | taken. |
401 | |
402 | The names "top" and "bot" refer to a pair of nodes with a shared |
403 | successor. The top is always the node corresponding to the left-most |
404 | operand of the two, and it holds that top < bot in a topological ordering. |
405 | |
406 | Now consider (a && b) || (c && d) and its masking table: |
407 | |
408 | a |
409 | |\ |
410 | b \ |
411 | |\| |
412 | | c |
413 | | |\ |
414 | | d \ |
415 | |/ \| |
416 | T F |
417 | |
418 | a[0] = {} |
419 | a[1] = {} |
420 | b[0] = {a} |
421 | b[1] = {} |
422 | c[0] = {} |
423 | c[1] = {} |
424 | d[0] = {c} |
425 | d[1] = {a,b} |
426 | |
427 | Note that 0 and 1 are indices and not boolean values - a[0] is the index in |
428 | the masking vector when a takes the true edge. |
429 | |
430 | b[0] and d[0] are identical to the a || b example, and d[1] is the bot in |
431 | the triangle [d, b] -> T. b is the top node in the [d, b] relationship and |
432 | last term in (a && b). To find the other terms masked we use the fact that |
433 | all paths in an expression go through either of the outcomes, found by |
434 | collecting all non-complex edges that go out of the expression (the |
435 | neighborhood). In some cases the outgoing edge go through intermediate (or |
436 | bypass) nodes, and we collect these paths too (see contract_edge_up). |
437 | |
438 | We find the terms by marking the outcomes (in this case c, T) and walk the |
439 | predecessors starting at top (in this case b) and masking nodes when both |
440 | successors are marked. |
441 | |
442 | The masking table is represented as two bitfields per term in the expression |
443 | with the index corresponding to the term in the Boolean expression. |
444 | a || b && c becomes the term vector [a b c] and the masking table [a[0] |
445 | a[1] b[0] ...]. The kth bit of a masking vector is set if the kth term |
446 | is masked by taking the edge. |
447 | |
448 | The out masks are in uint64_t (the practical maximum for gcov_type_node for |
449 | any target) as it has to be big enough to store the target size gcov types |
450 | independent of the host. */ |
451 | void |
452 | masking_vectors (conds_ctx& ctx, array_slice<basic_block> blocks, |
453 | array_slice<sbitmap> maps, array_slice<uint64_t> masks) |
454 | { |
455 | gcc_assert (blocks.is_valid ()); |
456 | gcc_assert (!blocks.empty ()); |
457 | gcc_assert (maps.is_valid ()); |
458 | gcc_assert (masks.is_valid ()); |
459 | gcc_assert (sizeof (masks[0]) * BITS_PER_UNIT >= CONDITIONS_MAX_TERMS); |
460 | |
461 | if (bitmap_count_bits (maps[0]) == 1) |
462 | return; |
463 | |
464 | sbitmap marks = ctx.G1; |
465 | const sbitmap core = maps[0]; |
466 | const sbitmap allg = maps[1]; |
467 | vec<basic_block>& queue = ctx.B1; |
468 | vec<basic_block>& body = ctx.B2; |
469 | const vec<int>& top_index = ctx.top_index; |
470 | |
471 | /* Set up for the iteration - include the outcome nodes in the traversal. |
472 | The algorithm compares pairs of nodes and is not really sensitive to |
473 | traversal order, but need to maintain topological order because the |
474 | index of masking nodes maps to the index in the accumulators. We must |
475 | also check the incoming-to-outcome pairs. These edges may in turn be |
476 | split (this happens with labels on top of then/else blocks) so we must |
477 | follow any single-in single-out path. The non-condition blocks do not |
478 | have to be in order as they are non-condition blocks and will not be |
479 | considered for the set-bit index. */ |
480 | body.truncate (size: 0); |
481 | body.reserve (nelems: blocks.size () + 2); |
482 | for (const basic_block b : blocks) |
483 | if (bitmap_bit_p (map: core, bitno: b->index)) |
484 | body.quick_push (obj: b); |
485 | |
486 | for (basic_block b : blocks) |
487 | { |
488 | if (!bitmap_bit_p (map: core, bitno: b->index)) |
489 | continue; |
490 | |
491 | for (edge e : b->succs) |
492 | { |
493 | if (e->flags & EDGE_COMPLEX) |
494 | continue; |
495 | if (bitmap_bit_p (map: allg, bitno: e->dest->index)) |
496 | continue; |
497 | body.safe_push (obj: e->dest); |
498 | |
499 | /* There may be multiple nodes between the condition edge and the |
500 | actual outcome, and we need to know when these paths join to |
501 | determine if there is short circuit/masking. This is |
502 | effectively creating a virtual edge from the condition node to |
503 | the real outcome. */ |
504 | while (!(e->flags & EDGE_DFS_BACK) && single_p (edges: e->dest->succs)) |
505 | { |
506 | e = single_edge (edges: e->dest->succs); |
507 | body.safe_push (obj: e->dest); |
508 | } |
509 | } |
510 | } |
511 | |
512 | /* Find the masking. The leftmost element cannot mask anything, so |
513 | start at 1. */ |
514 | for (size_t i = 1; i != body.length (); i++) |
515 | { |
516 | const basic_block b = body[i]; |
517 | for (edge e1 : b->preds) |
518 | for (edge e2 : b->preds) |
519 | { |
520 | if (e1 == e2) |
521 | continue; |
522 | if ((e1->flags | e2->flags) & EDGE_COMPLEX) |
523 | continue; |
524 | |
525 | edge etop = contract_edge_up (e: e1); |
526 | edge ebot = contract_edge_up (e: e2); |
527 | gcc_assert (etop != ebot); |
528 | |
529 | const basic_block top = etop->src; |
530 | const basic_block bot = ebot->src; |
531 | const unsigned cond = etop->flags & ebot->flags & EDGE_CONDITION; |
532 | if (!cond) |
533 | continue; |
534 | if (top_index[top->index] > top_index[bot->index]) |
535 | continue; |
536 | if (!bitmap_bit_p (map: core, bitno: top->index)) |
537 | continue; |
538 | if (!bitmap_bit_p (map: core, bitno: bot->index)) |
539 | continue; |
540 | |
541 | outcomes out = conditional_succs (b: top); |
542 | gcc_assert (out); |
543 | bitmap_clear (marks); |
544 | bitmap_set_bit (map: marks, bitno: out.t->index); |
545 | bitmap_set_bit (map: marks, bitno: out.f->index); |
546 | queue.truncate (size: 0); |
547 | queue.safe_push (obj: top); |
548 | |
549 | // The edge bot -> outcome triggers the masking |
550 | const int m = 2*index_of (needle: bot, blocks: body) + condition_index (flag: cond); |
551 | gcc_assert (m >= 0); |
552 | while (!queue.is_empty ()) |
553 | { |
554 | basic_block q = queue.pop (); |
555 | /* q may have been processed & completed by being added to the |
556 | queue multiple times, so check that there is still work to |
557 | do before continuing. */ |
558 | if (bitmap_bit_p (map: marks, bitno: q->index)) |
559 | continue; |
560 | |
561 | outcomes succs = conditional_succs (b: q); |
562 | if (!bitmap_bit_p (map: marks, bitno: succs.t->index)) |
563 | continue; |
564 | if (!bitmap_bit_p (map: marks, bitno: succs.f->index)) |
565 | continue; |
566 | |
567 | const int index = index_of (needle: q, blocks: body); |
568 | gcc_assert (index != -1); |
569 | masks[m] |= uint64_t (1) << index; |
570 | bitmap_set_bit (map: marks, bitno: q->index); |
571 | |
572 | for (edge e : q->preds) |
573 | { |
574 | e = contract_edge_up (e); |
575 | if (e->flags & EDGE_DFS_BACK) |
576 | continue; |
577 | if (bitmap_bit_p (map: marks, bitno: e->src->index)) |
578 | continue; |
579 | if (!bitmap_bit_p (map: core, bitno: e->src->index)) |
580 | continue; |
581 | queue.safe_push (obj: e->src); |
582 | } |
583 | } |
584 | } |
585 | } |
586 | } |
587 | |
588 | /* Emit LHS = RHS on edges. This is just a short hand that automates the |
589 | building of the assign and immediately puts it on the edge, which becomes |
590 | noisy. */ |
591 | tree |
592 | emit_assign (edge e, tree lhs, tree rhs) |
593 | { |
594 | gassign *w = gimple_build_assign (lhs, rhs); |
595 | gsi_insert_on_edge (e, w); |
596 | return lhs; |
597 | } |
598 | |
599 | /* Emit lhs = RHS on edges. The lhs is created. */ |
600 | tree |
601 | emit_assign (edge e, tree rhs) |
602 | { |
603 | return emit_assign (e, lhs: make_ssa_name (var: gcov_type_node), rhs); |
604 | } |
605 | |
606 | /* Emit LHS = OP1 <OP> OP2 on edges. */ |
607 | tree |
608 | emit_bitwise_op (edge e, tree op1, tree_code op, tree op2 = NULL_TREE) |
609 | { |
610 | tree lhs = make_ssa_name (var: gcov_type_node); |
611 | gassign *w = gimple_build_assign (lhs, op, op1, op2); |
612 | gsi_insert_on_edge (e, w); |
613 | return lhs; |
614 | } |
615 | |
616 | /* Visitor for make_top_index. */ |
617 | void |
618 | make_top_index_visit (basic_block b, vec<basic_block>& L, vec<int>& marks) |
619 | { |
620 | if (marks[b->index]) |
621 | return; |
622 | |
623 | /* Follow the false edge first, if it exists, so that true paths are given |
624 | the lower index in the ordering. Any iteration order |
625 | would yield a valid and useful topological ordering, but making sure the |
626 | true branch has the lower index first makes reporting work better for |
627 | expressions with ternaries. Walk the false branch first because the |
628 | array will be reversed to finalize the topological order. |
629 | |
630 | With the wrong ordering (a ? b : c) && d could become [a c b d], but the |
631 | (expected) order is really [a b c d]. */ |
632 | |
633 | const unsigned false_fwd = EDGE_DFS_BACK | EDGE_FALSE_VALUE; |
634 | for (edge e : b->succs) |
635 | if ((e->flags & false_fwd) == EDGE_FALSE_VALUE) |
636 | make_top_index_visit (b: e->dest, L, marks); |
637 | |
638 | for (edge e : b->succs) |
639 | if (!(e->flags & false_fwd)) |
640 | make_top_index_visit (b: e->dest, L, marks); |
641 | |
642 | marks[b->index] = 1; |
643 | L.quick_push (obj: b); |
644 | } |
645 | |
646 | /* Find a topological sorting of the blocks in a function so that left operands |
647 | are before right operands including subexpressions. Sorting on block index |
648 | does not guarantee this property and the syntactical order of terms is very |
649 | important to the condition coverage. The sorting algorithm is from Cormen |
650 | et al (2001) but with back-edges ignored and thus there is no need for |
651 | temporary marks (for cycle detection). The L argument is a buffer/working |
652 | memory, and the output will be written to TOP_INDEX. |
653 | |
654 | For the expression (a || (b && c) || d) the blocks should be [a b c d]. */ |
655 | void |
656 | make_top_index (array_slice<basic_block> blocks, vec<basic_block>& L, |
657 | vec<int>& top_index) |
658 | { |
659 | L.truncate (size: 0); |
660 | L.reserve (nelems: blocks.size ()); |
661 | |
662 | /* Use of the output map as a temporary for tracking visited status. */ |
663 | top_index.truncate (size: 0); |
664 | top_index.safe_grow_cleared (len: blocks.size ()); |
665 | for (const basic_block b : blocks) |
666 | make_top_index_visit (b, L, marks&: top_index); |
667 | |
668 | /* Insert canaries - if there are unreachable nodes (for example infinite |
669 | loops) then the unreachable nodes should never be needed for comparison, |
670 | and L.length () < max_index. An index mapping should also never be |
671 | recorded twice. */ |
672 | for (unsigned i = 0; i != top_index.length (); i++) |
673 | top_index[i] = -1; |
674 | |
675 | gcc_assert (blocks.size () == L.length ()); |
676 | L.reverse (); |
677 | const unsigned nblocks = L.length (); |
678 | for (unsigned i = 0; i != nblocks; i++) |
679 | { |
680 | gcc_assert (L[i]->index != -1); |
681 | top_index[L[i]->index] = int (i); |
682 | } |
683 | } |
684 | |
685 | /* Find all nodes including non-conditions in a Boolean expression. We need to |
686 | know the paths through the expression so that the masking and |
687 | instrumentation phases can limit searches and know what subgraphs must be |
688 | threaded through, but not counted, such as the (b || c) in |
689 | a && fn (b || c) && d. |
690 | |
691 | It is essentially the intersection of downwards paths from the expression |
692 | nodes EXPR to the post-dominator and upwards from the post-dominator. |
693 | Finding the dominator is slightly more involved than picking the first/last, |
694 | particularly under optimization, because both incoming and outgoing paths |
695 | may have multiple entries/exits. |
696 | |
697 | It is assumed GRAPH is an array_slice of the basic blocks of this function |
698 | sorted by the basic block index. */ |
699 | vec<basic_block>& |
700 | paths_between (conds_ctx &ctx, array_slice<basic_block> graph, |
701 | const vec<basic_block>& expr) |
702 | { |
703 | if (expr.length () == 1) |
704 | { |
705 | ctx.blocks.truncate (size: 0); |
706 | ctx.blocks.safe_push (obj: expr[0]); |
707 | return ctx.blocks; |
708 | } |
709 | |
710 | basic_block dom; |
711 | sbitmap up = ctx.G1; |
712 | sbitmap down = ctx.G2; |
713 | sbitmap paths = ctx.G3; |
714 | vec<basic_block>& queue = ctx.B1; |
715 | |
716 | queue.truncate (size: 0); |
717 | bitmap_clear (down); |
718 | dom = get_immediate_dominator (CDI_POST_DOMINATORS, expr[0]); |
719 | for (basic_block b : expr) |
720 | if (dom != b) |
721 | dom = nearest_common_dominator (CDI_POST_DOMINATORS, dom, b); |
722 | queue.safe_splice (src: expr); |
723 | while (!queue.is_empty ()) |
724 | { |
725 | basic_block b = queue.pop (); |
726 | if (!bitmap_set_bit (map: down, bitno: b->index)) |
727 | continue; |
728 | if (b == dom) |
729 | continue; |
730 | for (edge e : b->succs) |
731 | if (!(e->flags & (EDGE_COMPLEX | EDGE_DFS_BACK))) |
732 | queue.safe_push (obj: e->dest); |
733 | } |
734 | |
735 | queue.truncate (size: 0); |
736 | bitmap_clear (up); |
737 | dom = expr[0]; |
738 | for (basic_block b : expr) |
739 | if (dom != b) |
740 | dom = nearest_common_dominator (CDI_DOMINATORS, dom, b); |
741 | queue.safe_splice (src: expr); |
742 | while (!queue.is_empty ()) |
743 | { |
744 | basic_block b = queue.pop (); |
745 | if (!bitmap_set_bit (map: up, bitno: b->index)) |
746 | continue; |
747 | if (b == dom) |
748 | continue; |
749 | for (edge e : b->preds) |
750 | if (!(e->flags & (EDGE_COMPLEX | EDGE_DFS_BACK))) |
751 | queue.safe_push (obj: e->src); |
752 | } |
753 | |
754 | bitmap_and (paths, up, down); |
755 | vec<basic_block>& blocks = ctx.blocks; |
756 | blocks.truncate (size: 0); |
757 | blocks.reserve (nelems: graph.size ()); |
758 | sbitmap_iterator itr; |
759 | unsigned index; |
760 | EXECUTE_IF_SET_IN_BITMAP (paths, 0, index, itr) |
761 | blocks.quick_push (obj: graph[index]); |
762 | return blocks; |
763 | } |
764 | |
765 | } |
766 | |
767 | /* Context object for the condition coverage. This stores conds_ctx (the |
768 | buffers reused when analyzing the cfg) and the output arrays. This is |
769 | designed to be heap allocated and aggressively preallocates large buffers to |
770 | avoid having to reallocate for most programs. */ |
771 | struct condcov |
772 | { |
773 | explicit condcov (unsigned nblocks) noexcept (true) : ctx (nblocks), |
774 | m_maps (sbitmap_vector_alloc (2 * nblocks, nblocks)) |
775 | { |
776 | bitmap_vector_clear (m_maps, 2 * nblocks); |
777 | } |
778 | auto_vec<size_t, 128> m_index; |
779 | auto_vec<basic_block, 256> m_blocks; |
780 | auto_vec<uint64_t, 512> m_masks; |
781 | conds_ctx ctx; |
782 | sbitmap *m_maps; |
783 | }; |
784 | |
785 | /* Get the length, that is the number of Boolean expression found. cov_length |
786 | is the one-past index for cov_{blocks,masks,maps}. */ |
787 | size_t |
788 | cov_length (const struct condcov* cov) |
789 | { |
790 | if (cov->m_index.is_empty ()) |
791 | return 0; |
792 | return cov->m_index.length () - 1; |
793 | } |
794 | |
795 | /* The subgraph, exluding intermediates, for the nth Boolean expression. */ |
796 | array_slice<basic_block> |
797 | cov_blocks (struct condcov* cov, size_t n) |
798 | { |
799 | if (n >= cov->m_index.length ()) |
800 | return array_slice<basic_block>::invalid (); |
801 | |
802 | basic_block *begin = cov->m_blocks.begin () + cov->m_index[n]; |
803 | basic_block *end = cov->m_blocks.begin () + cov->m_index[n + 1]; |
804 | return array_slice<basic_block> (begin, end - begin); |
805 | } |
806 | |
807 | /* The masks for the nth Boolean expression. */ |
808 | array_slice<uint64_t> |
809 | cov_masks (struct condcov* cov, size_t n) |
810 | { |
811 | if (n >= cov->m_index.length ()) |
812 | return array_slice<uint64_t>::invalid (); |
813 | |
814 | uint64_t *begin = cov->m_masks.begin () + 2*cov->m_index[n]; |
815 | uint64_t *end = cov->m_masks.begin () + 2*cov->m_index[n + 1]; |
816 | return array_slice<uint64_t> (begin, end - begin); |
817 | } |
818 | |
819 | /* The maps for the nth Boolean expression. */ |
820 | array_slice<sbitmap> |
821 | cov_maps (struct condcov* cov, size_t n) |
822 | { |
823 | if (n >= cov->m_index.length ()) |
824 | return array_slice<sbitmap>::invalid (); |
825 | |
826 | sbitmap *begin = cov->m_maps + 2*n; |
827 | sbitmap *end = begin + 2; |
828 | return array_slice<sbitmap> (begin, end - begin); |
829 | } |
830 | |
831 | /* Deleter for condcov. */ |
832 | void |
833 | cov_free (struct condcov* cov) |
834 | { |
835 | sbitmap_vector_free (vec: cov->m_maps); |
836 | delete cov; |
837 | } |
838 | |
839 | /* Condition coverage (MC/DC) |
840 | |
841 | Whalen, Heimdahl, De Silva in "Efficient Test Coverage Measurement for |
842 | MC/DC" describe an algorithm for modified condition/decision coverage based |
843 | on AST analysis. This algorithm does analyzes the control flow graph |
844 | (interpreted as a binary decision diagram) to determine the masking vectors. |
845 | The individual phases are described in more detail closer to the |
846 | implementation. |
847 | |
848 | The coverage only considers the positions, not the symbols, in a |
849 | conditional, e.g. !A || (!B && A) is a 3-term conditional even though A |
850 | appears twice. Subexpressions have no effect on term ordering: |
851 | (a && (b || (c && d)) || e) comes out as [a b c d e]. Functions whose |
852 | arguments are Boolean expressions are treated as separate expressions, that |
853 | is, a && fn (b || c) && d is treated as [a _fn d] and [b c], not [a b c d]. |
854 | |
855 | The output for gcov is a vector of pairs of unsigned integers, interpreted |
856 | as bit-sets, where the bit index corresponds to the index of the condition |
857 | in the expression. |
858 | |
859 | The returned condcov should be released by the caller with cov_free. */ |
860 | struct condcov* |
861 | find_conditions (struct function *fn) |
862 | { |
863 | mark_dfs_back_edges (fn); |
864 | const bool have_dom = dom_info_available_p (fn, CDI_DOMINATORS); |
865 | const bool have_post_dom = dom_info_available_p (fn, CDI_POST_DOMINATORS); |
866 | if (!have_dom) |
867 | calculate_dominance_info (CDI_DOMINATORS); |
868 | if (!have_post_dom) |
869 | calculate_dominance_info (CDI_POST_DOMINATORS); |
870 | |
871 | const unsigned nblocks = n_basic_blocks_for_fn (fn); |
872 | basic_block *fnblocksp = basic_block_info_for_fn (fn)->address (); |
873 | condcov *cov = new condcov (nblocks); |
874 | conds_ctx& ctx = cov->ctx; |
875 | array_slice<basic_block> fnblocks (fnblocksp, nblocks); |
876 | make_top_index (blocks: fnblocks, L&: ctx.B1, top_index&: ctx.top_index); |
877 | |
878 | /* Bin the Boolean expressions so that exprs[id] -> [x1, x2, ...]. */ |
879 | hash_map<int_hash<unsigned, 0>, vec<basic_block>> exprs; |
880 | for (basic_block b : fnblocks) |
881 | { |
882 | const unsigned uid = condition_uid (fn, b); |
883 | if (uid == 0) |
884 | continue; |
885 | exprs.get_or_insert (k: uid).safe_push (obj: b); |
886 | } |
887 | |
888 | /* Visit all reachable nodes and collect conditions. Topological order is |
889 | important so the first node of a boolean expression is visited first |
890 | (it will mark subsequent terms). */ |
891 | cov->m_index.safe_push (obj: 0); |
892 | for (auto expr : exprs) |
893 | { |
894 | vec<basic_block>& conds = expr.second; |
895 | if (conds.length () > CONDITIONS_MAX_TERMS) |
896 | { |
897 | location_t loc = gimple_location (g: gsi_stmt (i: gsi_last_bb (bb: conds[0]))); |
898 | warning_at (loc, OPT_Wcoverage_too_many_conditions, |
899 | "Too many conditions (found %u); giving up coverage" , |
900 | conds.length ()); |
901 | continue; |
902 | } |
903 | conds.sort (cmp: topological_cmp, data: &ctx.top_index); |
904 | vec<basic_block>& subgraph = paths_between (ctx, graph: fnblocks, expr: conds); |
905 | subgraph.sort (cmp: topological_cmp, data: &ctx.top_index); |
906 | const unsigned index = cov->m_index.length () - 1; |
907 | sbitmap condm = cov->m_maps[0 + 2*index]; |
908 | sbitmap subgm = cov->m_maps[1 + 2*index]; |
909 | for (basic_block b : conds) |
910 | bitmap_set_bit (map: condm, bitno: b->index); |
911 | for (basic_block b : subgraph) |
912 | bitmap_set_bit (map: subgm, bitno: b->index); |
913 | cov->m_blocks.safe_splice (src: subgraph); |
914 | cov->m_index.safe_push (obj: cov->m_blocks.length ()); |
915 | } |
916 | |
917 | if (!have_dom) |
918 | free_dominance_info (fn, CDI_DOMINATORS); |
919 | if (!have_post_dom) |
920 | free_dominance_info (fn, CDI_POST_DOMINATORS); |
921 | |
922 | cov->m_masks.safe_grow_cleared (len: 2 * cov->m_index.last ()); |
923 | const size_t length = cov_length (cov); |
924 | for (size_t i = 0; i != length; i++) |
925 | masking_vectors (ctx, blocks: cov_blocks (cov, n: i), maps: cov_maps (cov, n: i), |
926 | masks: cov_masks (cov, n: i)); |
927 | |
928 | return cov; |
929 | } |
930 | |
931 | namespace |
932 | { |
933 | |
934 | /* Stores the incoming edge and previous counters (in SSA form) on that edge |
935 | for the node e->deston that edge for the node e->dest. The counters record |
936 | the seen-true (0), seen-false (1), and current-mask (2). They are stored in |
937 | an array rather than proper members for access-by-index as the code paths |
938 | tend to be identical for the different counters. */ |
939 | struct counters |
940 | { |
941 | edge e; |
942 | tree counter[3]; |
943 | tree& operator [] (size_t i) { return counter[i]; } |
944 | }; |
945 | |
946 | /* Find the counters for the incoming edge e, or NULL if the edge has not been |
947 | recorded (could be for complex incoming edges). */ |
948 | counters* |
949 | find_counters (vec<counters>& candidates, edge e) |
950 | { |
951 | for (counters& candidate : candidates) |
952 | if (candidate.e == e) |
953 | return &candidate; |
954 | return NULL; |
955 | } |
956 | |
957 | /* Resolve the SSA for a specific counter KIND. If it is not modified by any |
958 | incoming edges, simply forward it, otherwise create a phi node of all the |
959 | candidate counters and return it. */ |
960 | tree |
961 | resolve_counter (vec<counters>& cands, size_t kind) |
962 | { |
963 | gcc_assert (!cands.is_empty ()); |
964 | gcc_assert (kind < 3); |
965 | |
966 | counters& fst = cands[0]; |
967 | |
968 | if (!fst.e || fst.e->dest->preds->length () == 1) |
969 | { |
970 | gcc_assert (cands.length () == 1); |
971 | return fst[kind]; |
972 | } |
973 | |
974 | tree zero0 = build_int_cst (gcov_type_node, 0); |
975 | tree ssa = make_ssa_name (var: gcov_type_node); |
976 | gphi *phi = create_phi_node (ssa, fst.e->dest); |
977 | for (edge e : fst.e->dest->preds) |
978 | { |
979 | counters *prev = find_counters (candidates&: cands, e); |
980 | if (prev) |
981 | add_phi_arg (phi, (*prev)[kind], e, UNKNOWN_LOCATION); |
982 | else |
983 | { |
984 | tree zero = make_ssa_name (var: gcov_type_node); |
985 | gimple_stmt_iterator gsi = gsi_after_labels (bb: e->src); |
986 | gassign *set = gimple_build_assign (zero, zero0); |
987 | gsi_insert_before (&gsi, set, GSI_NEW_STMT); |
988 | add_phi_arg (phi, zero, e, UNKNOWN_LOCATION); |
989 | } |
990 | } |
991 | return ssa; |
992 | } |
993 | |
994 | /* Resolve all the counters for a node. Note that the edge is undefined, as |
995 | the counters are intended to form the base to push to the successors, and |
996 | because the is only meaningful for nodes with a single predecessor. */ |
997 | counters |
998 | resolve_counters (vec<counters>& cands) |
999 | { |
1000 | counters next; |
1001 | next[0] = resolve_counter (cands, kind: 0); |
1002 | next[1] = resolve_counter (cands, kind: 1); |
1003 | next[2] = resolve_counter (cands, kind: 2); |
1004 | return next; |
1005 | } |
1006 | |
1007 | } |
1008 | |
1009 | /* Add instrumentation to a decision subgraph. EXPR should be the |
1010 | (topologically sorted) block of nodes returned by cov_blocks, MAPS the |
1011 | bitmaps returned by cov_maps, and MASKS the block of bitsets returned by |
1012 | cov_masks. CONDNO should be the index of this condition in the function, |
1013 | i.e. the same argument given to cov_{masks,graphs}. EXPR may contain nodes |
1014 | in-between the conditions, e.g. when an operand contains a function call, |
1015 | or there is a setjmp and the cfg is filled with complex edges. |
1016 | |
1017 | Every node is annotated with three counters; the true, false, and mask |
1018 | value. First, walk the graph and determine what if there are multiple |
1019 | possible values for either accumulator depending on the path taken, in which |
1020 | case a phi node is created and registered as the accumulator. Then, those |
1021 | values are pushed as accumulators to the immediate successors. For some |
1022 | very particular programs there may be multiple paths into the expression |
1023 | (e.g. when prior terms are determined by a surrounding conditional) in which |
1024 | case the default zero-counter is pushed, otherwise all predecessors will |
1025 | have been considered before the successor because of topologically ordered |
1026 | traversal. Finally, expr is traversed again to look for edges to the |
1027 | outcomes, that is, edges with a destination outside of expr, and the local |
1028 | accumulators are flushed to the global gcov counters on these edges. In |
1029 | some cases there are edge splits that cause 3+ edges to the two outcome |
1030 | nodes. |
1031 | |
1032 | If a complex edge is taken (e.g. on a longjmp) the accumulators are |
1033 | attempted poisoned so that there would be no change to the global counters, |
1034 | but this has proven unreliable in the presence of undefined behavior, see |
1035 | the setjmp003 test. |
1036 | |
1037 | It is important that the flushes happen on the basic condition outgoing |
1038 | edge, otherwise flushes could be lost to exception handling or other |
1039 | abnormal control flow. */ |
1040 | size_t |
1041 | instrument_decisions (array_slice<basic_block> expr, size_t condno, |
1042 | array_slice<sbitmap> maps, array_slice<uint64_t> masks) |
1043 | { |
1044 | tree zero = build_int_cst (gcov_type_node, 0); |
1045 | tree poison = build_int_cst (gcov_type_node, ~0ULL); |
1046 | const sbitmap core = maps[0]; |
1047 | const sbitmap allg = maps[1]; |
1048 | |
1049 | hash_map<basic_block, vec<counters>> table; |
1050 | counters zerocounter; |
1051 | zerocounter.e = NULL; |
1052 | zerocounter[0] = zero; |
1053 | zerocounter[1] = zero; |
1054 | zerocounter[2] = zero; |
1055 | |
1056 | unsigned xi = 0; |
1057 | bool increment = false; |
1058 | tree rhs = build_int_cst (gcov_type_node, 1ULL << xi); |
1059 | for (basic_block current : expr) |
1060 | { |
1061 | vec<counters>& candidates = table.get_or_insert (k: current); |
1062 | if (candidates.is_empty ()) |
1063 | candidates.safe_push (obj: zerocounter); |
1064 | counters prev = resolve_counters (cands&: candidates); |
1065 | |
1066 | if (increment) |
1067 | { |
1068 | xi += 1; |
1069 | gcc_checking_assert (xi < sizeof (uint64_t) * BITS_PER_UNIT); |
1070 | rhs = build_int_cst (gcov_type_node, 1ULL << xi); |
1071 | increment = false; |
1072 | } |
1073 | |
1074 | for (edge e : current->succs) |
1075 | { |
1076 | counters next = prev; |
1077 | next.e = e; |
1078 | |
1079 | if (bitmap_bit_p (map: core, bitno: e->src->index) && (e->flags & EDGE_CONDITION)) |
1080 | { |
1081 | const int k = condition_index (flag: e->flags); |
1082 | next[k] = emit_bitwise_op (e, op1: prev[k], op: BIT_IOR_EXPR, op2: rhs); |
1083 | if (masks[2*xi + k]) |
1084 | { |
1085 | tree m = build_int_cst (gcov_type_node, masks[2*xi + k]); |
1086 | next[2] = emit_bitwise_op (e, op1: prev[2], op: BIT_IOR_EXPR, op2: m); |
1087 | } |
1088 | increment = true; |
1089 | } |
1090 | else if (e->flags & EDGE_COMPLEX) |
1091 | { |
1092 | /* A complex edge has been taken - wipe the accumulators and |
1093 | poison the mask so that this path does not contribute to |
1094 | coverage. */ |
1095 | next[0] = poison; |
1096 | next[1] = poison; |
1097 | next[2] = poison; |
1098 | } |
1099 | table.get_or_insert (k: e->dest).safe_push (obj: next); |
1100 | } |
1101 | } |
1102 | |
1103 | /* Since this is also the return value, the number of conditions, make sure |
1104 | to include the increment of the last basic block. */ |
1105 | if (increment) |
1106 | xi += 1; |
1107 | |
1108 | gcc_assert (xi == bitmap_count_bits (core)); |
1109 | |
1110 | const tree relaxed = build_int_cst (integer_type_node, MEMMODEL_RELAXED); |
1111 | const bool atomic = flag_profile_update == PROFILE_UPDATE_ATOMIC; |
1112 | const tree atomic_ior = builtin_decl_explicit |
1113 | (TYPE_PRECISION (gcov_type_node) > 32 |
1114 | ? BUILT_IN_ATOMIC_FETCH_OR_8 |
1115 | : BUILT_IN_ATOMIC_FETCH_OR_4); |
1116 | |
1117 | /* Flush to the gcov accumulators. */ |
1118 | for (const basic_block b : expr) |
1119 | { |
1120 | if (!bitmap_bit_p (map: core, bitno: b->index)) |
1121 | continue; |
1122 | |
1123 | for (edge e : b->succs) |
1124 | { |
1125 | /* Flush the accumulators on leaving the Boolean function. The |
1126 | destination may be inside the function only when it returns to |
1127 | the loop header, such as do { ... } while (x); */ |
1128 | if (bitmap_bit_p (map: allg, bitno: e->dest->index)) { |
1129 | if (!(e->flags & EDGE_DFS_BACK)) |
1130 | continue; |
1131 | if (e->dest != expr[0]) |
1132 | continue; |
1133 | } |
1134 | |
1135 | vec<counters> *cands = table.get (k: e->dest); |
1136 | gcc_assert (cands); |
1137 | counters *prevp = find_counters (candidates&: *cands, e); |
1138 | gcc_assert (prevp); |
1139 | counters prev = *prevp; |
1140 | |
1141 | /* _true &= ~mask, _false &= ~mask */ |
1142 | counters next; |
1143 | next[2] = emit_bitwise_op (e, op1: prev[2], op: BIT_NOT_EXPR); |
1144 | next[0] = emit_bitwise_op (e, op1: prev[0], op: BIT_AND_EXPR, op2: next[2]); |
1145 | next[1] = emit_bitwise_op (e, op1: prev[1], op: BIT_AND_EXPR, op2: next[2]); |
1146 | |
1147 | /* _global_true |= _true, _global_false |= _false */ |
1148 | for (size_t k = 0; k != 2; ++k) |
1149 | { |
1150 | tree ref = tree_coverage_counter_ref (GCOV_COUNTER_CONDS, |
1151 | 2*condno + k); |
1152 | if (atomic) |
1153 | { |
1154 | ref = unshare_expr (ref); |
1155 | gcall *flush = gimple_build_call (atomic_ior, 3, |
1156 | build_addr (ref), |
1157 | next[k], relaxed); |
1158 | gsi_insert_on_edge (e, flush); |
1159 | } |
1160 | else |
1161 | { |
1162 | tree get = emit_assign (e, rhs: ref); |
1163 | tree put = emit_bitwise_op (e, op1: next[k], op: BIT_IOR_EXPR, op2: get); |
1164 | emit_assign (e, lhs: unshare_expr (ref), rhs: put); |
1165 | } |
1166 | } |
1167 | } |
1168 | } |
1169 | |
1170 | return xi; |
1171 | } |
1172 | |
1173 | #undef CONDITIONS_MAX_TERMS |
1174 | #undef EDGE_CONDITION |
1175 | |
1176 | /* Do initialization work for the edge profiler. */ |
1177 | |
1178 | /* Add code: |
1179 | __thread gcov* __gcov_indirect_call.counters; // pointer to actual counter |
1180 | __thread void* __gcov_indirect_call.callee; // actual callee address |
1181 | __thread int __gcov_function_counter; // time profiler function counter |
1182 | */ |
1183 | static void |
1184 | init_ic_make_global_vars (void) |
1185 | { |
1186 | tree gcov_type_ptr; |
1187 | |
1188 | gcov_type_ptr = build_pointer_type (get_gcov_type ()); |
1189 | |
1190 | tree tuple_type = lang_hooks.types.make_type (RECORD_TYPE); |
1191 | |
1192 | /* callee */ |
1193 | ic_tuple_callee_field = build_decl (BUILTINS_LOCATION, FIELD_DECL, NULL_TREE, |
1194 | ptr_type_node); |
1195 | |
1196 | /* counters */ |
1197 | ic_tuple_counters_field = build_decl (BUILTINS_LOCATION, FIELD_DECL, |
1198 | NULL_TREE, gcov_type_ptr); |
1199 | DECL_CHAIN (ic_tuple_counters_field) = ic_tuple_callee_field; |
1200 | |
1201 | finish_builtin_struct (tuple_type, "indirect_call_tuple" , |
1202 | ic_tuple_counters_field, NULL_TREE); |
1203 | |
1204 | ic_tuple_var |
1205 | = build_decl (UNKNOWN_LOCATION, VAR_DECL, |
1206 | get_identifier ("__gcov_indirect_call" ), tuple_type); |
1207 | TREE_PUBLIC (ic_tuple_var) = 1; |
1208 | DECL_ARTIFICIAL (ic_tuple_var) = 1; |
1209 | DECL_INITIAL (ic_tuple_var) = NULL; |
1210 | DECL_EXTERNAL (ic_tuple_var) = 1; |
1211 | if (targetm.have_tls) |
1212 | set_decl_tls_model (ic_tuple_var, decl_default_tls_model (ic_tuple_var)); |
1213 | } |
1214 | |
1215 | /* Create the type and function decls for the interface with gcov. */ |
1216 | |
1217 | void |
1218 | gimple_init_gcov_profiler (void) |
1219 | { |
1220 | tree interval_profiler_fn_type; |
1221 | tree pow2_profiler_fn_type; |
1222 | tree topn_values_profiler_fn_type; |
1223 | tree gcov_type_ptr; |
1224 | tree ic_profiler_fn_type; |
1225 | tree average_profiler_fn_type; |
1226 | const char *fn_name; |
1227 | |
1228 | if (!gcov_type_node) |
1229 | { |
1230 | const char *fn_suffix |
1231 | = flag_profile_update == PROFILE_UPDATE_ATOMIC ? "_atomic" : "" ; |
1232 | |
1233 | gcov_type_node = get_gcov_type (); |
1234 | gcov_type_ptr = build_pointer_type (gcov_type_node); |
1235 | |
1236 | /* void (*) (gcov_type *, gcov_type, int, unsigned) */ |
1237 | interval_profiler_fn_type |
1238 | = build_function_type_list (void_type_node, |
1239 | gcov_type_ptr, gcov_type_node, |
1240 | integer_type_node, |
1241 | unsigned_type_node, NULL_TREE); |
1242 | fn_name = concat ("__gcov_interval_profiler" , fn_suffix, NULL); |
1243 | tree_interval_profiler_fn = build_fn_decl (fn_name, |
1244 | interval_profiler_fn_type); |
1245 | free (CONST_CAST (char *, fn_name)); |
1246 | TREE_NOTHROW (tree_interval_profiler_fn) = 1; |
1247 | DECL_ATTRIBUTES (tree_interval_profiler_fn) |
1248 | = tree_cons (get_identifier ("leaf" ), NULL, |
1249 | DECL_ATTRIBUTES (tree_interval_profiler_fn)); |
1250 | |
1251 | /* void (*) (gcov_type *, gcov_type) */ |
1252 | pow2_profiler_fn_type |
1253 | = build_function_type_list (void_type_node, |
1254 | gcov_type_ptr, gcov_type_node, |
1255 | NULL_TREE); |
1256 | fn_name = concat ("__gcov_pow2_profiler" , fn_suffix, NULL); |
1257 | tree_pow2_profiler_fn = build_fn_decl (fn_name, pow2_profiler_fn_type); |
1258 | free (CONST_CAST (char *, fn_name)); |
1259 | TREE_NOTHROW (tree_pow2_profiler_fn) = 1; |
1260 | DECL_ATTRIBUTES (tree_pow2_profiler_fn) |
1261 | = tree_cons (get_identifier ("leaf" ), NULL, |
1262 | DECL_ATTRIBUTES (tree_pow2_profiler_fn)); |
1263 | |
1264 | /* void (*) (gcov_type *, gcov_type) */ |
1265 | topn_values_profiler_fn_type |
1266 | = build_function_type_list (void_type_node, |
1267 | gcov_type_ptr, gcov_type_node, |
1268 | NULL_TREE); |
1269 | fn_name = concat ("__gcov_topn_values_profiler" , fn_suffix, NULL); |
1270 | tree_topn_values_profiler_fn |
1271 | = build_fn_decl (fn_name, topn_values_profiler_fn_type); |
1272 | free (CONST_CAST (char *, fn_name)); |
1273 | |
1274 | TREE_NOTHROW (tree_topn_values_profiler_fn) = 1; |
1275 | DECL_ATTRIBUTES (tree_topn_values_profiler_fn) |
1276 | = tree_cons (get_identifier ("leaf" ), NULL, |
1277 | DECL_ATTRIBUTES (tree_topn_values_profiler_fn)); |
1278 | |
1279 | init_ic_make_global_vars (); |
1280 | |
1281 | /* void (*) (gcov_type, void *) */ |
1282 | ic_profiler_fn_type |
1283 | = build_function_type_list (void_type_node, |
1284 | gcov_type_node, |
1285 | ptr_type_node, |
1286 | NULL_TREE); |
1287 | fn_name = concat ("__gcov_indirect_call_profiler_v4" , fn_suffix, NULL); |
1288 | tree_indirect_call_profiler_fn |
1289 | = build_fn_decl (fn_name, ic_profiler_fn_type); |
1290 | free (CONST_CAST (char *, fn_name)); |
1291 | |
1292 | TREE_NOTHROW (tree_indirect_call_profiler_fn) = 1; |
1293 | DECL_ATTRIBUTES (tree_indirect_call_profiler_fn) |
1294 | = tree_cons (get_identifier ("leaf" ), NULL, |
1295 | DECL_ATTRIBUTES (tree_indirect_call_profiler_fn)); |
1296 | |
1297 | tree_time_profiler_counter |
1298 | = build_decl (UNKNOWN_LOCATION, VAR_DECL, |
1299 | get_identifier ("__gcov_time_profiler_counter" ), |
1300 | get_gcov_type ()); |
1301 | TREE_PUBLIC (tree_time_profiler_counter) = 1; |
1302 | DECL_EXTERNAL (tree_time_profiler_counter) = 1; |
1303 | TREE_STATIC (tree_time_profiler_counter) = 1; |
1304 | DECL_ARTIFICIAL (tree_time_profiler_counter) = 1; |
1305 | DECL_INITIAL (tree_time_profiler_counter) = NULL; |
1306 | |
1307 | /* void (*) (gcov_type *, gcov_type) */ |
1308 | average_profiler_fn_type |
1309 | = build_function_type_list (void_type_node, |
1310 | gcov_type_ptr, gcov_type_node, NULL_TREE); |
1311 | fn_name = concat ("__gcov_average_profiler" , fn_suffix, NULL); |
1312 | tree_average_profiler_fn = build_fn_decl (fn_name, |
1313 | average_profiler_fn_type); |
1314 | free (CONST_CAST (char *, fn_name)); |
1315 | TREE_NOTHROW (tree_average_profiler_fn) = 1; |
1316 | DECL_ATTRIBUTES (tree_average_profiler_fn) |
1317 | = tree_cons (get_identifier ("leaf" ), NULL, |
1318 | DECL_ATTRIBUTES (tree_average_profiler_fn)); |
1319 | fn_name = concat ("__gcov_ior_profiler" , fn_suffix, NULL); |
1320 | tree_ior_profiler_fn = build_fn_decl (fn_name, average_profiler_fn_type); |
1321 | free (CONST_CAST (char *, fn_name)); |
1322 | TREE_NOTHROW (tree_ior_profiler_fn) = 1; |
1323 | DECL_ATTRIBUTES (tree_ior_profiler_fn) |
1324 | = tree_cons (get_identifier ("leaf" ), NULL, |
1325 | DECL_ATTRIBUTES (tree_ior_profiler_fn)); |
1326 | |
1327 | /* LTO streamer needs assembler names. Because we create these decls |
1328 | late, we need to initialize them by hand. */ |
1329 | DECL_ASSEMBLER_NAME (tree_interval_profiler_fn); |
1330 | DECL_ASSEMBLER_NAME (tree_pow2_profiler_fn); |
1331 | DECL_ASSEMBLER_NAME (tree_topn_values_profiler_fn); |
1332 | DECL_ASSEMBLER_NAME (tree_indirect_call_profiler_fn); |
1333 | DECL_ASSEMBLER_NAME (tree_average_profiler_fn); |
1334 | DECL_ASSEMBLER_NAME (tree_ior_profiler_fn); |
1335 | } |
1336 | } |
1337 | |
1338 | /* If RESULT is not null, then output instructions as GIMPLE trees to assign |
1339 | the updated counter from CALL of FUNC to RESULT. Insert the CALL and the |
1340 | optional assignment instructions to GSI. Use NAME for temporary values. */ |
1341 | |
1342 | static inline void |
1343 | gen_assign_counter_update (gimple_stmt_iterator *gsi, gcall *call, tree func, |
1344 | tree result, const char *name) |
1345 | { |
1346 | if (result) |
1347 | { |
1348 | tree result_type = TREE_TYPE (TREE_TYPE (func)); |
1349 | tree tmp1 = make_temp_ssa_name (type: result_type, NULL, name); |
1350 | gimple_set_lhs (call, tmp1); |
1351 | gsi_insert_after (gsi, call, GSI_NEW_STMT); |
1352 | tree tmp2 = make_temp_ssa_name (TREE_TYPE (result), NULL, name); |
1353 | gassign *assign = gimple_build_assign (tmp2, NOP_EXPR, tmp1); |
1354 | gsi_insert_after (gsi, assign, GSI_NEW_STMT); |
1355 | assign = gimple_build_assign (result, tmp2); |
1356 | gsi_insert_after (gsi, assign, GSI_NEW_STMT); |
1357 | } |
1358 | else |
1359 | gsi_insert_after (gsi, call, GSI_NEW_STMT); |
1360 | } |
1361 | |
1362 | /* Output instructions as GIMPLE trees to increment the COUNTER. If RESULT is |
1363 | not null, then assign the updated counter value to RESULT. Insert the |
1364 | instructions to GSI. Use NAME for temporary values. */ |
1365 | |
1366 | static inline void |
1367 | gen_counter_update (gimple_stmt_iterator *gsi, tree counter, tree result, |
1368 | const char *name) |
1369 | { |
1370 | tree type = gcov_type_node; |
1371 | tree addr = build_fold_addr_expr (counter); |
1372 | tree one = build_int_cst (type, 1); |
1373 | tree relaxed = build_int_cst (integer_type_node, MEMMODEL_RELAXED); |
1374 | |
1375 | if (counter_update == COUNTER_UPDATE_ATOMIC_BUILTIN |
1376 | || (result && counter_update == COUNTER_UPDATE_ATOMIC_SPLIT)) |
1377 | { |
1378 | /* __atomic_fetch_add (&counter, 1, MEMMODEL_RELAXED); */ |
1379 | tree f = builtin_decl_explicit (TYPE_PRECISION (type) > 32 |
1380 | ? BUILT_IN_ATOMIC_ADD_FETCH_8 |
1381 | : BUILT_IN_ATOMIC_ADD_FETCH_4); |
1382 | gcall *call = gimple_build_call (f, 3, addr, one, relaxed); |
1383 | gen_assign_counter_update (gsi, call, func: f, result, name); |
1384 | } |
1385 | else if (!result && (counter_update == COUNTER_UPDATE_ATOMIC_SPLIT |
1386 | || counter_update == COUNTER_UPDATE_ATOMIC_PARTIAL)) |
1387 | { |
1388 | /* low = __atomic_add_fetch_4 (addr, 1, MEMMODEL_RELAXED); |
1389 | high_inc = low == 0 ? 1 : 0; |
1390 | __atomic_add_fetch_4 (addr_high, high_inc, MEMMODEL_RELAXED); */ |
1391 | tree zero32 = build_zero_cst (uint32_type_node); |
1392 | tree one32 = build_one_cst (uint32_type_node); |
1393 | tree addr_high = make_temp_ssa_name (TREE_TYPE (addr), NULL, name); |
1394 | tree four = build_int_cst (size_type_node, 4); |
1395 | gassign *assign1 = gimple_build_assign (addr_high, POINTER_PLUS_EXPR, |
1396 | addr, four); |
1397 | gsi_insert_after (gsi, assign1, GSI_NEW_STMT); |
1398 | if (WORDS_BIG_ENDIAN) |
1399 | std::swap (a&: addr, b&: addr_high); |
1400 | tree f = builtin_decl_explicit (fncode: BUILT_IN_ATOMIC_ADD_FETCH_4); |
1401 | gcall *call1 = gimple_build_call (f, 3, addr, one, relaxed); |
1402 | tree low = make_temp_ssa_name (uint32_type_node, NULL, name); |
1403 | gimple_call_set_lhs (gs: call1, lhs: low); |
1404 | gsi_insert_after (gsi, call1, GSI_NEW_STMT); |
1405 | tree is_zero = make_temp_ssa_name (boolean_type_node, NULL, name); |
1406 | gassign *assign2 = gimple_build_assign (is_zero, EQ_EXPR, low, |
1407 | zero32); |
1408 | gsi_insert_after (gsi, assign2, GSI_NEW_STMT); |
1409 | tree high_inc = make_temp_ssa_name (uint32_type_node, NULL, name); |
1410 | gassign *assign3 = gimple_build_assign (high_inc, COND_EXPR, |
1411 | is_zero, one32, zero32); |
1412 | gsi_insert_after (gsi, assign3, GSI_NEW_STMT); |
1413 | gcall *call2 = gimple_build_call (f, 3, addr_high, high_inc, |
1414 | relaxed); |
1415 | gsi_insert_after (gsi, call2, GSI_NEW_STMT); |
1416 | } |
1417 | else |
1418 | { |
1419 | tree tmp1 = make_temp_ssa_name (type, NULL, name); |
1420 | gassign *assign1 = gimple_build_assign (tmp1, counter); |
1421 | gsi_insert_after (gsi, assign1, GSI_NEW_STMT); |
1422 | tree tmp2 = make_temp_ssa_name (type, NULL, name); |
1423 | gassign *assign2 = gimple_build_assign (tmp2, PLUS_EXPR, tmp1, one); |
1424 | gsi_insert_after (gsi, assign2, GSI_NEW_STMT); |
1425 | gassign *assign3 = gimple_build_assign (unshare_expr (counter), tmp2); |
1426 | gsi_insert_after (gsi, assign3, GSI_NEW_STMT); |
1427 | if (result) |
1428 | { |
1429 | gassign *assign4 = gimple_build_assign (result, tmp2); |
1430 | gsi_insert_after (gsi, assign4, GSI_NEW_STMT); |
1431 | } |
1432 | } |
1433 | } |
1434 | |
1435 | /* Output instructions as GIMPLE trees to increment the edge |
1436 | execution count, and insert them on E. */ |
1437 | |
1438 | void |
1439 | gimple_gen_edge_profiler (int edgeno, edge e) |
1440 | { |
1441 | gimple_stmt_iterator gsi = gsi_last (PENDING_STMT (e)); |
1442 | tree counter = tree_coverage_counter_ref (GCOV_COUNTER_ARCS, edgeno); |
1443 | gen_counter_update (gsi: &gsi, counter, NULL_TREE, name: "PROF_edge_counter" ); |
1444 | } |
1445 | |
1446 | /* Emits code to get VALUE to instrument at GSI, and returns the |
1447 | variable containing the value. */ |
1448 | |
1449 | static tree |
1450 | prepare_instrumented_value (gimple_stmt_iterator *gsi, histogram_value value) |
1451 | { |
1452 | tree val = value->hvalue.value; |
1453 | if (POINTER_TYPE_P (TREE_TYPE (val))) |
1454 | val = fold_convert (build_nonstandard_integer_type |
1455 | (TYPE_PRECISION (TREE_TYPE (val)), 1), val); |
1456 | return force_gimple_operand_gsi (gsi, fold_convert (gcov_type_node, val), |
1457 | true, NULL_TREE, true, GSI_SAME_STMT); |
1458 | } |
1459 | |
1460 | /* Output instructions as GIMPLE trees to increment the interval histogram |
1461 | counter. VALUE is the expression whose value is profiled. TAG is the |
1462 | tag of the section for counters, BASE is offset of the counter position. */ |
1463 | |
1464 | void |
1465 | gimple_gen_interval_profiler (histogram_value value, unsigned tag) |
1466 | { |
1467 | gimple *stmt = value->hvalue.stmt; |
1468 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
1469 | tree ref = tree_coverage_counter_ref (tag, 0), ref_ptr; |
1470 | gcall *call; |
1471 | tree val; |
1472 | tree start = build_int_cst_type (integer_type_node, |
1473 | value->hdata.intvl.int_start); |
1474 | tree steps = build_int_cst_type (unsigned_type_node, |
1475 | value->hdata.intvl.steps); |
1476 | |
1477 | ref_ptr = force_gimple_operand_gsi (&gsi, |
1478 | build_addr (ref), |
1479 | true, NULL_TREE, true, GSI_SAME_STMT); |
1480 | val = prepare_instrumented_value (gsi: &gsi, value); |
1481 | call = gimple_build_call (tree_interval_profiler_fn, 4, |
1482 | ref_ptr, val, start, steps); |
1483 | gsi_insert_before (&gsi, call, GSI_NEW_STMT); |
1484 | } |
1485 | |
1486 | /* Output instructions as GIMPLE trees to increment the power of two histogram |
1487 | counter. VALUE is the expression whose value is profiled. TAG is the tag |
1488 | of the section for counters. */ |
1489 | |
1490 | void |
1491 | gimple_gen_pow2_profiler (histogram_value value, unsigned tag) |
1492 | { |
1493 | gimple *stmt = value->hvalue.stmt; |
1494 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
1495 | tree ref_ptr = tree_coverage_counter_addr (tag, 0); |
1496 | gcall *call; |
1497 | tree val; |
1498 | |
1499 | ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr, |
1500 | true, NULL_TREE, true, GSI_SAME_STMT); |
1501 | val = prepare_instrumented_value (gsi: &gsi, value); |
1502 | call = gimple_build_call (tree_pow2_profiler_fn, 2, ref_ptr, val); |
1503 | gsi_insert_before (&gsi, call, GSI_NEW_STMT); |
1504 | } |
1505 | |
1506 | /* Output instructions as GIMPLE trees for code to find the most N common |
1507 | values. VALUE is the expression whose value is profiled. TAG is the tag |
1508 | of the section for counters. */ |
1509 | |
1510 | void |
1511 | gimple_gen_topn_values_profiler (histogram_value value, unsigned tag) |
1512 | { |
1513 | gimple *stmt = value->hvalue.stmt; |
1514 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
1515 | tree ref_ptr = tree_coverage_counter_addr (tag, 0); |
1516 | gcall *call; |
1517 | tree val; |
1518 | |
1519 | ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr, |
1520 | true, NULL_TREE, true, GSI_SAME_STMT); |
1521 | val = prepare_instrumented_value (gsi: &gsi, value); |
1522 | call = gimple_build_call (tree_topn_values_profiler_fn, 2, ref_ptr, val); |
1523 | gsi_insert_before (&gsi, call, GSI_NEW_STMT); |
1524 | } |
1525 | |
1526 | |
1527 | /* Output instructions as GIMPLE trees for code to find the most |
1528 | common called function in indirect call. |
1529 | VALUE is the call expression whose indirect callee is profiled. |
1530 | TAG is the tag of the section for counters. */ |
1531 | |
1532 | void |
1533 | gimple_gen_ic_profiler (histogram_value value, unsigned tag) |
1534 | { |
1535 | tree tmp1; |
1536 | gassign *stmt1, *stmt2, *stmt3; |
1537 | gimple *stmt = value->hvalue.stmt; |
1538 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
1539 | tree ref_ptr = tree_coverage_counter_addr (tag, 0); |
1540 | |
1541 | ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr, |
1542 | true, NULL_TREE, true, GSI_SAME_STMT); |
1543 | |
1544 | /* Insert code: |
1545 | |
1546 | stmt1: __gcov_indirect_call.counters = get_relevant_counter_ptr (); |
1547 | stmt2: tmp1 = (void *) (indirect call argument value) |
1548 | stmt3: __gcov_indirect_call.callee = tmp1; |
1549 | |
1550 | Example: |
1551 | f_1 = foo; |
1552 | __gcov_indirect_call.counters = &__gcov4.main[0]; |
1553 | PROF_fn_9 = f_1; |
1554 | __gcov_indirect_call.callee = PROF_fn_9; |
1555 | _4 = f_1 (); |
1556 | */ |
1557 | |
1558 | tree gcov_type_ptr = build_pointer_type (get_gcov_type ()); |
1559 | |
1560 | tree counter_ref = build3 (COMPONENT_REF, gcov_type_ptr, |
1561 | ic_tuple_var, ic_tuple_counters_field, NULL_TREE); |
1562 | |
1563 | stmt1 = gimple_build_assign (counter_ref, ref_ptr); |
1564 | tmp1 = make_temp_ssa_name (ptr_type_node, NULL, name: "PROF_fn" ); |
1565 | stmt2 = gimple_build_assign (tmp1, unshare_expr (value->hvalue.value)); |
1566 | tree callee_ref = build3 (COMPONENT_REF, ptr_type_node, |
1567 | ic_tuple_var, ic_tuple_callee_field, NULL_TREE); |
1568 | stmt3 = gimple_build_assign (callee_ref, tmp1); |
1569 | |
1570 | gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT); |
1571 | gsi_insert_before (&gsi, stmt2, GSI_SAME_STMT); |
1572 | gsi_insert_before (&gsi, stmt3, GSI_SAME_STMT); |
1573 | } |
1574 | |
1575 | |
1576 | /* Output instructions as GIMPLE trees for code to find the most |
1577 | common called function in indirect call. Insert instructions at the |
1578 | beginning of every possible called function. |
1579 | */ |
1580 | |
1581 | void |
1582 | gimple_gen_ic_func_profiler (void) |
1583 | { |
1584 | struct cgraph_node * c_node = cgraph_node::get (decl: current_function_decl); |
1585 | gcall *stmt1; |
1586 | tree tree_uid, cur_func, void0; |
1587 | |
1588 | /* Disable indirect call profiling for an IFUNC resolver and its |
1589 | callees since it requires TLS which hasn't been set up yet when |
1590 | the dynamic linker is resolving IFUNC symbols. See |
1591 | https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114115 |
1592 | */ |
1593 | if (c_node->only_called_directly_p () |
1594 | || c_node->called_by_ifunc_resolver) |
1595 | return; |
1596 | |
1597 | gimple_init_gcov_profiler (); |
1598 | |
1599 | basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun); |
1600 | basic_block cond_bb = split_edge (single_succ_edge (bb: entry)); |
1601 | basic_block update_bb = split_edge (single_succ_edge (bb: cond_bb)); |
1602 | |
1603 | /* We need to do an extra split in order to not create an input |
1604 | for a possible PHI node. */ |
1605 | split_edge (single_succ_edge (bb: update_bb)); |
1606 | |
1607 | edge true_edge = single_succ_edge (bb: cond_bb); |
1608 | true_edge->flags = EDGE_TRUE_VALUE; |
1609 | |
1610 | profile_probability probability; |
1611 | if (DECL_VIRTUAL_P (current_function_decl)) |
1612 | probability = profile_probability::very_likely (); |
1613 | else |
1614 | probability = profile_probability::unlikely (); |
1615 | |
1616 | true_edge->probability = probability; |
1617 | edge e = make_edge (cond_bb, single_succ_edge (bb: update_bb)->dest, |
1618 | EDGE_FALSE_VALUE); |
1619 | e->probability = true_edge->probability.invert (); |
1620 | |
1621 | /* Insert code: |
1622 | |
1623 | if (__gcov_indirect_call.callee != NULL) |
1624 | __gcov_indirect_call_profiler_v3 (profile_id, ¤t_function_decl); |
1625 | |
1626 | The function __gcov_indirect_call_profiler_v3 is responsible for |
1627 | resetting __gcov_indirect_call.callee to NULL. */ |
1628 | |
1629 | gimple_stmt_iterator gsi = gsi_start_bb (bb: cond_bb); |
1630 | void0 = build_int_cst (ptr_type_node, 0); |
1631 | |
1632 | tree callee_ref = build3 (COMPONENT_REF, ptr_type_node, |
1633 | ic_tuple_var, ic_tuple_callee_field, NULL_TREE); |
1634 | |
1635 | tree ref = force_gimple_operand_gsi (&gsi, callee_ref, true, NULL_TREE, |
1636 | true, GSI_SAME_STMT); |
1637 | |
1638 | gcond *cond = gimple_build_cond (NE_EXPR, ref, |
1639 | void0, NULL, NULL); |
1640 | gsi_insert_before (&gsi, cond, GSI_NEW_STMT); |
1641 | |
1642 | gsi = gsi_after_labels (bb: update_bb); |
1643 | |
1644 | cur_func = force_gimple_operand_gsi (&gsi, |
1645 | build_addr (current_function_decl), |
1646 | true, NULL_TREE, |
1647 | true, GSI_SAME_STMT); |
1648 | tree_uid = build_int_cst |
1649 | (gcov_type_node, |
1650 | cgraph_node::get (decl: current_function_decl)->profile_id); |
1651 | stmt1 = gimple_build_call (tree_indirect_call_profiler_fn, 2, |
1652 | tree_uid, cur_func); |
1653 | gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT); |
1654 | } |
1655 | |
1656 | /* Output instructions as GIMPLE tree at the beginning for each function. |
1657 | TAG is the tag of the section for counters, BASE is offset of the |
1658 | counter position and GSI is the iterator we place the counter. */ |
1659 | |
1660 | void |
1661 | gimple_gen_time_profiler (unsigned tag) |
1662 | { |
1663 | tree type = get_gcov_type (); |
1664 | basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun); |
1665 | basic_block cond_bb = split_edge (single_succ_edge (bb: entry)); |
1666 | basic_block update_bb = split_edge (single_succ_edge (bb: cond_bb)); |
1667 | |
1668 | /* We need to do an extra split in order to not create an input |
1669 | for a possible PHI node. */ |
1670 | split_edge (single_succ_edge (bb: update_bb)); |
1671 | |
1672 | edge true_edge = single_succ_edge (bb: cond_bb); |
1673 | true_edge->flags = EDGE_TRUE_VALUE; |
1674 | true_edge->probability = profile_probability::unlikely (); |
1675 | edge e |
1676 | = make_edge (cond_bb, single_succ_edge (bb: update_bb)->dest, EDGE_FALSE_VALUE); |
1677 | e->probability = true_edge->probability.invert (); |
1678 | |
1679 | gimple_stmt_iterator gsi = gsi_start_bb (bb: cond_bb); |
1680 | tree original_ref = tree_coverage_counter_ref (tag, 0); |
1681 | tree ref = force_gimple_operand_gsi (&gsi, original_ref, true, NULL_TREE, |
1682 | true, GSI_SAME_STMT); |
1683 | |
1684 | /* Emit: if (counters[0] != 0). */ |
1685 | gcond *cond = gimple_build_cond (EQ_EXPR, ref, build_int_cst (type, 0), |
1686 | NULL, NULL); |
1687 | gsi_insert_before (&gsi, cond, GSI_NEW_STMT); |
1688 | |
1689 | /* Emit: counters[0] = ++__gcov_time_profiler_counter. */ |
1690 | gsi = gsi_start_bb (bb: update_bb); |
1691 | gen_counter_update (gsi: &gsi, counter: tree_time_profiler_counter, result: original_ref, |
1692 | name: "PROF_time_profile" ); |
1693 | } |
1694 | |
1695 | /* Output instructions as GIMPLE trees to increment the average histogram |
1696 | counter. VALUE is the expression whose value is profiled. TAG is the |
1697 | tag of the section for counters, BASE is offset of the counter position. */ |
1698 | |
1699 | void |
1700 | gimple_gen_average_profiler (histogram_value value, unsigned tag) |
1701 | { |
1702 | gimple *stmt = value->hvalue.stmt; |
1703 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
1704 | tree ref_ptr = tree_coverage_counter_addr (tag, 0); |
1705 | gcall *call; |
1706 | tree val; |
1707 | |
1708 | ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr, |
1709 | true, NULL_TREE, |
1710 | true, GSI_SAME_STMT); |
1711 | val = prepare_instrumented_value (gsi: &gsi, value); |
1712 | call = gimple_build_call (tree_average_profiler_fn, 2, ref_ptr, val); |
1713 | gsi_insert_before (&gsi, call, GSI_NEW_STMT); |
1714 | } |
1715 | |
1716 | /* Output instructions as GIMPLE trees to increment the ior histogram |
1717 | counter. VALUE is the expression whose value is profiled. TAG is the |
1718 | tag of the section for counters, BASE is offset of the counter position. */ |
1719 | |
1720 | void |
1721 | gimple_gen_ior_profiler (histogram_value value, unsigned tag) |
1722 | { |
1723 | gimple *stmt = value->hvalue.stmt; |
1724 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); |
1725 | tree ref_ptr = tree_coverage_counter_addr (tag, 0); |
1726 | gcall *call; |
1727 | tree val; |
1728 | |
1729 | ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr, |
1730 | true, NULL_TREE, true, GSI_SAME_STMT); |
1731 | val = prepare_instrumented_value (gsi: &gsi, value); |
1732 | call = gimple_build_call (tree_ior_profiler_fn, 2, ref_ptr, val); |
1733 | gsi_insert_before (&gsi, call, GSI_NEW_STMT); |
1734 | } |
1735 | |
1736 | static vec<regex_t> profile_filter_files; |
1737 | static vec<regex_t> profile_exclude_files; |
1738 | |
1739 | /* Parse list of provided REGEX (separated with semi-collon) and |
1740 | create expressions (of type regex_t) and save them into V vector. |
1741 | If there is a regular expression parsing error, error message is |
1742 | printed for FLAG_NAME. */ |
1743 | |
1744 | static void |
1745 | parse_profile_filter (const char *regex, vec<regex_t> *v, |
1746 | const char *flag_name) |
1747 | { |
1748 | v->create (nelems: 4); |
1749 | if (regex != NULL) |
1750 | { |
1751 | char *str = xstrdup (regex); |
1752 | for (char *p = strtok (s: str, delim: ";" ); p != NULL; p = strtok (NULL, delim: ";" )) |
1753 | { |
1754 | regex_t r; |
1755 | if (regcomp (preg: &r, pattern: p, REG_EXTENDED | REG_NOSUB) != 0) |
1756 | { |
1757 | error ("invalid regular expression %qs in %qs" , |
1758 | p, flag_name); |
1759 | return; |
1760 | } |
1761 | |
1762 | v->safe_push (obj: r); |
1763 | } |
1764 | } |
1765 | } |
1766 | |
1767 | /* Parse values of -fprofile-filter-files and -fprofile-exclude-files |
1768 | options. */ |
1769 | |
1770 | static void |
1771 | parse_profile_file_filtering () |
1772 | { |
1773 | parse_profile_filter (flag_profile_filter_files, v: &profile_filter_files, |
1774 | flag_name: "-fprofile-filter-files" ); |
1775 | parse_profile_filter (flag_profile_exclude_files, v: &profile_exclude_files, |
1776 | flag_name: "-fprofile-exclude-files" ); |
1777 | } |
1778 | |
1779 | /* Parse vectors of regular expressions. */ |
1780 | |
1781 | static void |
1782 | release_profile_file_filtering () |
1783 | { |
1784 | profile_filter_files.release (); |
1785 | profile_exclude_files.release (); |
1786 | } |
1787 | |
1788 | /* Return true when FILENAME should be instrumented based on |
1789 | -fprofile-filter-files and -fprofile-exclude-files options. */ |
1790 | |
1791 | static bool |
1792 | include_source_file_for_profile (const char *filename) |
1793 | { |
1794 | /* First check whether file is included in flag_profile_exclude_files. */ |
1795 | for (unsigned i = 0; i < profile_exclude_files.length (); i++) |
1796 | if (regexec (preg: &profile_exclude_files[i], |
1797 | string: filename, nmatch: 0, NULL, eflags: 0) == REG_NOERROR) |
1798 | return false; |
1799 | |
1800 | /* For non-empty flag_profile_filter_files include only files matching a |
1801 | regex in the flag. */ |
1802 | if (profile_filter_files.is_empty ()) |
1803 | return true; |
1804 | |
1805 | for (unsigned i = 0; i < profile_filter_files.length (); i++) |
1806 | if (regexec (preg: &profile_filter_files[i], string: filename, nmatch: 0, NULL, eflags: 0) == REG_NOERROR) |
1807 | return true; |
1808 | |
1809 | return false; |
1810 | } |
1811 | |
1812 | #ifndef HAVE_sync_compare_and_swapsi |
1813 | #define HAVE_sync_compare_and_swapsi 0 |
1814 | #endif |
1815 | #ifndef HAVE_atomic_compare_and_swapsi |
1816 | #define HAVE_atomic_compare_and_swapsi 0 |
1817 | #endif |
1818 | |
1819 | #ifndef HAVE_sync_compare_and_swapdi |
1820 | #define HAVE_sync_compare_and_swapdi 0 |
1821 | #endif |
1822 | #ifndef HAVE_atomic_compare_and_swapdi |
1823 | #define HAVE_atomic_compare_and_swapdi 0 |
1824 | #endif |
1825 | |
1826 | /* Profile all functions in the callgraph. */ |
1827 | |
1828 | static unsigned int |
1829 | tree_profiling (void) |
1830 | { |
1831 | struct cgraph_node *node; |
1832 | |
1833 | /* Verify whether we can utilize atomic update operations. */ |
1834 | bool can_support_atomic = targetm.have_libatomic; |
1835 | unsigned HOST_WIDE_INT gcov_type_size |
1836 | = tree_to_uhwi (TYPE_SIZE_UNIT (get_gcov_type ())); |
1837 | bool have_atomic_4 |
1838 | = HAVE_sync_compare_and_swapsi || HAVE_atomic_compare_and_swapsi; |
1839 | bool have_atomic_8 |
1840 | = HAVE_sync_compare_and_swapdi || HAVE_atomic_compare_and_swapdi; |
1841 | bool needs_split = gcov_type_size == 8 && !have_atomic_8 && have_atomic_4; |
1842 | if (!can_support_atomic) |
1843 | { |
1844 | if (gcov_type_size == 4) |
1845 | can_support_atomic = have_atomic_4; |
1846 | else if (gcov_type_size == 8) |
1847 | can_support_atomic = have_atomic_8; |
1848 | } |
1849 | |
1850 | if (flag_profile_update != PROFILE_UPDATE_SINGLE && needs_split) |
1851 | counter_update = COUNTER_UPDATE_ATOMIC_PARTIAL; |
1852 | |
1853 | if (flag_profile_update == PROFILE_UPDATE_ATOMIC |
1854 | && !can_support_atomic) |
1855 | { |
1856 | warning (0, "target does not support atomic profile update, " |
1857 | "single mode is selected" ); |
1858 | flag_profile_update = PROFILE_UPDATE_SINGLE; |
1859 | } |
1860 | else if (flag_profile_update == PROFILE_UPDATE_PREFER_ATOMIC) |
1861 | flag_profile_update |
1862 | = can_support_atomic ? PROFILE_UPDATE_ATOMIC : PROFILE_UPDATE_SINGLE; |
1863 | |
1864 | if (flag_profile_update == PROFILE_UPDATE_ATOMIC) |
1865 | { |
1866 | if (needs_split) |
1867 | counter_update = COUNTER_UPDATE_ATOMIC_SPLIT; |
1868 | else |
1869 | counter_update = COUNTER_UPDATE_ATOMIC_BUILTIN; |
1870 | } |
1871 | |
1872 | /* This is a small-ipa pass that gets called only once, from |
1873 | cgraphunit.cc:ipa_passes(). */ |
1874 | gcc_assert (symtab->state == IPA_SSA); |
1875 | |
1876 | init_node_map (true); |
1877 | parse_profile_file_filtering (); |
1878 | |
1879 | FOR_EACH_DEFINED_FUNCTION (node) |
1880 | { |
1881 | bool thunk = false; |
1882 | if (!gimple_has_body_p (node->decl) && !node->thunk) |
1883 | continue; |
1884 | |
1885 | /* Don't profile functions produced for builtin stuff. */ |
1886 | if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION) |
1887 | continue; |
1888 | |
1889 | if (lookup_attribute (attr_name: "no_profile_instrument_function" , |
1890 | DECL_ATTRIBUTES (node->decl))) |
1891 | continue; |
1892 | /* Do not instrument extern inline functions when testing coverage. |
1893 | While this is not perfectly consistent (early inlined extern inlines |
1894 | will get acocunted), testsuite expects that. */ |
1895 | if (DECL_EXTERNAL (node->decl) |
1896 | && flag_test_coverage) |
1897 | continue; |
1898 | |
1899 | const char *file = LOCATION_FILE (DECL_SOURCE_LOCATION (node->decl)); |
1900 | if (!include_source_file_for_profile (filename: file)) |
1901 | continue; |
1902 | |
1903 | if (node->thunk) |
1904 | { |
1905 | /* We cannot expand variadic thunks to Gimple. */ |
1906 | if (stdarg_p (TREE_TYPE (node->decl))) |
1907 | continue; |
1908 | thunk = true; |
1909 | /* When generate profile, expand thunk to gimple so it can be |
1910 | instrumented same way as other functions. */ |
1911 | if (profile_arc_flag || condition_coverage_flag) |
1912 | expand_thunk (node, false, true); |
1913 | /* Read cgraph profile but keep function as thunk at profile-use |
1914 | time. */ |
1915 | else |
1916 | { |
1917 | read_thunk_profile (node); |
1918 | continue; |
1919 | } |
1920 | } |
1921 | |
1922 | push_cfun (DECL_STRUCT_FUNCTION (node->decl)); |
1923 | |
1924 | if (dump_file) |
1925 | dump_function_header (dump_file, cfun->decl, dump_flags); |
1926 | |
1927 | /* Local pure-const may imply need to fixup the cfg. */ |
1928 | if (gimple_has_body_p (node->decl) |
1929 | && (execute_fixup_cfg () & TODO_cleanup_cfg)) |
1930 | cleanup_tree_cfg (); |
1931 | |
1932 | branch_prob (thunk); |
1933 | |
1934 | if (! flag_branch_probabilities |
1935 | && flag_profile_values) |
1936 | gimple_gen_ic_func_profiler (); |
1937 | |
1938 | if (flag_branch_probabilities |
1939 | && !thunk |
1940 | && flag_profile_values |
1941 | && flag_value_profile_transformations |
1942 | && profile_status_for_fn (cfun) == PROFILE_READ) |
1943 | gimple_value_profile_transformations (); |
1944 | |
1945 | /* The above could hose dominator info. Currently there is |
1946 | none coming in, this is a safety valve. It should be |
1947 | easy to adjust it, if and when there is some. */ |
1948 | free_dominance_info (CDI_DOMINATORS); |
1949 | free_dominance_info (CDI_POST_DOMINATORS); |
1950 | pop_cfun (); |
1951 | } |
1952 | |
1953 | release_profile_file_filtering (); |
1954 | |
1955 | /* Drop pure/const flags from instrumented functions. */ |
1956 | if (profile_arc_flag || condition_coverage_flag || flag_test_coverage) |
1957 | FOR_EACH_DEFINED_FUNCTION (node) |
1958 | { |
1959 | if (!gimple_has_body_p (node->decl) |
1960 | || !(!node->clone_of |
1961 | || node->decl != node->clone_of->decl)) |
1962 | continue; |
1963 | |
1964 | /* Don't profile functions produced for builtin stuff. */ |
1965 | if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION) |
1966 | continue; |
1967 | |
1968 | node->set_const_flag (set_const: false, looping: false); |
1969 | node->set_pure_flag (pure: false, looping: false); |
1970 | } |
1971 | |
1972 | /* Update call statements and rebuild the cgraph. */ |
1973 | FOR_EACH_DEFINED_FUNCTION (node) |
1974 | { |
1975 | basic_block bb; |
1976 | |
1977 | if (!gimple_has_body_p (node->decl) |
1978 | || !(!node->clone_of |
1979 | || node->decl != node->clone_of->decl)) |
1980 | continue; |
1981 | |
1982 | /* Don't profile functions produced for builtin stuff. */ |
1983 | if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION) |
1984 | continue; |
1985 | |
1986 | push_cfun (DECL_STRUCT_FUNCTION (node->decl)); |
1987 | |
1988 | if (profile_arc_flag || condition_coverage_flag || flag_test_coverage) |
1989 | FOR_EACH_BB_FN (bb, cfun) |
1990 | { |
1991 | gimple_stmt_iterator gsi; |
1992 | for (gsi = gsi_start_bb (bb); !gsi_end_p (i: gsi); gsi_next (i: &gsi)) |
1993 | { |
1994 | gcall *call = dyn_cast <gcall *> (p: gsi_stmt (i: gsi)); |
1995 | if (!call || gimple_call_internal_p (gs: call)) |
1996 | continue; |
1997 | |
1998 | /* We do not clear pure/const on decls without body. */ |
1999 | tree fndecl = gimple_call_fndecl (gs: call); |
2000 | cgraph_node *callee; |
2001 | if (fndecl |
2002 | && (callee = cgraph_node::get (decl: fndecl)) |
2003 | && callee->get_availability (ref: node) == AVAIL_NOT_AVAILABLE) |
2004 | continue; |
2005 | |
2006 | /* Drop the const attribute from the call type (the pure |
2007 | attribute is not available on types). */ |
2008 | tree fntype = gimple_call_fntype (gs: call); |
2009 | if (fntype && TYPE_READONLY (fntype)) |
2010 | { |
2011 | int quals = TYPE_QUALS (fntype) & ~TYPE_QUAL_CONST; |
2012 | fntype = build_qualified_type (fntype, quals); |
2013 | gimple_call_set_fntype (call_stmt: call, fntype); |
2014 | } |
2015 | |
2016 | /* Update virtual operands of calls to no longer const/pure |
2017 | functions. */ |
2018 | update_stmt (s: call); |
2019 | } |
2020 | } |
2021 | |
2022 | /* re-merge split blocks. */ |
2023 | cleanup_tree_cfg (); |
2024 | update_ssa (TODO_update_ssa); |
2025 | |
2026 | cgraph_edge::rebuild_edges (); |
2027 | |
2028 | pop_cfun (); |
2029 | } |
2030 | |
2031 | handle_missing_profiles (); |
2032 | |
2033 | del_node_map (); |
2034 | return 0; |
2035 | } |
2036 | |
2037 | namespace { |
2038 | |
2039 | const pass_data pass_data_ipa_tree_profile = |
2040 | { |
2041 | .type: SIMPLE_IPA_PASS, /* type */ |
2042 | .name: "profile" , /* name */ |
2043 | .optinfo_flags: OPTGROUP_NONE, /* optinfo_flags */ |
2044 | .tv_id: TV_IPA_PROFILE, /* tv_id */ |
2045 | .properties_required: 0, /* properties_required */ |
2046 | .properties_provided: 0, /* properties_provided */ |
2047 | .properties_destroyed: 0, /* properties_destroyed */ |
2048 | .todo_flags_start: 0, /* todo_flags_start */ |
2049 | TODO_dump_symtab, /* todo_flags_finish */ |
2050 | }; |
2051 | |
2052 | class pass_ipa_tree_profile : public simple_ipa_opt_pass |
2053 | { |
2054 | public: |
2055 | pass_ipa_tree_profile (gcc::context *ctxt) |
2056 | : simple_ipa_opt_pass (pass_data_ipa_tree_profile, ctxt) |
2057 | {} |
2058 | |
2059 | /* opt_pass methods: */ |
2060 | bool gate (function *) final override; |
2061 | unsigned int execute (function *) final override { return tree_profiling (); } |
2062 | |
2063 | }; // class pass_ipa_tree_profile |
2064 | |
2065 | bool |
2066 | pass_ipa_tree_profile::gate (function *) |
2067 | { |
2068 | /* When profile instrumentation, use or test coverage shall be performed. |
2069 | But for AutoFDO, this there is no instrumentation, thus this pass is |
2070 | disabled. */ |
2071 | return (!in_lto_p && !flag_auto_profile |
2072 | && (flag_branch_probabilities || flag_test_coverage |
2073 | || profile_arc_flag || condition_coverage_flag)); |
2074 | } |
2075 | |
2076 | } // anon namespace |
2077 | |
2078 | simple_ipa_opt_pass * |
2079 | make_pass_ipa_tree_profile (gcc::context *ctxt) |
2080 | { |
2081 | return new pass_ipa_tree_profile (ctxt); |
2082 | } |
2083 | |
2084 | #include "gt-tree-profile.h" |
2085 | |