1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (C) 2009-2011, Frederic Weisbecker <fweisbec@gmail.com> |
4 | * |
5 | * Handle the callchains from the stream in an ad-hoc radix tree and then |
6 | * sort them in an rbtree. |
7 | * |
8 | * Using a radix for code path provides a fast retrieval and factorizes |
9 | * memory use. Also that lets us use the paths in a hierarchical graph view. |
10 | * |
11 | */ |
12 | |
13 | #include <inttypes.h> |
14 | #include <stdlib.h> |
15 | #include <stdio.h> |
16 | #include <stdbool.h> |
17 | #include <errno.h> |
18 | #include <math.h> |
19 | #include <linux/string.h> |
20 | #include <linux/zalloc.h> |
21 | |
22 | #include "asm/bug.h" |
23 | |
24 | #include "debug.h" |
25 | #include "dso.h" |
26 | #include "event.h" |
27 | #include "hist.h" |
28 | #include "sort.h" |
29 | #include "machine.h" |
30 | #include "map.h" |
31 | #include "callchain.h" |
32 | #include "branch.h" |
33 | #include "symbol.h" |
34 | #include "util.h" |
35 | #include "../perf.h" |
36 | |
37 | #define CALLCHAIN_PARAM_DEFAULT \ |
38 | .mode = CHAIN_GRAPH_ABS, \ |
39 | .min_percent = 0.5, \ |
40 | .order = ORDER_CALLEE, \ |
41 | .key = CCKEY_FUNCTION, \ |
42 | .value = CCVAL_PERCENT, \ |
43 | |
44 | struct callchain_param callchain_param = { |
45 | CALLCHAIN_PARAM_DEFAULT |
46 | }; |
47 | |
48 | /* |
49 | * Are there any events usind DWARF callchains? |
50 | * |
51 | * I.e. |
52 | * |
53 | * -e cycles/call-graph=dwarf/ |
54 | */ |
55 | bool dwarf_callchain_users; |
56 | |
57 | struct callchain_param callchain_param_default = { |
58 | CALLCHAIN_PARAM_DEFAULT |
59 | }; |
60 | |
61 | /* Used for thread-local struct callchain_cursor. */ |
62 | static pthread_key_t callchain_cursor; |
63 | |
64 | int parse_callchain_record_opt(const char *arg, struct callchain_param *param) |
65 | { |
66 | return parse_callchain_record(arg, param); |
67 | } |
68 | |
69 | static int parse_callchain_mode(const char *value) |
70 | { |
71 | if (!strncmp(value, "graph" , strlen(value))) { |
72 | callchain_param.mode = CHAIN_GRAPH_ABS; |
73 | return 0; |
74 | } |
75 | if (!strncmp(value, "flat" , strlen(value))) { |
76 | callchain_param.mode = CHAIN_FLAT; |
77 | return 0; |
78 | } |
79 | if (!strncmp(value, "fractal" , strlen(value))) { |
80 | callchain_param.mode = CHAIN_GRAPH_REL; |
81 | return 0; |
82 | } |
83 | if (!strncmp(value, "folded" , strlen(value))) { |
84 | callchain_param.mode = CHAIN_FOLDED; |
85 | return 0; |
86 | } |
87 | return -1; |
88 | } |
89 | |
90 | static int parse_callchain_order(const char *value) |
91 | { |
92 | if (!strncmp(value, "caller" , strlen(value))) { |
93 | callchain_param.order = ORDER_CALLER; |
94 | callchain_param.order_set = true; |
95 | return 0; |
96 | } |
97 | if (!strncmp(value, "callee" , strlen(value))) { |
98 | callchain_param.order = ORDER_CALLEE; |
99 | callchain_param.order_set = true; |
100 | return 0; |
101 | } |
102 | return -1; |
103 | } |
104 | |
105 | static int parse_callchain_sort_key(const char *value) |
106 | { |
107 | if (!strncmp(value, "function" , strlen(value))) { |
108 | callchain_param.key = CCKEY_FUNCTION; |
109 | return 0; |
110 | } |
111 | if (!strncmp(value, "address" , strlen(value))) { |
112 | callchain_param.key = CCKEY_ADDRESS; |
113 | return 0; |
114 | } |
115 | if (!strncmp(value, "srcline" , strlen(value))) { |
116 | callchain_param.key = CCKEY_SRCLINE; |
117 | return 0; |
118 | } |
119 | if (!strncmp(value, "branch" , strlen(value))) { |
120 | callchain_param.branch_callstack = 1; |
121 | return 0; |
122 | } |
123 | return -1; |
124 | } |
125 | |
126 | static int parse_callchain_value(const char *value) |
127 | { |
128 | if (!strncmp(value, "percent" , strlen(value))) { |
129 | callchain_param.value = CCVAL_PERCENT; |
130 | return 0; |
131 | } |
132 | if (!strncmp(value, "period" , strlen(value))) { |
133 | callchain_param.value = CCVAL_PERIOD; |
134 | return 0; |
135 | } |
136 | if (!strncmp(value, "count" , strlen(value))) { |
137 | callchain_param.value = CCVAL_COUNT; |
138 | return 0; |
139 | } |
140 | return -1; |
141 | } |
142 | |
143 | static int get_stack_size(const char *str, unsigned long *_size) |
144 | { |
145 | char *endptr; |
146 | unsigned long size; |
147 | unsigned long max_size = round_down(USHRT_MAX, sizeof(u64)); |
148 | |
149 | size = strtoul(str, &endptr, 0); |
150 | |
151 | do { |
152 | if (*endptr) |
153 | break; |
154 | |
155 | size = round_up(size, sizeof(u64)); |
156 | if (!size || size > max_size) |
157 | break; |
158 | |
159 | *_size = size; |
160 | return 0; |
161 | |
162 | } while (0); |
163 | |
164 | pr_err("callchain: Incorrect stack dump size (max %ld): %s\n" , |
165 | max_size, str); |
166 | return -1; |
167 | } |
168 | |
169 | static int |
170 | __parse_callchain_report_opt(const char *arg, bool allow_record_opt) |
171 | { |
172 | char *tok; |
173 | char *endptr, *saveptr = NULL; |
174 | bool minpcnt_set = false; |
175 | bool record_opt_set = false; |
176 | bool try_stack_size = false; |
177 | |
178 | callchain_param.enabled = true; |
179 | symbol_conf.use_callchain = true; |
180 | |
181 | if (!arg) |
182 | return 0; |
183 | |
184 | while ((tok = strtok_r((char *)arg, "," , &saveptr)) != NULL) { |
185 | if (!strncmp(tok, "none" , strlen(tok))) { |
186 | callchain_param.mode = CHAIN_NONE; |
187 | callchain_param.enabled = false; |
188 | symbol_conf.use_callchain = false; |
189 | return 0; |
190 | } |
191 | |
192 | if (!parse_callchain_mode(value: tok) || |
193 | !parse_callchain_order(value: tok) || |
194 | !parse_callchain_sort_key(value: tok) || |
195 | !parse_callchain_value(value: tok)) { |
196 | /* parsing ok - move on to the next */ |
197 | try_stack_size = false; |
198 | goto next; |
199 | } else if (allow_record_opt && !record_opt_set) { |
200 | if (parse_callchain_record(arg: tok, param: &callchain_param)) |
201 | goto try_numbers; |
202 | |
203 | /* assume that number followed by 'dwarf' is stack size */ |
204 | if (callchain_param.record_mode == CALLCHAIN_DWARF) |
205 | try_stack_size = true; |
206 | |
207 | record_opt_set = true; |
208 | goto next; |
209 | } |
210 | |
211 | try_numbers: |
212 | if (try_stack_size) { |
213 | unsigned long size = 0; |
214 | |
215 | if (get_stack_size(str: tok, size: &size) < 0) |
216 | return -1; |
217 | callchain_param.dump_size = size; |
218 | try_stack_size = false; |
219 | } else if (!minpcnt_set) { |
220 | /* try to get the min percent */ |
221 | callchain_param.min_percent = strtod(tok, &endptr); |
222 | if (tok == endptr) |
223 | return -1; |
224 | minpcnt_set = true; |
225 | } else { |
226 | /* try print limit at last */ |
227 | callchain_param.print_limit = strtoul(tok, &endptr, 0); |
228 | if (tok == endptr) |
229 | return -1; |
230 | } |
231 | next: |
232 | arg = NULL; |
233 | } |
234 | |
235 | if (callchain_register_param(param: &callchain_param) < 0) { |
236 | pr_err("Can't register callchain params\n" ); |
237 | return -1; |
238 | } |
239 | return 0; |
240 | } |
241 | |
242 | int parse_callchain_report_opt(const char *arg) |
243 | { |
244 | return __parse_callchain_report_opt(arg, allow_record_opt: false); |
245 | } |
246 | |
247 | int parse_callchain_top_opt(const char *arg) |
248 | { |
249 | return __parse_callchain_report_opt(arg, allow_record_opt: true); |
250 | } |
251 | |
252 | int parse_callchain_record(const char *arg, struct callchain_param *param) |
253 | { |
254 | char *tok, *name, *saveptr = NULL; |
255 | char *buf; |
256 | int ret = -1; |
257 | |
258 | /* We need buffer that we know we can write to. */ |
259 | buf = malloc(strlen(arg) + 1); |
260 | if (!buf) |
261 | return -ENOMEM; |
262 | |
263 | strcpy(p: buf, q: arg); |
264 | |
265 | tok = strtok_r((char *)buf, "," , &saveptr); |
266 | name = tok ? : (char *)buf; |
267 | |
268 | do { |
269 | /* Framepointer style */ |
270 | if (!strncmp(name, "fp" , sizeof("fp" ))) { |
271 | ret = 0; |
272 | param->record_mode = CALLCHAIN_FP; |
273 | |
274 | tok = strtok_r(NULL, "," , &saveptr); |
275 | if (tok) { |
276 | unsigned long size; |
277 | |
278 | size = strtoul(tok, &name, 0); |
279 | if (size < (unsigned) sysctl__max_stack()) |
280 | param->max_stack = size; |
281 | } |
282 | break; |
283 | |
284 | /* Dwarf style */ |
285 | } else if (!strncmp(name, "dwarf" , sizeof("dwarf" ))) { |
286 | const unsigned long default_stack_dump_size = 8192; |
287 | |
288 | ret = 0; |
289 | param->record_mode = CALLCHAIN_DWARF; |
290 | param->dump_size = default_stack_dump_size; |
291 | dwarf_callchain_users = true; |
292 | |
293 | tok = strtok_r(NULL, "," , &saveptr); |
294 | if (tok) { |
295 | unsigned long size = 0; |
296 | |
297 | ret = get_stack_size(str: tok, size: &size); |
298 | param->dump_size = size; |
299 | } |
300 | } else if (!strncmp(name, "lbr" , sizeof("lbr" ))) { |
301 | if (!strtok_r(NULL, "," , &saveptr)) { |
302 | param->record_mode = CALLCHAIN_LBR; |
303 | ret = 0; |
304 | } else |
305 | pr_err("callchain: No more arguments " |
306 | "needed for --call-graph lbr\n" ); |
307 | break; |
308 | } else { |
309 | pr_err("callchain: Unknown --call-graph option " |
310 | "value: %s\n" , arg); |
311 | break; |
312 | } |
313 | |
314 | } while (0); |
315 | |
316 | free(buf); |
317 | return ret; |
318 | } |
319 | |
320 | int perf_callchain_config(const char *var, const char *value) |
321 | { |
322 | char *endptr; |
323 | |
324 | if (!strstarts(str: var, prefix: "call-graph." )) |
325 | return 0; |
326 | var += sizeof("call-graph." ) - 1; |
327 | |
328 | if (!strcmp(var, "record-mode" )) |
329 | return parse_callchain_record_opt(arg: value, param: &callchain_param); |
330 | if (!strcmp(var, "dump-size" )) { |
331 | unsigned long size = 0; |
332 | int ret; |
333 | |
334 | ret = get_stack_size(str: value, size: &size); |
335 | callchain_param.dump_size = size; |
336 | |
337 | return ret; |
338 | } |
339 | if (!strcmp(var, "print-type" )){ |
340 | int ret; |
341 | ret = parse_callchain_mode(value); |
342 | if (ret == -1) |
343 | pr_err("Invalid callchain mode: %s\n" , value); |
344 | return ret; |
345 | } |
346 | if (!strcmp(var, "order" )){ |
347 | int ret; |
348 | ret = parse_callchain_order(value); |
349 | if (ret == -1) |
350 | pr_err("Invalid callchain order: %s\n" , value); |
351 | return ret; |
352 | } |
353 | if (!strcmp(var, "sort-key" )){ |
354 | int ret; |
355 | ret = parse_callchain_sort_key(value); |
356 | if (ret == -1) |
357 | pr_err("Invalid callchain sort key: %s\n" , value); |
358 | return ret; |
359 | } |
360 | if (!strcmp(var, "threshold" )) { |
361 | callchain_param.min_percent = strtod(value, &endptr); |
362 | if (value == endptr) { |
363 | pr_err("Invalid callchain threshold: %s\n" , value); |
364 | return -1; |
365 | } |
366 | } |
367 | if (!strcmp(var, "print-limit" )) { |
368 | callchain_param.print_limit = strtod(value, &endptr); |
369 | if (value == endptr) { |
370 | pr_err("Invalid callchain print limit: %s\n" , value); |
371 | return -1; |
372 | } |
373 | } |
374 | |
375 | return 0; |
376 | } |
377 | |
378 | static void |
379 | rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, |
380 | enum chain_mode mode) |
381 | { |
382 | struct rb_node **p = &root->rb_node; |
383 | struct rb_node *parent = NULL; |
384 | struct callchain_node *rnode; |
385 | u64 chain_cumul = callchain_cumul_hits(node: chain); |
386 | |
387 | while (*p) { |
388 | u64 rnode_cumul; |
389 | |
390 | parent = *p; |
391 | rnode = rb_entry(parent, struct callchain_node, rb_node); |
392 | rnode_cumul = callchain_cumul_hits(node: rnode); |
393 | |
394 | switch (mode) { |
395 | case CHAIN_FLAT: |
396 | case CHAIN_FOLDED: |
397 | if (rnode->hit < chain->hit) |
398 | p = &(*p)->rb_left; |
399 | else |
400 | p = &(*p)->rb_right; |
401 | break; |
402 | case CHAIN_GRAPH_ABS: /* Falldown */ |
403 | case CHAIN_GRAPH_REL: |
404 | if (rnode_cumul < chain_cumul) |
405 | p = &(*p)->rb_left; |
406 | else |
407 | p = &(*p)->rb_right; |
408 | break; |
409 | case CHAIN_NONE: |
410 | default: |
411 | break; |
412 | } |
413 | } |
414 | |
415 | rb_link_node(node: &chain->rb_node, parent, rb_link: p); |
416 | rb_insert_color(&chain->rb_node, root); |
417 | } |
418 | |
419 | static void |
420 | __sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node, |
421 | u64 min_hit) |
422 | { |
423 | struct rb_node *n; |
424 | struct callchain_node *child; |
425 | |
426 | n = rb_first(&node->rb_root_in); |
427 | while (n) { |
428 | child = rb_entry(n, struct callchain_node, rb_node_in); |
429 | n = rb_next(n); |
430 | |
431 | __sort_chain_flat(rb_root, node: child, min_hit); |
432 | } |
433 | |
434 | if (node->hit && node->hit >= min_hit) |
435 | rb_insert_callchain(root: rb_root, chain: node, mode: CHAIN_FLAT); |
436 | } |
437 | |
438 | /* |
439 | * Once we get every callchains from the stream, we can now |
440 | * sort them by hit |
441 | */ |
442 | static void |
443 | sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root, |
444 | u64 min_hit, struct callchain_param *param __maybe_unused) |
445 | { |
446 | *rb_root = RB_ROOT; |
447 | __sort_chain_flat(rb_root, node: &root->node, min_hit); |
448 | } |
449 | |
450 | static void __sort_chain_graph_abs(struct callchain_node *node, |
451 | u64 min_hit) |
452 | { |
453 | struct rb_node *n; |
454 | struct callchain_node *child; |
455 | |
456 | node->rb_root = RB_ROOT; |
457 | n = rb_first(&node->rb_root_in); |
458 | |
459 | while (n) { |
460 | child = rb_entry(n, struct callchain_node, rb_node_in); |
461 | n = rb_next(n); |
462 | |
463 | __sort_chain_graph_abs(node: child, min_hit); |
464 | if (callchain_cumul_hits(node: child) >= min_hit) |
465 | rb_insert_callchain(root: &node->rb_root, chain: child, |
466 | mode: CHAIN_GRAPH_ABS); |
467 | } |
468 | } |
469 | |
470 | static void |
471 | sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root, |
472 | u64 min_hit, struct callchain_param *param __maybe_unused) |
473 | { |
474 | __sort_chain_graph_abs(node: &chain_root->node, min_hit); |
475 | rb_root->rb_node = chain_root->node.rb_root.rb_node; |
476 | } |
477 | |
478 | static void __sort_chain_graph_rel(struct callchain_node *node, |
479 | double min_percent) |
480 | { |
481 | struct rb_node *n; |
482 | struct callchain_node *child; |
483 | u64 min_hit; |
484 | |
485 | node->rb_root = RB_ROOT; |
486 | min_hit = ceil(node->children_hit * min_percent); |
487 | |
488 | n = rb_first(&node->rb_root_in); |
489 | while (n) { |
490 | child = rb_entry(n, struct callchain_node, rb_node_in); |
491 | n = rb_next(n); |
492 | |
493 | __sort_chain_graph_rel(node: child, min_percent); |
494 | if (callchain_cumul_hits(node: child) >= min_hit) |
495 | rb_insert_callchain(root: &node->rb_root, chain: child, |
496 | mode: CHAIN_GRAPH_REL); |
497 | } |
498 | } |
499 | |
500 | static void |
501 | sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_root *chain_root, |
502 | u64 min_hit __maybe_unused, struct callchain_param *param) |
503 | { |
504 | __sort_chain_graph_rel(node: &chain_root->node, min_percent: param->min_percent / 100.0); |
505 | rb_root->rb_node = chain_root->node.rb_root.rb_node; |
506 | } |
507 | |
508 | int callchain_register_param(struct callchain_param *param) |
509 | { |
510 | switch (param->mode) { |
511 | case CHAIN_GRAPH_ABS: |
512 | param->sort = sort_chain_graph_abs; |
513 | break; |
514 | case CHAIN_GRAPH_REL: |
515 | param->sort = sort_chain_graph_rel; |
516 | break; |
517 | case CHAIN_FLAT: |
518 | case CHAIN_FOLDED: |
519 | param->sort = sort_chain_flat; |
520 | break; |
521 | case CHAIN_NONE: |
522 | default: |
523 | return -1; |
524 | } |
525 | return 0; |
526 | } |
527 | |
528 | /* |
529 | * Create a child for a parent. If inherit_children, then the new child |
530 | * will become the new parent of it's parent children |
531 | */ |
532 | static struct callchain_node * |
533 | create_child(struct callchain_node *parent, bool inherit_children) |
534 | { |
535 | struct callchain_node *new; |
536 | |
537 | new = zalloc(sizeof(*new)); |
538 | if (!new) { |
539 | perror("not enough memory to create child for code path tree" ); |
540 | return NULL; |
541 | } |
542 | new->parent = parent; |
543 | INIT_LIST_HEAD(list: &new->val); |
544 | INIT_LIST_HEAD(list: &new->parent_val); |
545 | |
546 | if (inherit_children) { |
547 | struct rb_node *n; |
548 | struct callchain_node *child; |
549 | |
550 | new->rb_root_in = parent->rb_root_in; |
551 | parent->rb_root_in = RB_ROOT; |
552 | |
553 | n = rb_first(&new->rb_root_in); |
554 | while (n) { |
555 | child = rb_entry(n, struct callchain_node, rb_node_in); |
556 | child->parent = new; |
557 | n = rb_next(n); |
558 | } |
559 | |
560 | /* make it the first child */ |
561 | rb_link_node(node: &new->rb_node_in, NULL, rb_link: &parent->rb_root_in.rb_node); |
562 | rb_insert_color(&new->rb_node_in, &parent->rb_root_in); |
563 | } |
564 | |
565 | return new; |
566 | } |
567 | |
568 | |
569 | /* |
570 | * Fill the node with callchain values |
571 | */ |
572 | static int |
573 | fill_node(struct callchain_node *node, struct callchain_cursor *cursor) |
574 | { |
575 | struct callchain_cursor_node *cursor_node; |
576 | |
577 | node->val_nr = cursor->nr - cursor->pos; |
578 | if (!node->val_nr) |
579 | pr_warning("Warning: empty node in callchain tree\n" ); |
580 | |
581 | cursor_node = callchain_cursor_current(cursor); |
582 | |
583 | while (cursor_node) { |
584 | struct callchain_list *call; |
585 | |
586 | call = zalloc(sizeof(*call)); |
587 | if (!call) { |
588 | perror("not enough memory for the code path tree" ); |
589 | return -ENOMEM; |
590 | } |
591 | call->ip = cursor_node->ip; |
592 | call->ms = cursor_node->ms; |
593 | call->ms.map = map__get(map: call->ms.map); |
594 | call->ms.maps = maps__get(maps: call->ms.maps); |
595 | call->srcline = cursor_node->srcline; |
596 | |
597 | if (cursor_node->branch) { |
598 | call->branch_count = 1; |
599 | |
600 | if (cursor_node->branch_from) { |
601 | /* |
602 | * branch_from is set with value somewhere else |
603 | * to imply it's "to" of a branch. |
604 | */ |
605 | if (!call->brtype_stat) { |
606 | call->brtype_stat = zalloc(sizeof(*call->brtype_stat)); |
607 | if (!call->brtype_stat) { |
608 | perror("not enough memory for the code path branch statistics" ); |
609 | free(call->brtype_stat); |
610 | return -ENOMEM; |
611 | } |
612 | } |
613 | call->brtype_stat->branch_to = true; |
614 | |
615 | if (cursor_node->branch_flags.predicted) |
616 | call->predicted_count = 1; |
617 | |
618 | if (cursor_node->branch_flags.abort) |
619 | call->abort_count = 1; |
620 | |
621 | branch_type_count(st: call->brtype_stat, |
622 | flags: &cursor_node->branch_flags, |
623 | from: cursor_node->branch_from, |
624 | to: cursor_node->ip); |
625 | } else { |
626 | /* |
627 | * It's "from" of a branch |
628 | */ |
629 | if (call->brtype_stat && call->brtype_stat->branch_to) |
630 | call->brtype_stat->branch_to = false; |
631 | call->cycles_count = |
632 | cursor_node->branch_flags.cycles; |
633 | call->iter_count = cursor_node->nr_loop_iter; |
634 | call->iter_cycles = cursor_node->iter_cycles; |
635 | } |
636 | } |
637 | |
638 | list_add_tail(new: &call->list, head: &node->val); |
639 | |
640 | callchain_cursor_advance(cursor); |
641 | cursor_node = callchain_cursor_current(cursor); |
642 | } |
643 | return 0; |
644 | } |
645 | |
646 | static struct callchain_node * |
647 | add_child(struct callchain_node *parent, |
648 | struct callchain_cursor *cursor, |
649 | u64 period) |
650 | { |
651 | struct callchain_node *new; |
652 | |
653 | new = create_child(parent, inherit_children: false); |
654 | if (new == NULL) |
655 | return NULL; |
656 | |
657 | if (fill_node(node: new, cursor) < 0) { |
658 | struct callchain_list *call, *tmp; |
659 | |
660 | list_for_each_entry_safe(call, tmp, &new->val, list) { |
661 | list_del_init(entry: &call->list); |
662 | map_symbol__exit(ms: &call->ms); |
663 | zfree(&call->brtype_stat); |
664 | free(call); |
665 | } |
666 | free(new); |
667 | return NULL; |
668 | } |
669 | |
670 | new->children_hit = 0; |
671 | new->hit = period; |
672 | new->children_count = 0; |
673 | new->count = 1; |
674 | return new; |
675 | } |
676 | |
677 | enum match_result { |
678 | MATCH_ERROR = -1, |
679 | MATCH_EQ, |
680 | MATCH_LT, |
681 | MATCH_GT, |
682 | }; |
683 | |
684 | static enum match_result match_chain_strings(const char *left, |
685 | const char *right) |
686 | { |
687 | enum match_result ret = MATCH_EQ; |
688 | int cmp; |
689 | |
690 | if (left && right) |
691 | cmp = strcmp(left, right); |
692 | else if (!left && right) |
693 | cmp = 1; |
694 | else if (left && !right) |
695 | cmp = -1; |
696 | else |
697 | return MATCH_ERROR; |
698 | |
699 | if (cmp != 0) |
700 | ret = cmp < 0 ? MATCH_LT : MATCH_GT; |
701 | |
702 | return ret; |
703 | } |
704 | |
705 | /* |
706 | * We need to always use relative addresses because we're aggregating |
707 | * callchains from multiple threads, i.e. different address spaces, so |
708 | * comparing absolute addresses make no sense as a symbol in a DSO may end up |
709 | * in a different address when used in a different binary or even the same |
710 | * binary but with some sort of address randomization technique, thus we need |
711 | * to compare just relative addresses. -acme |
712 | */ |
713 | static enum match_result match_chain_dso_addresses(struct map *left_map, u64 left_ip, |
714 | struct map *right_map, u64 right_ip) |
715 | { |
716 | struct dso *left_dso = left_map ? map__dso(map: left_map) : NULL; |
717 | struct dso *right_dso = right_map ? map__dso(map: right_map) : NULL; |
718 | |
719 | if (left_dso != right_dso) |
720 | return left_dso < right_dso ? MATCH_LT : MATCH_GT; |
721 | |
722 | if (left_ip != right_ip) |
723 | return left_ip < right_ip ? MATCH_LT : MATCH_GT; |
724 | |
725 | return MATCH_EQ; |
726 | } |
727 | |
728 | static enum match_result match_chain(struct callchain_cursor_node *node, |
729 | struct callchain_list *cnode) |
730 | { |
731 | enum match_result match = MATCH_ERROR; |
732 | |
733 | switch (callchain_param.key) { |
734 | case CCKEY_SRCLINE: |
735 | match = match_chain_strings(left: cnode->srcline, right: node->srcline); |
736 | if (match != MATCH_ERROR) |
737 | break; |
738 | /* otherwise fall-back to symbol-based comparison below */ |
739 | fallthrough; |
740 | case CCKEY_FUNCTION: |
741 | if (node->ms.sym && cnode->ms.sym) { |
742 | /* |
743 | * Compare inlined frames based on their symbol name |
744 | * because different inlined frames will have the same |
745 | * symbol start. Otherwise do a faster comparison based |
746 | * on the symbol start address. |
747 | */ |
748 | if (cnode->ms.sym->inlined || node->ms.sym->inlined) { |
749 | match = match_chain_strings(left: cnode->ms.sym->name, |
750 | right: node->ms.sym->name); |
751 | if (match != MATCH_ERROR) |
752 | break; |
753 | } else { |
754 | match = match_chain_dso_addresses(left_map: cnode->ms.map, left_ip: cnode->ms.sym->start, |
755 | right_map: node->ms.map, right_ip: node->ms.sym->start); |
756 | break; |
757 | } |
758 | } |
759 | /* otherwise fall-back to IP-based comparison below */ |
760 | fallthrough; |
761 | case CCKEY_ADDRESS: |
762 | default: |
763 | match = match_chain_dso_addresses(left_map: cnode->ms.map, left_ip: cnode->ip, right_map: node->ms.map, right_ip: node->ip); |
764 | break; |
765 | } |
766 | |
767 | if (match == MATCH_EQ && node->branch) { |
768 | cnode->branch_count++; |
769 | |
770 | if (node->branch_from) { |
771 | /* |
772 | * It's "to" of a branch |
773 | */ |
774 | if (!cnode->brtype_stat) { |
775 | cnode->brtype_stat = zalloc(sizeof(*cnode->brtype_stat)); |
776 | if (!cnode->brtype_stat) { |
777 | perror("not enough memory for the code path branch statistics" ); |
778 | return MATCH_ERROR; |
779 | } |
780 | } |
781 | cnode->brtype_stat->branch_to = true; |
782 | |
783 | if (node->branch_flags.predicted) |
784 | cnode->predicted_count++; |
785 | |
786 | if (node->branch_flags.abort) |
787 | cnode->abort_count++; |
788 | |
789 | branch_type_count(st: cnode->brtype_stat, |
790 | flags: &node->branch_flags, |
791 | from: node->branch_from, |
792 | to: node->ip); |
793 | } else { |
794 | /* |
795 | * It's "from" of a branch |
796 | */ |
797 | if (cnode->brtype_stat && cnode->brtype_stat->branch_to) |
798 | cnode->brtype_stat->branch_to = false; |
799 | cnode->cycles_count += node->branch_flags.cycles; |
800 | cnode->iter_count += node->nr_loop_iter; |
801 | cnode->iter_cycles += node->iter_cycles; |
802 | cnode->from_count++; |
803 | } |
804 | } |
805 | |
806 | return match; |
807 | } |
808 | |
809 | /* |
810 | * Split the parent in two parts (a new child is created) and |
811 | * give a part of its callchain to the created child. |
812 | * Then create another child to host the given callchain of new branch |
813 | */ |
814 | static int |
815 | split_add_child(struct callchain_node *parent, |
816 | struct callchain_cursor *cursor, |
817 | struct callchain_list *to_split, |
818 | u64 idx_parents, u64 idx_local, u64 period) |
819 | { |
820 | struct callchain_node *new; |
821 | struct list_head *old_tail; |
822 | unsigned int idx_total = idx_parents + idx_local; |
823 | |
824 | /* split */ |
825 | new = create_child(parent, inherit_children: true); |
826 | if (new == NULL) |
827 | return -1; |
828 | |
829 | /* split the callchain and move a part to the new child */ |
830 | old_tail = parent->val.prev; |
831 | list_del_range(&to_split->list, old_tail); |
832 | new->val.next = &to_split->list; |
833 | new->val.prev = old_tail; |
834 | to_split->list.prev = &new->val; |
835 | old_tail->next = &new->val; |
836 | |
837 | /* split the hits */ |
838 | new->hit = parent->hit; |
839 | new->children_hit = parent->children_hit; |
840 | parent->children_hit = callchain_cumul_hits(node: new); |
841 | new->val_nr = parent->val_nr - idx_local; |
842 | parent->val_nr = idx_local; |
843 | new->count = parent->count; |
844 | new->children_count = parent->children_count; |
845 | parent->children_count = callchain_cumul_counts(node: new); |
846 | |
847 | /* create a new child for the new branch if any */ |
848 | if (idx_total < cursor->nr) { |
849 | struct callchain_node *first; |
850 | struct callchain_list *cnode; |
851 | struct callchain_cursor_node *node; |
852 | struct rb_node *p, **pp; |
853 | |
854 | parent->hit = 0; |
855 | parent->children_hit += period; |
856 | parent->count = 0; |
857 | parent->children_count += 1; |
858 | |
859 | node = callchain_cursor_current(cursor); |
860 | new = add_child(parent, cursor, period); |
861 | if (new == NULL) |
862 | return -1; |
863 | |
864 | /* |
865 | * This is second child since we moved parent's children |
866 | * to new (first) child above. |
867 | */ |
868 | p = parent->rb_root_in.rb_node; |
869 | first = rb_entry(p, struct callchain_node, rb_node_in); |
870 | cnode = list_first_entry(&first->val, struct callchain_list, |
871 | list); |
872 | |
873 | if (match_chain(node, cnode) == MATCH_LT) |
874 | pp = &p->rb_left; |
875 | else |
876 | pp = &p->rb_right; |
877 | |
878 | rb_link_node(node: &new->rb_node_in, parent: p, rb_link: pp); |
879 | rb_insert_color(&new->rb_node_in, &parent->rb_root_in); |
880 | } else { |
881 | parent->hit = period; |
882 | parent->count = 1; |
883 | } |
884 | return 0; |
885 | } |
886 | |
887 | static enum match_result |
888 | append_chain(struct callchain_node *root, |
889 | struct callchain_cursor *cursor, |
890 | u64 period); |
891 | |
892 | static int |
893 | append_chain_children(struct callchain_node *root, |
894 | struct callchain_cursor *cursor, |
895 | u64 period) |
896 | { |
897 | struct callchain_node *rnode; |
898 | struct callchain_cursor_node *node; |
899 | struct rb_node **p = &root->rb_root_in.rb_node; |
900 | struct rb_node *parent = NULL; |
901 | |
902 | node = callchain_cursor_current(cursor); |
903 | if (!node) |
904 | return -1; |
905 | |
906 | /* lookup in children */ |
907 | while (*p) { |
908 | enum match_result ret; |
909 | |
910 | parent = *p; |
911 | rnode = rb_entry(parent, struct callchain_node, rb_node_in); |
912 | |
913 | /* If at least first entry matches, rely to children */ |
914 | ret = append_chain(root: rnode, cursor, period); |
915 | if (ret == MATCH_EQ) |
916 | goto inc_children_hit; |
917 | if (ret == MATCH_ERROR) |
918 | return -1; |
919 | |
920 | if (ret == MATCH_LT) |
921 | p = &parent->rb_left; |
922 | else |
923 | p = &parent->rb_right; |
924 | } |
925 | /* nothing in children, add to the current node */ |
926 | rnode = add_child(parent: root, cursor, period); |
927 | if (rnode == NULL) |
928 | return -1; |
929 | |
930 | rb_link_node(node: &rnode->rb_node_in, parent, rb_link: p); |
931 | rb_insert_color(&rnode->rb_node_in, &root->rb_root_in); |
932 | |
933 | inc_children_hit: |
934 | root->children_hit += period; |
935 | root->children_count++; |
936 | return 0; |
937 | } |
938 | |
939 | static enum match_result |
940 | append_chain(struct callchain_node *root, |
941 | struct callchain_cursor *cursor, |
942 | u64 period) |
943 | { |
944 | struct callchain_list *cnode; |
945 | u64 start = cursor->pos; |
946 | bool found = false; |
947 | u64 matches; |
948 | enum match_result cmp = MATCH_ERROR; |
949 | |
950 | /* |
951 | * Lookup in the current node |
952 | * If we have a symbol, then compare the start to match |
953 | * anywhere inside a function, unless function |
954 | * mode is disabled. |
955 | */ |
956 | list_for_each_entry(cnode, &root->val, list) { |
957 | struct callchain_cursor_node *node; |
958 | |
959 | node = callchain_cursor_current(cursor); |
960 | if (!node) |
961 | break; |
962 | |
963 | cmp = match_chain(node, cnode); |
964 | if (cmp != MATCH_EQ) |
965 | break; |
966 | |
967 | found = true; |
968 | |
969 | callchain_cursor_advance(cursor); |
970 | } |
971 | |
972 | /* matches not, relay no the parent */ |
973 | if (!found) { |
974 | WARN_ONCE(cmp == MATCH_ERROR, "Chain comparison error\n" ); |
975 | return cmp; |
976 | } |
977 | |
978 | matches = cursor->pos - start; |
979 | |
980 | /* we match only a part of the node. Split it and add the new chain */ |
981 | if (matches < root->val_nr) { |
982 | if (split_add_child(parent: root, cursor, to_split: cnode, idx_parents: start, idx_local: matches, |
983 | period) < 0) |
984 | return MATCH_ERROR; |
985 | |
986 | return MATCH_EQ; |
987 | } |
988 | |
989 | /* we match 100% of the path, increment the hit */ |
990 | if (matches == root->val_nr && cursor->pos == cursor->nr) { |
991 | root->hit += period; |
992 | root->count++; |
993 | return MATCH_EQ; |
994 | } |
995 | |
996 | /* We match the node and still have a part remaining */ |
997 | if (append_chain_children(root, cursor, period) < 0) |
998 | return MATCH_ERROR; |
999 | |
1000 | return MATCH_EQ; |
1001 | } |
1002 | |
1003 | int callchain_append(struct callchain_root *root, |
1004 | struct callchain_cursor *cursor, |
1005 | u64 period) |
1006 | { |
1007 | if (cursor == NULL) |
1008 | return -1; |
1009 | |
1010 | if (!cursor->nr) |
1011 | return 0; |
1012 | |
1013 | callchain_cursor_commit(cursor); |
1014 | |
1015 | if (append_chain_children(root: &root->node, cursor, period) < 0) |
1016 | return -1; |
1017 | |
1018 | if (cursor->nr > root->max_depth) |
1019 | root->max_depth = cursor->nr; |
1020 | |
1021 | return 0; |
1022 | } |
1023 | |
1024 | static int |
1025 | merge_chain_branch(struct callchain_cursor *cursor, |
1026 | struct callchain_node *dst, struct callchain_node *src) |
1027 | { |
1028 | struct callchain_cursor_node **old_last = cursor->last; |
1029 | struct callchain_node *child; |
1030 | struct callchain_list *list, *next_list; |
1031 | struct rb_node *n; |
1032 | int old_pos = cursor->nr; |
1033 | int err = 0; |
1034 | |
1035 | list_for_each_entry_safe(list, next_list, &src->val, list) { |
1036 | struct map_symbol ms = { |
1037 | .maps = maps__get(maps: list->ms.maps), |
1038 | .map = map__get(map: list->ms.map), |
1039 | }; |
1040 | callchain_cursor_append(cursor, ip: list->ip, ms: &ms, branch: false, NULL, nr_loop_iter: 0, iter_cycles: 0, branch_from: 0, srcline: list->srcline); |
1041 | list_del_init(entry: &list->list); |
1042 | map_symbol__exit(ms: &ms); |
1043 | map_symbol__exit(ms: &list->ms); |
1044 | zfree(&list->brtype_stat); |
1045 | free(list); |
1046 | } |
1047 | |
1048 | if (src->hit) { |
1049 | callchain_cursor_commit(cursor); |
1050 | if (append_chain_children(root: dst, cursor, period: src->hit) < 0) |
1051 | return -1; |
1052 | } |
1053 | |
1054 | n = rb_first(&src->rb_root_in); |
1055 | while (n) { |
1056 | child = container_of(n, struct callchain_node, rb_node_in); |
1057 | n = rb_next(n); |
1058 | rb_erase(&child->rb_node_in, &src->rb_root_in); |
1059 | |
1060 | err = merge_chain_branch(cursor, dst, src: child); |
1061 | if (err) |
1062 | break; |
1063 | |
1064 | free(child); |
1065 | } |
1066 | |
1067 | cursor->nr = old_pos; |
1068 | cursor->last = old_last; |
1069 | |
1070 | return err; |
1071 | } |
1072 | |
1073 | int callchain_merge(struct callchain_cursor *cursor, |
1074 | struct callchain_root *dst, struct callchain_root *src) |
1075 | { |
1076 | return merge_chain_branch(cursor, dst: &dst->node, src: &src->node); |
1077 | } |
1078 | |
1079 | int callchain_cursor_append(struct callchain_cursor *cursor, |
1080 | u64 ip, struct map_symbol *ms, |
1081 | bool branch, struct branch_flags *flags, |
1082 | int nr_loop_iter, u64 iter_cycles, u64 branch_from, |
1083 | const char *srcline) |
1084 | { |
1085 | struct callchain_cursor_node *node = *cursor->last; |
1086 | |
1087 | if (!node) { |
1088 | node = calloc(1, sizeof(*node)); |
1089 | if (!node) |
1090 | return -ENOMEM; |
1091 | |
1092 | *cursor->last = node; |
1093 | } |
1094 | |
1095 | node->ip = ip; |
1096 | map_symbol__exit(ms: &node->ms); |
1097 | node->ms = *ms; |
1098 | node->ms.maps = maps__get(maps: ms->maps); |
1099 | node->ms.map = map__get(map: ms->map); |
1100 | node->branch = branch; |
1101 | node->nr_loop_iter = nr_loop_iter; |
1102 | node->iter_cycles = iter_cycles; |
1103 | node->srcline = srcline; |
1104 | |
1105 | if (flags) |
1106 | memcpy(&node->branch_flags, flags, |
1107 | sizeof(struct branch_flags)); |
1108 | |
1109 | node->branch_from = branch_from; |
1110 | cursor->nr++; |
1111 | |
1112 | cursor->last = &node->next; |
1113 | |
1114 | return 0; |
1115 | } |
1116 | |
1117 | int sample__resolve_callchain(struct perf_sample *sample, |
1118 | struct callchain_cursor *cursor, struct symbol **parent, |
1119 | struct evsel *evsel, struct addr_location *al, |
1120 | int max_stack) |
1121 | { |
1122 | if (sample->callchain == NULL && !symbol_conf.show_branchflag_count) |
1123 | return 0; |
1124 | |
1125 | if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain || |
1126 | perf_hpp_list.parent || symbol_conf.show_branchflag_count) { |
1127 | return thread__resolve_callchain(thread: al->thread, cursor, evsel, sample, |
1128 | parent, root_al: al, max_stack); |
1129 | } |
1130 | return 0; |
1131 | } |
1132 | |
1133 | int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample) |
1134 | { |
1135 | if ((!symbol_conf.use_callchain || sample->callchain == NULL) && |
1136 | !symbol_conf.show_branchflag_count) |
1137 | return 0; |
1138 | return callchain_append(root: he->callchain, cursor: get_tls_callchain_cursor(), period: sample->period); |
1139 | } |
1140 | |
1141 | int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node, |
1142 | bool hide_unresolved) |
1143 | { |
1144 | struct machine *machine = maps__machine(maps: node->ms.maps); |
1145 | |
1146 | maps__put(maps: al->maps); |
1147 | al->maps = maps__get(maps: node->ms.maps); |
1148 | map__put(map: al->map); |
1149 | al->map = map__get(map: node->ms.map); |
1150 | al->sym = node->ms.sym; |
1151 | al->srcline = node->srcline; |
1152 | al->addr = node->ip; |
1153 | |
1154 | if (al->sym == NULL) { |
1155 | if (hide_unresolved) |
1156 | return 0; |
1157 | if (al->map == NULL) |
1158 | goto out; |
1159 | } |
1160 | if (maps__equal(a: al->maps, b: machine__kernel_maps(machine))) { |
1161 | if (machine__is_host(machine)) { |
1162 | al->cpumode = PERF_RECORD_MISC_KERNEL; |
1163 | al->level = 'k'; |
1164 | } else { |
1165 | al->cpumode = PERF_RECORD_MISC_GUEST_KERNEL; |
1166 | al->level = 'g'; |
1167 | } |
1168 | } else { |
1169 | if (machine__is_host(machine)) { |
1170 | al->cpumode = PERF_RECORD_MISC_USER; |
1171 | al->level = '.'; |
1172 | } else if (perf_guest) { |
1173 | al->cpumode = PERF_RECORD_MISC_GUEST_USER; |
1174 | al->level = 'u'; |
1175 | } else { |
1176 | al->cpumode = PERF_RECORD_MISC_HYPERVISOR; |
1177 | al->level = 'H'; |
1178 | } |
1179 | } |
1180 | |
1181 | out: |
1182 | return 1; |
1183 | } |
1184 | |
1185 | char *callchain_list__sym_name(struct callchain_list *cl, |
1186 | char *bf, size_t bfsize, bool show_dso) |
1187 | { |
1188 | bool show_addr = callchain_param.key == CCKEY_ADDRESS; |
1189 | bool show_srcline = show_addr || callchain_param.key == CCKEY_SRCLINE; |
1190 | int printed; |
1191 | |
1192 | if (cl->ms.sym) { |
1193 | const char *inlined = cl->ms.sym->inlined ? " (inlined)" : "" ; |
1194 | |
1195 | if (show_srcline && cl->srcline) |
1196 | printed = scnprintf(buf: bf, size: bfsize, fmt: "%s %s%s" , |
1197 | cl->ms.sym->name, cl->srcline, |
1198 | inlined); |
1199 | else |
1200 | printed = scnprintf(buf: bf, size: bfsize, fmt: "%s%s" , |
1201 | cl->ms.sym->name, inlined); |
1202 | } else |
1203 | printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip); |
1204 | |
1205 | if (show_dso) |
1206 | scnprintf(buf: bf + printed, size: bfsize - printed, fmt: " %s" , |
1207 | cl->ms.map ? |
1208 | map__dso(map: cl->ms.map)->short_name : |
1209 | "unknown" ); |
1210 | |
1211 | return bf; |
1212 | } |
1213 | |
1214 | char *callchain_node__scnprintf_value(struct callchain_node *node, |
1215 | char *bf, size_t bfsize, u64 total) |
1216 | { |
1217 | double percent = 0.0; |
1218 | u64 period = callchain_cumul_hits(node); |
1219 | unsigned count = callchain_cumul_counts(node); |
1220 | |
1221 | if (callchain_param.mode == CHAIN_FOLDED) { |
1222 | period = node->hit; |
1223 | count = node->count; |
1224 | } |
1225 | |
1226 | switch (callchain_param.value) { |
1227 | case CCVAL_PERIOD: |
1228 | scnprintf(bf, bfsize, "%" PRIu64, period); |
1229 | break; |
1230 | case CCVAL_COUNT: |
1231 | scnprintf(buf: bf, size: bfsize, fmt: "%u" , count); |
1232 | break; |
1233 | case CCVAL_PERCENT: |
1234 | default: |
1235 | if (total) |
1236 | percent = period * 100.0 / total; |
1237 | scnprintf(buf: bf, size: bfsize, fmt: "%.2f%%" , percent); |
1238 | break; |
1239 | } |
1240 | return bf; |
1241 | } |
1242 | |
1243 | int callchain_node__fprintf_value(struct callchain_node *node, |
1244 | FILE *fp, u64 total) |
1245 | { |
1246 | double percent = 0.0; |
1247 | u64 period = callchain_cumul_hits(node); |
1248 | unsigned count = callchain_cumul_counts(node); |
1249 | |
1250 | if (callchain_param.mode == CHAIN_FOLDED) { |
1251 | period = node->hit; |
1252 | count = node->count; |
1253 | } |
1254 | |
1255 | switch (callchain_param.value) { |
1256 | case CCVAL_PERIOD: |
1257 | return fprintf(fp, "%" PRIu64, period); |
1258 | case CCVAL_COUNT: |
1259 | return fprintf(fp, "%u" , count); |
1260 | case CCVAL_PERCENT: |
1261 | default: |
1262 | if (total) |
1263 | percent = period * 100.0 / total; |
1264 | return percent_color_fprintf(fp, "%.2f%%" , percent); |
1265 | } |
1266 | return 0; |
1267 | } |
1268 | |
1269 | static void callchain_counts_value(struct callchain_node *node, |
1270 | u64 *branch_count, u64 *predicted_count, |
1271 | u64 *abort_count, u64 *cycles_count) |
1272 | { |
1273 | struct callchain_list *clist; |
1274 | |
1275 | list_for_each_entry(clist, &node->val, list) { |
1276 | if (branch_count) |
1277 | *branch_count += clist->branch_count; |
1278 | |
1279 | if (predicted_count) |
1280 | *predicted_count += clist->predicted_count; |
1281 | |
1282 | if (abort_count) |
1283 | *abort_count += clist->abort_count; |
1284 | |
1285 | if (cycles_count) |
1286 | *cycles_count += clist->cycles_count; |
1287 | } |
1288 | } |
1289 | |
1290 | static int callchain_node_branch_counts_cumul(struct callchain_node *node, |
1291 | u64 *branch_count, |
1292 | u64 *predicted_count, |
1293 | u64 *abort_count, |
1294 | u64 *cycles_count) |
1295 | { |
1296 | struct callchain_node *child; |
1297 | struct rb_node *n; |
1298 | |
1299 | n = rb_first(&node->rb_root_in); |
1300 | while (n) { |
1301 | child = rb_entry(n, struct callchain_node, rb_node_in); |
1302 | n = rb_next(n); |
1303 | |
1304 | callchain_node_branch_counts_cumul(node: child, branch_count, |
1305 | predicted_count, |
1306 | abort_count, |
1307 | cycles_count); |
1308 | |
1309 | callchain_counts_value(node: child, branch_count, |
1310 | predicted_count, abort_count, |
1311 | cycles_count); |
1312 | } |
1313 | |
1314 | return 0; |
1315 | } |
1316 | |
1317 | int callchain_branch_counts(struct callchain_root *root, |
1318 | u64 *branch_count, u64 *predicted_count, |
1319 | u64 *abort_count, u64 *cycles_count) |
1320 | { |
1321 | if (branch_count) |
1322 | *branch_count = 0; |
1323 | |
1324 | if (predicted_count) |
1325 | *predicted_count = 0; |
1326 | |
1327 | if (abort_count) |
1328 | *abort_count = 0; |
1329 | |
1330 | if (cycles_count) |
1331 | *cycles_count = 0; |
1332 | |
1333 | return callchain_node_branch_counts_cumul(node: &root->node, |
1334 | branch_count, |
1335 | predicted_count, |
1336 | abort_count, |
1337 | cycles_count); |
1338 | } |
1339 | |
1340 | static int count_pri64_printf(int idx, const char *str, u64 value, char *bf, int bfsize) |
1341 | { |
1342 | return scnprintf(bf, bfsize, "%s%s:%" PRId64 "" , (idx) ? " " : " (" , str, value); |
1343 | } |
1344 | |
1345 | static int count_float_printf(int idx, const char *str, float value, |
1346 | char *bf, int bfsize, float threshold) |
1347 | { |
1348 | if (threshold != 0.0 && value < threshold) |
1349 | return 0; |
1350 | |
1351 | return scnprintf(buf: bf, size: bfsize, fmt: "%s%s:%.1f%%" , (idx) ? " " : " (" , str, value); |
1352 | } |
1353 | |
1354 | static int branch_to_str(char *bf, int bfsize, |
1355 | u64 branch_count, u64 predicted_count, |
1356 | u64 abort_count, |
1357 | const struct branch_type_stat *brtype_stat) |
1358 | { |
1359 | int printed, i = 0; |
1360 | |
1361 | printed = branch_type_str(st: brtype_stat, bf, bfsize); |
1362 | if (printed) |
1363 | i++; |
1364 | |
1365 | if (predicted_count < branch_count) { |
1366 | printed += count_float_printf(idx: i++, str: "predicted" , |
1367 | value: predicted_count * 100.0 / branch_count, |
1368 | bf: bf + printed, bfsize: bfsize - printed, threshold: 0.0); |
1369 | } |
1370 | |
1371 | if (abort_count) { |
1372 | printed += count_float_printf(idx: i++, str: "abort" , |
1373 | value: abort_count * 100.0 / branch_count, |
1374 | bf: bf + printed, bfsize: bfsize - printed, threshold: 0.1); |
1375 | } |
1376 | |
1377 | if (i) |
1378 | printed += scnprintf(buf: bf + printed, size: bfsize - printed, fmt: ")" ); |
1379 | |
1380 | return printed; |
1381 | } |
1382 | |
1383 | static int branch_from_str(char *bf, int bfsize, |
1384 | u64 branch_count, |
1385 | u64 cycles_count, u64 iter_count, |
1386 | u64 iter_cycles, u64 from_count) |
1387 | { |
1388 | int printed = 0, i = 0; |
1389 | u64 cycles, v = 0; |
1390 | |
1391 | cycles = cycles_count / branch_count; |
1392 | if (cycles) { |
1393 | printed += count_pri64_printf(idx: i++, str: "cycles" , |
1394 | value: cycles, |
1395 | bf: bf + printed, bfsize: bfsize - printed); |
1396 | } |
1397 | |
1398 | if (iter_count && from_count) { |
1399 | v = iter_count / from_count; |
1400 | if (v) { |
1401 | printed += count_pri64_printf(idx: i++, str: "iter" , |
1402 | value: v, bf: bf + printed, bfsize: bfsize - printed); |
1403 | |
1404 | printed += count_pri64_printf(idx: i++, str: "avg_cycles" , |
1405 | value: iter_cycles / iter_count, |
1406 | bf: bf + printed, bfsize: bfsize - printed); |
1407 | } |
1408 | } |
1409 | |
1410 | if (i) |
1411 | printed += scnprintf(buf: bf + printed, size: bfsize - printed, fmt: ")" ); |
1412 | |
1413 | return printed; |
1414 | } |
1415 | |
1416 | static int counts_str_build(char *bf, int bfsize, |
1417 | u64 branch_count, u64 predicted_count, |
1418 | u64 abort_count, u64 cycles_count, |
1419 | u64 iter_count, u64 iter_cycles, |
1420 | u64 from_count, |
1421 | const struct branch_type_stat *brtype_stat) |
1422 | { |
1423 | int printed; |
1424 | |
1425 | if (branch_count == 0) |
1426 | return scnprintf(buf: bf, size: bfsize, fmt: " (calltrace)" ); |
1427 | |
1428 | if (brtype_stat->branch_to) { |
1429 | printed = branch_to_str(bf, bfsize, branch_count, |
1430 | predicted_count, abort_count, brtype_stat); |
1431 | } else { |
1432 | printed = branch_from_str(bf, bfsize, branch_count, |
1433 | cycles_count, iter_count, iter_cycles, |
1434 | from_count); |
1435 | } |
1436 | |
1437 | if (!printed) |
1438 | bf[0] = 0; |
1439 | |
1440 | return printed; |
1441 | } |
1442 | |
1443 | static int callchain_counts_printf(FILE *fp, char *bf, int bfsize, |
1444 | u64 branch_count, u64 predicted_count, |
1445 | u64 abort_count, u64 cycles_count, |
1446 | u64 iter_count, u64 iter_cycles, |
1447 | u64 from_count, |
1448 | const struct branch_type_stat *brtype_stat) |
1449 | { |
1450 | char str[256]; |
1451 | |
1452 | counts_str_build(bf: str, bfsize: sizeof(str), branch_count, |
1453 | predicted_count, abort_count, cycles_count, |
1454 | iter_count, iter_cycles, from_count, brtype_stat); |
1455 | |
1456 | if (fp) |
1457 | return fprintf(fp, "%s" , str); |
1458 | |
1459 | return scnprintf(buf: bf, size: bfsize, fmt: "%s" , str); |
1460 | } |
1461 | |
1462 | int callchain_list_counts__printf_value(struct callchain_list *clist, |
1463 | FILE *fp, char *bf, int bfsize) |
1464 | { |
1465 | static const struct branch_type_stat empty_brtype_stat = {}; |
1466 | const struct branch_type_stat *brtype_stat; |
1467 | u64 branch_count, predicted_count; |
1468 | u64 abort_count, cycles_count; |
1469 | u64 iter_count, iter_cycles; |
1470 | u64 from_count; |
1471 | |
1472 | brtype_stat = clist->brtype_stat ?: &empty_brtype_stat; |
1473 | branch_count = clist->branch_count; |
1474 | predicted_count = clist->predicted_count; |
1475 | abort_count = clist->abort_count; |
1476 | cycles_count = clist->cycles_count; |
1477 | iter_count = clist->iter_count; |
1478 | iter_cycles = clist->iter_cycles; |
1479 | from_count = clist->from_count; |
1480 | |
1481 | return callchain_counts_printf(fp, bf, bfsize, branch_count, |
1482 | predicted_count, abort_count, |
1483 | cycles_count, iter_count, iter_cycles, |
1484 | from_count, brtype_stat); |
1485 | } |
1486 | |
1487 | static void free_callchain_node(struct callchain_node *node) |
1488 | { |
1489 | struct callchain_list *list, *tmp; |
1490 | struct callchain_node *child; |
1491 | struct rb_node *n; |
1492 | |
1493 | list_for_each_entry_safe(list, tmp, &node->parent_val, list) { |
1494 | list_del_init(entry: &list->list); |
1495 | map_symbol__exit(ms: &list->ms); |
1496 | zfree(&list->brtype_stat); |
1497 | free(list); |
1498 | } |
1499 | |
1500 | list_for_each_entry_safe(list, tmp, &node->val, list) { |
1501 | list_del_init(entry: &list->list); |
1502 | map_symbol__exit(ms: &list->ms); |
1503 | zfree(&list->brtype_stat); |
1504 | free(list); |
1505 | } |
1506 | |
1507 | n = rb_first(&node->rb_root_in); |
1508 | while (n) { |
1509 | child = container_of(n, struct callchain_node, rb_node_in); |
1510 | n = rb_next(n); |
1511 | rb_erase(&child->rb_node_in, &node->rb_root_in); |
1512 | |
1513 | free_callchain_node(node: child); |
1514 | free(child); |
1515 | } |
1516 | } |
1517 | |
1518 | void free_callchain(struct callchain_root *root) |
1519 | { |
1520 | if (!symbol_conf.use_callchain) |
1521 | return; |
1522 | |
1523 | free_callchain_node(node: &root->node); |
1524 | } |
1525 | |
1526 | static u64 decay_callchain_node(struct callchain_node *node) |
1527 | { |
1528 | struct callchain_node *child; |
1529 | struct rb_node *n; |
1530 | u64 child_hits = 0; |
1531 | |
1532 | n = rb_first(&node->rb_root_in); |
1533 | while (n) { |
1534 | child = container_of(n, struct callchain_node, rb_node_in); |
1535 | |
1536 | child_hits += decay_callchain_node(node: child); |
1537 | n = rb_next(n); |
1538 | } |
1539 | |
1540 | node->hit = (node->hit * 7) / 8; |
1541 | node->children_hit = child_hits; |
1542 | |
1543 | return node->hit; |
1544 | } |
1545 | |
1546 | void decay_callchain(struct callchain_root *root) |
1547 | { |
1548 | if (!symbol_conf.use_callchain) |
1549 | return; |
1550 | |
1551 | decay_callchain_node(node: &root->node); |
1552 | } |
1553 | |
1554 | int callchain_node__make_parent_list(struct callchain_node *node) |
1555 | { |
1556 | struct callchain_node *parent = node->parent; |
1557 | struct callchain_list *chain, *new; |
1558 | LIST_HEAD(head); |
1559 | |
1560 | while (parent) { |
1561 | list_for_each_entry_reverse(chain, &parent->val, list) { |
1562 | new = malloc(sizeof(*new)); |
1563 | if (new == NULL) |
1564 | goto out; |
1565 | *new = *chain; |
1566 | new->has_children = false; |
1567 | new->ms.map = map__get(map: new->ms.map); |
1568 | list_add_tail(new: &new->list, head: &head); |
1569 | } |
1570 | parent = parent->parent; |
1571 | } |
1572 | |
1573 | list_for_each_entry_safe_reverse(chain, new, &head, list) |
1574 | list_move_tail(list: &chain->list, head: &node->parent_val); |
1575 | |
1576 | if (!list_empty(head: &node->parent_val)) { |
1577 | chain = list_first_entry(&node->parent_val, struct callchain_list, list); |
1578 | chain->has_children = rb_prev(&node->rb_node) || rb_next(&node->rb_node); |
1579 | |
1580 | chain = list_first_entry(&node->val, struct callchain_list, list); |
1581 | chain->has_children = false; |
1582 | } |
1583 | return 0; |
1584 | |
1585 | out: |
1586 | list_for_each_entry_safe(chain, new, &head, list) { |
1587 | list_del_init(entry: &chain->list); |
1588 | map_symbol__exit(ms: &chain->ms); |
1589 | zfree(&chain->brtype_stat); |
1590 | free(chain); |
1591 | } |
1592 | return -ENOMEM; |
1593 | } |
1594 | |
1595 | static void callchain_cursor__delete(void *vcursor) |
1596 | { |
1597 | struct callchain_cursor *cursor = vcursor; |
1598 | struct callchain_cursor_node *node, *next; |
1599 | |
1600 | callchain_cursor_reset(cursor); |
1601 | for (node = cursor->first; node != NULL; node = next) { |
1602 | next = node->next; |
1603 | free(node); |
1604 | } |
1605 | free(cursor); |
1606 | } |
1607 | |
1608 | static void init_callchain_cursor_key(void) |
1609 | { |
1610 | if (pthread_key_create(&callchain_cursor, callchain_cursor__delete)) { |
1611 | pr_err("callchain cursor creation failed" ); |
1612 | abort(); |
1613 | } |
1614 | } |
1615 | |
1616 | struct callchain_cursor *get_tls_callchain_cursor(void) |
1617 | { |
1618 | static pthread_once_t once_control = PTHREAD_ONCE_INIT; |
1619 | struct callchain_cursor *cursor; |
1620 | |
1621 | pthread_once(&once_control, init_callchain_cursor_key); |
1622 | cursor = pthread_getspecific(callchain_cursor); |
1623 | if (!cursor) { |
1624 | cursor = zalloc(sizeof(*cursor)); |
1625 | if (!cursor) |
1626 | pr_debug3("%s: not enough memory\n" , __func__); |
1627 | pthread_setspecific(callchain_cursor, cursor); |
1628 | } |
1629 | return cursor; |
1630 | } |
1631 | |
1632 | int callchain_cursor__copy(struct callchain_cursor *dst, |
1633 | struct callchain_cursor *src) |
1634 | { |
1635 | int rc = 0; |
1636 | |
1637 | callchain_cursor_reset(cursor: dst); |
1638 | callchain_cursor_commit(cursor: src); |
1639 | |
1640 | while (true) { |
1641 | struct callchain_cursor_node *node; |
1642 | |
1643 | node = callchain_cursor_current(cursor: src); |
1644 | if (node == NULL) |
1645 | break; |
1646 | |
1647 | rc = callchain_cursor_append(cursor: dst, ip: node->ip, ms: &node->ms, |
1648 | branch: node->branch, flags: &node->branch_flags, |
1649 | nr_loop_iter: node->nr_loop_iter, |
1650 | iter_cycles: node->iter_cycles, |
1651 | branch_from: node->branch_from, srcline: node->srcline); |
1652 | if (rc) |
1653 | break; |
1654 | |
1655 | callchain_cursor_advance(cursor: src); |
1656 | } |
1657 | |
1658 | return rc; |
1659 | } |
1660 | |
1661 | /* |
1662 | * Initialize a cursor before adding entries inside, but keep |
1663 | * the previously allocated entries as a cache. |
1664 | */ |
1665 | void callchain_cursor_reset(struct callchain_cursor *cursor) |
1666 | { |
1667 | struct callchain_cursor_node *node; |
1668 | |
1669 | cursor->nr = 0; |
1670 | cursor->last = &cursor->first; |
1671 | |
1672 | for (node = cursor->first; node != NULL; node = node->next) |
1673 | map_symbol__exit(ms: &node->ms); |
1674 | } |
1675 | |
1676 | void callchain_param_setup(u64 sample_type, const char *arch) |
1677 | { |
1678 | if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { |
1679 | if ((sample_type & PERF_SAMPLE_REGS_USER) && |
1680 | (sample_type & PERF_SAMPLE_STACK_USER)) { |
1681 | callchain_param.record_mode = CALLCHAIN_DWARF; |
1682 | dwarf_callchain_users = true; |
1683 | } else if (sample_type & PERF_SAMPLE_BRANCH_STACK) |
1684 | callchain_param.record_mode = CALLCHAIN_LBR; |
1685 | else |
1686 | callchain_param.record_mode = CALLCHAIN_FP; |
1687 | } |
1688 | |
1689 | /* |
1690 | * It's necessary to use libunwind to reliably determine the caller of |
1691 | * a leaf function on aarch64, as otherwise we cannot know whether to |
1692 | * start from the LR or FP. |
1693 | * |
1694 | * Always starting from the LR can result in duplicate or entirely |
1695 | * erroneous entries. Always skipping the LR and starting from the FP |
1696 | * can result in missing entries. |
1697 | */ |
1698 | if (callchain_param.record_mode == CALLCHAIN_FP && !strcmp(arch, "arm64" )) |
1699 | dwarf_callchain_users = true; |
1700 | } |
1701 | |
1702 | static bool chain_match(struct callchain_list *base_chain, |
1703 | struct callchain_list *pair_chain) |
1704 | { |
1705 | enum match_result match; |
1706 | |
1707 | match = match_chain_strings(left: base_chain->srcline, |
1708 | right: pair_chain->srcline); |
1709 | if (match != MATCH_ERROR) |
1710 | return match == MATCH_EQ; |
1711 | |
1712 | match = match_chain_dso_addresses(left_map: base_chain->ms.map, |
1713 | left_ip: base_chain->ip, |
1714 | right_map: pair_chain->ms.map, |
1715 | right_ip: pair_chain->ip); |
1716 | |
1717 | return match == MATCH_EQ; |
1718 | } |
1719 | |
1720 | bool callchain_cnode_matched(struct callchain_node *base_cnode, |
1721 | struct callchain_node *pair_cnode) |
1722 | { |
1723 | struct callchain_list *base_chain, *pair_chain; |
1724 | bool match = false; |
1725 | |
1726 | pair_chain = list_first_entry(&pair_cnode->val, |
1727 | struct callchain_list, |
1728 | list); |
1729 | |
1730 | list_for_each_entry(base_chain, &base_cnode->val, list) { |
1731 | if (&pair_chain->list == &pair_cnode->val) |
1732 | return false; |
1733 | |
1734 | if (!base_chain->srcline || !pair_chain->srcline) { |
1735 | pair_chain = list_next_entry(pair_chain, list); |
1736 | continue; |
1737 | } |
1738 | |
1739 | match = chain_match(base_chain, pair_chain); |
1740 | if (!match) |
1741 | return false; |
1742 | |
1743 | pair_chain = list_next_entry(pair_chain, list); |
1744 | } |
1745 | |
1746 | /* |
1747 | * Say chain1 is ABC, chain2 is ABCD, we consider they are |
1748 | * not fully matched. |
1749 | */ |
1750 | if (pair_chain && (&pair_chain->list != &pair_cnode->val)) |
1751 | return false; |
1752 | |
1753 | return match; |
1754 | } |
1755 | |
1756 | static u64 count_callchain_hits(struct hist_entry *he) |
1757 | { |
1758 | struct rb_root *root = &he->sorted_chain; |
1759 | struct rb_node *rb_node = rb_first(root); |
1760 | struct callchain_node *node; |
1761 | u64 chain_hits = 0; |
1762 | |
1763 | while (rb_node) { |
1764 | node = rb_entry(rb_node, struct callchain_node, rb_node); |
1765 | chain_hits += node->hit; |
1766 | rb_node = rb_next(rb_node); |
1767 | } |
1768 | |
1769 | return chain_hits; |
1770 | } |
1771 | |
1772 | u64 callchain_total_hits(struct hists *hists) |
1773 | { |
1774 | struct rb_node *next = rb_first_cached(&hists->entries); |
1775 | u64 chain_hits = 0; |
1776 | |
1777 | while (next) { |
1778 | struct hist_entry *he = rb_entry(next, struct hist_entry, |
1779 | rb_node); |
1780 | |
1781 | chain_hits += count_callchain_hits(he); |
1782 | next = rb_next(&he->rb_node); |
1783 | } |
1784 | |
1785 | return chain_hits; |
1786 | } |
1787 | |
1788 | s64 callchain_avg_cycles(struct callchain_node *cnode) |
1789 | { |
1790 | struct callchain_list *chain; |
1791 | s64 cycles = 0; |
1792 | |
1793 | list_for_each_entry(chain, &cnode->val, list) { |
1794 | if (chain->srcline && chain->branch_count) |
1795 | cycles += chain->cycles_count / chain->branch_count; |
1796 | } |
1797 | |
1798 | return cycles; |
1799 | } |
1800 | |