1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * thread-stack.c: Synthesize a thread's stack using call / return events |
4 | * Copyright (c) 2014, Intel Corporation. |
5 | */ |
6 | |
7 | #include <linux/rbtree.h> |
8 | #include <linux/list.h> |
9 | #include <linux/log2.h> |
10 | #include <linux/zalloc.h> |
11 | #include <errno.h> |
12 | #include <stdlib.h> |
13 | #include <string.h> |
14 | #include "thread.h" |
15 | #include "event.h" |
16 | #include "machine.h" |
17 | #include "env.h" |
18 | #include "debug.h" |
19 | #include "symbol.h" |
20 | #include "comm.h" |
21 | #include "call-path.h" |
22 | #include "thread-stack.h" |
23 | |
24 | #define STACK_GROWTH 2048 |
25 | |
26 | /* |
27 | * State of retpoline detection. |
28 | * |
29 | * RETPOLINE_NONE: no retpoline detection |
30 | * X86_RETPOLINE_POSSIBLE: x86 retpoline possible |
31 | * X86_RETPOLINE_DETECTED: x86 retpoline detected |
32 | */ |
33 | enum retpoline_state_t { |
34 | RETPOLINE_NONE, |
35 | X86_RETPOLINE_POSSIBLE, |
36 | X86_RETPOLINE_DETECTED, |
37 | }; |
38 | |
39 | /** |
40 | * struct thread_stack_entry - thread stack entry. |
41 | * @ret_addr: return address |
42 | * @timestamp: timestamp (if known) |
43 | * @ref: external reference (e.g. db_id of sample) |
44 | * @branch_count: the branch count when the entry was created |
45 | * @insn_count: the instruction count when the entry was created |
46 | * @cyc_count the cycle count when the entry was created |
47 | * @db_id: id used for db-export |
48 | * @cp: call path |
49 | * @no_call: a 'call' was not seen |
50 | * @trace_end: a 'call' but trace ended |
51 | * @non_call: a branch but not a 'call' to the start of a different symbol |
52 | */ |
53 | struct thread_stack_entry { |
54 | u64 ret_addr; |
55 | u64 timestamp; |
56 | u64 ref; |
57 | u64 branch_count; |
58 | u64 insn_count; |
59 | u64 cyc_count; |
60 | u64 db_id; |
61 | struct call_path *cp; |
62 | bool no_call; |
63 | bool trace_end; |
64 | bool non_call; |
65 | }; |
66 | |
67 | /** |
68 | * struct thread_stack - thread stack constructed from 'call' and 'return' |
69 | * branch samples. |
70 | * @stack: array that holds the stack |
71 | * @cnt: number of entries in the stack |
72 | * @sz: current maximum stack size |
73 | * @trace_nr: current trace number |
74 | * @branch_count: running branch count |
75 | * @insn_count: running instruction count |
76 | * @cyc_count running cycle count |
77 | * @kernel_start: kernel start address |
78 | * @last_time: last timestamp |
79 | * @crp: call/return processor |
80 | * @comm: current comm |
81 | * @arr_sz: size of array if this is the first element of an array |
82 | * @rstate: used to detect retpolines |
83 | * @br_stack_rb: branch stack (ring buffer) |
84 | * @br_stack_sz: maximum branch stack size |
85 | * @br_stack_pos: current position in @br_stack_rb |
86 | * @mispred_all: mark all branches as mispredicted |
87 | */ |
88 | struct thread_stack { |
89 | struct thread_stack_entry *stack; |
90 | size_t cnt; |
91 | size_t sz; |
92 | u64 trace_nr; |
93 | u64 branch_count; |
94 | u64 insn_count; |
95 | u64 cyc_count; |
96 | u64 kernel_start; |
97 | u64 last_time; |
98 | struct call_return_processor *crp; |
99 | struct comm *comm; |
100 | unsigned int arr_sz; |
101 | enum retpoline_state_t rstate; |
102 | struct branch_stack *br_stack_rb; |
103 | unsigned int br_stack_sz; |
104 | unsigned int br_stack_pos; |
105 | bool mispred_all; |
106 | }; |
107 | |
108 | /* |
109 | * Assume pid == tid == 0 identifies the idle task as defined by |
110 | * perf_session__register_idle_thread(). The idle task is really 1 task per cpu, |
111 | * and therefore requires a stack for each cpu. |
112 | */ |
113 | static inline bool thread_stack__per_cpu(struct thread *thread) |
114 | { |
115 | return !(thread__tid(thread) || thread__pid(thread)); |
116 | } |
117 | |
118 | static int thread_stack__grow(struct thread_stack *ts) |
119 | { |
120 | struct thread_stack_entry *new_stack; |
121 | size_t sz, new_sz; |
122 | |
123 | new_sz = ts->sz + STACK_GROWTH; |
124 | sz = new_sz * sizeof(struct thread_stack_entry); |
125 | |
126 | new_stack = realloc(ts->stack, sz); |
127 | if (!new_stack) |
128 | return -ENOMEM; |
129 | |
130 | ts->stack = new_stack; |
131 | ts->sz = new_sz; |
132 | |
133 | return 0; |
134 | } |
135 | |
136 | static int thread_stack__init(struct thread_stack *ts, struct thread *thread, |
137 | struct call_return_processor *crp, |
138 | bool callstack, unsigned int br_stack_sz) |
139 | { |
140 | int err; |
141 | |
142 | if (callstack) { |
143 | err = thread_stack__grow(ts); |
144 | if (err) |
145 | return err; |
146 | } |
147 | |
148 | if (br_stack_sz) { |
149 | size_t sz = sizeof(struct branch_stack); |
150 | |
151 | sz += br_stack_sz * sizeof(struct branch_entry); |
152 | ts->br_stack_rb = zalloc(sz); |
153 | if (!ts->br_stack_rb) |
154 | return -ENOMEM; |
155 | ts->br_stack_sz = br_stack_sz; |
156 | } |
157 | |
158 | if (thread__maps(thread) && maps__machine(maps: thread__maps(thread))) { |
159 | struct machine *machine = maps__machine(maps: thread__maps(thread)); |
160 | const char *arch = perf_env__arch(env: machine->env); |
161 | |
162 | ts->kernel_start = machine__kernel_start(machine); |
163 | if (!strcmp(arch, "x86" )) |
164 | ts->rstate = X86_RETPOLINE_POSSIBLE; |
165 | } else { |
166 | ts->kernel_start = 1ULL << 63; |
167 | } |
168 | ts->crp = crp; |
169 | |
170 | return 0; |
171 | } |
172 | |
173 | static struct thread_stack *thread_stack__new(struct thread *thread, int cpu, |
174 | struct call_return_processor *crp, |
175 | bool callstack, |
176 | unsigned int br_stack_sz) |
177 | { |
178 | struct thread_stack *ts = thread__ts(thread), *new_ts; |
179 | unsigned int old_sz = ts ? ts->arr_sz : 0; |
180 | unsigned int new_sz = 1; |
181 | |
182 | if (thread_stack__per_cpu(thread) && cpu > 0) |
183 | new_sz = roundup_pow_of_two(cpu + 1); |
184 | |
185 | if (!ts || new_sz > old_sz) { |
186 | new_ts = calloc(new_sz, sizeof(*ts)); |
187 | if (!new_ts) |
188 | return NULL; |
189 | if (ts) |
190 | memcpy(new_ts, ts, old_sz * sizeof(*ts)); |
191 | new_ts->arr_sz = new_sz; |
192 | free(thread__ts(thread)); |
193 | thread__set_ts(thread, ts: new_ts); |
194 | ts = new_ts; |
195 | } |
196 | |
197 | if (thread_stack__per_cpu(thread) && cpu > 0 && |
198 | (unsigned int)cpu < ts->arr_sz) |
199 | ts += cpu; |
200 | |
201 | if (!ts->stack && |
202 | thread_stack__init(ts, thread, crp, callstack, br_stack_sz)) |
203 | return NULL; |
204 | |
205 | return ts; |
206 | } |
207 | |
208 | static struct thread_stack *thread__cpu_stack(struct thread *thread, int cpu) |
209 | { |
210 | struct thread_stack *ts = thread__ts(thread); |
211 | |
212 | if (cpu < 0) |
213 | cpu = 0; |
214 | |
215 | if (!ts || (unsigned int)cpu >= ts->arr_sz) |
216 | return NULL; |
217 | |
218 | ts += cpu; |
219 | |
220 | if (!ts->stack) |
221 | return NULL; |
222 | |
223 | return ts; |
224 | } |
225 | |
226 | static inline struct thread_stack *thread__stack(struct thread *thread, |
227 | int cpu) |
228 | { |
229 | if (!thread) |
230 | return NULL; |
231 | |
232 | if (thread_stack__per_cpu(thread)) |
233 | return thread__cpu_stack(thread, cpu); |
234 | |
235 | return thread__ts(thread); |
236 | } |
237 | |
238 | static int thread_stack__push(struct thread_stack *ts, u64 ret_addr, |
239 | bool trace_end) |
240 | { |
241 | int err = 0; |
242 | |
243 | if (ts->cnt == ts->sz) { |
244 | err = thread_stack__grow(ts); |
245 | if (err) { |
246 | pr_warning("Out of memory: discarding thread stack\n" ); |
247 | ts->cnt = 0; |
248 | } |
249 | } |
250 | |
251 | ts->stack[ts->cnt].trace_end = trace_end; |
252 | ts->stack[ts->cnt++].ret_addr = ret_addr; |
253 | |
254 | return err; |
255 | } |
256 | |
257 | static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr) |
258 | { |
259 | size_t i; |
260 | |
261 | /* |
262 | * In some cases there may be functions which are not seen to return. |
263 | * For example when setjmp / longjmp has been used. Or the perf context |
264 | * switch in the kernel which doesn't stop and start tracing in exactly |
265 | * the same code path. When that happens the return address will be |
266 | * further down the stack. If the return address is not found at all, |
267 | * we assume the opposite (i.e. this is a return for a call that wasn't |
268 | * seen for some reason) and leave the stack alone. |
269 | */ |
270 | for (i = ts->cnt; i; ) { |
271 | if (ts->stack[--i].ret_addr == ret_addr) { |
272 | ts->cnt = i; |
273 | return; |
274 | } |
275 | } |
276 | } |
277 | |
278 | static void thread_stack__pop_trace_end(struct thread_stack *ts) |
279 | { |
280 | size_t i; |
281 | |
282 | for (i = ts->cnt; i; ) { |
283 | if (ts->stack[--i].trace_end) |
284 | ts->cnt = i; |
285 | else |
286 | return; |
287 | } |
288 | } |
289 | |
290 | static bool thread_stack__in_kernel(struct thread_stack *ts) |
291 | { |
292 | if (!ts->cnt) |
293 | return false; |
294 | |
295 | return ts->stack[ts->cnt - 1].cp->in_kernel; |
296 | } |
297 | |
298 | static int thread_stack__call_return(struct thread *thread, |
299 | struct thread_stack *ts, size_t idx, |
300 | u64 timestamp, u64 ref, bool no_return) |
301 | { |
302 | struct call_return_processor *crp = ts->crp; |
303 | struct thread_stack_entry *tse; |
304 | struct call_return cr = { |
305 | .thread = thread, |
306 | .comm = ts->comm, |
307 | .db_id = 0, |
308 | }; |
309 | u64 *parent_db_id; |
310 | |
311 | tse = &ts->stack[idx]; |
312 | cr.cp = tse->cp; |
313 | cr.call_time = tse->timestamp; |
314 | cr.return_time = timestamp; |
315 | cr.branch_count = ts->branch_count - tse->branch_count; |
316 | cr.insn_count = ts->insn_count - tse->insn_count; |
317 | cr.cyc_count = ts->cyc_count - tse->cyc_count; |
318 | cr.db_id = tse->db_id; |
319 | cr.call_ref = tse->ref; |
320 | cr.return_ref = ref; |
321 | if (tse->no_call) |
322 | cr.flags |= CALL_RETURN_NO_CALL; |
323 | if (no_return) |
324 | cr.flags |= CALL_RETURN_NO_RETURN; |
325 | if (tse->non_call) |
326 | cr.flags |= CALL_RETURN_NON_CALL; |
327 | |
328 | /* |
329 | * The parent db_id must be assigned before exporting the child. Note |
330 | * it is not possible to export the parent first because its information |
331 | * is not yet complete because its 'return' has not yet been processed. |
332 | */ |
333 | parent_db_id = idx ? &(tse - 1)->db_id : NULL; |
334 | |
335 | return crp->process(&cr, parent_db_id, crp->data); |
336 | } |
337 | |
338 | static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts) |
339 | { |
340 | struct call_return_processor *crp = ts->crp; |
341 | int err; |
342 | |
343 | if (!crp) { |
344 | ts->cnt = 0; |
345 | ts->br_stack_pos = 0; |
346 | if (ts->br_stack_rb) |
347 | ts->br_stack_rb->nr = 0; |
348 | return 0; |
349 | } |
350 | |
351 | while (ts->cnt) { |
352 | err = thread_stack__call_return(thread, ts, idx: --ts->cnt, |
353 | timestamp: ts->last_time, ref: 0, no_return: true); |
354 | if (err) { |
355 | pr_err("Error flushing thread stack!\n" ); |
356 | ts->cnt = 0; |
357 | return err; |
358 | } |
359 | } |
360 | |
361 | return 0; |
362 | } |
363 | |
364 | int thread_stack__flush(struct thread *thread) |
365 | { |
366 | struct thread_stack *ts = thread__ts(thread); |
367 | unsigned int pos; |
368 | int err = 0; |
369 | |
370 | if (ts) { |
371 | for (pos = 0; pos < ts->arr_sz; pos++) { |
372 | int ret = __thread_stack__flush(thread, ts: ts + pos); |
373 | |
374 | if (ret) |
375 | err = ret; |
376 | } |
377 | } |
378 | |
379 | return err; |
380 | } |
381 | |
382 | static void thread_stack__update_br_stack(struct thread_stack *ts, u32 flags, |
383 | u64 from_ip, u64 to_ip) |
384 | { |
385 | struct branch_stack *bs = ts->br_stack_rb; |
386 | struct branch_entry *be; |
387 | |
388 | if (!ts->br_stack_pos) |
389 | ts->br_stack_pos = ts->br_stack_sz; |
390 | |
391 | ts->br_stack_pos -= 1; |
392 | |
393 | be = &bs->entries[ts->br_stack_pos]; |
394 | be->from = from_ip; |
395 | be->to = to_ip; |
396 | be->flags.value = 0; |
397 | be->flags.abort = !!(flags & PERF_IP_FLAG_TX_ABORT); |
398 | be->flags.in_tx = !!(flags & PERF_IP_FLAG_IN_TX); |
399 | /* No support for mispredict */ |
400 | be->flags.mispred = ts->mispred_all; |
401 | |
402 | if (bs->nr < ts->br_stack_sz) |
403 | bs->nr += 1; |
404 | } |
405 | |
406 | int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip, |
407 | u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack, |
408 | unsigned int br_stack_sz, bool mispred_all) |
409 | { |
410 | struct thread_stack *ts = thread__stack(thread, cpu); |
411 | |
412 | if (!thread) |
413 | return -EINVAL; |
414 | |
415 | if (!ts) { |
416 | ts = thread_stack__new(thread, cpu, NULL, callstack, br_stack_sz); |
417 | if (!ts) { |
418 | pr_warning("Out of memory: no thread stack\n" ); |
419 | return -ENOMEM; |
420 | } |
421 | ts->trace_nr = trace_nr; |
422 | ts->mispred_all = mispred_all; |
423 | } |
424 | |
425 | /* |
426 | * When the trace is discontinuous, the trace_nr changes. In that case |
427 | * the stack might be completely invalid. Better to report nothing than |
428 | * to report something misleading, so flush the stack. |
429 | */ |
430 | if (trace_nr != ts->trace_nr) { |
431 | if (ts->trace_nr) |
432 | __thread_stack__flush(thread, ts); |
433 | ts->trace_nr = trace_nr; |
434 | } |
435 | |
436 | if (br_stack_sz) |
437 | thread_stack__update_br_stack(ts, flags, from_ip, to_ip); |
438 | |
439 | /* |
440 | * Stop here if thread_stack__process() is in use, or not recording call |
441 | * stack. |
442 | */ |
443 | if (ts->crp || !callstack) |
444 | return 0; |
445 | |
446 | if (flags & PERF_IP_FLAG_CALL) { |
447 | u64 ret_addr; |
448 | |
449 | if (!to_ip) |
450 | return 0; |
451 | ret_addr = from_ip + insn_len; |
452 | if (ret_addr == to_ip) |
453 | return 0; /* Zero-length calls are excluded */ |
454 | return thread_stack__push(ts, ret_addr, |
455 | trace_end: flags & PERF_IP_FLAG_TRACE_END); |
456 | } else if (flags & PERF_IP_FLAG_TRACE_BEGIN) { |
457 | /* |
458 | * If the caller did not change the trace number (which would |
459 | * have flushed the stack) then try to make sense of the stack. |
460 | * Possibly, tracing began after returning to the current |
461 | * address, so try to pop that. Also, do not expect a call made |
462 | * when the trace ended, to return, so pop that. |
463 | */ |
464 | thread_stack__pop(ts, ret_addr: to_ip); |
465 | thread_stack__pop_trace_end(ts); |
466 | } else if ((flags & PERF_IP_FLAG_RETURN) && from_ip) { |
467 | thread_stack__pop(ts, ret_addr: to_ip); |
468 | } |
469 | |
470 | return 0; |
471 | } |
472 | |
473 | void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr) |
474 | { |
475 | struct thread_stack *ts = thread__stack(thread, cpu); |
476 | |
477 | if (!ts) |
478 | return; |
479 | |
480 | if (trace_nr != ts->trace_nr) { |
481 | if (ts->trace_nr) |
482 | __thread_stack__flush(thread, ts); |
483 | ts->trace_nr = trace_nr; |
484 | } |
485 | } |
486 | |
487 | static void __thread_stack__free(struct thread *thread, struct thread_stack *ts) |
488 | { |
489 | __thread_stack__flush(thread, ts); |
490 | zfree(&ts->stack); |
491 | zfree(&ts->br_stack_rb); |
492 | } |
493 | |
494 | static void thread_stack__reset(struct thread *thread, struct thread_stack *ts) |
495 | { |
496 | unsigned int arr_sz = ts->arr_sz; |
497 | |
498 | __thread_stack__free(thread, ts); |
499 | memset(ts, 0, sizeof(*ts)); |
500 | ts->arr_sz = arr_sz; |
501 | } |
502 | |
503 | void thread_stack__free(struct thread *thread) |
504 | { |
505 | struct thread_stack *ts = thread__ts(thread); |
506 | unsigned int pos; |
507 | |
508 | if (ts) { |
509 | for (pos = 0; pos < ts->arr_sz; pos++) |
510 | __thread_stack__free(thread, ts: ts + pos); |
511 | free(thread__ts(thread)); |
512 | thread__set_ts(thread, NULL); |
513 | } |
514 | } |
515 | |
516 | static inline u64 callchain_context(u64 ip, u64 kernel_start) |
517 | { |
518 | return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL; |
519 | } |
520 | |
521 | void thread_stack__sample(struct thread *thread, int cpu, |
522 | struct ip_callchain *chain, |
523 | size_t sz, u64 ip, u64 kernel_start) |
524 | { |
525 | struct thread_stack *ts = thread__stack(thread, cpu); |
526 | u64 context = callchain_context(ip, kernel_start); |
527 | u64 last_context; |
528 | size_t i, j; |
529 | |
530 | if (sz < 2) { |
531 | chain->nr = 0; |
532 | return; |
533 | } |
534 | |
535 | chain->ips[0] = context; |
536 | chain->ips[1] = ip; |
537 | |
538 | if (!ts) { |
539 | chain->nr = 2; |
540 | return; |
541 | } |
542 | |
543 | last_context = context; |
544 | |
545 | for (i = 2, j = 1; i < sz && j <= ts->cnt; i++, j++) { |
546 | ip = ts->stack[ts->cnt - j].ret_addr; |
547 | context = callchain_context(ip, kernel_start); |
548 | if (context != last_context) { |
549 | if (i >= sz - 1) |
550 | break; |
551 | chain->ips[i++] = context; |
552 | last_context = context; |
553 | } |
554 | chain->ips[i] = ip; |
555 | } |
556 | |
557 | chain->nr = i; |
558 | } |
559 | |
560 | /* |
561 | * Hardware sample records, created some time after the event occurred, need to |
562 | * have subsequent addresses removed from the call chain. |
563 | */ |
564 | void thread_stack__sample_late(struct thread *thread, int cpu, |
565 | struct ip_callchain *chain, size_t sz, |
566 | u64 sample_ip, u64 kernel_start) |
567 | { |
568 | struct thread_stack *ts = thread__stack(thread, cpu); |
569 | u64 sample_context = callchain_context(ip: sample_ip, kernel_start); |
570 | u64 last_context, context, ip; |
571 | size_t nr = 0, j; |
572 | |
573 | if (sz < 2) { |
574 | chain->nr = 0; |
575 | return; |
576 | } |
577 | |
578 | if (!ts) |
579 | goto out; |
580 | |
581 | /* |
582 | * When tracing kernel space, kernel addresses occur at the top of the |
583 | * call chain after the event occurred but before tracing stopped. |
584 | * Skip them. |
585 | */ |
586 | for (j = 1; j <= ts->cnt; j++) { |
587 | ip = ts->stack[ts->cnt - j].ret_addr; |
588 | context = callchain_context(ip, kernel_start); |
589 | if (context == PERF_CONTEXT_USER || |
590 | (context == sample_context && ip == sample_ip)) |
591 | break; |
592 | } |
593 | |
594 | last_context = sample_ip; /* Use sample_ip as an invalid context */ |
595 | |
596 | for (; nr < sz && j <= ts->cnt; nr++, j++) { |
597 | ip = ts->stack[ts->cnt - j].ret_addr; |
598 | context = callchain_context(ip, kernel_start); |
599 | if (context != last_context) { |
600 | if (nr >= sz - 1) |
601 | break; |
602 | chain->ips[nr++] = context; |
603 | last_context = context; |
604 | } |
605 | chain->ips[nr] = ip; |
606 | } |
607 | out: |
608 | if (nr) { |
609 | chain->nr = nr; |
610 | } else { |
611 | chain->ips[0] = sample_context; |
612 | chain->ips[1] = sample_ip; |
613 | chain->nr = 2; |
614 | } |
615 | } |
616 | |
617 | void thread_stack__br_sample(struct thread *thread, int cpu, |
618 | struct branch_stack *dst, unsigned int sz) |
619 | { |
620 | struct thread_stack *ts = thread__stack(thread, cpu); |
621 | const size_t bsz = sizeof(struct branch_entry); |
622 | struct branch_stack *src; |
623 | struct branch_entry *be; |
624 | unsigned int nr; |
625 | |
626 | dst->nr = 0; |
627 | |
628 | if (!ts) |
629 | return; |
630 | |
631 | src = ts->br_stack_rb; |
632 | if (!src->nr) |
633 | return; |
634 | |
635 | dst->nr = min((unsigned int)src->nr, sz); |
636 | |
637 | be = &dst->entries[0]; |
638 | nr = min(ts->br_stack_sz - ts->br_stack_pos, (unsigned int)dst->nr); |
639 | memcpy(be, &src->entries[ts->br_stack_pos], bsz * nr); |
640 | |
641 | if (src->nr >= ts->br_stack_sz) { |
642 | sz -= nr; |
643 | be = &dst->entries[nr]; |
644 | nr = min(ts->br_stack_pos, sz); |
645 | memcpy(be, &src->entries[0], bsz * ts->br_stack_pos); |
646 | } |
647 | } |
648 | |
649 | /* Start of user space branch entries */ |
650 | static bool us_start(struct branch_entry *be, u64 kernel_start, bool *start) |
651 | { |
652 | if (!*start) |
653 | *start = be->to && be->to < kernel_start; |
654 | |
655 | return *start; |
656 | } |
657 | |
658 | /* |
659 | * Start of branch entries after the ip fell in between 2 branches, or user |
660 | * space branch entries. |
661 | */ |
662 | static bool ks_start(struct branch_entry *be, u64 sample_ip, u64 kernel_start, |
663 | bool *start, struct branch_entry *nb) |
664 | { |
665 | if (!*start) { |
666 | *start = (nb && sample_ip >= be->to && sample_ip <= nb->from) || |
667 | be->from < kernel_start || |
668 | (be->to && be->to < kernel_start); |
669 | } |
670 | |
671 | return *start; |
672 | } |
673 | |
674 | /* |
675 | * Hardware sample records, created some time after the event occurred, need to |
676 | * have subsequent addresses removed from the branch stack. |
677 | */ |
678 | void thread_stack__br_sample_late(struct thread *thread, int cpu, |
679 | struct branch_stack *dst, unsigned int sz, |
680 | u64 ip, u64 kernel_start) |
681 | { |
682 | struct thread_stack *ts = thread__stack(thread, cpu); |
683 | struct branch_entry *d, *s, *spos, *ssz; |
684 | struct branch_stack *src; |
685 | unsigned int nr = 0; |
686 | bool start = false; |
687 | |
688 | dst->nr = 0; |
689 | |
690 | if (!ts) |
691 | return; |
692 | |
693 | src = ts->br_stack_rb; |
694 | if (!src->nr) |
695 | return; |
696 | |
697 | spos = &src->entries[ts->br_stack_pos]; |
698 | ssz = &src->entries[ts->br_stack_sz]; |
699 | |
700 | d = &dst->entries[0]; |
701 | s = spos; |
702 | |
703 | if (ip < kernel_start) { |
704 | /* |
705 | * User space sample: start copying branch entries when the |
706 | * branch is in user space. |
707 | */ |
708 | for (s = spos; s < ssz && nr < sz; s++) { |
709 | if (us_start(be: s, kernel_start, start: &start)) { |
710 | *d++ = *s; |
711 | nr += 1; |
712 | } |
713 | } |
714 | |
715 | if (src->nr >= ts->br_stack_sz) { |
716 | for (s = &src->entries[0]; s < spos && nr < sz; s++) { |
717 | if (us_start(be: s, kernel_start, start: &start)) { |
718 | *d++ = *s; |
719 | nr += 1; |
720 | } |
721 | } |
722 | } |
723 | } else { |
724 | struct branch_entry *nb = NULL; |
725 | |
726 | /* |
727 | * Kernel space sample: start copying branch entries when the ip |
728 | * falls in between 2 branches (or the branch is in user space |
729 | * because then the start must have been missed). |
730 | */ |
731 | for (s = spos; s < ssz && nr < sz; s++) { |
732 | if (ks_start(be: s, sample_ip: ip, kernel_start, start: &start, nb)) { |
733 | *d++ = *s; |
734 | nr += 1; |
735 | } |
736 | nb = s; |
737 | } |
738 | |
739 | if (src->nr >= ts->br_stack_sz) { |
740 | for (s = &src->entries[0]; s < spos && nr < sz; s++) { |
741 | if (ks_start(be: s, sample_ip: ip, kernel_start, start: &start, nb)) { |
742 | *d++ = *s; |
743 | nr += 1; |
744 | } |
745 | nb = s; |
746 | } |
747 | } |
748 | } |
749 | |
750 | dst->nr = nr; |
751 | } |
752 | |
753 | struct call_return_processor * |
754 | call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data), |
755 | void *data) |
756 | { |
757 | struct call_return_processor *crp; |
758 | |
759 | crp = zalloc(sizeof(struct call_return_processor)); |
760 | if (!crp) |
761 | return NULL; |
762 | crp->cpr = call_path_root__new(); |
763 | if (!crp->cpr) |
764 | goto out_free; |
765 | crp->process = process; |
766 | crp->data = data; |
767 | return crp; |
768 | |
769 | out_free: |
770 | free(crp); |
771 | return NULL; |
772 | } |
773 | |
774 | void call_return_processor__free(struct call_return_processor *crp) |
775 | { |
776 | if (crp) { |
777 | call_path_root__free(cpr: crp->cpr); |
778 | free(crp); |
779 | } |
780 | } |
781 | |
782 | static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, |
783 | u64 timestamp, u64 ref, struct call_path *cp, |
784 | bool no_call, bool trace_end) |
785 | { |
786 | struct thread_stack_entry *tse; |
787 | int err; |
788 | |
789 | if (!cp) |
790 | return -ENOMEM; |
791 | |
792 | if (ts->cnt == ts->sz) { |
793 | err = thread_stack__grow(ts); |
794 | if (err) |
795 | return err; |
796 | } |
797 | |
798 | tse = &ts->stack[ts->cnt++]; |
799 | tse->ret_addr = ret_addr; |
800 | tse->timestamp = timestamp; |
801 | tse->ref = ref; |
802 | tse->branch_count = ts->branch_count; |
803 | tse->insn_count = ts->insn_count; |
804 | tse->cyc_count = ts->cyc_count; |
805 | tse->cp = cp; |
806 | tse->no_call = no_call; |
807 | tse->trace_end = trace_end; |
808 | tse->non_call = false; |
809 | tse->db_id = 0; |
810 | |
811 | return 0; |
812 | } |
813 | |
814 | static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts, |
815 | u64 ret_addr, u64 timestamp, u64 ref, |
816 | struct symbol *sym) |
817 | { |
818 | int err; |
819 | |
820 | if (!ts->cnt) |
821 | return 1; |
822 | |
823 | if (ts->cnt == 1) { |
824 | struct thread_stack_entry *tse = &ts->stack[0]; |
825 | |
826 | if (tse->cp->sym == sym) |
827 | return thread_stack__call_return(thread, ts, idx: --ts->cnt, |
828 | timestamp, ref, no_return: false); |
829 | } |
830 | |
831 | if (ts->stack[ts->cnt - 1].ret_addr == ret_addr && |
832 | !ts->stack[ts->cnt - 1].non_call) { |
833 | return thread_stack__call_return(thread, ts, idx: --ts->cnt, |
834 | timestamp, ref, no_return: false); |
835 | } else { |
836 | size_t i = ts->cnt - 1; |
837 | |
838 | while (i--) { |
839 | if (ts->stack[i].ret_addr != ret_addr || |
840 | ts->stack[i].non_call) |
841 | continue; |
842 | i += 1; |
843 | while (ts->cnt > i) { |
844 | err = thread_stack__call_return(thread, ts, |
845 | idx: --ts->cnt, |
846 | timestamp, ref, |
847 | no_return: true); |
848 | if (err) |
849 | return err; |
850 | } |
851 | return thread_stack__call_return(thread, ts, idx: --ts->cnt, |
852 | timestamp, ref, no_return: false); |
853 | } |
854 | } |
855 | |
856 | return 1; |
857 | } |
858 | |
859 | static int thread_stack__bottom(struct thread_stack *ts, |
860 | struct perf_sample *sample, |
861 | struct addr_location *from_al, |
862 | struct addr_location *to_al, u64 ref) |
863 | { |
864 | struct call_path_root *cpr = ts->crp->cpr; |
865 | struct call_path *cp; |
866 | struct symbol *sym; |
867 | u64 ip; |
868 | |
869 | if (sample->ip) { |
870 | ip = sample->ip; |
871 | sym = from_al->sym; |
872 | } else if (sample->addr) { |
873 | ip = sample->addr; |
874 | sym = to_al->sym; |
875 | } else { |
876 | return 0; |
877 | } |
878 | |
879 | cp = call_path__findnew(cpr, parent: &cpr->call_path, sym, ip, |
880 | ks: ts->kernel_start); |
881 | |
882 | return thread_stack__push_cp(ts, ret_addr: ip, timestamp: sample->time, ref, cp, |
883 | no_call: true, trace_end: false); |
884 | } |
885 | |
886 | static int thread_stack__pop_ks(struct thread *thread, struct thread_stack *ts, |
887 | struct perf_sample *sample, u64 ref) |
888 | { |
889 | u64 tm = sample->time; |
890 | int err; |
891 | |
892 | /* Return to userspace, so pop all kernel addresses */ |
893 | while (thread_stack__in_kernel(ts)) { |
894 | err = thread_stack__call_return(thread, ts, idx: --ts->cnt, |
895 | timestamp: tm, ref, no_return: true); |
896 | if (err) |
897 | return err; |
898 | } |
899 | |
900 | return 0; |
901 | } |
902 | |
903 | static int thread_stack__no_call_return(struct thread *thread, |
904 | struct thread_stack *ts, |
905 | struct perf_sample *sample, |
906 | struct addr_location *from_al, |
907 | struct addr_location *to_al, u64 ref) |
908 | { |
909 | struct call_path_root *cpr = ts->crp->cpr; |
910 | struct call_path *root = &cpr->call_path; |
911 | struct symbol *fsym = from_al->sym; |
912 | struct symbol *tsym = to_al->sym; |
913 | struct call_path *cp, *parent; |
914 | u64 ks = ts->kernel_start; |
915 | u64 addr = sample->addr; |
916 | u64 tm = sample->time; |
917 | u64 ip = sample->ip; |
918 | int err; |
919 | |
920 | if (ip >= ks && addr < ks) { |
921 | /* Return to userspace, so pop all kernel addresses */ |
922 | err = thread_stack__pop_ks(thread, ts, sample, ref); |
923 | if (err) |
924 | return err; |
925 | |
926 | /* If the stack is empty, push the userspace address */ |
927 | if (!ts->cnt) { |
928 | cp = call_path__findnew(cpr, parent: root, sym: tsym, ip: addr, ks); |
929 | return thread_stack__push_cp(ts, ret_addr: 0, timestamp: tm, ref, cp, no_call: true, |
930 | trace_end: false); |
931 | } |
932 | } else if (thread_stack__in_kernel(ts) && ip < ks) { |
933 | /* Return to userspace, so pop all kernel addresses */ |
934 | err = thread_stack__pop_ks(thread, ts, sample, ref); |
935 | if (err) |
936 | return err; |
937 | } |
938 | |
939 | if (ts->cnt) |
940 | parent = ts->stack[ts->cnt - 1].cp; |
941 | else |
942 | parent = root; |
943 | |
944 | if (parent->sym == from_al->sym) { |
945 | /* |
946 | * At the bottom of the stack, assume the missing 'call' was |
947 | * before the trace started. So, pop the current symbol and push |
948 | * the 'to' symbol. |
949 | */ |
950 | if (ts->cnt == 1) { |
951 | err = thread_stack__call_return(thread, ts, idx: --ts->cnt, |
952 | timestamp: tm, ref, no_return: false); |
953 | if (err) |
954 | return err; |
955 | } |
956 | |
957 | if (!ts->cnt) { |
958 | cp = call_path__findnew(cpr, parent: root, sym: tsym, ip: addr, ks); |
959 | |
960 | return thread_stack__push_cp(ts, ret_addr: addr, timestamp: tm, ref, cp, |
961 | no_call: true, trace_end: false); |
962 | } |
963 | |
964 | /* |
965 | * Otherwise assume the 'return' is being used as a jump (e.g. |
966 | * retpoline) and just push the 'to' symbol. |
967 | */ |
968 | cp = call_path__findnew(cpr, parent, sym: tsym, ip: addr, ks); |
969 | |
970 | err = thread_stack__push_cp(ts, ret_addr: 0, timestamp: tm, ref, cp, no_call: true, trace_end: false); |
971 | if (!err) |
972 | ts->stack[ts->cnt - 1].non_call = true; |
973 | |
974 | return err; |
975 | } |
976 | |
977 | /* |
978 | * Assume 'parent' has not yet returned, so push 'to', and then push and |
979 | * pop 'from'. |
980 | */ |
981 | |
982 | cp = call_path__findnew(cpr, parent, sym: tsym, ip: addr, ks); |
983 | |
984 | err = thread_stack__push_cp(ts, ret_addr: addr, timestamp: tm, ref, cp, no_call: true, trace_end: false); |
985 | if (err) |
986 | return err; |
987 | |
988 | cp = call_path__findnew(cpr, parent: cp, sym: fsym, ip, ks); |
989 | |
990 | err = thread_stack__push_cp(ts, ret_addr: ip, timestamp: tm, ref, cp, no_call: true, trace_end: false); |
991 | if (err) |
992 | return err; |
993 | |
994 | return thread_stack__call_return(thread, ts, idx: --ts->cnt, timestamp: tm, ref, no_return: false); |
995 | } |
996 | |
997 | static int thread_stack__trace_begin(struct thread *thread, |
998 | struct thread_stack *ts, u64 timestamp, |
999 | u64 ref) |
1000 | { |
1001 | struct thread_stack_entry *tse; |
1002 | int err; |
1003 | |
1004 | if (!ts->cnt) |
1005 | return 0; |
1006 | |
1007 | /* Pop trace end */ |
1008 | tse = &ts->stack[ts->cnt - 1]; |
1009 | if (tse->trace_end) { |
1010 | err = thread_stack__call_return(thread, ts, idx: --ts->cnt, |
1011 | timestamp, ref, no_return: false); |
1012 | if (err) |
1013 | return err; |
1014 | } |
1015 | |
1016 | return 0; |
1017 | } |
1018 | |
1019 | static int thread_stack__trace_end(struct thread_stack *ts, |
1020 | struct perf_sample *sample, u64 ref) |
1021 | { |
1022 | struct call_path_root *cpr = ts->crp->cpr; |
1023 | struct call_path *cp; |
1024 | u64 ret_addr; |
1025 | |
1026 | /* No point having 'trace end' on the bottom of the stack */ |
1027 | if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref)) |
1028 | return 0; |
1029 | |
1030 | cp = call_path__findnew(cpr, parent: ts->stack[ts->cnt - 1].cp, NULL, ip: 0, |
1031 | ks: ts->kernel_start); |
1032 | |
1033 | ret_addr = sample->ip + sample->insn_len; |
1034 | |
1035 | return thread_stack__push_cp(ts, ret_addr, timestamp: sample->time, ref, cp, |
1036 | no_call: false, trace_end: true); |
1037 | } |
1038 | |
1039 | static bool is_x86_retpoline(const char *name) |
1040 | { |
1041 | return strstr(name, "__x86_indirect_thunk_" ) == name; |
1042 | } |
1043 | |
1044 | /* |
1045 | * x86 retpoline functions pollute the call graph. This function removes them. |
1046 | * This does not handle function return thunks, nor is there any improvement |
1047 | * for the handling of inline thunks or extern thunks. |
1048 | */ |
1049 | static int thread_stack__x86_retpoline(struct thread_stack *ts, |
1050 | struct perf_sample *sample, |
1051 | struct addr_location *to_al) |
1052 | { |
1053 | struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1]; |
1054 | struct call_path_root *cpr = ts->crp->cpr; |
1055 | struct symbol *sym = tse->cp->sym; |
1056 | struct symbol *tsym = to_al->sym; |
1057 | struct call_path *cp; |
1058 | |
1059 | if (sym && is_x86_retpoline(name: sym->name)) { |
1060 | /* |
1061 | * This is a x86 retpoline fn. It pollutes the call graph by |
1062 | * showing up everywhere there is an indirect branch, but does |
1063 | * not itself mean anything. Here the top-of-stack is removed, |
1064 | * by decrementing the stack count, and then further down, the |
1065 | * resulting top-of-stack is replaced with the actual target. |
1066 | * The result is that the retpoline functions will no longer |
1067 | * appear in the call graph. Note this only affects the call |
1068 | * graph, since all the original branches are left unchanged. |
1069 | */ |
1070 | ts->cnt -= 1; |
1071 | sym = ts->stack[ts->cnt - 2].cp->sym; |
1072 | if (sym && sym == tsym && to_al->addr != tsym->start) { |
1073 | /* |
1074 | * Target is back to the middle of the symbol we came |
1075 | * from so assume it is an indirect jmp and forget it |
1076 | * altogether. |
1077 | */ |
1078 | ts->cnt -= 1; |
1079 | return 0; |
1080 | } |
1081 | } else if (sym && sym == tsym) { |
1082 | /* |
1083 | * Target is back to the symbol we came from so assume it is an |
1084 | * indirect jmp and forget it altogether. |
1085 | */ |
1086 | ts->cnt -= 1; |
1087 | return 0; |
1088 | } |
1089 | |
1090 | cp = call_path__findnew(cpr, parent: ts->stack[ts->cnt - 2].cp, sym: tsym, |
1091 | ip: sample->addr, ks: ts->kernel_start); |
1092 | if (!cp) |
1093 | return -ENOMEM; |
1094 | |
1095 | /* Replace the top-of-stack with the actual target */ |
1096 | ts->stack[ts->cnt - 1].cp = cp; |
1097 | |
1098 | return 0; |
1099 | } |
1100 | |
1101 | int thread_stack__process(struct thread *thread, struct comm *comm, |
1102 | struct perf_sample *sample, |
1103 | struct addr_location *from_al, |
1104 | struct addr_location *to_al, u64 ref, |
1105 | struct call_return_processor *crp) |
1106 | { |
1107 | struct thread_stack *ts = thread__stack(thread, cpu: sample->cpu); |
1108 | enum retpoline_state_t rstate; |
1109 | int err = 0; |
1110 | |
1111 | if (ts && !ts->crp) { |
1112 | /* Supersede thread_stack__event() */ |
1113 | thread_stack__reset(thread, ts); |
1114 | ts = NULL; |
1115 | } |
1116 | |
1117 | if (!ts) { |
1118 | ts = thread_stack__new(thread, cpu: sample->cpu, crp, callstack: true, br_stack_sz: 0); |
1119 | if (!ts) |
1120 | return -ENOMEM; |
1121 | ts->comm = comm; |
1122 | } |
1123 | |
1124 | rstate = ts->rstate; |
1125 | if (rstate == X86_RETPOLINE_DETECTED) |
1126 | ts->rstate = X86_RETPOLINE_POSSIBLE; |
1127 | |
1128 | /* Flush stack on exec */ |
1129 | if (ts->comm != comm && thread__pid(thread) == thread__tid(thread)) { |
1130 | err = __thread_stack__flush(thread, ts); |
1131 | if (err) |
1132 | return err; |
1133 | ts->comm = comm; |
1134 | } |
1135 | |
1136 | /* If the stack is empty, put the current symbol on the stack */ |
1137 | if (!ts->cnt) { |
1138 | err = thread_stack__bottom(ts, sample, from_al, to_al, ref); |
1139 | if (err) |
1140 | return err; |
1141 | } |
1142 | |
1143 | ts->branch_count += 1; |
1144 | ts->insn_count += sample->insn_cnt; |
1145 | ts->cyc_count += sample->cyc_cnt; |
1146 | ts->last_time = sample->time; |
1147 | |
1148 | if (sample->flags & PERF_IP_FLAG_CALL) { |
1149 | bool trace_end = sample->flags & PERF_IP_FLAG_TRACE_END; |
1150 | struct call_path_root *cpr = ts->crp->cpr; |
1151 | struct call_path *cp; |
1152 | u64 ret_addr; |
1153 | |
1154 | if (!sample->ip || !sample->addr) |
1155 | return 0; |
1156 | |
1157 | ret_addr = sample->ip + sample->insn_len; |
1158 | if (ret_addr == sample->addr) |
1159 | return 0; /* Zero-length calls are excluded */ |
1160 | |
1161 | cp = call_path__findnew(cpr, parent: ts->stack[ts->cnt - 1].cp, |
1162 | sym: to_al->sym, ip: sample->addr, |
1163 | ks: ts->kernel_start); |
1164 | err = thread_stack__push_cp(ts, ret_addr, timestamp: sample->time, ref, |
1165 | cp, no_call: false, trace_end); |
1166 | |
1167 | /* |
1168 | * A call to the same symbol but not the start of the symbol, |
1169 | * may be the start of a x86 retpoline. |
1170 | */ |
1171 | if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym && |
1172 | from_al->sym == to_al->sym && |
1173 | to_al->addr != to_al->sym->start) |
1174 | ts->rstate = X86_RETPOLINE_DETECTED; |
1175 | |
1176 | } else if (sample->flags & PERF_IP_FLAG_RETURN) { |
1177 | if (!sample->addr) { |
1178 | u32 return_from_kernel = PERF_IP_FLAG_SYSCALLRET | |
1179 | PERF_IP_FLAG_INTERRUPT; |
1180 | |
1181 | if (!(sample->flags & return_from_kernel)) |
1182 | return 0; |
1183 | |
1184 | /* Pop kernel stack */ |
1185 | return thread_stack__pop_ks(thread, ts, sample, ref); |
1186 | } |
1187 | |
1188 | if (!sample->ip) |
1189 | return 0; |
1190 | |
1191 | /* x86 retpoline 'return' doesn't match the stack */ |
1192 | if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 && |
1193 | ts->stack[ts->cnt - 1].ret_addr != sample->addr) |
1194 | return thread_stack__x86_retpoline(ts, sample, to_al); |
1195 | |
1196 | err = thread_stack__pop_cp(thread, ts, ret_addr: sample->addr, |
1197 | timestamp: sample->time, ref, sym: from_al->sym); |
1198 | if (err) { |
1199 | if (err < 0) |
1200 | return err; |
1201 | err = thread_stack__no_call_return(thread, ts, sample, |
1202 | from_al, to_al, ref); |
1203 | } |
1204 | } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) { |
1205 | err = thread_stack__trace_begin(thread, ts, timestamp: sample->time, ref); |
1206 | } else if (sample->flags & PERF_IP_FLAG_TRACE_END) { |
1207 | err = thread_stack__trace_end(ts, sample, ref); |
1208 | } else if (sample->flags & PERF_IP_FLAG_BRANCH && |
1209 | from_al->sym != to_al->sym && to_al->sym && |
1210 | to_al->addr == to_al->sym->start) { |
1211 | struct call_path_root *cpr = ts->crp->cpr; |
1212 | struct call_path *cp; |
1213 | |
1214 | /* |
1215 | * The compiler might optimize a call/ret combination by making |
1216 | * it a jmp. Make that visible by recording on the stack a |
1217 | * branch to the start of a different symbol. Note, that means |
1218 | * when a ret pops the stack, all jmps must be popped off first. |
1219 | */ |
1220 | cp = call_path__findnew(cpr, parent: ts->stack[ts->cnt - 1].cp, |
1221 | sym: to_al->sym, ip: sample->addr, |
1222 | ks: ts->kernel_start); |
1223 | err = thread_stack__push_cp(ts, ret_addr: 0, timestamp: sample->time, ref, cp, no_call: false, |
1224 | trace_end: false); |
1225 | if (!err) |
1226 | ts->stack[ts->cnt - 1].non_call = true; |
1227 | } |
1228 | |
1229 | return err; |
1230 | } |
1231 | |
1232 | size_t thread_stack__depth(struct thread *thread, int cpu) |
1233 | { |
1234 | struct thread_stack *ts = thread__stack(thread, cpu); |
1235 | |
1236 | if (!ts) |
1237 | return 0; |
1238 | return ts->cnt; |
1239 | } |
1240 | |