1 | /* |
2 | * builtin-trace.c |
3 | * |
4 | * Builtin 'trace' command: |
5 | * |
6 | * Display a continuously updated trace of any workload, CPU, specific PID, |
7 | * system wide, etc. Default format is loosely strace like, but any other |
8 | * event may be specified using --event. |
9 | * |
10 | * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> |
11 | * |
12 | * Initially based on the 'trace' prototype by Thomas Gleixner: |
13 | * |
14 | * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'") |
15 | */ |
16 | |
17 | #include "util/record.h" |
18 | #include <api/fs/tracing_path.h> |
19 | #ifdef HAVE_LIBBPF_SUPPORT |
20 | #include <bpf/bpf.h> |
21 | #include <bpf/libbpf.h> |
22 | #ifdef HAVE_BPF_SKEL |
23 | #include "bpf_skel/augmented_raw_syscalls.skel.h" |
24 | #endif |
25 | #endif |
26 | #include "util/bpf_map.h" |
27 | #include "util/rlimit.h" |
28 | #include "builtin.h" |
29 | #include "util/cgroup.h" |
30 | #include "util/color.h" |
31 | #include "util/config.h" |
32 | #include "util/debug.h" |
33 | #include "util/dso.h" |
34 | #include "util/env.h" |
35 | #include "util/event.h" |
36 | #include "util/evsel.h" |
37 | #include "util/evsel_fprintf.h" |
38 | #include "util/synthetic-events.h" |
39 | #include "util/evlist.h" |
40 | #include "util/evswitch.h" |
41 | #include "util/mmap.h" |
42 | #include <subcmd/pager.h> |
43 | #include <subcmd/exec-cmd.h> |
44 | #include "util/machine.h" |
45 | #include "util/map.h" |
46 | #include "util/symbol.h" |
47 | #include "util/path.h" |
48 | #include "util/session.h" |
49 | #include "util/thread.h" |
50 | #include <subcmd/parse-options.h> |
51 | #include "util/strlist.h" |
52 | #include "util/intlist.h" |
53 | #include "util/thread_map.h" |
54 | #include "util/stat.h" |
55 | #include "util/tool.h" |
56 | #include "util/util.h" |
57 | #include "trace/beauty/beauty.h" |
58 | #include "trace-event.h" |
59 | #include "util/parse-events.h" |
60 | #include "util/tracepoint.h" |
61 | #include "callchain.h" |
62 | #include "print_binary.h" |
63 | #include "string2.h" |
64 | #include "syscalltbl.h" |
65 | #include "rb_resort.h" |
66 | #include "../perf.h" |
67 | |
68 | #include <errno.h> |
69 | #include <inttypes.h> |
70 | #include <poll.h> |
71 | #include <signal.h> |
72 | #include <stdlib.h> |
73 | #include <string.h> |
74 | #include <linux/err.h> |
75 | #include <linux/filter.h> |
76 | #include <linux/kernel.h> |
77 | #include <linux/random.h> |
78 | #include <linux/stringify.h> |
79 | #include <linux/time64.h> |
80 | #include <linux/zalloc.h> |
81 | #include <fcntl.h> |
82 | #include <sys/sysmacros.h> |
83 | |
84 | #include <linux/ctype.h> |
85 | #include <perf/mmap.h> |
86 | |
87 | #ifdef HAVE_LIBTRACEEVENT |
88 | #include <traceevent/event-parse.h> |
89 | #endif |
90 | |
91 | #ifndef O_CLOEXEC |
92 | # define O_CLOEXEC 02000000 |
93 | #endif |
94 | |
95 | #ifndef F_LINUX_SPECIFIC_BASE |
96 | # define F_LINUX_SPECIFIC_BASE 1024 |
97 | #endif |
98 | |
99 | #define RAW_SYSCALL_ARGS_NUM 6 |
100 | |
101 | /* |
102 | * strtoul: Go from a string to a value, i.e. for msr: MSR_FS_BASE to 0xc0000100 |
103 | */ |
104 | struct syscall_arg_fmt { |
105 | size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg); |
106 | bool (*strtoul)(char *bf, size_t size, struct syscall_arg *arg, u64 *val); |
107 | unsigned long (*mask_val)(struct syscall_arg *arg, unsigned long val); |
108 | void *parm; |
109 | const char *name; |
110 | u16 nr_entries; // for arrays |
111 | bool show_zero; |
112 | }; |
113 | |
114 | struct syscall_fmt { |
115 | const char *name; |
116 | const char *alias; |
117 | struct { |
118 | const char *sys_enter, |
119 | *sys_exit; |
120 | } bpf_prog_name; |
121 | struct syscall_arg_fmt arg[RAW_SYSCALL_ARGS_NUM]; |
122 | u8 nr_args; |
123 | bool errpid; |
124 | bool timeout; |
125 | bool hexret; |
126 | }; |
127 | |
128 | struct trace { |
129 | struct perf_tool tool; |
130 | struct syscalltbl *sctbl; |
131 | struct { |
132 | struct syscall *table; |
133 | struct { |
134 | struct evsel *sys_enter, |
135 | *sys_exit, |
136 | *bpf_output; |
137 | } events; |
138 | } syscalls; |
139 | #ifdef HAVE_BPF_SKEL |
140 | struct augmented_raw_syscalls_bpf *skel; |
141 | #endif |
142 | struct record_opts opts; |
143 | struct evlist *evlist; |
144 | struct machine *host; |
145 | struct thread *current; |
146 | struct cgroup *cgroup; |
147 | u64 base_time; |
148 | FILE *output; |
149 | unsigned long nr_events; |
150 | unsigned long nr_events_printed; |
151 | unsigned long max_events; |
152 | struct evswitch evswitch; |
153 | struct strlist *ev_qualifier; |
154 | struct { |
155 | size_t nr; |
156 | int *entries; |
157 | } ev_qualifier_ids; |
158 | struct { |
159 | size_t nr; |
160 | pid_t *entries; |
161 | struct bpf_map *map; |
162 | } filter_pids; |
163 | double duration_filter; |
164 | double runtime_ms; |
165 | struct { |
166 | u64 vfs_getname, |
167 | proc_getname; |
168 | } stats; |
169 | unsigned int max_stack; |
170 | unsigned int min_stack; |
171 | int raw_augmented_syscalls_args_size; |
172 | bool raw_augmented_syscalls; |
173 | bool fd_path_disabled; |
174 | bool sort_events; |
175 | bool not_ev_qualifier; |
176 | bool live; |
177 | bool full_time; |
178 | bool sched; |
179 | bool multiple_threads; |
180 | bool summary; |
181 | bool summary_only; |
182 | bool errno_summary; |
183 | bool failure_only; |
184 | bool show_comm; |
185 | bool print_sample; |
186 | bool show_tool_stats; |
187 | bool trace_syscalls; |
188 | bool libtraceevent_print; |
189 | bool kernel_syscallchains; |
190 | s16 args_alignment; |
191 | bool show_tstamp; |
192 | bool show_duration; |
193 | bool show_zeros; |
194 | bool show_arg_names; |
195 | bool show_string_prefix; |
196 | bool force; |
197 | bool vfs_getname; |
198 | int trace_pgfaults; |
199 | char *perfconfig_events; |
200 | struct { |
201 | struct ordered_events data; |
202 | u64 last; |
203 | } oe; |
204 | }; |
205 | |
206 | struct tp_field { |
207 | int offset; |
208 | union { |
209 | u64 (*integer)(struct tp_field *field, struct perf_sample *sample); |
210 | void *(*pointer)(struct tp_field *field, struct perf_sample *sample); |
211 | }; |
212 | }; |
213 | |
214 | #define TP_UINT_FIELD(bits) \ |
215 | static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \ |
216 | { \ |
217 | u##bits value; \ |
218 | memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ |
219 | return value; \ |
220 | } |
221 | |
222 | TP_UINT_FIELD(8); |
223 | TP_UINT_FIELD(16); |
224 | TP_UINT_FIELD(32); |
225 | TP_UINT_FIELD(64); |
226 | |
227 | #define TP_UINT_FIELD__SWAPPED(bits) \ |
228 | static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \ |
229 | { \ |
230 | u##bits value; \ |
231 | memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ |
232 | return bswap_##bits(value);\ |
233 | } |
234 | |
235 | TP_UINT_FIELD__SWAPPED(16); |
236 | TP_UINT_FIELD__SWAPPED(32); |
237 | TP_UINT_FIELD__SWAPPED(64); |
238 | |
239 | static int __tp_field__init_uint(struct tp_field *field, int size, int offset, bool needs_swap) |
240 | { |
241 | field->offset = offset; |
242 | |
243 | switch (size) { |
244 | case 1: |
245 | field->integer = tp_field__u8; |
246 | break; |
247 | case 2: |
248 | field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16; |
249 | break; |
250 | case 4: |
251 | field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32; |
252 | break; |
253 | case 8: |
254 | field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64; |
255 | break; |
256 | default: |
257 | return -1; |
258 | } |
259 | |
260 | return 0; |
261 | } |
262 | |
263 | static int tp_field__init_uint(struct tp_field *field, struct tep_format_field *format_field, bool needs_swap) |
264 | { |
265 | return __tp_field__init_uint(field, size: format_field->size, offset: format_field->offset, needs_swap); |
266 | } |
267 | |
268 | static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample) |
269 | { |
270 | return sample->raw_data + field->offset; |
271 | } |
272 | |
273 | static int __tp_field__init_ptr(struct tp_field *field, int offset) |
274 | { |
275 | field->offset = offset; |
276 | field->pointer = tp_field__ptr; |
277 | return 0; |
278 | } |
279 | |
280 | static int tp_field__init_ptr(struct tp_field *field, struct tep_format_field *format_field) |
281 | { |
282 | return __tp_field__init_ptr(field, offset: format_field->offset); |
283 | } |
284 | |
285 | struct syscall_tp { |
286 | struct tp_field id; |
287 | union { |
288 | struct tp_field args, ret; |
289 | }; |
290 | }; |
291 | |
292 | /* |
293 | * The evsel->priv as used by 'perf trace' |
294 | * sc: for raw_syscalls:sys_{enter,exit} and syscalls:sys_{enter,exit}_SYSCALLNAME |
295 | * fmt: for all the other tracepoints |
296 | */ |
297 | struct evsel_trace { |
298 | struct syscall_tp sc; |
299 | struct syscall_arg_fmt *fmt; |
300 | }; |
301 | |
302 | static struct evsel_trace *evsel_trace__new(void) |
303 | { |
304 | return zalloc(sizeof(struct evsel_trace)); |
305 | } |
306 | |
307 | static void evsel_trace__delete(struct evsel_trace *et) |
308 | { |
309 | if (et == NULL) |
310 | return; |
311 | |
312 | zfree(&et->fmt); |
313 | free(et); |
314 | } |
315 | |
316 | /* |
317 | * Used with raw_syscalls:sys_{enter,exit} and with the |
318 | * syscalls:sys_{enter,exit}_SYSCALL tracepoints |
319 | */ |
320 | static inline struct syscall_tp *__evsel__syscall_tp(struct evsel *evsel) |
321 | { |
322 | struct evsel_trace *et = evsel->priv; |
323 | |
324 | return &et->sc; |
325 | } |
326 | |
327 | static struct syscall_tp *evsel__syscall_tp(struct evsel *evsel) |
328 | { |
329 | if (evsel->priv == NULL) { |
330 | evsel->priv = evsel_trace__new(); |
331 | if (evsel->priv == NULL) |
332 | return NULL; |
333 | } |
334 | |
335 | return __evsel__syscall_tp(evsel); |
336 | } |
337 | |
338 | /* |
339 | * Used with all the other tracepoints. |
340 | */ |
341 | static inline struct syscall_arg_fmt *__evsel__syscall_arg_fmt(struct evsel *evsel) |
342 | { |
343 | struct evsel_trace *et = evsel->priv; |
344 | |
345 | return et->fmt; |
346 | } |
347 | |
348 | static struct syscall_arg_fmt *evsel__syscall_arg_fmt(struct evsel *evsel) |
349 | { |
350 | struct evsel_trace *et = evsel->priv; |
351 | |
352 | if (evsel->priv == NULL) { |
353 | et = evsel->priv = evsel_trace__new(); |
354 | |
355 | if (et == NULL) |
356 | return NULL; |
357 | } |
358 | |
359 | if (et->fmt == NULL) { |
360 | et->fmt = calloc(evsel->tp_format->format.nr_fields, sizeof(struct syscall_arg_fmt)); |
361 | if (et->fmt == NULL) |
362 | goto out_delete; |
363 | } |
364 | |
365 | return __evsel__syscall_arg_fmt(evsel); |
366 | |
367 | out_delete: |
368 | evsel_trace__delete(et: evsel->priv); |
369 | evsel->priv = NULL; |
370 | return NULL; |
371 | } |
372 | |
373 | static int evsel__init_tp_uint_field(struct evsel *evsel, struct tp_field *field, const char *name) |
374 | { |
375 | struct tep_format_field *format_field = evsel__field(evsel, name); |
376 | |
377 | if (format_field == NULL) |
378 | return -1; |
379 | |
380 | return tp_field__init_uint(field, format_field, needs_swap: evsel->needs_swap); |
381 | } |
382 | |
383 | #define perf_evsel__init_sc_tp_uint_field(evsel, name) \ |
384 | ({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\ |
385 | evsel__init_tp_uint_field(evsel, &sc->name, #name); }) |
386 | |
387 | static int evsel__init_tp_ptr_field(struct evsel *evsel, struct tp_field *field, const char *name) |
388 | { |
389 | struct tep_format_field *format_field = evsel__field(evsel, name); |
390 | |
391 | if (format_field == NULL) |
392 | return -1; |
393 | |
394 | return tp_field__init_ptr(field, format_field); |
395 | } |
396 | |
397 | #define perf_evsel__init_sc_tp_ptr_field(evsel, name) \ |
398 | ({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\ |
399 | evsel__init_tp_ptr_field(evsel, &sc->name, #name); }) |
400 | |
401 | static void evsel__delete_priv(struct evsel *evsel) |
402 | { |
403 | zfree(&evsel->priv); |
404 | evsel__delete(evsel); |
405 | } |
406 | |
407 | static int evsel__init_syscall_tp(struct evsel *evsel) |
408 | { |
409 | struct syscall_tp *sc = evsel__syscall_tp(evsel); |
410 | |
411 | if (sc != NULL) { |
412 | if (evsel__init_tp_uint_field(evsel, field: &sc->id, name: "__syscall_nr" ) && |
413 | evsel__init_tp_uint_field(evsel, field: &sc->id, name: "nr" )) |
414 | return -ENOENT; |
415 | |
416 | return 0; |
417 | } |
418 | |
419 | return -ENOMEM; |
420 | } |
421 | |
422 | static int evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp) |
423 | { |
424 | struct syscall_tp *sc = evsel__syscall_tp(evsel); |
425 | |
426 | if (sc != NULL) { |
427 | struct tep_format_field *syscall_id = evsel__field(evsel: tp, name: "id" ); |
428 | if (syscall_id == NULL) |
429 | syscall_id = evsel__field(evsel: tp, name: "__syscall_nr" ); |
430 | if (syscall_id == NULL || |
431 | __tp_field__init_uint(field: &sc->id, size: syscall_id->size, offset: syscall_id->offset, needs_swap: evsel->needs_swap)) |
432 | return -EINVAL; |
433 | |
434 | return 0; |
435 | } |
436 | |
437 | return -ENOMEM; |
438 | } |
439 | |
440 | static int evsel__init_augmented_syscall_tp_args(struct evsel *evsel) |
441 | { |
442 | struct syscall_tp *sc = __evsel__syscall_tp(evsel); |
443 | |
444 | return __tp_field__init_ptr(field: &sc->args, offset: sc->id.offset + sizeof(u64)); |
445 | } |
446 | |
447 | static int evsel__init_augmented_syscall_tp_ret(struct evsel *evsel) |
448 | { |
449 | struct syscall_tp *sc = __evsel__syscall_tp(evsel); |
450 | |
451 | return __tp_field__init_uint(field: &sc->ret, size: sizeof(u64), offset: sc->id.offset + sizeof(u64), needs_swap: evsel->needs_swap); |
452 | } |
453 | |
454 | static int evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler) |
455 | { |
456 | if (evsel__syscall_tp(evsel) != NULL) { |
457 | if (perf_evsel__init_sc_tp_uint_field(evsel, id)) |
458 | return -ENOENT; |
459 | |
460 | evsel->handler = handler; |
461 | return 0; |
462 | } |
463 | |
464 | return -ENOMEM; |
465 | } |
466 | |
467 | static struct evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler) |
468 | { |
469 | struct evsel *evsel = evsel__newtp("raw_syscalls" , direction); |
470 | |
471 | /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */ |
472 | if (IS_ERR(ptr: evsel)) |
473 | evsel = evsel__newtp("syscalls" , direction); |
474 | |
475 | if (IS_ERR(ptr: evsel)) |
476 | return NULL; |
477 | |
478 | if (evsel__init_raw_syscall_tp(evsel, handler)) |
479 | goto out_delete; |
480 | |
481 | return evsel; |
482 | |
483 | out_delete: |
484 | evsel__delete_priv(evsel); |
485 | return NULL; |
486 | } |
487 | |
488 | #define perf_evsel__sc_tp_uint(evsel, name, sample) \ |
489 | ({ struct syscall_tp *fields = __evsel__syscall_tp(evsel); \ |
490 | fields->name.integer(&fields->name, sample); }) |
491 | |
492 | #define perf_evsel__sc_tp_ptr(evsel, name, sample) \ |
493 | ({ struct syscall_tp *fields = __evsel__syscall_tp(evsel); \ |
494 | fields->name.pointer(&fields->name, sample); }) |
495 | |
496 | size_t strarray__scnprintf_suffix(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_suffix, int val) |
497 | { |
498 | int idx = val - sa->offset; |
499 | |
500 | if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) { |
501 | size_t printed = scnprintf(buf: bf, size, fmt: intfmt, val); |
502 | if (show_suffix) |
503 | printed += scnprintf(buf: bf + printed, size: size - printed, fmt: " /* %s??? */" , sa->prefix); |
504 | return printed; |
505 | } |
506 | |
507 | return scnprintf(buf: bf, size, fmt: "%s%s" , sa->entries[idx], show_suffix ? sa->prefix : "" ); |
508 | } |
509 | |
510 | size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, bool show_prefix, int val) |
511 | { |
512 | int idx = val - sa->offset; |
513 | |
514 | if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL) { |
515 | size_t printed = scnprintf(buf: bf, size, fmt: intfmt, val); |
516 | if (show_prefix) |
517 | printed += scnprintf(buf: bf + printed, size: size - printed, fmt: " /* %s??? */" , sa->prefix); |
518 | return printed; |
519 | } |
520 | |
521 | return scnprintf(buf: bf, size, fmt: "%s%s" , show_prefix ? sa->prefix : "" , sa->entries[idx]); |
522 | } |
523 | |
524 | static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size, |
525 | const char *intfmt, |
526 | struct syscall_arg *arg) |
527 | { |
528 | return strarray__scnprintf(sa: arg->parm, bf, size, intfmt, show_prefix: arg->show_string_prefix, val: arg->val); |
529 | } |
530 | |
531 | static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size, |
532 | struct syscall_arg *arg) |
533 | { |
534 | return __syscall_arg__scnprintf_strarray(bf, size, intfmt: "%d" , arg); |
535 | } |
536 | |
537 | #define SCA_STRARRAY syscall_arg__scnprintf_strarray |
538 | |
539 | bool syscall_arg__strtoul_strarray(char *bf, size_t size, struct syscall_arg *arg, u64 *ret) |
540 | { |
541 | return strarray__strtoul(sa: arg->parm, bf, size, ret); |
542 | } |
543 | |
544 | bool syscall_arg__strtoul_strarray_flags(char *bf, size_t size, struct syscall_arg *arg, u64 *ret) |
545 | { |
546 | return strarray__strtoul_flags(sa: arg->parm, bf, size, ret); |
547 | } |
548 | |
549 | bool syscall_arg__strtoul_strarrays(char *bf, size_t size, struct syscall_arg *arg, u64 *ret) |
550 | { |
551 | return strarrays__strtoul(sas: arg->parm, bf, size, ret); |
552 | } |
553 | |
554 | size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg) |
555 | { |
556 | return strarray__scnprintf_flags(sa: arg->parm, bf, size, show_prefix: arg->show_string_prefix, flags: arg->val); |
557 | } |
558 | |
559 | size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const char *intfmt, bool show_prefix, int val) |
560 | { |
561 | size_t printed; |
562 | int i; |
563 | |
564 | for (i = 0; i < sas->nr_entries; ++i) { |
565 | struct strarray *sa = sas->entries[i]; |
566 | int idx = val - sa->offset; |
567 | |
568 | if (idx >= 0 && idx < sa->nr_entries) { |
569 | if (sa->entries[idx] == NULL) |
570 | break; |
571 | return scnprintf(buf: bf, size, fmt: "%s%s" , show_prefix ? sa->prefix : "" , sa->entries[idx]); |
572 | } |
573 | } |
574 | |
575 | printed = scnprintf(buf: bf, size, fmt: intfmt, val); |
576 | if (show_prefix) |
577 | printed += scnprintf(buf: bf + printed, size: size - printed, fmt: " /* %s??? */" , sas->entries[0]->prefix); |
578 | return printed; |
579 | } |
580 | |
581 | bool strarray__strtoul(struct strarray *sa, char *bf, size_t size, u64 *ret) |
582 | { |
583 | int i; |
584 | |
585 | for (i = 0; i < sa->nr_entries; ++i) { |
586 | if (sa->entries[i] && strncmp(sa->entries[i], bf, size) == 0 && sa->entries[i][size] == '\0') { |
587 | *ret = sa->offset + i; |
588 | return true; |
589 | } |
590 | } |
591 | |
592 | return false; |
593 | } |
594 | |
595 | bool strarray__strtoul_flags(struct strarray *sa, char *bf, size_t size, u64 *ret) |
596 | { |
597 | u64 val = 0; |
598 | char *tok = bf, *sep, *end; |
599 | |
600 | *ret = 0; |
601 | |
602 | while (size != 0) { |
603 | int toklen = size; |
604 | |
605 | sep = memchr(p: tok, c: '|', size); |
606 | if (sep != NULL) { |
607 | size -= sep - tok + 1; |
608 | |
609 | end = sep - 1; |
610 | while (end > tok && isspace(*end)) |
611 | --end; |
612 | |
613 | toklen = end - tok + 1; |
614 | } |
615 | |
616 | while (isspace(*tok)) |
617 | ++tok; |
618 | |
619 | if (isalpha(*tok) || *tok == '_') { |
620 | if (!strarray__strtoul(sa, bf: tok, size: toklen, ret: &val)) |
621 | return false; |
622 | } else |
623 | val = strtoul(tok, NULL, 0); |
624 | |
625 | *ret |= (1 << (val - 1)); |
626 | |
627 | if (sep == NULL) |
628 | break; |
629 | tok = sep + 1; |
630 | } |
631 | |
632 | return true; |
633 | } |
634 | |
635 | bool strarrays__strtoul(struct strarrays *sas, char *bf, size_t size, u64 *ret) |
636 | { |
637 | int i; |
638 | |
639 | for (i = 0; i < sas->nr_entries; ++i) { |
640 | struct strarray *sa = sas->entries[i]; |
641 | |
642 | if (strarray__strtoul(sa, bf, size, ret)) |
643 | return true; |
644 | } |
645 | |
646 | return false; |
647 | } |
648 | |
649 | size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, |
650 | struct syscall_arg *arg) |
651 | { |
652 | return strarrays__scnprintf(sas: arg->parm, bf, size, intfmt: "%d" , show_prefix: arg->show_string_prefix, val: arg->val); |
653 | } |
654 | |
655 | #ifndef AT_FDCWD |
656 | #define AT_FDCWD -100 |
657 | #endif |
658 | |
659 | static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size, |
660 | struct syscall_arg *arg) |
661 | { |
662 | int fd = arg->val; |
663 | const char *prefix = "AT_FD" ; |
664 | |
665 | if (fd == AT_FDCWD) |
666 | return scnprintf(buf: bf, size, fmt: "%s%s" , arg->show_string_prefix ? prefix : "" , "CWD" ); |
667 | |
668 | return syscall_arg__scnprintf_fd(bf, size, arg); |
669 | } |
670 | |
671 | #define SCA_FDAT syscall_arg__scnprintf_fd_at |
672 | |
673 | static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, |
674 | struct syscall_arg *arg); |
675 | |
676 | #define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd |
677 | |
678 | size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg) |
679 | { |
680 | return scnprintf(buf: bf, size, fmt: "%#lx" , arg->val); |
681 | } |
682 | |
683 | size_t syscall_arg__scnprintf_ptr(char *bf, size_t size, struct syscall_arg *arg) |
684 | { |
685 | if (arg->val == 0) |
686 | return scnprintf(buf: bf, size, fmt: "NULL" ); |
687 | return syscall_arg__scnprintf_hex(bf, size, arg); |
688 | } |
689 | |
690 | size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg) |
691 | { |
692 | return scnprintf(buf: bf, size, fmt: "%d" , arg->val); |
693 | } |
694 | |
695 | size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg) |
696 | { |
697 | return scnprintf(buf: bf, size, fmt: "%ld" , arg->val); |
698 | } |
699 | |
700 | static size_t syscall_arg__scnprintf_char_array(char *bf, size_t size, struct syscall_arg *arg) |
701 | { |
702 | // XXX Hey, maybe for sched:sched_switch prev/next comm fields we can |
703 | // fill missing comms using thread__set_comm()... |
704 | // here or in a special syscall_arg__scnprintf_pid_sched_tp... |
705 | return scnprintf(buf: bf, size, fmt: "\"%-.*s\"" , arg->fmt->nr_entries ?: arg->len, arg->val); |
706 | } |
707 | |
708 | #define SCA_CHAR_ARRAY syscall_arg__scnprintf_char_array |
709 | |
710 | static const char *bpf_cmd[] = { |
711 | "MAP_CREATE" , "MAP_LOOKUP_ELEM" , "MAP_UPDATE_ELEM" , "MAP_DELETE_ELEM" , |
712 | "MAP_GET_NEXT_KEY" , "PROG_LOAD" , "OBJ_PIN" , "OBJ_GET" , "PROG_ATTACH" , |
713 | "PROG_DETACH" , "PROG_TEST_RUN" , "PROG_GET_NEXT_ID" , "MAP_GET_NEXT_ID" , |
714 | "PROG_GET_FD_BY_ID" , "MAP_GET_FD_BY_ID" , "OBJ_GET_INFO_BY_FD" , |
715 | "PROG_QUERY" , "RAW_TRACEPOINT_OPEN" , "BTF_LOAD" , "BTF_GET_FD_BY_ID" , |
716 | "TASK_FD_QUERY" , "MAP_LOOKUP_AND_DELETE_ELEM" , "MAP_FREEZE" , |
717 | "BTF_GET_NEXT_ID" , "MAP_LOOKUP_BATCH" , "MAP_LOOKUP_AND_DELETE_BATCH" , |
718 | "MAP_UPDATE_BATCH" , "MAP_DELETE_BATCH" , "LINK_CREATE" , "LINK_UPDATE" , |
719 | "LINK_GET_FD_BY_ID" , "LINK_GET_NEXT_ID" , "ENABLE_STATS" , "ITER_CREATE" , |
720 | "LINK_DETACH" , "PROG_BIND_MAP" , |
721 | }; |
722 | static DEFINE_STRARRAY(bpf_cmd, "BPF_" ); |
723 | |
724 | static const char *fsmount_flags[] = { |
725 | [1] = "CLOEXEC" , |
726 | }; |
727 | static DEFINE_STRARRAY(fsmount_flags, "FSMOUNT_" ); |
728 | |
729 | #include "trace/beauty/generated/fsconfig_arrays.c" |
730 | |
731 | static DEFINE_STRARRAY(fsconfig_cmds, "FSCONFIG_" ); |
732 | |
733 | static const char *epoll_ctl_ops[] = { "ADD" , "DEL" , "MOD" , }; |
734 | static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, "EPOLL_CTL_" , 1); |
735 | |
736 | static const char *itimers[] = { "REAL" , "VIRTUAL" , "PROF" , }; |
737 | static DEFINE_STRARRAY(itimers, "ITIMER_" ); |
738 | |
739 | static const char *keyctl_options[] = { |
740 | "GET_KEYRING_ID" , "JOIN_SESSION_KEYRING" , "UPDATE" , "REVOKE" , "CHOWN" , |
741 | "SETPERM" , "DESCRIBE" , "CLEAR" , "LINK" , "UNLINK" , "SEARCH" , "READ" , |
742 | "INSTANTIATE" , "NEGATE" , "SET_REQKEY_KEYRING" , "SET_TIMEOUT" , |
743 | "ASSUME_AUTHORITY" , "GET_SECURITY" , "SESSION_TO_PARENT" , "REJECT" , |
744 | "INSTANTIATE_IOV" , "INVALIDATE" , "GET_PERSISTENT" , |
745 | }; |
746 | static DEFINE_STRARRAY(keyctl_options, "KEYCTL_" ); |
747 | |
748 | static const char *whences[] = { "SET" , "CUR" , "END" , |
749 | #ifdef SEEK_DATA |
750 | "DATA" , |
751 | #endif |
752 | #ifdef SEEK_HOLE |
753 | "HOLE" , |
754 | #endif |
755 | }; |
756 | static DEFINE_STRARRAY(whences, "SEEK_" ); |
757 | |
758 | static const char *fcntl_cmds[] = { |
759 | "DUPFD" , "GETFD" , "SETFD" , "GETFL" , "SETFL" , "GETLK" , "SETLK" , |
760 | "SETLKW" , "SETOWN" , "GETOWN" , "SETSIG" , "GETSIG" , "GETLK64" , |
761 | "SETLK64" , "SETLKW64" , "SETOWN_EX" , "GETOWN_EX" , |
762 | "GETOWNER_UIDS" , |
763 | }; |
764 | static DEFINE_STRARRAY(fcntl_cmds, "F_" ); |
765 | |
766 | static const char *fcntl_linux_specific_cmds[] = { |
767 | "SETLEASE" , "GETLEASE" , "NOTIFY" , [5] = "CANCELLK" , "DUPFD_CLOEXEC" , |
768 | "SETPIPE_SZ" , "GETPIPE_SZ" , "ADD_SEALS" , "GET_SEALS" , |
769 | "GET_RW_HINT" , "SET_RW_HINT" , "GET_FILE_RW_HINT" , "SET_FILE_RW_HINT" , |
770 | }; |
771 | |
772 | static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, "F_" , F_LINUX_SPECIFIC_BASE); |
773 | |
774 | static struct strarray *fcntl_cmds_arrays[] = { |
775 | &strarray__fcntl_cmds, |
776 | &strarray__fcntl_linux_specific_cmds, |
777 | }; |
778 | |
779 | static DEFINE_STRARRAYS(fcntl_cmds_arrays); |
780 | |
781 | static const char *rlimit_resources[] = { |
782 | "CPU" , "FSIZE" , "DATA" , "STACK" , "CORE" , "RSS" , "NPROC" , "NOFILE" , |
783 | "MEMLOCK" , "AS" , "LOCKS" , "SIGPENDING" , "MSGQUEUE" , "NICE" , "RTPRIO" , |
784 | "RTTIME" , |
785 | }; |
786 | static DEFINE_STRARRAY(rlimit_resources, "RLIMIT_" ); |
787 | |
788 | static const char *sighow[] = { "BLOCK" , "UNBLOCK" , "SETMASK" , }; |
789 | static DEFINE_STRARRAY(sighow, "SIG_" ); |
790 | |
791 | static const char *clockid[] = { |
792 | "REALTIME" , "MONOTONIC" , "PROCESS_CPUTIME_ID" , "THREAD_CPUTIME_ID" , |
793 | "MONOTONIC_RAW" , "REALTIME_COARSE" , "MONOTONIC_COARSE" , "BOOTTIME" , |
794 | "REALTIME_ALARM" , "BOOTTIME_ALARM" , "SGI_CYCLE" , "TAI" |
795 | }; |
796 | static DEFINE_STRARRAY(clockid, "CLOCK_" ); |
797 | |
798 | static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, |
799 | struct syscall_arg *arg) |
800 | { |
801 | bool show_prefix = arg->show_string_prefix; |
802 | const char *suffix = "_OK" ; |
803 | size_t printed = 0; |
804 | int mode = arg->val; |
805 | |
806 | if (mode == F_OK) /* 0 */ |
807 | return scnprintf(buf: bf, size, fmt: "F%s" , show_prefix ? suffix : "" ); |
808 | #define P_MODE(n) \ |
809 | if (mode & n##_OK) { \ |
810 | printed += scnprintf(bf + printed, size - printed, "%s%s", #n, show_prefix ? suffix : ""); \ |
811 | mode &= ~n##_OK; \ |
812 | } |
813 | |
814 | P_MODE(R); |
815 | P_MODE(W); |
816 | P_MODE(X); |
817 | #undef P_MODE |
818 | |
819 | if (mode) |
820 | printed += scnprintf(buf: bf + printed, size: size - printed, fmt: "|%#x" , mode); |
821 | |
822 | return printed; |
823 | } |
824 | |
825 | #define SCA_ACCMODE syscall_arg__scnprintf_access_mode |
826 | |
827 | static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, |
828 | struct syscall_arg *arg); |
829 | |
830 | #define SCA_FILENAME syscall_arg__scnprintf_filename |
831 | |
832 | static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size, |
833 | struct syscall_arg *arg) |
834 | { |
835 | bool show_prefix = arg->show_string_prefix; |
836 | const char *prefix = "O_" ; |
837 | int printed = 0, flags = arg->val; |
838 | |
839 | #define P_FLAG(n) \ |
840 | if (flags & O_##n) { \ |
841 | printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \ |
842 | flags &= ~O_##n; \ |
843 | } |
844 | |
845 | P_FLAG(CLOEXEC); |
846 | P_FLAG(NONBLOCK); |
847 | #undef P_FLAG |
848 | |
849 | if (flags) |
850 | printed += scnprintf(buf: bf + printed, size: size - printed, fmt: "%s%#x" , printed ? "|" : "" , flags); |
851 | |
852 | return printed; |
853 | } |
854 | |
855 | #define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags |
856 | |
857 | #ifndef GRND_NONBLOCK |
858 | #define GRND_NONBLOCK 0x0001 |
859 | #endif |
860 | #ifndef GRND_RANDOM |
861 | #define GRND_RANDOM 0x0002 |
862 | #endif |
863 | |
864 | static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, |
865 | struct syscall_arg *arg) |
866 | { |
867 | bool show_prefix = arg->show_string_prefix; |
868 | const char *prefix = "GRND_" ; |
869 | int printed = 0, flags = arg->val; |
870 | |
871 | #define P_FLAG(n) \ |
872 | if (flags & GRND_##n) { \ |
873 | printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \ |
874 | flags &= ~GRND_##n; \ |
875 | } |
876 | |
877 | P_FLAG(RANDOM); |
878 | P_FLAG(NONBLOCK); |
879 | #undef P_FLAG |
880 | |
881 | if (flags) |
882 | printed += scnprintf(buf: bf + printed, size: size - printed, fmt: "%s%#x" , printed ? "|" : "" , flags); |
883 | |
884 | return printed; |
885 | } |
886 | |
887 | #define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags |
888 | |
889 | #define STRARRAY(name, array) \ |
890 | { .scnprintf = SCA_STRARRAY, \ |
891 | .strtoul = STUL_STRARRAY, \ |
892 | .parm = &strarray__##array, } |
893 | |
894 | #define STRARRAY_FLAGS(name, array) \ |
895 | { .scnprintf = SCA_STRARRAY_FLAGS, \ |
896 | .strtoul = STUL_STRARRAY_FLAGS, \ |
897 | .parm = &strarray__##array, } |
898 | |
899 | #include "trace/beauty/arch_errno_names.c" |
900 | #include "trace/beauty/eventfd.c" |
901 | #include "trace/beauty/futex_op.c" |
902 | #include "trace/beauty/futex_val3.c" |
903 | #include "trace/beauty/mmap.c" |
904 | #include "trace/beauty/mode_t.c" |
905 | #include "trace/beauty/msg_flags.c" |
906 | #include "trace/beauty/open_flags.c" |
907 | #include "trace/beauty/perf_event_open.c" |
908 | #include "trace/beauty/pid.c" |
909 | #include "trace/beauty/sched_policy.c" |
910 | #include "trace/beauty/seccomp.c" |
911 | #include "trace/beauty/signum.c" |
912 | #include "trace/beauty/socket_type.c" |
913 | #include "trace/beauty/waitid_options.c" |
914 | |
915 | static const struct syscall_fmt syscall_fmts[] = { |
916 | { .name = "access" , |
917 | .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, }, |
918 | { .name = "arch_prctl" , |
919 | .arg = { [0] = { .scnprintf = SCA_X86_ARCH_PRCTL_CODE, /* code */ }, |
920 | [1] = { .scnprintf = SCA_PTR, /* arg2 */ }, }, }, |
921 | { .name = "bind" , |
922 | .arg = { [0] = { .scnprintf = SCA_INT, /* fd */ }, |
923 | [1] = { .scnprintf = SCA_SOCKADDR, /* umyaddr */ }, |
924 | [2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, }, |
925 | { .name = "bpf" , |
926 | .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, }, |
927 | { .name = "brk" , .hexret = true, |
928 | .arg = { [0] = { .scnprintf = SCA_PTR, /* brk */ }, }, }, |
929 | { .name = "clock_gettime" , |
930 | .arg = { [0] = STRARRAY(clk_id, clockid), }, }, |
931 | { .name = "clock_nanosleep" , |
932 | .arg = { [2] = { .scnprintf = SCA_TIMESPEC, /* rqtp */ }, }, }, |
933 | { .name = "clone" , .errpid = true, .nr_args = 5, |
934 | .arg = { [0] = { .name = "flags" , .scnprintf = SCA_CLONE_FLAGS, }, |
935 | [1] = { .name = "child_stack" , .scnprintf = SCA_HEX, }, |
936 | [2] = { .name = "parent_tidptr" , .scnprintf = SCA_HEX, }, |
937 | [3] = { .name = "child_tidptr" , .scnprintf = SCA_HEX, }, |
938 | [4] = { .name = "tls" , .scnprintf = SCA_HEX, }, }, }, |
939 | { .name = "close" , |
940 | .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, }, |
941 | { .name = "connect" , |
942 | .arg = { [0] = { .scnprintf = SCA_INT, /* fd */ }, |
943 | [1] = { .scnprintf = SCA_SOCKADDR, /* servaddr */ }, |
944 | [2] = { .scnprintf = SCA_INT, /* addrlen */ }, }, }, |
945 | { .name = "epoll_ctl" , |
946 | .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, }, |
947 | { .name = "eventfd2" , |
948 | .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, }, |
949 | { .name = "fchmodat" , |
950 | .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, |
951 | { .name = "fchownat" , |
952 | .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, |
953 | { .name = "fcntl" , |
954 | .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */ |
955 | .strtoul = STUL_STRARRAYS, |
956 | .parm = &strarrays__fcntl_cmds_arrays, |
957 | .show_zero = true, }, |
958 | [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, }, |
959 | { .name = "flock" , |
960 | .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, }, |
961 | { .name = "fsconfig" , |
962 | .arg = { [1] = STRARRAY(cmd, fsconfig_cmds), }, }, |
963 | { .name = "fsmount" , |
964 | .arg = { [1] = STRARRAY_FLAGS(flags, fsmount_flags), |
965 | [2] = { .scnprintf = SCA_FSMOUNT_ATTR_FLAGS, /* attr_flags */ }, }, }, |
966 | { .name = "fspick" , |
967 | .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, |
968 | [1] = { .scnprintf = SCA_FILENAME, /* path */ }, |
969 | [2] = { .scnprintf = SCA_FSPICK_FLAGS, /* flags */ }, }, }, |
970 | { .name = "fstat" , .alias = "newfstat" , }, |
971 | { .name = "fstatat" , .alias = "newfstatat" , }, |
972 | { .name = "futex" , |
973 | .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ }, |
974 | [5] = { .scnprintf = SCA_FUTEX_VAL3, /* val3 */ }, }, }, |
975 | { .name = "futimesat" , |
976 | .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, |
977 | { .name = "getitimer" , |
978 | .arg = { [0] = STRARRAY(which, itimers), }, }, |
979 | { .name = "getpid" , .errpid = true, }, |
980 | { .name = "getpgid" , .errpid = true, }, |
981 | { .name = "getppid" , .errpid = true, }, |
982 | { .name = "getrandom" , |
983 | .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, }, |
984 | { .name = "getrlimit" , |
985 | .arg = { [0] = STRARRAY(resource, rlimit_resources), }, }, |
986 | { .name = "getsockopt" , |
987 | .arg = { [1] = STRARRAY(level, socket_level), }, }, |
988 | { .name = "gettid" , .errpid = true, }, |
989 | { .name = "ioctl" , |
990 | .arg = { |
991 | #if defined(__i386__) || defined(__x86_64__) |
992 | /* |
993 | * FIXME: Make this available to all arches. |
994 | */ |
995 | [1] = { .scnprintf = SCA_IOCTL_CMD, /* cmd */ }, |
996 | [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, }, |
997 | #else |
998 | [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, }, |
999 | #endif |
1000 | { .name = "kcmp" , .nr_args = 5, |
1001 | .arg = { [0] = { .name = "pid1" , .scnprintf = SCA_PID, }, |
1002 | [1] = { .name = "pid2" , .scnprintf = SCA_PID, }, |
1003 | [2] = { .name = "type" , .scnprintf = SCA_KCMP_TYPE, }, |
1004 | [3] = { .name = "idx1" , .scnprintf = SCA_KCMP_IDX, }, |
1005 | [4] = { .name = "idx2" , .scnprintf = SCA_KCMP_IDX, }, }, }, |
1006 | { .name = "keyctl" , |
1007 | .arg = { [0] = STRARRAY(option, keyctl_options), }, }, |
1008 | { .name = "kill" , |
1009 | .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, |
1010 | { .name = "linkat" , |
1011 | .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, |
1012 | { .name = "lseek" , |
1013 | .arg = { [2] = STRARRAY(whence, whences), }, }, |
1014 | { .name = "lstat" , .alias = "newlstat" , }, |
1015 | { .name = "madvise" , |
1016 | .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, |
1017 | [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, }, |
1018 | { .name = "mkdirat" , |
1019 | .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, |
1020 | { .name = "mknodat" , |
1021 | .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, |
1022 | { .name = "mmap" , .hexret = true, |
1023 | /* The standard mmap maps to old_mmap on s390x */ |
1024 | #if defined(__s390x__) |
1025 | .alias = "old_mmap" , |
1026 | #endif |
1027 | .arg = { [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, |
1028 | [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ |
1029 | .strtoul = STUL_STRARRAY_FLAGS, |
1030 | .parm = &strarray__mmap_flags, }, |
1031 | [5] = { .scnprintf = SCA_HEX, /* offset */ }, }, }, |
1032 | { .name = "mount" , |
1033 | .arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ }, |
1034 | [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */ |
1035 | .mask_val = SCAMV_MOUNT_FLAGS, /* flags */ }, }, }, |
1036 | { .name = "move_mount" , |
1037 | .arg = { [0] = { .scnprintf = SCA_FDAT, /* from_dfd */ }, |
1038 | [1] = { .scnprintf = SCA_FILENAME, /* from_pathname */ }, |
1039 | [2] = { .scnprintf = SCA_FDAT, /* to_dfd */ }, |
1040 | [3] = { .scnprintf = SCA_FILENAME, /* to_pathname */ }, |
1041 | [4] = { .scnprintf = SCA_MOVE_MOUNT_FLAGS, /* flags */ }, }, }, |
1042 | { .name = "mprotect" , |
1043 | .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, |
1044 | [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, }, |
1045 | { .name = "mq_unlink" , |
1046 | .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, }, |
1047 | { .name = "mremap" , .hexret = true, |
1048 | .arg = { [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ }, }, }, |
1049 | { .name = "name_to_handle_at" , |
1050 | .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, |
1051 | { .name = "newfstatat" , |
1052 | .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, |
1053 | { .name = "open" , |
1054 | .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, |
1055 | { .name = "open_by_handle_at" , |
1056 | .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, |
1057 | [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, |
1058 | { .name = "openat" , |
1059 | .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, |
1060 | [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, |
1061 | { .name = "perf_event_open" , |
1062 | .arg = { [0] = { .scnprintf = SCA_PERF_ATTR, /* attr */ }, |
1063 | [2] = { .scnprintf = SCA_INT, /* cpu */ }, |
1064 | [3] = { .scnprintf = SCA_FD, /* group_fd */ }, |
1065 | [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, }, |
1066 | { .name = "pipe2" , |
1067 | .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, }, |
1068 | { .name = "pkey_alloc" , |
1069 | .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS, /* access_rights */ }, }, }, |
1070 | { .name = "pkey_free" , |
1071 | .arg = { [0] = { .scnprintf = SCA_INT, /* key */ }, }, }, |
1072 | { .name = "pkey_mprotect" , |
1073 | .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, |
1074 | [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, |
1075 | [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, }, |
1076 | { .name = "poll" , .timeout = true, }, |
1077 | { .name = "ppoll" , .timeout = true, }, |
1078 | { .name = "prctl" , |
1079 | .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ |
1080 | .strtoul = STUL_STRARRAY, |
1081 | .parm = &strarray__prctl_options, }, |
1082 | [1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ }, |
1083 | [2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, }, |
1084 | { .name = "pread" , .alias = "pread64" , }, |
1085 | { .name = "preadv" , .alias = "pread" , }, |
1086 | { .name = "prlimit64" , |
1087 | .arg = { [1] = STRARRAY(resource, rlimit_resources), }, }, |
1088 | { .name = "pwrite" , .alias = "pwrite64" , }, |
1089 | { .name = "readlinkat" , |
1090 | .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, |
1091 | { .name = "recvfrom" , |
1092 | .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, |
1093 | { .name = "recvmmsg" , |
1094 | .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, |
1095 | { .name = "recvmsg" , |
1096 | .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, |
1097 | { .name = "renameat" , |
1098 | .arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ }, |
1099 | [2] = { .scnprintf = SCA_FDAT, /* newdirfd */ }, }, }, |
1100 | { .name = "renameat2" , |
1101 | .arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ }, |
1102 | [2] = { .scnprintf = SCA_FDAT, /* newdirfd */ }, |
1103 | [4] = { .scnprintf = SCA_RENAMEAT2_FLAGS, /* flags */ }, }, }, |
1104 | { .name = "rt_sigaction" , |
1105 | .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, |
1106 | { .name = "rt_sigprocmask" , |
1107 | .arg = { [0] = STRARRAY(how, sighow), }, }, |
1108 | { .name = "rt_sigqueueinfo" , |
1109 | .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, |
1110 | { .name = "rt_tgsigqueueinfo" , |
1111 | .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, |
1112 | { .name = "sched_setscheduler" , |
1113 | .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, }, |
1114 | { .name = "seccomp" , |
1115 | .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ }, |
1116 | [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, }, |
1117 | { .name = "select" , .timeout = true, }, |
1118 | { .name = "sendfile" , .alias = "sendfile64" , }, |
1119 | { .name = "sendmmsg" , |
1120 | .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, |
1121 | { .name = "sendmsg" , |
1122 | .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, }, |
1123 | { .name = "sendto" , |
1124 | .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, |
1125 | [4] = { .scnprintf = SCA_SOCKADDR, /* addr */ }, }, }, |
1126 | { .name = "set_tid_address" , .errpid = true, }, |
1127 | { .name = "setitimer" , |
1128 | .arg = { [0] = STRARRAY(which, itimers), }, }, |
1129 | { .name = "setrlimit" , |
1130 | .arg = { [0] = STRARRAY(resource, rlimit_resources), }, }, |
1131 | { .name = "setsockopt" , |
1132 | .arg = { [1] = STRARRAY(level, socket_level), }, }, |
1133 | { .name = "socket" , |
1134 | .arg = { [0] = STRARRAY(family, socket_families), |
1135 | [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, |
1136 | [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, }, |
1137 | { .name = "socketpair" , |
1138 | .arg = { [0] = STRARRAY(family, socket_families), |
1139 | [1] = { .scnprintf = SCA_SK_TYPE, /* type */ }, |
1140 | [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, }, |
1141 | { .name = "stat" , .alias = "newstat" , }, |
1142 | { .name = "statx" , |
1143 | .arg = { [0] = { .scnprintf = SCA_FDAT, /* fdat */ }, |
1144 | [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } , |
1145 | [3] = { .scnprintf = SCA_STATX_MASK, /* mask */ }, }, }, |
1146 | { .name = "swapoff" , |
1147 | .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, }, |
1148 | { .name = "swapon" , |
1149 | .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, }, |
1150 | { .name = "symlinkat" , |
1151 | .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, |
1152 | { .name = "sync_file_range" , |
1153 | .arg = { [3] = { .scnprintf = SCA_SYNC_FILE_RANGE_FLAGS, /* flags */ }, }, }, |
1154 | { .name = "tgkill" , |
1155 | .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, |
1156 | { .name = "tkill" , |
1157 | .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, |
1158 | { .name = "umount2" , .alias = "umount" , |
1159 | .arg = { [0] = { .scnprintf = SCA_FILENAME, /* name */ }, }, }, |
1160 | { .name = "uname" , .alias = "newuname" , }, |
1161 | { .name = "unlinkat" , |
1162 | .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, |
1163 | { .name = "utimensat" , |
1164 | .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, }, |
1165 | { .name = "wait4" , .errpid = true, |
1166 | .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, }, |
1167 | { .name = "waitid" , .errpid = true, |
1168 | .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, }, |
1169 | }; |
1170 | |
1171 | static int syscall_fmt__cmp(const void *name, const void *fmtp) |
1172 | { |
1173 | const struct syscall_fmt *fmt = fmtp; |
1174 | return strcmp(name, fmt->name); |
1175 | } |
1176 | |
1177 | static const struct syscall_fmt *__syscall_fmt__find(const struct syscall_fmt *fmts, |
1178 | const int nmemb, |
1179 | const char *name) |
1180 | { |
1181 | return bsearch(key: name, base: fmts, num: nmemb, size: sizeof(struct syscall_fmt), cmp: syscall_fmt__cmp); |
1182 | } |
1183 | |
1184 | static const struct syscall_fmt *syscall_fmt__find(const char *name) |
1185 | { |
1186 | const int nmemb = ARRAY_SIZE(syscall_fmts); |
1187 | return __syscall_fmt__find(fmts: syscall_fmts, nmemb, name); |
1188 | } |
1189 | |
1190 | static const struct syscall_fmt *__syscall_fmt__find_by_alias(const struct syscall_fmt *fmts, |
1191 | const int nmemb, const char *alias) |
1192 | { |
1193 | int i; |
1194 | |
1195 | for (i = 0; i < nmemb; ++i) { |
1196 | if (fmts[i].alias && strcmp(fmts[i].alias, alias) == 0) |
1197 | return &fmts[i]; |
1198 | } |
1199 | |
1200 | return NULL; |
1201 | } |
1202 | |
1203 | static const struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias) |
1204 | { |
1205 | const int nmemb = ARRAY_SIZE(syscall_fmts); |
1206 | return __syscall_fmt__find_by_alias(fmts: syscall_fmts, nmemb, alias); |
1207 | } |
1208 | |
1209 | /* |
1210 | * is_exit: is this "exit" or "exit_group"? |
1211 | * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter. |
1212 | * args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc. |
1213 | * nonexistent: Just a hole in the syscall table, syscall id not allocated |
1214 | */ |
1215 | struct syscall { |
1216 | struct tep_event *tp_format; |
1217 | int nr_args; |
1218 | int args_size; |
1219 | struct { |
1220 | struct bpf_program *sys_enter, |
1221 | *sys_exit; |
1222 | } bpf_prog; |
1223 | bool is_exit; |
1224 | bool is_open; |
1225 | bool nonexistent; |
1226 | struct tep_format_field *args; |
1227 | const char *name; |
1228 | const struct syscall_fmt *fmt; |
1229 | struct syscall_arg_fmt *arg_fmt; |
1230 | }; |
1231 | |
1232 | /* |
1233 | * We need to have this 'calculated' boolean because in some cases we really |
1234 | * don't know what is the duration of a syscall, for instance, when we start |
1235 | * a session and some threads are waiting for a syscall to finish, say 'poll', |
1236 | * in which case all we can do is to print "( ? ) for duration and for the |
1237 | * start timestamp. |
1238 | */ |
1239 | static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp) |
1240 | { |
1241 | double duration = (double)t / NSEC_PER_MSEC; |
1242 | size_t printed = fprintf(fp, "(" ); |
1243 | |
1244 | if (!calculated) |
1245 | printed += fprintf(fp, " " ); |
1246 | else if (duration >= 1.0) |
1247 | printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms" , duration); |
1248 | else if (duration >= 0.01) |
1249 | printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms" , duration); |
1250 | else |
1251 | printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms" , duration); |
1252 | return printed + fprintf(fp, "): " ); |
1253 | } |
1254 | |
1255 | /** |
1256 | * filename.ptr: The filename char pointer that will be vfs_getname'd |
1257 | * filename.entry_str_pos: Where to insert the string translated from |
1258 | * filename.ptr by the vfs_getname tracepoint/kprobe. |
1259 | * ret_scnprintf: syscall args may set this to a different syscall return |
1260 | * formatter, for instance, fcntl may return fds, file flags, etc. |
1261 | */ |
1262 | struct thread_trace { |
1263 | u64 entry_time; |
1264 | bool entry_pending; |
1265 | unsigned long nr_events; |
1266 | unsigned long pfmaj, pfmin; |
1267 | char *entry_str; |
1268 | double runtime_ms; |
1269 | size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg); |
1270 | struct { |
1271 | unsigned long ptr; |
1272 | short int entry_str_pos; |
1273 | bool pending_open; |
1274 | unsigned int namelen; |
1275 | char *name; |
1276 | } filename; |
1277 | struct { |
1278 | int max; |
1279 | struct file *table; |
1280 | } files; |
1281 | |
1282 | struct intlist *syscall_stats; |
1283 | }; |
1284 | |
1285 | static struct thread_trace *thread_trace__new(void) |
1286 | { |
1287 | struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace)); |
1288 | |
1289 | if (ttrace) { |
1290 | ttrace->files.max = -1; |
1291 | ttrace->syscall_stats = intlist__new(NULL); |
1292 | } |
1293 | |
1294 | return ttrace; |
1295 | } |
1296 | |
1297 | static void thread_trace__free_files(struct thread_trace *ttrace); |
1298 | |
1299 | static void thread_trace__delete(void *pttrace) |
1300 | { |
1301 | struct thread_trace *ttrace = pttrace; |
1302 | |
1303 | if (!ttrace) |
1304 | return; |
1305 | |
1306 | intlist__delete(ilist: ttrace->syscall_stats); |
1307 | ttrace->syscall_stats = NULL; |
1308 | thread_trace__free_files(ttrace); |
1309 | zfree(&ttrace->entry_str); |
1310 | free(ttrace); |
1311 | } |
1312 | |
1313 | static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) |
1314 | { |
1315 | struct thread_trace *ttrace; |
1316 | |
1317 | if (thread == NULL) |
1318 | goto fail; |
1319 | |
1320 | if (thread__priv(thread) == NULL) |
1321 | thread__set_priv(thread, p: thread_trace__new()); |
1322 | |
1323 | if (thread__priv(thread) == NULL) |
1324 | goto fail; |
1325 | |
1326 | ttrace = thread__priv(thread); |
1327 | ++ttrace->nr_events; |
1328 | |
1329 | return ttrace; |
1330 | fail: |
1331 | color_fprintf(fp, PERF_COLOR_RED, |
1332 | "WARNING: not enough memory, dropping samples!\n" ); |
1333 | return NULL; |
1334 | } |
1335 | |
1336 | |
1337 | void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg, |
1338 | size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg)) |
1339 | { |
1340 | struct thread_trace *ttrace = thread__priv(thread: arg->thread); |
1341 | |
1342 | ttrace->ret_scnprintf = ret_scnprintf; |
1343 | } |
1344 | |
1345 | #define TRACE_PFMAJ (1 << 0) |
1346 | #define TRACE_PFMIN (1 << 1) |
1347 | |
1348 | static const size_t trace__entry_str_size = 2048; |
1349 | |
1350 | static void thread_trace__free_files(struct thread_trace *ttrace) |
1351 | { |
1352 | for (int i = 0; i < ttrace->files.max; ++i) { |
1353 | struct file *file = ttrace->files.table + i; |
1354 | zfree(&file->pathname); |
1355 | } |
1356 | |
1357 | zfree(&ttrace->files.table); |
1358 | ttrace->files.max = -1; |
1359 | } |
1360 | |
1361 | static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd) |
1362 | { |
1363 | if (fd < 0) |
1364 | return NULL; |
1365 | |
1366 | if (fd > ttrace->files.max) { |
1367 | struct file *nfiles = realloc(ttrace->files.table, (fd + 1) * sizeof(struct file)); |
1368 | |
1369 | if (nfiles == NULL) |
1370 | return NULL; |
1371 | |
1372 | if (ttrace->files.max != -1) { |
1373 | memset(nfiles + ttrace->files.max + 1, 0, |
1374 | (fd - ttrace->files.max) * sizeof(struct file)); |
1375 | } else { |
1376 | memset(nfiles, 0, (fd + 1) * sizeof(struct file)); |
1377 | } |
1378 | |
1379 | ttrace->files.table = nfiles; |
1380 | ttrace->files.max = fd; |
1381 | } |
1382 | |
1383 | return ttrace->files.table + fd; |
1384 | } |
1385 | |
1386 | struct file *thread__files_entry(struct thread *thread, int fd) |
1387 | { |
1388 | return thread_trace__files_entry(ttrace: thread__priv(thread), fd); |
1389 | } |
1390 | |
1391 | static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname) |
1392 | { |
1393 | struct thread_trace *ttrace = thread__priv(thread); |
1394 | struct file *file = thread_trace__files_entry(ttrace, fd); |
1395 | |
1396 | if (file != NULL) { |
1397 | struct stat st; |
1398 | if (stat(pathname, &st) == 0) |
1399 | file->dev_maj = major(st.st_rdev); |
1400 | file->pathname = strdup(pathname); |
1401 | if (file->pathname) |
1402 | return 0; |
1403 | } |
1404 | |
1405 | return -1; |
1406 | } |
1407 | |
1408 | static int thread__read_fd_path(struct thread *thread, int fd) |
1409 | { |
1410 | char linkname[PATH_MAX], pathname[PATH_MAX]; |
1411 | struct stat st; |
1412 | int ret; |
1413 | |
1414 | if (thread__pid(thread) == thread__tid(thread)) { |
1415 | scnprintf(buf: linkname, size: sizeof(linkname), |
1416 | fmt: "/proc/%d/fd/%d" , thread__pid(thread), fd); |
1417 | } else { |
1418 | scnprintf(buf: linkname, size: sizeof(linkname), |
1419 | fmt: "/proc/%d/task/%d/fd/%d" , |
1420 | thread__pid(thread), thread__tid(thread), fd); |
1421 | } |
1422 | |
1423 | if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname)) |
1424 | return -1; |
1425 | |
1426 | ret = readlink(linkname, pathname, sizeof(pathname)); |
1427 | |
1428 | if (ret < 0 || ret > st.st_size) |
1429 | return -1; |
1430 | |
1431 | pathname[ret] = '\0'; |
1432 | return trace__set_fd_pathname(thread, fd, pathname); |
1433 | } |
1434 | |
1435 | static const char *thread__fd_path(struct thread *thread, int fd, |
1436 | struct trace *trace) |
1437 | { |
1438 | struct thread_trace *ttrace = thread__priv(thread); |
1439 | |
1440 | if (ttrace == NULL || trace->fd_path_disabled) |
1441 | return NULL; |
1442 | |
1443 | if (fd < 0) |
1444 | return NULL; |
1445 | |
1446 | if ((fd > ttrace->files.max || ttrace->files.table[fd].pathname == NULL)) { |
1447 | if (!trace->live) |
1448 | return NULL; |
1449 | ++trace->stats.proc_getname; |
1450 | if (thread__read_fd_path(thread, fd)) |
1451 | return NULL; |
1452 | } |
1453 | |
1454 | return ttrace->files.table[fd].pathname; |
1455 | } |
1456 | |
1457 | size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg) |
1458 | { |
1459 | int fd = arg->val; |
1460 | size_t printed = scnprintf(buf: bf, size, fmt: "%d" , fd); |
1461 | const char *path = thread__fd_path(thread: arg->thread, fd, trace: arg->trace); |
1462 | |
1463 | if (path) |
1464 | printed += scnprintf(buf: bf + printed, size: size - printed, fmt: "<%s>" , path); |
1465 | |
1466 | return printed; |
1467 | } |
1468 | |
1469 | size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size) |
1470 | { |
1471 | size_t printed = scnprintf(buf: bf, size, fmt: "%d" , fd); |
1472 | struct thread *thread = machine__find_thread(machine: trace->host, pid, tid: pid); |
1473 | |
1474 | if (thread) { |
1475 | const char *path = thread__fd_path(thread, fd, trace); |
1476 | |
1477 | if (path) |
1478 | printed += scnprintf(buf: bf + printed, size: size - printed, fmt: "<%s>" , path); |
1479 | |
1480 | thread__put(thread); |
1481 | } |
1482 | |
1483 | return printed; |
1484 | } |
1485 | |
1486 | static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, |
1487 | struct syscall_arg *arg) |
1488 | { |
1489 | int fd = arg->val; |
1490 | size_t printed = syscall_arg__scnprintf_fd(bf, size, arg); |
1491 | struct thread_trace *ttrace = thread__priv(thread: arg->thread); |
1492 | |
1493 | if (ttrace && fd >= 0 && fd <= ttrace->files.max) |
1494 | zfree(&ttrace->files.table[fd].pathname); |
1495 | |
1496 | return printed; |
1497 | } |
1498 | |
1499 | static void thread__set_filename_pos(struct thread *thread, const char *bf, |
1500 | unsigned long ptr) |
1501 | { |
1502 | struct thread_trace *ttrace = thread__priv(thread); |
1503 | |
1504 | ttrace->filename.ptr = ptr; |
1505 | ttrace->filename.entry_str_pos = bf - ttrace->entry_str; |
1506 | } |
1507 | |
1508 | static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, char *bf, size_t size) |
1509 | { |
1510 | struct augmented_arg *augmented_arg = arg->augmented.args; |
1511 | size_t printed = scnprintf(buf: bf, size, fmt: "\"%.*s\"" , augmented_arg->size, augmented_arg->value); |
1512 | /* |
1513 | * So that the next arg with a payload can consume its augmented arg, i.e. for rename* syscalls |
1514 | * we would have two strings, each prefixed by its size. |
1515 | */ |
1516 | int consumed = sizeof(*augmented_arg) + augmented_arg->size; |
1517 | |
1518 | arg->augmented.args = ((void *)arg->augmented.args) + consumed; |
1519 | arg->augmented.size -= consumed; |
1520 | |
1521 | return printed; |
1522 | } |
1523 | |
1524 | static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, |
1525 | struct syscall_arg *arg) |
1526 | { |
1527 | unsigned long ptr = arg->val; |
1528 | |
1529 | if (arg->augmented.args) |
1530 | return syscall_arg__scnprintf_augmented_string(arg, bf, size); |
1531 | |
1532 | if (!arg->trace->vfs_getname) |
1533 | return scnprintf(buf: bf, size, fmt: "%#x" , ptr); |
1534 | |
1535 | thread__set_filename_pos(thread: arg->thread, bf, ptr); |
1536 | return 0; |
1537 | } |
1538 | |
1539 | static bool trace__filter_duration(struct trace *trace, double t) |
1540 | { |
1541 | return t < (trace->duration_filter * NSEC_PER_MSEC); |
1542 | } |
1543 | |
1544 | static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) |
1545 | { |
1546 | double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC; |
1547 | |
1548 | return fprintf(fp, "%10.3f " , ts); |
1549 | } |
1550 | |
1551 | /* |
1552 | * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are |
1553 | * using ttrace->entry_time for a thread that receives a sys_exit without |
1554 | * first having received a sys_enter ("poll" issued before tracing session |
1555 | * starts, lost sys_enter exit due to ring buffer overflow). |
1556 | */ |
1557 | static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) |
1558 | { |
1559 | if (tstamp > 0) |
1560 | return __trace__fprintf_tstamp(trace, tstamp, fp); |
1561 | |
1562 | return fprintf(fp, " ? " ); |
1563 | } |
1564 | |
1565 | static pid_t workload_pid = -1; |
1566 | static volatile sig_atomic_t done = false; |
1567 | static volatile sig_atomic_t interrupted = false; |
1568 | |
1569 | static void sighandler_interrupt(int sig __maybe_unused) |
1570 | { |
1571 | done = interrupted = true; |
1572 | } |
1573 | |
1574 | static void sighandler_chld(int sig __maybe_unused, siginfo_t *info, |
1575 | void *context __maybe_unused) |
1576 | { |
1577 | if (info->si_pid == workload_pid) |
1578 | done = true; |
1579 | } |
1580 | |
1581 | static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread, FILE *fp) |
1582 | { |
1583 | size_t printed = 0; |
1584 | |
1585 | if (trace->multiple_threads) { |
1586 | if (trace->show_comm) |
1587 | printed += fprintf(fp, "%.14s/" , thread__comm_str(thread)); |
1588 | printed += fprintf(fp, "%d " , thread__tid(thread)); |
1589 | } |
1590 | |
1591 | return printed; |
1592 | } |
1593 | |
1594 | static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread, |
1595 | u64 duration, bool duration_calculated, u64 tstamp, FILE *fp) |
1596 | { |
1597 | size_t printed = 0; |
1598 | |
1599 | if (trace->show_tstamp) |
1600 | printed = trace__fprintf_tstamp(trace, tstamp, fp); |
1601 | if (trace->show_duration) |
1602 | printed += fprintf_duration(duration, duration_calculated, fp); |
1603 | return printed + trace__fprintf_comm_tid(trace, thread, fp); |
1604 | } |
1605 | |
1606 | static int trace__process_event(struct trace *trace, struct machine *machine, |
1607 | union perf_event *event, struct perf_sample *sample) |
1608 | { |
1609 | int ret = 0; |
1610 | |
1611 | switch (event->header.type) { |
1612 | case PERF_RECORD_LOST: |
1613 | color_fprintf(trace->output, PERF_COLOR_RED, |
1614 | "LOST %" PRIu64 " events!\n" , event->lost.lost); |
1615 | ret = machine__process_lost_event(machine, event, sample); |
1616 | break; |
1617 | default: |
1618 | ret = machine__process_event(machine, event, sample); |
1619 | break; |
1620 | } |
1621 | |
1622 | return ret; |
1623 | } |
1624 | |
1625 | static int trace__tool_process(struct perf_tool *tool, |
1626 | union perf_event *event, |
1627 | struct perf_sample *sample, |
1628 | struct machine *machine) |
1629 | { |
1630 | struct trace *trace = container_of(tool, struct trace, tool); |
1631 | return trace__process_event(trace, machine, event, sample); |
1632 | } |
1633 | |
1634 | static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp) |
1635 | { |
1636 | struct machine *machine = vmachine; |
1637 | |
1638 | if (machine->kptr_restrict_warned) |
1639 | return NULL; |
1640 | |
1641 | if (symbol_conf.kptr_restrict) { |
1642 | pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n" |
1643 | "Check /proc/sys/kernel/kptr_restrict and /proc/sys/kernel/perf_event_paranoid.\n\n" |
1644 | "Kernel samples will not be resolved.\n" ); |
1645 | machine->kptr_restrict_warned = true; |
1646 | return NULL; |
1647 | } |
1648 | |
1649 | return machine__resolve_kernel_addr(vmachine, addrp, modp); |
1650 | } |
1651 | |
1652 | static int trace__symbols_init(struct trace *trace, struct evlist *evlist) |
1653 | { |
1654 | int err = symbol__init(NULL); |
1655 | |
1656 | if (err) |
1657 | return err; |
1658 | |
1659 | trace->host = machine__new_host(); |
1660 | if (trace->host == NULL) |
1661 | return -ENOMEM; |
1662 | |
1663 | thread__set_priv_destructor(destructor: thread_trace__delete); |
1664 | |
1665 | err = trace_event__register_resolver(machine: trace->host, func: trace__machine__resolve_kernel_addr); |
1666 | if (err < 0) |
1667 | goto out; |
1668 | |
1669 | err = __machine__synthesize_threads(machine: trace->host, tool: &trace->tool, target: &trace->opts.target, |
1670 | threads: evlist->core.threads, process: trace__tool_process, |
1671 | needs_mmap: true, data_mmap: false, nr_threads_synthesize: 1); |
1672 | out: |
1673 | if (err) |
1674 | symbol__exit(); |
1675 | |
1676 | return err; |
1677 | } |
1678 | |
1679 | static void trace__symbols__exit(struct trace *trace) |
1680 | { |
1681 | machine__exit(machine: trace->host); |
1682 | trace->host = NULL; |
1683 | |
1684 | symbol__exit(); |
1685 | } |
1686 | |
1687 | static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args) |
1688 | { |
1689 | int idx; |
1690 | |
1691 | if (nr_args == RAW_SYSCALL_ARGS_NUM && sc->fmt && sc->fmt->nr_args != 0) |
1692 | nr_args = sc->fmt->nr_args; |
1693 | |
1694 | sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt)); |
1695 | if (sc->arg_fmt == NULL) |
1696 | return -1; |
1697 | |
1698 | for (idx = 0; idx < nr_args; ++idx) { |
1699 | if (sc->fmt) |
1700 | sc->arg_fmt[idx] = sc->fmt->arg[idx]; |
1701 | } |
1702 | |
1703 | sc->nr_args = nr_args; |
1704 | return 0; |
1705 | } |
1706 | |
1707 | static const struct syscall_arg_fmt syscall_arg_fmts__by_name[] = { |
1708 | { .name = "msr" , .scnprintf = SCA_X86_MSR, .strtoul = STUL_X86_MSR, }, |
1709 | { .name = "vector" , .scnprintf = SCA_X86_IRQ_VECTORS, .strtoul = STUL_X86_IRQ_VECTORS, }, |
1710 | }; |
1711 | |
1712 | static int syscall_arg_fmt__cmp(const void *name, const void *fmtp) |
1713 | { |
1714 | const struct syscall_arg_fmt *fmt = fmtp; |
1715 | return strcmp(name, fmt->name); |
1716 | } |
1717 | |
1718 | static const struct syscall_arg_fmt * |
1719 | __syscall_arg_fmt__find_by_name(const struct syscall_arg_fmt *fmts, const int nmemb, |
1720 | const char *name) |
1721 | { |
1722 | return bsearch(key: name, base: fmts, num: nmemb, size: sizeof(struct syscall_arg_fmt), cmp: syscall_arg_fmt__cmp); |
1723 | } |
1724 | |
1725 | static const struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *name) |
1726 | { |
1727 | const int nmemb = ARRAY_SIZE(syscall_arg_fmts__by_name); |
1728 | return __syscall_arg_fmt__find_by_name(fmts: syscall_arg_fmts__by_name, nmemb, name); |
1729 | } |
1730 | |
1731 | static struct tep_format_field * |
1732 | syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field) |
1733 | { |
1734 | struct tep_format_field *last_field = NULL; |
1735 | int len; |
1736 | |
1737 | for (; field; field = field->next, ++arg) { |
1738 | last_field = field; |
1739 | |
1740 | if (arg->scnprintf) |
1741 | continue; |
1742 | |
1743 | len = strlen(field->name); |
1744 | |
1745 | if (strcmp(field->type, "const char *" ) == 0 && |
1746 | ((len >= 4 && strcmp(field->name + len - 4, "name" ) == 0) || |
1747 | strstr(field->name, "path" ) != NULL)) |
1748 | arg->scnprintf = SCA_FILENAME; |
1749 | else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr" )) |
1750 | arg->scnprintf = SCA_PTR; |
1751 | else if (strcmp(field->type, "pid_t" ) == 0) |
1752 | arg->scnprintf = SCA_PID; |
1753 | else if (strcmp(field->type, "umode_t" ) == 0) |
1754 | arg->scnprintf = SCA_MODE_T; |
1755 | else if ((field->flags & TEP_FIELD_IS_ARRAY) && strstr(field->type, "char" )) { |
1756 | arg->scnprintf = SCA_CHAR_ARRAY; |
1757 | arg->nr_entries = field->arraylen; |
1758 | } else if ((strcmp(field->type, "int" ) == 0 || |
1759 | strcmp(field->type, "unsigned int" ) == 0 || |
1760 | strcmp(field->type, "long" ) == 0) && |
1761 | len >= 2 && strcmp(field->name + len - 2, "fd" ) == 0) { |
1762 | /* |
1763 | * /sys/kernel/tracing/events/syscalls/sys_enter* |
1764 | * grep -E 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c |
1765 | * 65 int |
1766 | * 23 unsigned int |
1767 | * 7 unsigned long |
1768 | */ |
1769 | arg->scnprintf = SCA_FD; |
1770 | } else { |
1771 | const struct syscall_arg_fmt *fmt = |
1772 | syscall_arg_fmt__find_by_name(name: field->name); |
1773 | |
1774 | if (fmt) { |
1775 | arg->scnprintf = fmt->scnprintf; |
1776 | arg->strtoul = fmt->strtoul; |
1777 | } |
1778 | } |
1779 | } |
1780 | |
1781 | return last_field; |
1782 | } |
1783 | |
1784 | static int syscall__set_arg_fmts(struct syscall *sc) |
1785 | { |
1786 | struct tep_format_field *last_field = syscall_arg_fmt__init_array(arg: sc->arg_fmt, field: sc->args); |
1787 | |
1788 | if (last_field) |
1789 | sc->args_size = last_field->offset + last_field->size; |
1790 | |
1791 | return 0; |
1792 | } |
1793 | |
1794 | static int trace__read_syscall_info(struct trace *trace, int id) |
1795 | { |
1796 | char tp_name[128]; |
1797 | struct syscall *sc; |
1798 | const char *name = syscalltbl__name(trace->sctbl, id); |
1799 | |
1800 | #ifdef HAVE_SYSCALL_TABLE_SUPPORT |
1801 | if (trace->syscalls.table == NULL) { |
1802 | trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc)); |
1803 | if (trace->syscalls.table == NULL) |
1804 | return -ENOMEM; |
1805 | } |
1806 | #else |
1807 | if (id > trace->sctbl->syscalls.max_id || (id == 0 && trace->syscalls.table == NULL)) { |
1808 | // When using libaudit we don't know beforehand what is the max syscall id |
1809 | struct syscall *table = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); |
1810 | |
1811 | if (table == NULL) |
1812 | return -ENOMEM; |
1813 | |
1814 | // Need to memset from offset 0 and +1 members if brand new |
1815 | if (trace->syscalls.table == NULL) |
1816 | memset(table, 0, (id + 1) * sizeof(*sc)); |
1817 | else |
1818 | memset(table + trace->sctbl->syscalls.max_id + 1, 0, (id - trace->sctbl->syscalls.max_id) * sizeof(*sc)); |
1819 | |
1820 | trace->syscalls.table = table; |
1821 | trace->sctbl->syscalls.max_id = id; |
1822 | } |
1823 | #endif |
1824 | sc = trace->syscalls.table + id; |
1825 | if (sc->nonexistent) |
1826 | return -EEXIST; |
1827 | |
1828 | if (name == NULL) { |
1829 | sc->nonexistent = true; |
1830 | return -EEXIST; |
1831 | } |
1832 | |
1833 | sc->name = name; |
1834 | sc->fmt = syscall_fmt__find(name: sc->name); |
1835 | |
1836 | snprintf(buf: tp_name, size: sizeof(tp_name), fmt: "sys_enter_%s" , sc->name); |
1837 | sc->tp_format = trace_event__tp_format(sys: "syscalls" , name: tp_name); |
1838 | |
1839 | if (IS_ERR(ptr: sc->tp_format) && sc->fmt && sc->fmt->alias) { |
1840 | snprintf(buf: tp_name, size: sizeof(tp_name), fmt: "sys_enter_%s" , sc->fmt->alias); |
1841 | sc->tp_format = trace_event__tp_format(sys: "syscalls" , name: tp_name); |
1842 | } |
1843 | |
1844 | /* |
1845 | * Fails to read trace point format via sysfs node, so the trace point |
1846 | * doesn't exist. Set the 'nonexistent' flag as true. |
1847 | */ |
1848 | if (IS_ERR(ptr: sc->tp_format)) { |
1849 | sc->nonexistent = true; |
1850 | return PTR_ERR(ptr: sc->tp_format); |
1851 | } |
1852 | |
1853 | if (syscall__alloc_arg_fmts(sc, nr_args: IS_ERR(ptr: sc->tp_format) ? |
1854 | RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields)) |
1855 | return -ENOMEM; |
1856 | |
1857 | sc->args = sc->tp_format->format.fields; |
1858 | /* |
1859 | * We need to check and discard the first variable '__syscall_nr' |
1860 | * or 'nr' that mean the syscall number. It is needless here. |
1861 | * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels. |
1862 | */ |
1863 | if (sc->args && (!strcmp(sc->args->name, "__syscall_nr" ) || !strcmp(sc->args->name, "nr" ))) { |
1864 | sc->args = sc->args->next; |
1865 | --sc->nr_args; |
1866 | } |
1867 | |
1868 | sc->is_exit = !strcmp(name, "exit_group" ) || !strcmp(name, "exit" ); |
1869 | sc->is_open = !strcmp(name, "open" ) || !strcmp(name, "openat" ); |
1870 | |
1871 | return syscall__set_arg_fmts(sc); |
1872 | } |
1873 | |
1874 | static int evsel__init_tp_arg_scnprintf(struct evsel *evsel) |
1875 | { |
1876 | struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel); |
1877 | |
1878 | if (fmt != NULL) { |
1879 | syscall_arg_fmt__init_array(arg: fmt, field: evsel->tp_format->format.fields); |
1880 | return 0; |
1881 | } |
1882 | |
1883 | return -ENOMEM; |
1884 | } |
1885 | |
1886 | static int intcmp(const void *a, const void *b) |
1887 | { |
1888 | const int *one = a, *another = b; |
1889 | |
1890 | return *one - *another; |
1891 | } |
1892 | |
1893 | static int trace__validate_ev_qualifier(struct trace *trace) |
1894 | { |
1895 | int err = 0; |
1896 | bool printed_invalid_prefix = false; |
1897 | struct str_node *pos; |
1898 | size_t nr_used = 0, nr_allocated = strlist__nr_entries(slist: trace->ev_qualifier); |
1899 | |
1900 | trace->ev_qualifier_ids.entries = malloc(nr_allocated * |
1901 | sizeof(trace->ev_qualifier_ids.entries[0])); |
1902 | |
1903 | if (trace->ev_qualifier_ids.entries == NULL) { |
1904 | fputs("Error:\tNot enough memory for allocating events qualifier ids\n" , |
1905 | trace->output); |
1906 | err = -EINVAL; |
1907 | goto out; |
1908 | } |
1909 | |
1910 | strlist__for_each_entry(pos, trace->ev_qualifier) { |
1911 | const char *sc = pos->s; |
1912 | int id = syscalltbl__id(trace->sctbl, sc), match_next = -1; |
1913 | |
1914 | if (id < 0) { |
1915 | id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next); |
1916 | if (id >= 0) |
1917 | goto matches; |
1918 | |
1919 | if (!printed_invalid_prefix) { |
1920 | pr_debug("Skipping unknown syscalls: " ); |
1921 | printed_invalid_prefix = true; |
1922 | } else { |
1923 | pr_debug(", " ); |
1924 | } |
1925 | |
1926 | pr_debug("%s" , sc); |
1927 | continue; |
1928 | } |
1929 | matches: |
1930 | trace->ev_qualifier_ids.entries[nr_used++] = id; |
1931 | if (match_next == -1) |
1932 | continue; |
1933 | |
1934 | while (1) { |
1935 | id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next); |
1936 | if (id < 0) |
1937 | break; |
1938 | if (nr_allocated == nr_used) { |
1939 | void *entries; |
1940 | |
1941 | nr_allocated += 8; |
1942 | entries = realloc(trace->ev_qualifier_ids.entries, |
1943 | nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0])); |
1944 | if (entries == NULL) { |
1945 | err = -ENOMEM; |
1946 | fputs("\nError:\t Not enough memory for parsing\n" , trace->output); |
1947 | goto out_free; |
1948 | } |
1949 | trace->ev_qualifier_ids.entries = entries; |
1950 | } |
1951 | trace->ev_qualifier_ids.entries[nr_used++] = id; |
1952 | } |
1953 | } |
1954 | |
1955 | trace->ev_qualifier_ids.nr = nr_used; |
1956 | qsort(trace->ev_qualifier_ids.entries, nr_used, sizeof(int), intcmp); |
1957 | out: |
1958 | if (printed_invalid_prefix) |
1959 | pr_debug("\n" ); |
1960 | return err; |
1961 | out_free: |
1962 | zfree(&trace->ev_qualifier_ids.entries); |
1963 | trace->ev_qualifier_ids.nr = 0; |
1964 | goto out; |
1965 | } |
1966 | |
1967 | static __maybe_unused bool trace__syscall_enabled(struct trace *trace, int id) |
1968 | { |
1969 | bool in_ev_qualifier; |
1970 | |
1971 | if (trace->ev_qualifier_ids.nr == 0) |
1972 | return true; |
1973 | |
1974 | in_ev_qualifier = bsearch(key: &id, base: trace->ev_qualifier_ids.entries, |
1975 | num: trace->ev_qualifier_ids.nr, size: sizeof(int), cmp: intcmp) != NULL; |
1976 | |
1977 | if (in_ev_qualifier) |
1978 | return !trace->not_ev_qualifier; |
1979 | |
1980 | return trace->not_ev_qualifier; |
1981 | } |
1982 | |
1983 | /* |
1984 | * args is to be interpreted as a series of longs but we need to handle |
1985 | * 8-byte unaligned accesses. args points to raw_data within the event |
1986 | * and raw_data is guaranteed to be 8-byte unaligned because it is |
1987 | * preceded by raw_size which is a u32. So we need to copy args to a temp |
1988 | * variable to read it. Most notably this avoids extended load instructions |
1989 | * on unaligned addresses |
1990 | */ |
1991 | unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx) |
1992 | { |
1993 | unsigned long val; |
1994 | unsigned char *p = arg->args + sizeof(unsigned long) * idx; |
1995 | |
1996 | memcpy(&val, p, sizeof(val)); |
1997 | return val; |
1998 | } |
1999 | |
2000 | static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size, |
2001 | struct syscall_arg *arg) |
2002 | { |
2003 | if (sc->arg_fmt && sc->arg_fmt[arg->idx].name) |
2004 | return scnprintf(buf: bf, size, fmt: "%s: " , sc->arg_fmt[arg->idx].name); |
2005 | |
2006 | return scnprintf(buf: bf, size, fmt: "arg%d: " , arg->idx); |
2007 | } |
2008 | |
2009 | /* |
2010 | * Check if the value is in fact zero, i.e. mask whatever needs masking, such |
2011 | * as mount 'flags' argument that needs ignoring some magic flag, see comment |
2012 | * in tools/perf/trace/beauty/mount_flags.c |
2013 | */ |
2014 | static unsigned long syscall_arg_fmt__mask_val(struct syscall_arg_fmt *fmt, struct syscall_arg *arg, unsigned long val) |
2015 | { |
2016 | if (fmt && fmt->mask_val) |
2017 | return fmt->mask_val(arg, val); |
2018 | |
2019 | return val; |
2020 | } |
2021 | |
2022 | static size_t syscall_arg_fmt__scnprintf_val(struct syscall_arg_fmt *fmt, char *bf, size_t size, |
2023 | struct syscall_arg *arg, unsigned long val) |
2024 | { |
2025 | if (fmt && fmt->scnprintf) { |
2026 | arg->val = val; |
2027 | if (fmt->parm) |
2028 | arg->parm = fmt->parm; |
2029 | return fmt->scnprintf(bf, size, arg); |
2030 | } |
2031 | return scnprintf(buf: bf, size, fmt: "%ld" , val); |
2032 | } |
2033 | |
2034 | static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, |
2035 | unsigned char *args, void *augmented_args, int augmented_args_size, |
2036 | struct trace *trace, struct thread *thread) |
2037 | { |
2038 | size_t printed = 0; |
2039 | unsigned long val; |
2040 | u8 bit = 1; |
2041 | struct syscall_arg arg = { |
2042 | .args = args, |
2043 | .augmented = { |
2044 | .size = augmented_args_size, |
2045 | .args = augmented_args, |
2046 | }, |
2047 | .idx = 0, |
2048 | .mask = 0, |
2049 | .trace = trace, |
2050 | .thread = thread, |
2051 | .show_string_prefix = trace->show_string_prefix, |
2052 | }; |
2053 | struct thread_trace *ttrace = thread__priv(thread); |
2054 | |
2055 | /* |
2056 | * Things like fcntl will set this in its 'cmd' formatter to pick the |
2057 | * right formatter for the return value (an fd? file flags?), which is |
2058 | * not needed for syscalls that always return a given type, say an fd. |
2059 | */ |
2060 | ttrace->ret_scnprintf = NULL; |
2061 | |
2062 | if (sc->args != NULL) { |
2063 | struct tep_format_field *field; |
2064 | |
2065 | for (field = sc->args; field; |
2066 | field = field->next, ++arg.idx, bit <<= 1) { |
2067 | if (arg.mask & bit) |
2068 | continue; |
2069 | |
2070 | arg.fmt = &sc->arg_fmt[arg.idx]; |
2071 | val = syscall_arg__val(arg: &arg, idx: arg.idx); |
2072 | /* |
2073 | * Some syscall args need some mask, most don't and |
2074 | * return val untouched. |
2075 | */ |
2076 | val = syscall_arg_fmt__mask_val(fmt: &sc->arg_fmt[arg.idx], arg: &arg, val); |
2077 | |
2078 | /* |
2079 | * Suppress this argument if its value is zero and |
2080 | * and we don't have a string associated in an |
2081 | * strarray for it. |
2082 | */ |
2083 | if (val == 0 && |
2084 | !trace->show_zeros && |
2085 | !(sc->arg_fmt && |
2086 | (sc->arg_fmt[arg.idx].show_zero || |
2087 | sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY || |
2088 | sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) && |
2089 | sc->arg_fmt[arg.idx].parm)) |
2090 | continue; |
2091 | |
2092 | printed += scnprintf(buf: bf + printed, size: size - printed, fmt: "%s" , printed ? ", " : "" ); |
2093 | |
2094 | if (trace->show_arg_names) |
2095 | printed += scnprintf(buf: bf + printed, size: size - printed, fmt: "%s: " , field->name); |
2096 | |
2097 | printed += syscall_arg_fmt__scnprintf_val(fmt: &sc->arg_fmt[arg.idx], |
2098 | bf: bf + printed, size: size - printed, arg: &arg, val); |
2099 | } |
2100 | } else if (IS_ERR(ptr: sc->tp_format)) { |
2101 | /* |
2102 | * If we managed to read the tracepoint /format file, then we |
2103 | * may end up not having any args, like with gettid(), so only |
2104 | * print the raw args when we didn't manage to read it. |
2105 | */ |
2106 | while (arg.idx < sc->nr_args) { |
2107 | if (arg.mask & bit) |
2108 | goto next_arg; |
2109 | val = syscall_arg__val(arg: &arg, idx: arg.idx); |
2110 | if (printed) |
2111 | printed += scnprintf(buf: bf + printed, size: size - printed, fmt: ", " ); |
2112 | printed += syscall__scnprintf_name(sc, bf: bf + printed, size: size - printed, arg: &arg); |
2113 | printed += syscall_arg_fmt__scnprintf_val(fmt: &sc->arg_fmt[arg.idx], bf: bf + printed, size: size - printed, arg: &arg, val); |
2114 | next_arg: |
2115 | ++arg.idx; |
2116 | bit <<= 1; |
2117 | } |
2118 | } |
2119 | |
2120 | return printed; |
2121 | } |
2122 | |
2123 | typedef int (*tracepoint_handler)(struct trace *trace, struct evsel *evsel, |
2124 | union perf_event *event, |
2125 | struct perf_sample *sample); |
2126 | |
2127 | static struct syscall *trace__syscall_info(struct trace *trace, |
2128 | struct evsel *evsel, int id) |
2129 | { |
2130 | int err = 0; |
2131 | |
2132 | if (id < 0) { |
2133 | |
2134 | /* |
2135 | * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried |
2136 | * before that, leaving at a higher verbosity level till that is |
2137 | * explained. Reproduced with plain ftrace with: |
2138 | * |
2139 | * echo 1 > /t/events/raw_syscalls/sys_exit/enable |
2140 | * grep "NR -1 " /t/trace_pipe |
2141 | * |
2142 | * After generating some load on the machine. |
2143 | */ |
2144 | if (verbose > 1) { |
2145 | static u64 n; |
2146 | fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n" , |
2147 | id, evsel__name(evsel), ++n); |
2148 | } |
2149 | return NULL; |
2150 | } |
2151 | |
2152 | err = -EINVAL; |
2153 | |
2154 | #ifdef HAVE_SYSCALL_TABLE_SUPPORT |
2155 | if (id > trace->sctbl->syscalls.max_id) { |
2156 | #else |
2157 | if (id >= trace->sctbl->syscalls.max_id) { |
2158 | /* |
2159 | * With libaudit we don't know beforehand what is the max_id, |
2160 | * so we let trace__read_syscall_info() figure that out as we |
2161 | * go on reading syscalls. |
2162 | */ |
2163 | err = trace__read_syscall_info(trace, id); |
2164 | if (err) |
2165 | #endif |
2166 | goto out_cant_read; |
2167 | } |
2168 | |
2169 | if ((trace->syscalls.table == NULL || trace->syscalls.table[id].name == NULL) && |
2170 | (err = trace__read_syscall_info(trace, id)) != 0) |
2171 | goto out_cant_read; |
2172 | |
2173 | if (trace->syscalls.table && trace->syscalls.table[id].nonexistent) |
2174 | goto out_cant_read; |
2175 | |
2176 | return &trace->syscalls.table[id]; |
2177 | |
2178 | out_cant_read: |
2179 | if (verbose > 0) { |
2180 | char sbuf[STRERR_BUFSIZE]; |
2181 | fprintf(trace->output, "Problems reading syscall %d: %d (%s)" , id, -err, str_error_r(-err, sbuf, sizeof(sbuf))); |
2182 | if (id <= trace->sctbl->syscalls.max_id && trace->syscalls.table[id].name != NULL) |
2183 | fprintf(trace->output, "(%s)" , trace->syscalls.table[id].name); |
2184 | fputs(" information\n" , trace->output); |
2185 | } |
2186 | return NULL; |
2187 | } |
2188 | |
2189 | struct syscall_stats { |
2190 | struct stats stats; |
2191 | u64 nr_failures; |
2192 | int max_errno; |
2193 | u32 *errnos; |
2194 | }; |
2195 | |
2196 | static void thread__update_stats(struct thread *thread, struct thread_trace *ttrace, |
2197 | int id, struct perf_sample *sample, long err, bool errno_summary) |
2198 | { |
2199 | struct int_node *inode; |
2200 | struct syscall_stats *stats; |
2201 | u64 duration = 0; |
2202 | |
2203 | inode = intlist__findnew(ilist: ttrace->syscall_stats, i: id); |
2204 | if (inode == NULL) |
2205 | return; |
2206 | |
2207 | stats = inode->priv; |
2208 | if (stats == NULL) { |
2209 | stats = zalloc(sizeof(*stats)); |
2210 | if (stats == NULL) |
2211 | return; |
2212 | |
2213 | init_stats(stats: &stats->stats); |
2214 | inode->priv = stats; |
2215 | } |
2216 | |
2217 | if (ttrace->entry_time && sample->time > ttrace->entry_time) |
2218 | duration = sample->time - ttrace->entry_time; |
2219 | |
2220 | update_stats(stats: &stats->stats, val: duration); |
2221 | |
2222 | if (err < 0) { |
2223 | ++stats->nr_failures; |
2224 | |
2225 | if (!errno_summary) |
2226 | return; |
2227 | |
2228 | err = -err; |
2229 | if (err > stats->max_errno) { |
2230 | u32 *new_errnos = realloc(stats->errnos, err * sizeof(u32)); |
2231 | |
2232 | if (new_errnos) { |
2233 | memset(new_errnos + stats->max_errno, 0, (err - stats->max_errno) * sizeof(u32)); |
2234 | } else { |
2235 | pr_debug("Not enough memory for errno stats for thread \"%s\"(%d/%d), results will be incomplete\n" , |
2236 | thread__comm_str(thread), thread__pid(thread), |
2237 | thread__tid(thread)); |
2238 | return; |
2239 | } |
2240 | |
2241 | stats->errnos = new_errnos; |
2242 | stats->max_errno = err; |
2243 | } |
2244 | |
2245 | ++stats->errnos[err - 1]; |
2246 | } |
2247 | } |
2248 | |
2249 | static int trace__printf_interrupted_entry(struct trace *trace) |
2250 | { |
2251 | struct thread_trace *ttrace; |
2252 | size_t printed; |
2253 | int len; |
2254 | |
2255 | if (trace->failure_only || trace->current == NULL) |
2256 | return 0; |
2257 | |
2258 | ttrace = thread__priv(thread: trace->current); |
2259 | |
2260 | if (!ttrace->entry_pending) |
2261 | return 0; |
2262 | |
2263 | printed = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output); |
2264 | printed += len = fprintf(trace->output, "%s)" , ttrace->entry_str); |
2265 | |
2266 | if (len < trace->args_alignment - 4) |
2267 | printed += fprintf(trace->output, "%-*s" , trace->args_alignment - 4 - len, " " ); |
2268 | |
2269 | printed += fprintf(trace->output, " ...\n" ); |
2270 | |
2271 | ttrace->entry_pending = false; |
2272 | ++trace->nr_events_printed; |
2273 | |
2274 | return printed; |
2275 | } |
2276 | |
2277 | static int trace__fprintf_sample(struct trace *trace, struct evsel *evsel, |
2278 | struct perf_sample *sample, struct thread *thread) |
2279 | { |
2280 | int printed = 0; |
2281 | |
2282 | if (trace->print_sample) { |
2283 | double ts = (double)sample->time / NSEC_PER_MSEC; |
2284 | |
2285 | printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n" , |
2286 | evsel__name(evsel), ts, |
2287 | thread__comm_str(thread), |
2288 | sample->pid, sample->tid, sample->cpu); |
2289 | } |
2290 | |
2291 | return printed; |
2292 | } |
2293 | |
2294 | static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, int raw_augmented_args_size) |
2295 | { |
2296 | void *augmented_args = NULL; |
2297 | /* |
2298 | * For now with BPF raw_augmented we hook into raw_syscalls:sys_enter |
2299 | * and there we get all 6 syscall args plus the tracepoint common fields |
2300 | * that gets calculated at the start and the syscall_nr (another long). |
2301 | * So we check if that is the case and if so don't look after the |
2302 | * sc->args_size but always after the full raw_syscalls:sys_enter payload, |
2303 | * which is fixed. |
2304 | * |
2305 | * We'll revisit this later to pass s->args_size to the BPF augmenter |
2306 | * (now tools/perf/examples/bpf/augmented_raw_syscalls.c, so that it |
2307 | * copies only what we need for each syscall, like what happens when we |
2308 | * use syscalls:sys_enter_NAME, so that we reduce the kernel/userspace |
2309 | * traffic to just what is needed for each syscall. |
2310 | */ |
2311 | int args_size = raw_augmented_args_size ?: sc->args_size; |
2312 | |
2313 | *augmented_args_size = sample->raw_size - args_size; |
2314 | if (*augmented_args_size > 0) |
2315 | augmented_args = sample->raw_data + args_size; |
2316 | |
2317 | return augmented_args; |
2318 | } |
2319 | |
2320 | static void syscall__exit(struct syscall *sc) |
2321 | { |
2322 | if (!sc) |
2323 | return; |
2324 | |
2325 | zfree(&sc->arg_fmt); |
2326 | } |
2327 | |
2328 | static int trace__sys_enter(struct trace *trace, struct evsel *evsel, |
2329 | union perf_event *event __maybe_unused, |
2330 | struct perf_sample *sample) |
2331 | { |
2332 | char *msg; |
2333 | void *args; |
2334 | int printed = 0; |
2335 | struct thread *thread; |
2336 | int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; |
2337 | int augmented_args_size = 0; |
2338 | void *augmented_args = NULL; |
2339 | struct syscall *sc = trace__syscall_info(trace, evsel, id); |
2340 | struct thread_trace *ttrace; |
2341 | |
2342 | if (sc == NULL) |
2343 | return -1; |
2344 | |
2345 | thread = machine__findnew_thread(machine: trace->host, pid: sample->pid, tid: sample->tid); |
2346 | ttrace = thread__trace(thread, trace->output); |
2347 | if (ttrace == NULL) |
2348 | goto out_put; |
2349 | |
2350 | trace__fprintf_sample(trace, evsel, sample, thread); |
2351 | |
2352 | args = perf_evsel__sc_tp_ptr(evsel, args, sample); |
2353 | |
2354 | if (ttrace->entry_str == NULL) { |
2355 | ttrace->entry_str = malloc(trace__entry_str_size); |
2356 | if (!ttrace->entry_str) |
2357 | goto out_put; |
2358 | } |
2359 | |
2360 | if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) |
2361 | trace__printf_interrupted_entry(trace); |
2362 | /* |
2363 | * If this is raw_syscalls.sys_enter, then it always comes with the 6 possible |
2364 | * arguments, even if the syscall being handled, say "openat", uses only 4 arguments |
2365 | * this breaks syscall__augmented_args() check for augmented args, as we calculate |
2366 | * syscall->args_size using each syscalls:sys_enter_NAME tracefs format file, |
2367 | * so when handling, say the openat syscall, we end up getting 6 args for the |
2368 | * raw_syscalls:sys_enter event, when we expected just 4, we end up mistakenly |
2369 | * thinking that the extra 2 u64 args are the augmented filename, so just check |
2370 | * here and avoid using augmented syscalls when the evsel is the raw_syscalls one. |
2371 | */ |
2372 | if (evsel != trace->syscalls.events.sys_enter) |
2373 | augmented_args = syscall__augmented_args(sc, sample, augmented_args_size: &augmented_args_size, raw_augmented_args_size: trace->raw_augmented_syscalls_args_size); |
2374 | ttrace->entry_time = sample->time; |
2375 | msg = ttrace->entry_str; |
2376 | printed += scnprintf(buf: msg + printed, size: trace__entry_str_size - printed, fmt: "%s(" , sc->name); |
2377 | |
2378 | printed += syscall__scnprintf_args(sc, bf: msg + printed, size: trace__entry_str_size - printed, |
2379 | args, augmented_args, augmented_args_size, trace, thread); |
2380 | |
2381 | if (sc->is_exit) { |
2382 | if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) { |
2383 | int alignment = 0; |
2384 | |
2385 | trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output); |
2386 | printed = fprintf(trace->output, "%s)" , ttrace->entry_str); |
2387 | if (trace->args_alignment > printed) |
2388 | alignment = trace->args_alignment - printed; |
2389 | fprintf(trace->output, "%*s= ?\n" , alignment, " " ); |
2390 | } |
2391 | } else { |
2392 | ttrace->entry_pending = true; |
2393 | /* See trace__vfs_getname & trace__sys_exit */ |
2394 | ttrace->filename.pending_open = false; |
2395 | } |
2396 | |
2397 | if (trace->current != thread) { |
2398 | thread__put(thread: trace->current); |
2399 | trace->current = thread__get(thread); |
2400 | } |
2401 | err = 0; |
2402 | out_put: |
2403 | thread__put(thread); |
2404 | return err; |
2405 | } |
2406 | |
2407 | static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel, |
2408 | struct perf_sample *sample) |
2409 | { |
2410 | struct thread_trace *ttrace; |
2411 | struct thread *thread; |
2412 | int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; |
2413 | struct syscall *sc = trace__syscall_info(trace, evsel, id); |
2414 | char msg[1024]; |
2415 | void *args, *augmented_args = NULL; |
2416 | int augmented_args_size; |
2417 | |
2418 | if (sc == NULL) |
2419 | return -1; |
2420 | |
2421 | thread = machine__findnew_thread(machine: trace->host, pid: sample->pid, tid: sample->tid); |
2422 | ttrace = thread__trace(thread, trace->output); |
2423 | /* |
2424 | * We need to get ttrace just to make sure it is there when syscall__scnprintf_args() |
2425 | * and the rest of the beautifiers accessing it via struct syscall_arg touches it. |
2426 | */ |
2427 | if (ttrace == NULL) |
2428 | goto out_put; |
2429 | |
2430 | args = perf_evsel__sc_tp_ptr(evsel, args, sample); |
2431 | augmented_args = syscall__augmented_args(sc, sample, augmented_args_size: &augmented_args_size, raw_augmented_args_size: trace->raw_augmented_syscalls_args_size); |
2432 | syscall__scnprintf_args(sc, bf: msg, size: sizeof(msg), args, augmented_args, augmented_args_size, trace, thread); |
2433 | fprintf(trace->output, "%s" , msg); |
2434 | err = 0; |
2435 | out_put: |
2436 | thread__put(thread); |
2437 | return err; |
2438 | } |
2439 | |
2440 | static int trace__resolve_callchain(struct trace *trace, struct evsel *evsel, |
2441 | struct perf_sample *sample, |
2442 | struct callchain_cursor *cursor) |
2443 | { |
2444 | struct addr_location al; |
2445 | int max_stack = evsel->core.attr.sample_max_stack ? |
2446 | evsel->core.attr.sample_max_stack : |
2447 | trace->max_stack; |
2448 | int err = -1; |
2449 | |
2450 | addr_location__init(al: &al); |
2451 | if (machine__resolve(machine: trace->host, al: &al, sample) < 0) |
2452 | goto out; |
2453 | |
2454 | err = thread__resolve_callchain(thread: al.thread, cursor, evsel, sample, NULL, NULL, max_stack); |
2455 | out: |
2456 | addr_location__exit(al: &al); |
2457 | return err; |
2458 | } |
2459 | |
2460 | static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample) |
2461 | { |
2462 | /* TODO: user-configurable print_opts */ |
2463 | const unsigned int print_opts = EVSEL__PRINT_SYM | |
2464 | EVSEL__PRINT_DSO | |
2465 | EVSEL__PRINT_UNKNOWN_AS_ADDR; |
2466 | |
2467 | return sample__fprintf_callchain(sample, 38, print_opts, get_tls_callchain_cursor(), symbol_conf.bt_stop_list, trace->output); |
2468 | } |
2469 | |
2470 | static const char *errno_to_name(struct evsel *evsel, int err) |
2471 | { |
2472 | struct perf_env *env = evsel__env(evsel); |
2473 | const char *arch_name = perf_env__arch(env); |
2474 | |
2475 | return arch_syscalls__strerrno(arch: arch_name, err); |
2476 | } |
2477 | |
2478 | static int trace__sys_exit(struct trace *trace, struct evsel *evsel, |
2479 | union perf_event *event __maybe_unused, |
2480 | struct perf_sample *sample) |
2481 | { |
2482 | long ret; |
2483 | u64 duration = 0; |
2484 | bool duration_calculated = false; |
2485 | struct thread *thread; |
2486 | int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0, printed = 0; |
2487 | int alignment = trace->args_alignment; |
2488 | struct syscall *sc = trace__syscall_info(trace, evsel, id); |
2489 | struct thread_trace *ttrace; |
2490 | |
2491 | if (sc == NULL) |
2492 | return -1; |
2493 | |
2494 | thread = machine__findnew_thread(machine: trace->host, pid: sample->pid, tid: sample->tid); |
2495 | ttrace = thread__trace(thread, trace->output); |
2496 | if (ttrace == NULL) |
2497 | goto out_put; |
2498 | |
2499 | trace__fprintf_sample(trace, evsel, sample, thread); |
2500 | |
2501 | ret = perf_evsel__sc_tp_uint(evsel, ret, sample); |
2502 | |
2503 | if (trace->summary) |
2504 | thread__update_stats(thread, ttrace, id, sample, err: ret, errno_summary: trace->errno_summary); |
2505 | |
2506 | if (!trace->fd_path_disabled && sc->is_open && ret >= 0 && ttrace->filename.pending_open) { |
2507 | trace__set_fd_pathname(thread, fd: ret, pathname: ttrace->filename.name); |
2508 | ttrace->filename.pending_open = false; |
2509 | ++trace->stats.vfs_getname; |
2510 | } |
2511 | |
2512 | if (ttrace->entry_time) { |
2513 | duration = sample->time - ttrace->entry_time; |
2514 | if (trace__filter_duration(trace, t: duration)) |
2515 | goto out; |
2516 | duration_calculated = true; |
2517 | } else if (trace->duration_filter) |
2518 | goto out; |
2519 | |
2520 | if (sample->callchain) { |
2521 | struct callchain_cursor *cursor = get_tls_callchain_cursor(); |
2522 | |
2523 | callchain_ret = trace__resolve_callchain(trace, evsel, sample, cursor); |
2524 | if (callchain_ret == 0) { |
2525 | if (cursor->nr < trace->min_stack) |
2526 | goto out; |
2527 | callchain_ret = 1; |
2528 | } |
2529 | } |
2530 | |
2531 | if (trace->summary_only || (ret >= 0 && trace->failure_only)) |
2532 | goto out; |
2533 | |
2534 | trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output); |
2535 | |
2536 | if (ttrace->entry_pending) { |
2537 | printed = fprintf(trace->output, "%s" , ttrace->entry_str); |
2538 | } else { |
2539 | printed += fprintf(trace->output, " ... [" ); |
2540 | color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued" ); |
2541 | printed += 9; |
2542 | printed += fprintf(trace->output, "]: %s()" , sc->name); |
2543 | } |
2544 | |
2545 | printed++; /* the closing ')' */ |
2546 | |
2547 | if (alignment > printed) |
2548 | alignment -= printed; |
2549 | else |
2550 | alignment = 0; |
2551 | |
2552 | fprintf(trace->output, ")%*s= " , alignment, " " ); |
2553 | |
2554 | if (sc->fmt == NULL) { |
2555 | if (ret < 0) |
2556 | goto errno_print; |
2557 | signed_print: |
2558 | fprintf(trace->output, "%ld" , ret); |
2559 | } else if (ret < 0) { |
2560 | errno_print: { |
2561 | char bf[STRERR_BUFSIZE]; |
2562 | const char *emsg = str_error_r(-ret, bf, sizeof(bf)), |
2563 | *e = errno_to_name(evsel, err: -ret); |
2564 | |
2565 | fprintf(trace->output, "-1 %s (%s)" , e, emsg); |
2566 | } |
2567 | } else if (ret == 0 && sc->fmt->timeout) |
2568 | fprintf(trace->output, "0 (Timeout)" ); |
2569 | else if (ttrace->ret_scnprintf) { |
2570 | char bf[1024]; |
2571 | struct syscall_arg arg = { |
2572 | .val = ret, |
2573 | .thread = thread, |
2574 | .trace = trace, |
2575 | }; |
2576 | ttrace->ret_scnprintf(bf, sizeof(bf), &arg); |
2577 | ttrace->ret_scnprintf = NULL; |
2578 | fprintf(trace->output, "%s" , bf); |
2579 | } else if (sc->fmt->hexret) |
2580 | fprintf(trace->output, "%#lx" , ret); |
2581 | else if (sc->fmt->errpid) { |
2582 | struct thread *child = machine__find_thread(machine: trace->host, pid: ret, tid: ret); |
2583 | |
2584 | if (child != NULL) { |
2585 | fprintf(trace->output, "%ld" , ret); |
2586 | if (thread__comm_set(thread: child)) |
2587 | fprintf(trace->output, " (%s)" , thread__comm_str(thread: child)); |
2588 | thread__put(thread: child); |
2589 | } |
2590 | } else |
2591 | goto signed_print; |
2592 | |
2593 | fputc('\n', trace->output); |
2594 | |
2595 | /* |
2596 | * We only consider an 'event' for the sake of --max-events a non-filtered |
2597 | * sys_enter + sys_exit and other tracepoint events. |
2598 | */ |
2599 | if (++trace->nr_events_printed == trace->max_events && trace->max_events != ULONG_MAX) |
2600 | interrupted = true; |
2601 | |
2602 | if (callchain_ret > 0) |
2603 | trace__fprintf_callchain(trace, sample); |
2604 | else if (callchain_ret < 0) |
2605 | pr_err("Problem processing %s callchain, skipping...\n" , evsel__name(evsel)); |
2606 | out: |
2607 | ttrace->entry_pending = false; |
2608 | err = 0; |
2609 | out_put: |
2610 | thread__put(thread); |
2611 | return err; |
2612 | } |
2613 | |
2614 | static int trace__vfs_getname(struct trace *trace, struct evsel *evsel, |
2615 | union perf_event *event __maybe_unused, |
2616 | struct perf_sample *sample) |
2617 | { |
2618 | struct thread *thread = machine__findnew_thread(machine: trace->host, pid: sample->pid, tid: sample->tid); |
2619 | struct thread_trace *ttrace; |
2620 | size_t filename_len, entry_str_len, to_move; |
2621 | ssize_t remaining_space; |
2622 | char *pos; |
2623 | const char *filename = evsel__rawptr(evsel, sample, "pathname" ); |
2624 | |
2625 | if (!thread) |
2626 | goto out; |
2627 | |
2628 | ttrace = thread__priv(thread); |
2629 | if (!ttrace) |
2630 | goto out_put; |
2631 | |
2632 | filename_len = strlen(filename); |
2633 | if (filename_len == 0) |
2634 | goto out_put; |
2635 | |
2636 | if (ttrace->filename.namelen < filename_len) { |
2637 | char *f = realloc(ttrace->filename.name, filename_len + 1); |
2638 | |
2639 | if (f == NULL) |
2640 | goto out_put; |
2641 | |
2642 | ttrace->filename.namelen = filename_len; |
2643 | ttrace->filename.name = f; |
2644 | } |
2645 | |
2646 | strcpy(p: ttrace->filename.name, q: filename); |
2647 | ttrace->filename.pending_open = true; |
2648 | |
2649 | if (!ttrace->filename.ptr) |
2650 | goto out_put; |
2651 | |
2652 | entry_str_len = strlen(ttrace->entry_str); |
2653 | remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */ |
2654 | if (remaining_space <= 0) |
2655 | goto out_put; |
2656 | |
2657 | if (filename_len > (size_t)remaining_space) { |
2658 | filename += filename_len - remaining_space; |
2659 | filename_len = remaining_space; |
2660 | } |
2661 | |
2662 | to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */ |
2663 | pos = ttrace->entry_str + ttrace->filename.entry_str_pos; |
2664 | memmove(pos + filename_len, pos, to_move); |
2665 | memcpy(pos, filename, filename_len); |
2666 | |
2667 | ttrace->filename.ptr = 0; |
2668 | ttrace->filename.entry_str_pos = 0; |
2669 | out_put: |
2670 | thread__put(thread); |
2671 | out: |
2672 | return 0; |
2673 | } |
2674 | |
2675 | static int trace__sched_stat_runtime(struct trace *trace, struct evsel *evsel, |
2676 | union perf_event *event __maybe_unused, |
2677 | struct perf_sample *sample) |
2678 | { |
2679 | u64 runtime = evsel__intval(evsel, sample, "runtime" ); |
2680 | double runtime_ms = (double)runtime / NSEC_PER_MSEC; |
2681 | struct thread *thread = machine__findnew_thread(machine: trace->host, |
2682 | pid: sample->pid, |
2683 | tid: sample->tid); |
2684 | struct thread_trace *ttrace = thread__trace(thread, trace->output); |
2685 | |
2686 | if (ttrace == NULL) |
2687 | goto out_dump; |
2688 | |
2689 | ttrace->runtime_ms += runtime_ms; |
2690 | trace->runtime_ms += runtime_ms; |
2691 | out_put: |
2692 | thread__put(thread); |
2693 | return 0; |
2694 | |
2695 | out_dump: |
2696 | fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n" , |
2697 | evsel->name, |
2698 | evsel__strval(evsel, sample, "comm" ), |
2699 | (pid_t)evsel__intval(evsel, sample, "pid" ), |
2700 | runtime, |
2701 | evsel__intval(evsel, sample, "vruntime" )); |
2702 | goto out_put; |
2703 | } |
2704 | |
2705 | static int bpf_output__printer(enum binary_printer_ops op, |
2706 | unsigned int val, void * __maybe_unused, FILE *fp) |
2707 | { |
2708 | unsigned char ch = (unsigned char)val; |
2709 | |
2710 | switch (op) { |
2711 | case BINARY_PRINT_CHAR_DATA: |
2712 | return fprintf(fp, "%c" , isprint(ch) ? ch : '.'); |
2713 | case BINARY_PRINT_DATA_BEGIN: |
2714 | case BINARY_PRINT_LINE_BEGIN: |
2715 | case BINARY_PRINT_ADDR: |
2716 | case BINARY_PRINT_NUM_DATA: |
2717 | case BINARY_PRINT_NUM_PAD: |
2718 | case BINARY_PRINT_SEP: |
2719 | case BINARY_PRINT_CHAR_PAD: |
2720 | case BINARY_PRINT_LINE_END: |
2721 | case BINARY_PRINT_DATA_END: |
2722 | default: |
2723 | break; |
2724 | } |
2725 | |
2726 | return 0; |
2727 | } |
2728 | |
2729 | static void bpf_output__fprintf(struct trace *trace, |
2730 | struct perf_sample *sample) |
2731 | { |
2732 | binary__fprintf(sample->raw_data, sample->raw_size, 8, |
2733 | bpf_output__printer, NULL, trace->output); |
2734 | ++trace->nr_events_printed; |
2735 | } |
2736 | |
2737 | static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, struct perf_sample *sample, |
2738 | struct thread *thread, void *augmented_args, int augmented_args_size) |
2739 | { |
2740 | char bf[2048]; |
2741 | size_t size = sizeof(bf); |
2742 | struct tep_format_field *field = evsel->tp_format->format.fields; |
2743 | struct syscall_arg_fmt *arg = __evsel__syscall_arg_fmt(evsel); |
2744 | size_t printed = 0; |
2745 | unsigned long val; |
2746 | u8 bit = 1; |
2747 | struct syscall_arg syscall_arg = { |
2748 | .augmented = { |
2749 | .size = augmented_args_size, |
2750 | .args = augmented_args, |
2751 | }, |
2752 | .idx = 0, |
2753 | .mask = 0, |
2754 | .trace = trace, |
2755 | .thread = thread, |
2756 | .show_string_prefix = trace->show_string_prefix, |
2757 | }; |
2758 | |
2759 | for (; field && arg; field = field->next, ++syscall_arg.idx, bit <<= 1, ++arg) { |
2760 | if (syscall_arg.mask & bit) |
2761 | continue; |
2762 | |
2763 | syscall_arg.len = 0; |
2764 | syscall_arg.fmt = arg; |
2765 | if (field->flags & TEP_FIELD_IS_ARRAY) { |
2766 | int offset = field->offset; |
2767 | |
2768 | if (field->flags & TEP_FIELD_IS_DYNAMIC) { |
2769 | offset = format_field__intval(field, sample, needs_swap: evsel->needs_swap); |
2770 | syscall_arg.len = offset >> 16; |
2771 | offset &= 0xffff; |
2772 | if (tep_field_is_relative(flags: field->flags)) |
2773 | offset += field->offset + field->size; |
2774 | } |
2775 | |
2776 | val = (uintptr_t)(sample->raw_data + offset); |
2777 | } else |
2778 | val = format_field__intval(field, sample, needs_swap: evsel->needs_swap); |
2779 | /* |
2780 | * Some syscall args need some mask, most don't and |
2781 | * return val untouched. |
2782 | */ |
2783 | val = syscall_arg_fmt__mask_val(fmt: arg, arg: &syscall_arg, val); |
2784 | |
2785 | /* |
2786 | * Suppress this argument if its value is zero and |
2787 | * we don't have a string associated in an |
2788 | * strarray for it. |
2789 | */ |
2790 | if (val == 0 && |
2791 | !trace->show_zeros && |
2792 | !((arg->show_zero || |
2793 | arg->scnprintf == SCA_STRARRAY || |
2794 | arg->scnprintf == SCA_STRARRAYS) && |
2795 | arg->parm)) |
2796 | continue; |
2797 | |
2798 | printed += scnprintf(buf: bf + printed, size: size - printed, fmt: "%s" , printed ? ", " : "" ); |
2799 | |
2800 | if (trace->show_arg_names) |
2801 | printed += scnprintf(buf: bf + printed, size: size - printed, fmt: "%s: " , field->name); |
2802 | |
2803 | printed += syscall_arg_fmt__scnprintf_val(fmt: arg, bf: bf + printed, size: size - printed, arg: &syscall_arg, val); |
2804 | } |
2805 | |
2806 | return printed + fprintf(trace->output, "%s" , bf); |
2807 | } |
2808 | |
2809 | static int trace__event_handler(struct trace *trace, struct evsel *evsel, |
2810 | union perf_event *event __maybe_unused, |
2811 | struct perf_sample *sample) |
2812 | { |
2813 | struct thread *thread; |
2814 | int callchain_ret = 0; |
2815 | /* |
2816 | * Check if we called perf_evsel__disable(evsel) due to, for instance, |
2817 | * this event's max_events having been hit and this is an entry coming |
2818 | * from the ring buffer that we should discard, since the max events |
2819 | * have already been considered/printed. |
2820 | */ |
2821 | if (evsel->disabled) |
2822 | return 0; |
2823 | |
2824 | thread = machine__findnew_thread(machine: trace->host, pid: sample->pid, tid: sample->tid); |
2825 | |
2826 | if (sample->callchain) { |
2827 | struct callchain_cursor *cursor = get_tls_callchain_cursor(); |
2828 | |
2829 | callchain_ret = trace__resolve_callchain(trace, evsel, sample, cursor); |
2830 | if (callchain_ret == 0) { |
2831 | if (cursor->nr < trace->min_stack) |
2832 | goto out; |
2833 | callchain_ret = 1; |
2834 | } |
2835 | } |
2836 | |
2837 | trace__printf_interrupted_entry(trace); |
2838 | trace__fprintf_tstamp(trace, sample->time, trace->output); |
2839 | |
2840 | if (trace->trace_syscalls && trace->show_duration) |
2841 | fprintf(trace->output, "( ): " ); |
2842 | |
2843 | if (thread) |
2844 | trace__fprintf_comm_tid(trace, thread, trace->output); |
2845 | |
2846 | if (evsel == trace->syscalls.events.bpf_output) { |
2847 | int id = perf_evsel__sc_tp_uint(evsel, id, sample); |
2848 | struct syscall *sc = trace__syscall_info(trace, evsel, id); |
2849 | |
2850 | if (sc) { |
2851 | fprintf(trace->output, "%s(" , sc->name); |
2852 | trace__fprintf_sys_enter(trace, evsel, sample); |
2853 | fputc(')', trace->output); |
2854 | goto newline; |
2855 | } |
2856 | |
2857 | /* |
2858 | * XXX: Not having the associated syscall info or not finding/adding |
2859 | * the thread should never happen, but if it does... |
2860 | * fall thru and print it as a bpf_output event. |
2861 | */ |
2862 | } |
2863 | |
2864 | fprintf(trace->output, "%s(" , evsel->name); |
2865 | |
2866 | if (evsel__is_bpf_output(evsel)) { |
2867 | bpf_output__fprintf(trace, sample); |
2868 | } else if (evsel->tp_format) { |
2869 | if (strncmp(evsel->tp_format->name, "sys_enter_" , 10) || |
2870 | trace__fprintf_sys_enter(trace, evsel, sample)) { |
2871 | if (trace->libtraceevent_print) { |
2872 | event_format__fprintf(evsel->tp_format, sample->cpu, |
2873 | sample->raw_data, sample->raw_size, |
2874 | trace->output); |
2875 | } else { |
2876 | trace__fprintf_tp_fields(trace, evsel, sample, thread, NULL, augmented_args_size: 0); |
2877 | } |
2878 | } |
2879 | } |
2880 | |
2881 | newline: |
2882 | fprintf(trace->output, ")\n" ); |
2883 | |
2884 | if (callchain_ret > 0) |
2885 | trace__fprintf_callchain(trace, sample); |
2886 | else if (callchain_ret < 0) |
2887 | pr_err("Problem processing %s callchain, skipping...\n" , evsel__name(evsel)); |
2888 | |
2889 | ++trace->nr_events_printed; |
2890 | |
2891 | if (evsel->max_events != ULONG_MAX && ++evsel->nr_events_printed == evsel->max_events) { |
2892 | evsel__disable(evsel); |
2893 | evsel__close(evsel); |
2894 | } |
2895 | out: |
2896 | thread__put(thread); |
2897 | return 0; |
2898 | } |
2899 | |
2900 | static void print_location(FILE *f, struct perf_sample *sample, |
2901 | struct addr_location *al, |
2902 | bool print_dso, bool print_sym) |
2903 | { |
2904 | |
2905 | if ((verbose > 0 || print_dso) && al->map) |
2906 | fprintf(f, "%s@" , map__dso(map: al->map)->long_name); |
2907 | |
2908 | if ((verbose > 0 || print_sym) && al->sym) |
2909 | fprintf(f, "%s+0x%" PRIx64, al->sym->name, |
2910 | al->addr - al->sym->start); |
2911 | else if (al->map) |
2912 | fprintf(f, "0x%" PRIx64, al->addr); |
2913 | else |
2914 | fprintf(f, "0x%" PRIx64, sample->addr); |
2915 | } |
2916 | |
2917 | static int trace__pgfault(struct trace *trace, |
2918 | struct evsel *evsel, |
2919 | union perf_event *event __maybe_unused, |
2920 | struct perf_sample *sample) |
2921 | { |
2922 | struct thread *thread; |
2923 | struct addr_location al; |
2924 | char map_type = 'd'; |
2925 | struct thread_trace *ttrace; |
2926 | int err = -1; |
2927 | int callchain_ret = 0; |
2928 | |
2929 | addr_location__init(al: &al); |
2930 | thread = machine__findnew_thread(machine: trace->host, pid: sample->pid, tid: sample->tid); |
2931 | |
2932 | if (sample->callchain) { |
2933 | struct callchain_cursor *cursor = get_tls_callchain_cursor(); |
2934 | |
2935 | callchain_ret = trace__resolve_callchain(trace, evsel, sample, cursor); |
2936 | if (callchain_ret == 0) { |
2937 | if (cursor->nr < trace->min_stack) |
2938 | goto out_put; |
2939 | callchain_ret = 1; |
2940 | } |
2941 | } |
2942 | |
2943 | ttrace = thread__trace(thread, trace->output); |
2944 | if (ttrace == NULL) |
2945 | goto out_put; |
2946 | |
2947 | if (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) |
2948 | ttrace->pfmaj++; |
2949 | else |
2950 | ttrace->pfmin++; |
2951 | |
2952 | if (trace->summary_only) |
2953 | goto out; |
2954 | |
2955 | thread__find_symbol(thread, cpumode: sample->cpumode, addr: sample->ip, al: &al); |
2956 | |
2957 | trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output); |
2958 | |
2959 | fprintf(trace->output, "%sfault [" , |
2960 | evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ? |
2961 | "maj" : "min" ); |
2962 | |
2963 | print_location(trace->output, sample, &al, false, true); |
2964 | |
2965 | fprintf(trace->output, "] => " ); |
2966 | |
2967 | thread__find_symbol(thread, cpumode: sample->cpumode, addr: sample->addr, al: &al); |
2968 | |
2969 | if (!al.map) { |
2970 | thread__find_symbol(thread, cpumode: sample->cpumode, addr: sample->addr, al: &al); |
2971 | |
2972 | if (al.map) |
2973 | map_type = 'x'; |
2974 | else |
2975 | map_type = '?'; |
2976 | } |
2977 | |
2978 | print_location(trace->output, sample, &al, true, false); |
2979 | |
2980 | fprintf(trace->output, " (%c%c)\n" , map_type, al.level); |
2981 | |
2982 | if (callchain_ret > 0) |
2983 | trace__fprintf_callchain(trace, sample); |
2984 | else if (callchain_ret < 0) |
2985 | pr_err("Problem processing %s callchain, skipping...\n" , evsel__name(evsel)); |
2986 | |
2987 | ++trace->nr_events_printed; |
2988 | out: |
2989 | err = 0; |
2990 | out_put: |
2991 | thread__put(thread); |
2992 | addr_location__exit(al: &al); |
2993 | return err; |
2994 | } |
2995 | |
2996 | static void trace__set_base_time(struct trace *trace, |
2997 | struct evsel *evsel, |
2998 | struct perf_sample *sample) |
2999 | { |
3000 | /* |
3001 | * BPF events were not setting PERF_SAMPLE_TIME, so be more robust |
3002 | * and don't use sample->time unconditionally, we may end up having |
3003 | * some other event in the future without PERF_SAMPLE_TIME for good |
3004 | * reason, i.e. we may not be interested in its timestamps, just in |
3005 | * it taking place, picking some piece of information when it |
3006 | * appears in our event stream (vfs_getname comes to mind). |
3007 | */ |
3008 | if (trace->base_time == 0 && !trace->full_time && |
3009 | (evsel->core.attr.sample_type & PERF_SAMPLE_TIME)) |
3010 | trace->base_time = sample->time; |
3011 | } |
3012 | |
3013 | static int trace__process_sample(struct perf_tool *tool, |
3014 | union perf_event *event, |
3015 | struct perf_sample *sample, |
3016 | struct evsel *evsel, |
3017 | struct machine *machine __maybe_unused) |
3018 | { |
3019 | struct trace *trace = container_of(tool, struct trace, tool); |
3020 | struct thread *thread; |
3021 | int err = 0; |
3022 | |
3023 | tracepoint_handler handler = evsel->handler; |
3024 | |
3025 | thread = machine__findnew_thread(machine: trace->host, pid: sample->pid, tid: sample->tid); |
3026 | if (thread && thread__is_filtered(thread)) |
3027 | goto out; |
3028 | |
3029 | trace__set_base_time(trace, evsel, sample); |
3030 | |
3031 | if (handler) { |
3032 | ++trace->nr_events; |
3033 | handler(trace, evsel, event, sample); |
3034 | } |
3035 | out: |
3036 | thread__put(thread); |
3037 | return err; |
3038 | } |
3039 | |
3040 | static int trace__record(struct trace *trace, int argc, const char **argv) |
3041 | { |
3042 | unsigned int rec_argc, i, j; |
3043 | const char **rec_argv; |
3044 | const char * const record_args[] = { |
3045 | "record" , |
3046 | "-R" , |
3047 | "-m" , "1024" , |
3048 | "-c" , "1" , |
3049 | }; |
3050 | pid_t pid = getpid(); |
3051 | char *filter = asprintf__tp_filter_pids(1, &pid); |
3052 | const char * const sc_args[] = { "-e" , }; |
3053 | unsigned int sc_args_nr = ARRAY_SIZE(sc_args); |
3054 | const char * const majpf_args[] = { "-e" , "major-faults" }; |
3055 | unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args); |
3056 | const char * const minpf_args[] = { "-e" , "minor-faults" }; |
3057 | unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args); |
3058 | int err = -1; |
3059 | |
3060 | /* +3 is for the event string below and the pid filter */ |
3061 | rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 3 + |
3062 | majpf_args_nr + minpf_args_nr + argc; |
3063 | rec_argv = calloc(rec_argc + 1, sizeof(char *)); |
3064 | |
3065 | if (rec_argv == NULL || filter == NULL) |
3066 | goto out_free; |
3067 | |
3068 | j = 0; |
3069 | for (i = 0; i < ARRAY_SIZE(record_args); i++) |
3070 | rec_argv[j++] = record_args[i]; |
3071 | |
3072 | if (trace->trace_syscalls) { |
3073 | for (i = 0; i < sc_args_nr; i++) |
3074 | rec_argv[j++] = sc_args[i]; |
3075 | |
3076 | /* event string may be different for older kernels - e.g., RHEL6 */ |
3077 | if (is_valid_tracepoint(event_string: "raw_syscalls:sys_enter" )) |
3078 | rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit" ; |
3079 | else if (is_valid_tracepoint(event_string: "syscalls:sys_enter" )) |
3080 | rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit" ; |
3081 | else { |
3082 | pr_err("Neither raw_syscalls nor syscalls events exist.\n" ); |
3083 | goto out_free; |
3084 | } |
3085 | } |
3086 | |
3087 | rec_argv[j++] = "--filter" ; |
3088 | rec_argv[j++] = filter; |
3089 | |
3090 | if (trace->trace_pgfaults & TRACE_PFMAJ) |
3091 | for (i = 0; i < majpf_args_nr; i++) |
3092 | rec_argv[j++] = majpf_args[i]; |
3093 | |
3094 | if (trace->trace_pgfaults & TRACE_PFMIN) |
3095 | for (i = 0; i < minpf_args_nr; i++) |
3096 | rec_argv[j++] = minpf_args[i]; |
3097 | |
3098 | for (i = 0; i < (unsigned int)argc; i++) |
3099 | rec_argv[j++] = argv[i]; |
3100 | |
3101 | err = cmd_record(argc: j, argv: rec_argv); |
3102 | out_free: |
3103 | free(filter); |
3104 | free(rec_argv); |
3105 | return err; |
3106 | } |
3107 | |
3108 | static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); |
3109 | |
3110 | static bool evlist__add_vfs_getname(struct evlist *evlist) |
3111 | { |
3112 | bool found = false; |
3113 | struct evsel *evsel, *tmp; |
3114 | struct parse_events_error err; |
3115 | int ret; |
3116 | |
3117 | parse_events_error__init(err: &err); |
3118 | ret = parse_events(evlist, str: "probe:vfs_getname*" , err: &err); |
3119 | parse_events_error__exit(err: &err); |
3120 | if (ret) |
3121 | return false; |
3122 | |
3123 | evlist__for_each_entry_safe(evlist, evsel, tmp) { |
3124 | if (!strstarts(str: evsel__name(evsel), prefix: "probe:vfs_getname" )) |
3125 | continue; |
3126 | |
3127 | if (evsel__field(evsel, name: "pathname" )) { |
3128 | evsel->handler = trace__vfs_getname; |
3129 | found = true; |
3130 | continue; |
3131 | } |
3132 | |
3133 | list_del_init(entry: &evsel->core.node); |
3134 | evsel->evlist = NULL; |
3135 | evsel__delete(evsel); |
3136 | } |
3137 | |
3138 | return found; |
3139 | } |
3140 | |
3141 | static struct evsel *evsel__new_pgfault(u64 config) |
3142 | { |
3143 | struct evsel *evsel; |
3144 | struct perf_event_attr attr = { |
3145 | .type = PERF_TYPE_SOFTWARE, |
3146 | .mmap_data = 1, |
3147 | }; |
3148 | |
3149 | attr.config = config; |
3150 | attr.sample_period = 1; |
3151 | |
3152 | event_attr_init(attr: &attr); |
3153 | |
3154 | evsel = evsel__new(attr: &attr); |
3155 | if (evsel) |
3156 | evsel->handler = trace__pgfault; |
3157 | |
3158 | return evsel; |
3159 | } |
3160 | |
3161 | static void evlist__free_syscall_tp_fields(struct evlist *evlist) |
3162 | { |
3163 | struct evsel *evsel; |
3164 | |
3165 | evlist__for_each_entry(evlist, evsel) { |
3166 | evsel_trace__delete(et: evsel->priv); |
3167 | evsel->priv = NULL; |
3168 | } |
3169 | } |
3170 | |
3171 | static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample) |
3172 | { |
3173 | const u32 type = event->header.type; |
3174 | struct evsel *evsel; |
3175 | |
3176 | if (type != PERF_RECORD_SAMPLE) { |
3177 | trace__process_event(trace, machine: trace->host, event, sample); |
3178 | return; |
3179 | } |
3180 | |
3181 | evsel = evlist__id2evsel(evlist: trace->evlist, id: sample->id); |
3182 | if (evsel == NULL) { |
3183 | fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n" , sample->id); |
3184 | return; |
3185 | } |
3186 | |
3187 | if (evswitch__discard(evswitch: &trace->evswitch, evsel)) |
3188 | return; |
3189 | |
3190 | trace__set_base_time(trace, evsel, sample); |
3191 | |
3192 | if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT && |
3193 | sample->raw_data == NULL) { |
3194 | fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n" , |
3195 | evsel__name(evsel), sample->tid, |
3196 | sample->cpu, sample->raw_size); |
3197 | } else { |
3198 | tracepoint_handler handler = evsel->handler; |
3199 | handler(trace, evsel, event, sample); |
3200 | } |
3201 | |
3202 | if (trace->nr_events_printed >= trace->max_events && trace->max_events != ULONG_MAX) |
3203 | interrupted = true; |
3204 | } |
3205 | |
3206 | static int trace__add_syscall_newtp(struct trace *trace) |
3207 | { |
3208 | int ret = -1; |
3209 | struct evlist *evlist = trace->evlist; |
3210 | struct evsel *sys_enter, *sys_exit; |
3211 | |
3212 | sys_enter = perf_evsel__raw_syscall_newtp(direction: "sys_enter" , handler: trace__sys_enter); |
3213 | if (sys_enter == NULL) |
3214 | goto out; |
3215 | |
3216 | if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args)) |
3217 | goto out_delete_sys_enter; |
3218 | |
3219 | sys_exit = perf_evsel__raw_syscall_newtp(direction: "sys_exit" , handler: trace__sys_exit); |
3220 | if (sys_exit == NULL) |
3221 | goto out_delete_sys_enter; |
3222 | |
3223 | if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) |
3224 | goto out_delete_sys_exit; |
3225 | |
3226 | evsel__config_callchain(evsel: sys_enter, opts: &trace->opts, callchain: &callchain_param); |
3227 | evsel__config_callchain(evsel: sys_exit, opts: &trace->opts, callchain: &callchain_param); |
3228 | |
3229 | evlist__add(evlist, entry: sys_enter); |
3230 | evlist__add(evlist, entry: sys_exit); |
3231 | |
3232 | if (callchain_param.enabled && !trace->kernel_syscallchains) { |
3233 | /* |
3234 | * We're interested only in the user space callchain |
3235 | * leading to the syscall, allow overriding that for |
3236 | * debugging reasons using --kernel_syscall_callchains |
3237 | */ |
3238 | sys_exit->core.attr.exclude_callchain_kernel = 1; |
3239 | } |
3240 | |
3241 | trace->syscalls.events.sys_enter = sys_enter; |
3242 | trace->syscalls.events.sys_exit = sys_exit; |
3243 | |
3244 | ret = 0; |
3245 | out: |
3246 | return ret; |
3247 | |
3248 | out_delete_sys_exit: |
3249 | evsel__delete_priv(evsel: sys_exit); |
3250 | out_delete_sys_enter: |
3251 | evsel__delete_priv(evsel: sys_enter); |
3252 | goto out; |
3253 | } |
3254 | |
3255 | static int trace__set_ev_qualifier_tp_filter(struct trace *trace) |
3256 | { |
3257 | int err = -1; |
3258 | struct evsel *sys_exit; |
3259 | char *filter = asprintf_expr_inout_ints("id" , !trace->not_ev_qualifier, |
3260 | trace->ev_qualifier_ids.nr, |
3261 | trace->ev_qualifier_ids.entries); |
3262 | |
3263 | if (filter == NULL) |
3264 | goto out_enomem; |
3265 | |
3266 | if (!evsel__append_tp_filter(evsel: trace->syscalls.events.sys_enter, filter)) { |
3267 | sys_exit = trace->syscalls.events.sys_exit; |
3268 | err = evsel__append_tp_filter(evsel: sys_exit, filter); |
3269 | } |
3270 | |
3271 | free(filter); |
3272 | out: |
3273 | return err; |
3274 | out_enomem: |
3275 | errno = ENOMEM; |
3276 | goto out; |
3277 | } |
3278 | |
3279 | #ifdef HAVE_BPF_SKEL |
3280 | static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name) |
3281 | { |
3282 | struct bpf_program *pos, *prog = NULL; |
3283 | const char *sec_name; |
3284 | |
3285 | if (trace->skel->obj == NULL) |
3286 | return NULL; |
3287 | |
3288 | bpf_object__for_each_program(pos, trace->skel->obj) { |
3289 | sec_name = bpf_program__section_name(pos); |
3290 | if (sec_name && !strcmp(sec_name, name)) { |
3291 | prog = pos; |
3292 | break; |
3293 | } |
3294 | } |
3295 | |
3296 | return prog; |
3297 | } |
3298 | |
3299 | static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, struct syscall *sc, |
3300 | const char *prog_name, const char *type) |
3301 | { |
3302 | struct bpf_program *prog; |
3303 | |
3304 | if (prog_name == NULL) { |
3305 | char default_prog_name[256]; |
3306 | scnprintf(default_prog_name, sizeof(default_prog_name), "tp/syscalls/sys_%s_%s" , type, sc->name); |
3307 | prog = trace__find_bpf_program_by_title(trace, default_prog_name); |
3308 | if (prog != NULL) |
3309 | goto out_found; |
3310 | if (sc->fmt && sc->fmt->alias) { |
3311 | scnprintf(default_prog_name, sizeof(default_prog_name), "tp/syscalls/sys_%s_%s" , type, sc->fmt->alias); |
3312 | prog = trace__find_bpf_program_by_title(trace, default_prog_name); |
3313 | if (prog != NULL) |
3314 | goto out_found; |
3315 | } |
3316 | goto out_unaugmented; |
3317 | } |
3318 | |
3319 | prog = trace__find_bpf_program_by_title(trace, prog_name); |
3320 | |
3321 | if (prog != NULL) { |
3322 | out_found: |
3323 | return prog; |
3324 | } |
3325 | |
3326 | pr_debug("Couldn't find BPF prog \"%s\" to associate with syscalls:sys_%s_%s, not augmenting it\n" , |
3327 | prog_name, type, sc->name); |
3328 | out_unaugmented: |
3329 | return trace->skel->progs.syscall_unaugmented; |
3330 | } |
3331 | |
3332 | static void trace__init_syscall_bpf_progs(struct trace *trace, int id) |
3333 | { |
3334 | struct syscall *sc = trace__syscall_info(trace, NULL, id); |
3335 | |
3336 | if (sc == NULL) |
3337 | return; |
3338 | |
3339 | sc->bpf_prog.sys_enter = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_enter : NULL, "enter" ); |
3340 | sc->bpf_prog.sys_exit = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_exit : NULL, "exit" ); |
3341 | } |
3342 | |
3343 | static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id) |
3344 | { |
3345 | struct syscall *sc = trace__syscall_info(trace, NULL, id); |
3346 | return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->skel->progs.syscall_unaugmented); |
3347 | } |
3348 | |
3349 | static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id) |
3350 | { |
3351 | struct syscall *sc = trace__syscall_info(trace, NULL, id); |
3352 | return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->skel->progs.syscall_unaugmented); |
3353 | } |
3354 | |
3355 | static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc) |
3356 | { |
3357 | struct tep_format_field *field, *candidate_field; |
3358 | int id; |
3359 | |
3360 | /* |
3361 | * We're only interested in syscalls that have a pointer: |
3362 | */ |
3363 | for (field = sc->args; field; field = field->next) { |
3364 | if (field->flags & TEP_FIELD_IS_POINTER) |
3365 | goto try_to_find_pair; |
3366 | } |
3367 | |
3368 | return NULL; |
3369 | |
3370 | try_to_find_pair: |
3371 | for (id = 0; id < trace->sctbl->syscalls.nr_entries; ++id) { |
3372 | struct syscall *pair = trace__syscall_info(trace, NULL, id); |
3373 | struct bpf_program *pair_prog; |
3374 | bool is_candidate = false; |
3375 | |
3376 | if (pair == NULL || pair == sc || |
3377 | pair->bpf_prog.sys_enter == trace->skel->progs.syscall_unaugmented) |
3378 | continue; |
3379 | |
3380 | for (field = sc->args, candidate_field = pair->args; |
3381 | field && candidate_field; field = field->next, candidate_field = candidate_field->next) { |
3382 | bool is_pointer = field->flags & TEP_FIELD_IS_POINTER, |
3383 | candidate_is_pointer = candidate_field->flags & TEP_FIELD_IS_POINTER; |
3384 | |
3385 | if (is_pointer) { |
3386 | if (!candidate_is_pointer) { |
3387 | // The candidate just doesn't copies our pointer arg, might copy other pointers we want. |
3388 | continue; |
3389 | } |
3390 | } else { |
3391 | if (candidate_is_pointer) { |
3392 | // The candidate might copy a pointer we don't have, skip it. |
3393 | goto next_candidate; |
3394 | } |
3395 | continue; |
3396 | } |
3397 | |
3398 | if (strcmp(field->type, candidate_field->type)) |
3399 | goto next_candidate; |
3400 | |
3401 | /* |
3402 | * This is limited in the BPF program but sys_write |
3403 | * uses "const char *" for its "buf" arg so we need to |
3404 | * use some heuristic that is kinda future proof... |
3405 | */ |
3406 | if (strcmp(field->type, "const char *" ) == 0 && |
3407 | !(strstr(field->name, "name" ) || |
3408 | strstr(field->name, "path" ) || |
3409 | strstr(field->name, "file" ) || |
3410 | strstr(field->name, "root" ) || |
3411 | strstr(field->name, "description" ))) |
3412 | goto next_candidate; |
3413 | |
3414 | is_candidate = true; |
3415 | } |
3416 | |
3417 | if (!is_candidate) |
3418 | goto next_candidate; |
3419 | |
3420 | /* |
3421 | * Check if the tentative pair syscall augmenter has more pointers, if it has, |
3422 | * then it may be collecting that and we then can't use it, as it would collect |
3423 | * more than what is common to the two syscalls. |
3424 | */ |
3425 | if (candidate_field) { |
3426 | for (candidate_field = candidate_field->next; candidate_field; candidate_field = candidate_field->next) |
3427 | if (candidate_field->flags & TEP_FIELD_IS_POINTER) |
3428 | goto next_candidate; |
3429 | } |
3430 | |
3431 | pair_prog = pair->bpf_prog.sys_enter; |
3432 | /* |
3433 | * If the pair isn't enabled, then its bpf_prog.sys_enter will not |
3434 | * have been searched for, so search it here and if it returns the |
3435 | * unaugmented one, then ignore it, otherwise we'll reuse that BPF |
3436 | * program for a filtered syscall on a non-filtered one. |
3437 | * |
3438 | * For instance, we have "!syscalls:sys_enter_renameat" and that is |
3439 | * useful for "renameat2". |
3440 | */ |
3441 | if (pair_prog == NULL) { |
3442 | pair_prog = trace__find_syscall_bpf_prog(trace, pair, pair->fmt ? pair->fmt->bpf_prog_name.sys_enter : NULL, "enter" ); |
3443 | if (pair_prog == trace->skel->progs.syscall_unaugmented) |
3444 | goto next_candidate; |
3445 | } |
3446 | |
3447 | pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n" , pair->name, sc->name); |
3448 | return pair_prog; |
3449 | next_candidate: |
3450 | continue; |
3451 | } |
3452 | |
3453 | return NULL; |
3454 | } |
3455 | |
3456 | static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace) |
3457 | { |
3458 | int map_enter_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_enter); |
3459 | int map_exit_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_exit); |
3460 | int err = 0, key; |
3461 | |
3462 | for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) { |
3463 | int prog_fd; |
3464 | |
3465 | if (!trace__syscall_enabled(trace, key)) |
3466 | continue; |
3467 | |
3468 | trace__init_syscall_bpf_progs(trace, key); |
3469 | |
3470 | // It'll get at least the "!raw_syscalls:unaugmented" |
3471 | prog_fd = trace__bpf_prog_sys_enter_fd(trace, key); |
3472 | err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY); |
3473 | if (err) |
3474 | break; |
3475 | prog_fd = trace__bpf_prog_sys_exit_fd(trace, key); |
3476 | err = bpf_map_update_elem(map_exit_fd, &key, &prog_fd, BPF_ANY); |
3477 | if (err) |
3478 | break; |
3479 | } |
3480 | |
3481 | /* |
3482 | * Now lets do a second pass looking for enabled syscalls without |
3483 | * an augmenter that have a signature that is a superset of another |
3484 | * syscall with an augmenter so that we can auto-reuse it. |
3485 | * |
3486 | * I.e. if we have an augmenter for the "open" syscall that has |
3487 | * this signature: |
3488 | * |
3489 | * int open(const char *pathname, int flags, mode_t mode); |
3490 | * |
3491 | * I.e. that will collect just the first string argument, then we |
3492 | * can reuse it for the 'creat' syscall, that has this signature: |
3493 | * |
3494 | * int creat(const char *pathname, mode_t mode); |
3495 | * |
3496 | * and for: |
3497 | * |
3498 | * int stat(const char *pathname, struct stat *statbuf); |
3499 | * int lstat(const char *pathname, struct stat *statbuf); |
3500 | * |
3501 | * Because the 'open' augmenter will collect the first arg as a string, |
3502 | * and leave alone all the other args, which already helps with |
3503 | * beautifying 'stat' and 'lstat''s pathname arg. |
3504 | * |
3505 | * Then, in time, when 'stat' gets an augmenter that collects both |
3506 | * first and second arg (this one on the raw_syscalls:sys_exit prog |
3507 | * array tail call, then that one will be used. |
3508 | */ |
3509 | for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) { |
3510 | struct syscall *sc = trace__syscall_info(trace, NULL, key); |
3511 | struct bpf_program *pair_prog; |
3512 | int prog_fd; |
3513 | |
3514 | if (sc == NULL || sc->bpf_prog.sys_enter == NULL) |
3515 | continue; |
3516 | |
3517 | /* |
3518 | * For now we're just reusing the sys_enter prog, and if it |
3519 | * already has an augmenter, we don't need to find one. |
3520 | */ |
3521 | if (sc->bpf_prog.sys_enter != trace->skel->progs.syscall_unaugmented) |
3522 | continue; |
3523 | |
3524 | /* |
3525 | * Look at all the other syscalls for one that has a signature |
3526 | * that is close enough that we can share: |
3527 | */ |
3528 | pair_prog = trace__find_usable_bpf_prog_entry(trace, sc); |
3529 | if (pair_prog == NULL) |
3530 | continue; |
3531 | |
3532 | sc->bpf_prog.sys_enter = pair_prog; |
3533 | |
3534 | /* |
3535 | * Update the BPF_MAP_TYPE_PROG_SHARED for raw_syscalls:sys_enter |
3536 | * with the fd for the program we're reusing: |
3537 | */ |
3538 | prog_fd = bpf_program__fd(sc->bpf_prog.sys_enter); |
3539 | err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY); |
3540 | if (err) |
3541 | break; |
3542 | } |
3543 | |
3544 | return err; |
3545 | } |
3546 | #endif // HAVE_BPF_SKEL |
3547 | |
3548 | static int trace__set_ev_qualifier_filter(struct trace *trace) |
3549 | { |
3550 | if (trace->syscalls.events.sys_enter) |
3551 | return trace__set_ev_qualifier_tp_filter(trace); |
3552 | return 0; |
3553 | } |
3554 | |
3555 | static int bpf_map__set_filter_pids(struct bpf_map *map __maybe_unused, |
3556 | size_t npids __maybe_unused, pid_t *pids __maybe_unused) |
3557 | { |
3558 | int err = 0; |
3559 | #ifdef HAVE_LIBBPF_SUPPORT |
3560 | bool value = true; |
3561 | int map_fd = bpf_map__fd(map); |
3562 | size_t i; |
3563 | |
3564 | for (i = 0; i < npids; ++i) { |
3565 | err = bpf_map_update_elem(map_fd, &pids[i], &value, BPF_ANY); |
3566 | if (err) |
3567 | break; |
3568 | } |
3569 | #endif |
3570 | return err; |
3571 | } |
3572 | |
3573 | static int trace__set_filter_loop_pids(struct trace *trace) |
3574 | { |
3575 | unsigned int nr = 1, err; |
3576 | pid_t pids[32] = { |
3577 | getpid(), |
3578 | }; |
3579 | struct thread *thread = machine__find_thread(machine: trace->host, pid: pids[0], tid: pids[0]); |
3580 | |
3581 | while (thread && nr < ARRAY_SIZE(pids)) { |
3582 | struct thread *parent = machine__find_thread(machine: trace->host, |
3583 | pid: thread__ppid(thread), |
3584 | tid: thread__ppid(thread)); |
3585 | |
3586 | if (parent == NULL) |
3587 | break; |
3588 | |
3589 | if (!strcmp(thread__comm_str(thread: parent), "sshd" ) || |
3590 | strstarts(str: thread__comm_str(thread: parent), prefix: "gnome-terminal" )) { |
3591 | pids[nr++] = thread__tid(thread: parent); |
3592 | break; |
3593 | } |
3594 | thread = parent; |
3595 | } |
3596 | |
3597 | err = evlist__append_tp_filter_pids(evlist: trace->evlist, npids: nr, pids); |
3598 | if (!err && trace->filter_pids.map) |
3599 | err = bpf_map__set_filter_pids(map: trace->filter_pids.map, npids: nr, pids); |
3600 | |
3601 | return err; |
3602 | } |
3603 | |
3604 | static int trace__set_filter_pids(struct trace *trace) |
3605 | { |
3606 | int err = 0; |
3607 | /* |
3608 | * Better not use !target__has_task() here because we need to cover the |
3609 | * case where no threads were specified in the command line, but a |
3610 | * workload was, and in that case we will fill in the thread_map when |
3611 | * we fork the workload in evlist__prepare_workload. |
3612 | */ |
3613 | if (trace->filter_pids.nr > 0) { |
3614 | err = evlist__append_tp_filter_pids(evlist: trace->evlist, npids: trace->filter_pids.nr, |
3615 | pids: trace->filter_pids.entries); |
3616 | if (!err && trace->filter_pids.map) { |
3617 | err = bpf_map__set_filter_pids(map: trace->filter_pids.map, npids: trace->filter_pids.nr, |
3618 | pids: trace->filter_pids.entries); |
3619 | } |
3620 | } else if (perf_thread_map__pid(trace->evlist->core.threads, 0) == -1) { |
3621 | err = trace__set_filter_loop_pids(trace); |
3622 | } |
3623 | |
3624 | return err; |
3625 | } |
3626 | |
3627 | static int __trace__deliver_event(struct trace *trace, union perf_event *event) |
3628 | { |
3629 | struct evlist *evlist = trace->evlist; |
3630 | struct perf_sample sample; |
3631 | int err = evlist__parse_sample(evlist, event, sample: &sample); |
3632 | |
3633 | if (err) |
3634 | fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n" , err); |
3635 | else |
3636 | trace__handle_event(trace, event, sample: &sample); |
3637 | |
3638 | return 0; |
3639 | } |
3640 | |
3641 | static int __trace__flush_events(struct trace *trace) |
3642 | { |
3643 | u64 first = ordered_events__first_time(oe: &trace->oe.data); |
3644 | u64 flush = trace->oe.last - NSEC_PER_SEC; |
3645 | |
3646 | /* Is there some thing to flush.. */ |
3647 | if (first && first < flush) |
3648 | return ordered_events__flush_time(oe: &trace->oe.data, timestamp: flush); |
3649 | |
3650 | return 0; |
3651 | } |
3652 | |
3653 | static int trace__flush_events(struct trace *trace) |
3654 | { |
3655 | return !trace->sort_events ? 0 : __trace__flush_events(trace); |
3656 | } |
3657 | |
3658 | static int trace__deliver_event(struct trace *trace, union perf_event *event) |
3659 | { |
3660 | int err; |
3661 | |
3662 | if (!trace->sort_events) |
3663 | return __trace__deliver_event(trace, event); |
3664 | |
3665 | err = evlist__parse_sample_timestamp(evlist: trace->evlist, event, timestamp: &trace->oe.last); |
3666 | if (err && err != -1) |
3667 | return err; |
3668 | |
3669 | err = ordered_events__queue(oe: &trace->oe.data, event, timestamp: trace->oe.last, file_offset: 0, NULL); |
3670 | if (err) |
3671 | return err; |
3672 | |
3673 | return trace__flush_events(trace); |
3674 | } |
3675 | |
3676 | static int ordered_events__deliver_event(struct ordered_events *oe, |
3677 | struct ordered_event *event) |
3678 | { |
3679 | struct trace *trace = container_of(oe, struct trace, oe.data); |
3680 | |
3681 | return __trace__deliver_event(trace, event: event->event); |
3682 | } |
3683 | |
3684 | static struct syscall_arg_fmt *evsel__find_syscall_arg_fmt_by_name(struct evsel *evsel, char *arg) |
3685 | { |
3686 | struct tep_format_field *field; |
3687 | struct syscall_arg_fmt *fmt = __evsel__syscall_arg_fmt(evsel); |
3688 | |
3689 | if (evsel->tp_format == NULL || fmt == NULL) |
3690 | return NULL; |
3691 | |
3692 | for (field = evsel->tp_format->format.fields; field; field = field->next, ++fmt) |
3693 | if (strcmp(field->name, arg) == 0) |
3694 | return fmt; |
3695 | |
3696 | return NULL; |
3697 | } |
3698 | |
3699 | static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel *evsel) |
3700 | { |
3701 | char *tok, *left = evsel->filter, *new_filter = evsel->filter; |
3702 | |
3703 | while ((tok = strpbrk(left, "=<>!" )) != NULL) { |
3704 | char *right = tok + 1, *right_end; |
3705 | |
3706 | if (*right == '=') |
3707 | ++right; |
3708 | |
3709 | while (isspace(*right)) |
3710 | ++right; |
3711 | |
3712 | if (*right == '\0') |
3713 | break; |
3714 | |
3715 | while (!isalpha(*left)) |
3716 | if (++left == tok) { |
3717 | /* |
3718 | * Bail out, can't find the name of the argument that is being |
3719 | * used in the filter, let it try to set this filter, will fail later. |
3720 | */ |
3721 | return 0; |
3722 | } |
3723 | |
3724 | right_end = right + 1; |
3725 | while (isalnum(*right_end) || *right_end == '_' || *right_end == '|') |
3726 | ++right_end; |
3727 | |
3728 | if (isalpha(*right)) { |
3729 | struct syscall_arg_fmt *fmt; |
3730 | int left_size = tok - left, |
3731 | right_size = right_end - right; |
3732 | char arg[128]; |
3733 | |
3734 | while (isspace(left[left_size - 1])) |
3735 | --left_size; |
3736 | |
3737 | scnprintf(buf: arg, size: sizeof(arg), fmt: "%.*s" , left_size, left); |
3738 | |
3739 | fmt = evsel__find_syscall_arg_fmt_by_name(evsel, arg); |
3740 | if (fmt == NULL) { |
3741 | pr_err("\"%s\" not found in \"%s\", can't set filter \"%s\"\n" , |
3742 | arg, evsel->name, evsel->filter); |
3743 | return -1; |
3744 | } |
3745 | |
3746 | pr_debug2("trying to expand \"%s\" \"%.*s\" \"%.*s\" -> " , |
3747 | arg, (int)(right - tok), tok, right_size, right); |
3748 | |
3749 | if (fmt->strtoul) { |
3750 | u64 val; |
3751 | struct syscall_arg syscall_arg = { |
3752 | .parm = fmt->parm, |
3753 | }; |
3754 | |
3755 | if (fmt->strtoul(right, right_size, &syscall_arg, &val)) { |
3756 | char *n, expansion[19]; |
3757 | int expansion_lenght = scnprintf(expansion, sizeof(expansion), "%#" PRIx64, val); |
3758 | int expansion_offset = right - new_filter; |
3759 | |
3760 | pr_debug("%s" , expansion); |
3761 | |
3762 | if (asprintf(&n, "%.*s%s%s" , expansion_offset, new_filter, expansion, right_end) < 0) { |
3763 | pr_debug(" out of memory!\n" ); |
3764 | free(new_filter); |
3765 | return -1; |
3766 | } |
3767 | if (new_filter != evsel->filter) |
3768 | free(new_filter); |
3769 | left = n + expansion_offset + expansion_lenght; |
3770 | new_filter = n; |
3771 | } else { |
3772 | pr_err("\"%.*s\" not found for \"%s\" in \"%s\", can't set filter \"%s\"\n" , |
3773 | right_size, right, arg, evsel->name, evsel->filter); |
3774 | return -1; |
3775 | } |
3776 | } else { |
3777 | pr_err("No resolver (strtoul) for \"%s\" in \"%s\", can't set filter \"%s\"\n" , |
3778 | arg, evsel->name, evsel->filter); |
3779 | return -1; |
3780 | } |
3781 | |
3782 | pr_debug("\n" ); |
3783 | } else { |
3784 | left = right_end; |
3785 | } |
3786 | } |
3787 | |
3788 | if (new_filter != evsel->filter) { |
3789 | pr_debug("New filter for %s: %s\n" , evsel->name, new_filter); |
3790 | evsel__set_filter(evsel, filter: new_filter); |
3791 | free(new_filter); |
3792 | } |
3793 | |
3794 | return 0; |
3795 | } |
3796 | |
3797 | static int trace__expand_filters(struct trace *trace, struct evsel **err_evsel) |
3798 | { |
3799 | struct evlist *evlist = trace->evlist; |
3800 | struct evsel *evsel; |
3801 | |
3802 | evlist__for_each_entry(evlist, evsel) { |
3803 | if (evsel->filter == NULL) |
3804 | continue; |
3805 | |
3806 | if (trace__expand_filter(trace, evsel)) { |
3807 | *err_evsel = evsel; |
3808 | return -1; |
3809 | } |
3810 | } |
3811 | |
3812 | return 0; |
3813 | } |
3814 | |
3815 | static int trace__run(struct trace *trace, int argc, const char **argv) |
3816 | { |
3817 | struct evlist *evlist = trace->evlist; |
3818 | struct evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL; |
3819 | int err = -1, i; |
3820 | unsigned long before; |
3821 | const bool forks = argc > 0; |
3822 | bool draining = false; |
3823 | |
3824 | trace->live = true; |
3825 | |
3826 | if (!trace->raw_augmented_syscalls) { |
3827 | if (trace->trace_syscalls && trace__add_syscall_newtp(trace)) |
3828 | goto out_error_raw_syscalls; |
3829 | |
3830 | if (trace->trace_syscalls) |
3831 | trace->vfs_getname = evlist__add_vfs_getname(evlist); |
3832 | } |
3833 | |
3834 | if ((trace->trace_pgfaults & TRACE_PFMAJ)) { |
3835 | pgfault_maj = evsel__new_pgfault(config: PERF_COUNT_SW_PAGE_FAULTS_MAJ); |
3836 | if (pgfault_maj == NULL) |
3837 | goto out_error_mem; |
3838 | evsel__config_callchain(evsel: pgfault_maj, opts: &trace->opts, callchain: &callchain_param); |
3839 | evlist__add(evlist, entry: pgfault_maj); |
3840 | } |
3841 | |
3842 | if ((trace->trace_pgfaults & TRACE_PFMIN)) { |
3843 | pgfault_min = evsel__new_pgfault(config: PERF_COUNT_SW_PAGE_FAULTS_MIN); |
3844 | if (pgfault_min == NULL) |
3845 | goto out_error_mem; |
3846 | evsel__config_callchain(evsel: pgfault_min, opts: &trace->opts, callchain: &callchain_param); |
3847 | evlist__add(evlist, entry: pgfault_min); |
3848 | } |
3849 | |
3850 | /* Enable ignoring missing threads when -u/-p option is defined. */ |
3851 | trace->opts.ignore_missing_thread = trace->opts.target.uid != UINT_MAX || trace->opts.target.pid; |
3852 | |
3853 | if (trace->sched && |
3854 | evlist__add_newtp(evlist, "sched" , "sched_stat_runtime" , trace__sched_stat_runtime)) |
3855 | goto out_error_sched_stat_runtime; |
3856 | /* |
3857 | * If a global cgroup was set, apply it to all the events without an |
3858 | * explicit cgroup. I.e.: |
3859 | * |
3860 | * trace -G A -e sched:*switch |
3861 | * |
3862 | * Will set all raw_syscalls:sys_{enter,exit}, pgfault, vfs_getname, etc |
3863 | * _and_ sched:sched_switch to the 'A' cgroup, while: |
3864 | * |
3865 | * trace -e sched:*switch -G A |
3866 | * |
3867 | * will only set the sched:sched_switch event to the 'A' cgroup, all the |
3868 | * other events (raw_syscalls:sys_{enter,exit}, etc are left "without" |
3869 | * a cgroup (on the root cgroup, sys wide, etc). |
3870 | * |
3871 | * Multiple cgroups: |
3872 | * |
3873 | * trace -G A -e sched:*switch -G B |
3874 | * |
3875 | * the syscall ones go to the 'A' cgroup, the sched:sched_switch goes |
3876 | * to the 'B' cgroup. |
3877 | * |
3878 | * evlist__set_default_cgroup() grabs a reference of the passed cgroup |
3879 | * only for the evsels still without a cgroup, i.e. evsel->cgroup == NULL. |
3880 | */ |
3881 | if (trace->cgroup) |
3882 | evlist__set_default_cgroup(evlist: trace->evlist, cgroup: trace->cgroup); |
3883 | |
3884 | err = evlist__create_maps(evlist, target: &trace->opts.target); |
3885 | if (err < 0) { |
3886 | fprintf(trace->output, "Problems parsing the target to trace, check your options!\n" ); |
3887 | goto out_delete_evlist; |
3888 | } |
3889 | |
3890 | err = trace__symbols_init(trace, evlist); |
3891 | if (err < 0) { |
3892 | fprintf(trace->output, "Problems initializing symbol libraries!\n" ); |
3893 | goto out_delete_evlist; |
3894 | } |
3895 | |
3896 | evlist__config(evlist, opts: &trace->opts, callchain: &callchain_param); |
3897 | |
3898 | if (forks) { |
3899 | err = evlist__prepare_workload(evlist, target: &trace->opts.target, argv, pipe_output: false, NULL); |
3900 | if (err < 0) { |
3901 | fprintf(trace->output, "Couldn't run the workload!\n" ); |
3902 | goto out_delete_evlist; |
3903 | } |
3904 | workload_pid = evlist->workload.pid; |
3905 | } |
3906 | |
3907 | err = evlist__open(evlist); |
3908 | if (err < 0) |
3909 | goto out_error_open; |
3910 | #ifdef HAVE_BPF_SKEL |
3911 | if (trace->syscalls.events.bpf_output) { |
3912 | struct perf_cpu cpu; |
3913 | |
3914 | /* |
3915 | * Set up the __augmented_syscalls__ BPF map to hold for each |
3916 | * CPU the bpf-output event's file descriptor. |
3917 | */ |
3918 | perf_cpu_map__for_each_cpu(cpu, i, trace->syscalls.events.bpf_output->core.cpus) { |
3919 | bpf_map__update_elem(trace->skel->maps.__augmented_syscalls__, |
3920 | &cpu.cpu, sizeof(int), |
3921 | xyarray__entry(trace->syscalls.events.bpf_output->core.fd, |
3922 | cpu.cpu, 0), |
3923 | sizeof(__u32), BPF_ANY); |
3924 | } |
3925 | } |
3926 | #endif |
3927 | err = trace__set_filter_pids(trace); |
3928 | if (err < 0) |
3929 | goto out_error_mem; |
3930 | |
3931 | #ifdef HAVE_BPF_SKEL |
3932 | if (trace->skel && trace->skel->progs.sys_enter) |
3933 | trace__init_syscalls_bpf_prog_array_maps(trace); |
3934 | #endif |
3935 | |
3936 | if (trace->ev_qualifier_ids.nr > 0) { |
3937 | err = trace__set_ev_qualifier_filter(trace); |
3938 | if (err < 0) |
3939 | goto out_errno; |
3940 | |
3941 | if (trace->syscalls.events.sys_exit) { |
3942 | pr_debug("event qualifier tracepoint filter: %s\n" , |
3943 | trace->syscalls.events.sys_exit->filter); |
3944 | } |
3945 | } |
3946 | |
3947 | /* |
3948 | * If the "close" syscall is not traced, then we will not have the |
3949 | * opportunity to, in syscall_arg__scnprintf_close_fd() invalidate the |
3950 | * fd->pathname table and were ending up showing the last value set by |
3951 | * syscalls opening a pathname and associating it with a descriptor or |
3952 | * reading it from /proc/pid/fd/ in cases where that doesn't make |
3953 | * sense. |
3954 | * |
3955 | * So just disable this beautifier (SCA_FD, SCA_FDAT) when 'close' is |
3956 | * not in use. |
3957 | */ |
3958 | trace->fd_path_disabled = !trace__syscall_enabled(trace, id: syscalltbl__id(trace->sctbl, "close" )); |
3959 | |
3960 | err = trace__expand_filters(trace, err_evsel: &evsel); |
3961 | if (err) |
3962 | goto out_delete_evlist; |
3963 | err = evlist__apply_filters(evlist, err_evsel: &evsel); |
3964 | if (err < 0) |
3965 | goto out_error_apply_filters; |
3966 | |
3967 | err = evlist__mmap(evlist, pages: trace->opts.mmap_pages); |
3968 | if (err < 0) |
3969 | goto out_error_mmap; |
3970 | |
3971 | if (!target__none(&trace->opts.target) && !trace->opts.target.initial_delay) |
3972 | evlist__enable(evlist); |
3973 | |
3974 | if (forks) |
3975 | evlist__start_workload(evlist); |
3976 | |
3977 | if (trace->opts.target.initial_delay) { |
3978 | usleep(trace->opts.target.initial_delay * 1000); |
3979 | evlist__enable(evlist); |
3980 | } |
3981 | |
3982 | trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 || |
3983 | perf_thread_map__nr(evlist->core.threads) > 1 || |
3984 | evlist__first(evlist)->core.attr.inherit; |
3985 | |
3986 | /* |
3987 | * Now that we already used evsel->core.attr to ask the kernel to setup the |
3988 | * events, lets reuse evsel->core.attr.sample_max_stack as the limit in |
3989 | * trace__resolve_callchain(), allowing per-event max-stack settings |
3990 | * to override an explicitly set --max-stack global setting. |
3991 | */ |
3992 | evlist__for_each_entry(evlist, evsel) { |
3993 | if (evsel__has_callchain(evsel) && |
3994 | evsel->core.attr.sample_max_stack == 0) |
3995 | evsel->core.attr.sample_max_stack = trace->max_stack; |
3996 | } |
3997 | again: |
3998 | before = trace->nr_events; |
3999 | |
4000 | for (i = 0; i < evlist->core.nr_mmaps; i++) { |
4001 | union perf_event *event; |
4002 | struct mmap *md; |
4003 | |
4004 | md = &evlist->mmap[i]; |
4005 | if (perf_mmap__read_init(&md->core) < 0) |
4006 | continue; |
4007 | |
4008 | while ((event = perf_mmap__read_event(&md->core)) != NULL) { |
4009 | ++trace->nr_events; |
4010 | |
4011 | err = trace__deliver_event(trace, event); |
4012 | if (err) |
4013 | goto out_disable; |
4014 | |
4015 | perf_mmap__consume(&md->core); |
4016 | |
4017 | if (interrupted) |
4018 | goto out_disable; |
4019 | |
4020 | if (done && !draining) { |
4021 | evlist__disable(evlist); |
4022 | draining = true; |
4023 | } |
4024 | } |
4025 | perf_mmap__read_done(&md->core); |
4026 | } |
4027 | |
4028 | if (trace->nr_events == before) { |
4029 | int timeout = done ? 100 : -1; |
4030 | |
4031 | if (!draining && evlist__poll(evlist, timeout) > 0) { |
4032 | if (evlist__filter_pollfd(evlist, POLLERR | POLLHUP | POLLNVAL) == 0) |
4033 | draining = true; |
4034 | |
4035 | goto again; |
4036 | } else { |
4037 | if (trace__flush_events(trace)) |
4038 | goto out_disable; |
4039 | } |
4040 | } else { |
4041 | goto again; |
4042 | } |
4043 | |
4044 | out_disable: |
4045 | thread__zput(trace->current); |
4046 | |
4047 | evlist__disable(evlist); |
4048 | |
4049 | if (trace->sort_events) |
4050 | ordered_events__flush(oe: &trace->oe.data, how: OE_FLUSH__FINAL); |
4051 | |
4052 | if (!err) { |
4053 | if (trace->summary) |
4054 | trace__fprintf_thread_summary(trace, trace->output); |
4055 | |
4056 | if (trace->show_tool_stats) { |
4057 | fprintf(trace->output, "Stats:\n " |
4058 | " vfs_getname : %" PRIu64 "\n" |
4059 | " proc_getname: %" PRIu64 "\n" , |
4060 | trace->stats.vfs_getname, |
4061 | trace->stats.proc_getname); |
4062 | } |
4063 | } |
4064 | |
4065 | out_delete_evlist: |
4066 | trace__symbols__exit(trace); |
4067 | evlist__free_syscall_tp_fields(evlist); |
4068 | evlist__delete(evlist); |
4069 | cgroup__put(cgroup: trace->cgroup); |
4070 | trace->evlist = NULL; |
4071 | trace->live = false; |
4072 | return err; |
4073 | { |
4074 | char errbuf[BUFSIZ]; |
4075 | |
4076 | out_error_sched_stat_runtime: |
4077 | tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched" , "sched_stat_runtime" ); |
4078 | goto out_error; |
4079 | |
4080 | out_error_raw_syscalls: |
4081 | tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls" , "sys_(enter|exit)" ); |
4082 | goto out_error; |
4083 | |
4084 | out_error_mmap: |
4085 | evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf)); |
4086 | goto out_error; |
4087 | |
4088 | out_error_open: |
4089 | evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf)); |
4090 | |
4091 | out_error: |
4092 | fprintf(trace->output, "%s\n" , errbuf); |
4093 | goto out_delete_evlist; |
4094 | |
4095 | out_error_apply_filters: |
4096 | fprintf(trace->output, |
4097 | "Failed to set filter \"%s\" on event %s with %d (%s)\n" , |
4098 | evsel->filter, evsel__name(evsel), errno, |
4099 | str_error_r(errno, errbuf, sizeof(errbuf))); |
4100 | goto out_delete_evlist; |
4101 | } |
4102 | out_error_mem: |
4103 | fprintf(trace->output, "Not enough memory to run!\n" ); |
4104 | goto out_delete_evlist; |
4105 | |
4106 | out_errno: |
4107 | fprintf(trace->output, "errno=%d,%s\n" , errno, strerror(errno)); |
4108 | goto out_delete_evlist; |
4109 | } |
4110 | |
4111 | static int trace__replay(struct trace *trace) |
4112 | { |
4113 | const struct evsel_str_handler handlers[] = { |
4114 | { "probe:vfs_getname" , trace__vfs_getname, }, |
4115 | }; |
4116 | struct perf_data data = { |
4117 | .path = input_name, |
4118 | .mode = PERF_DATA_MODE_READ, |
4119 | .force = trace->force, |
4120 | }; |
4121 | struct perf_session *session; |
4122 | struct evsel *evsel; |
4123 | int err = -1; |
4124 | |
4125 | trace->tool.sample = trace__process_sample; |
4126 | trace->tool.mmap = perf_event__process_mmap; |
4127 | trace->tool.mmap2 = perf_event__process_mmap2; |
4128 | trace->tool.comm = perf_event__process_comm; |
4129 | trace->tool.exit = perf_event__process_exit; |
4130 | trace->tool.fork = perf_event__process_fork; |
4131 | trace->tool.attr = perf_event__process_attr; |
4132 | trace->tool.tracing_data = perf_event__process_tracing_data; |
4133 | trace->tool.build_id = perf_event__process_build_id; |
4134 | trace->tool.namespaces = perf_event__process_namespaces; |
4135 | |
4136 | trace->tool.ordered_events = true; |
4137 | trace->tool.ordering_requires_timestamps = true; |
4138 | |
4139 | /* add tid to output */ |
4140 | trace->multiple_threads = true; |
4141 | |
4142 | session = perf_session__new(data: &data, tool: &trace->tool); |
4143 | if (IS_ERR(ptr: session)) |
4144 | return PTR_ERR(ptr: session); |
4145 | |
4146 | if (trace->opts.target.pid) |
4147 | symbol_conf.pid_list_str = strdup(trace->opts.target.pid); |
4148 | |
4149 | if (trace->opts.target.tid) |
4150 | symbol_conf.tid_list_str = strdup(trace->opts.target.tid); |
4151 | |
4152 | if (symbol__init(env: &session->header.env) < 0) |
4153 | goto out; |
4154 | |
4155 | trace->host = &session->machines.host; |
4156 | |
4157 | err = perf_session__set_tracepoints_handlers(session, handlers); |
4158 | if (err) |
4159 | goto out; |
4160 | |
4161 | evsel = evlist__find_tracepoint_by_name(evlist: session->evlist, name: "raw_syscalls:sys_enter" ); |
4162 | trace->syscalls.events.sys_enter = evsel; |
4163 | /* older kernels have syscalls tp versus raw_syscalls */ |
4164 | if (evsel == NULL) |
4165 | evsel = evlist__find_tracepoint_by_name(evlist: session->evlist, name: "syscalls:sys_enter" ); |
4166 | |
4167 | if (evsel && |
4168 | (evsel__init_raw_syscall_tp(evsel, handler: trace__sys_enter) < 0 || |
4169 | perf_evsel__init_sc_tp_ptr_field(evsel, args))) { |
4170 | pr_err("Error during initialize raw_syscalls:sys_enter event\n" ); |
4171 | goto out; |
4172 | } |
4173 | |
4174 | evsel = evlist__find_tracepoint_by_name(evlist: session->evlist, name: "raw_syscalls:sys_exit" ); |
4175 | trace->syscalls.events.sys_exit = evsel; |
4176 | if (evsel == NULL) |
4177 | evsel = evlist__find_tracepoint_by_name(evlist: session->evlist, name: "syscalls:sys_exit" ); |
4178 | if (evsel && |
4179 | (evsel__init_raw_syscall_tp(evsel, handler: trace__sys_exit) < 0 || |
4180 | perf_evsel__init_sc_tp_uint_field(evsel, ret))) { |
4181 | pr_err("Error during initialize raw_syscalls:sys_exit event\n" ); |
4182 | goto out; |
4183 | } |
4184 | |
4185 | evlist__for_each_entry(session->evlist, evsel) { |
4186 | if (evsel->core.attr.type == PERF_TYPE_SOFTWARE && |
4187 | (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ || |
4188 | evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN || |
4189 | evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS)) |
4190 | evsel->handler = trace__pgfault; |
4191 | } |
4192 | |
4193 | setup_pager(); |
4194 | |
4195 | err = perf_session__process_events(session); |
4196 | if (err) |
4197 | pr_err("Failed to process events, error %d" , err); |
4198 | |
4199 | else if (trace->summary) |
4200 | trace__fprintf_thread_summary(trace, trace->output); |
4201 | |
4202 | out: |
4203 | perf_session__delete(session); |
4204 | |
4205 | return err; |
4206 | } |
4207 | |
4208 | static size_t (FILE *fp) |
4209 | { |
4210 | size_t printed; |
4211 | |
4212 | printed = fprintf(fp, "\n Summary of events:\n\n" ); |
4213 | |
4214 | return printed; |
4215 | } |
4216 | |
4217 | DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs, |
4218 | struct syscall_stats *stats; |
4219 | double msecs; |
4220 | int syscall; |
4221 | ) |
4222 | { |
4223 | struct int_node *source = rb_entry(nd, struct int_node, rb_node); |
4224 | struct syscall_stats *stats = source->priv; |
4225 | |
4226 | entry->syscall = source->i; |
4227 | entry->stats = stats; |
4228 | entry->msecs = stats ? (u64)stats->stats.n * (avg_stats(&stats->stats) / NSEC_PER_MSEC) : 0; |
4229 | } |
4230 | |
4231 | static size_t thread__dump_stats(struct thread_trace *ttrace, |
4232 | struct trace *trace, FILE *fp) |
4233 | { |
4234 | size_t printed = 0; |
4235 | struct syscall *sc; |
4236 | struct rb_node *nd; |
4237 | DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats); |
4238 | |
4239 | if (syscall_stats == NULL) |
4240 | return 0; |
4241 | |
4242 | printed += fprintf(fp, "\n" ); |
4243 | |
4244 | printed += fprintf(fp, " syscall calls errors total min avg max stddev\n" ); |
4245 | printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n" ); |
4246 | printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n" ); |
4247 | |
4248 | resort_rb__for_each_entry(nd, syscall_stats) { |
4249 | struct syscall_stats *stats = syscall_stats_entry->stats; |
4250 | if (stats) { |
4251 | double min = (double)(stats->stats.min) / NSEC_PER_MSEC; |
4252 | double max = (double)(stats->stats.max) / NSEC_PER_MSEC; |
4253 | double avg = avg_stats(&stats->stats); |
4254 | double pct; |
4255 | u64 n = (u64)stats->stats.n; |
4256 | |
4257 | pct = avg ? 100.0 * stddev_stats(&stats->stats) / avg : 0.0; |
4258 | avg /= NSEC_PER_MSEC; |
4259 | |
4260 | sc = &trace->syscalls.table[syscall_stats_entry->syscall]; |
4261 | printed += fprintf(fp, " %-15s" , sc->name); |
4262 | printed += fprintf(fp, " %8" PRIu64 " %6" PRIu64 " %9.3f %9.3f %9.3f" , |
4263 | n, stats->nr_failures, syscall_stats_entry->msecs, min, avg); |
4264 | printed += fprintf(fp, " %9.3f %9.2f%%\n" , max, pct); |
4265 | |
4266 | if (trace->errno_summary && stats->nr_failures) { |
4267 | const char *arch_name = perf_env__arch(trace->host->env); |
4268 | int e; |
4269 | |
4270 | for (e = 0; e < stats->max_errno; ++e) { |
4271 | if (stats->errnos[e] != 0) |
4272 | fprintf(fp, "\t\t\t\t%s: %d\n" , arch_syscalls__strerrno(arch_name, e + 1), stats->errnos[e]); |
4273 | } |
4274 | } |
4275 | } |
4276 | } |
4277 | |
4278 | resort_rb__delete(syscall_stats); |
4279 | printed += fprintf(fp, "\n\n" ); |
4280 | |
4281 | return printed; |
4282 | } |
4283 | |
4284 | static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace) |
4285 | { |
4286 | size_t printed = 0; |
4287 | struct thread_trace *ttrace = thread__priv(thread); |
4288 | double ratio; |
4289 | |
4290 | if (ttrace == NULL) |
4291 | return 0; |
4292 | |
4293 | ratio = (double)ttrace->nr_events / trace->nr_events * 100.0; |
4294 | |
4295 | printed += fprintf(fp, " %s (%d), " , thread__comm_str(thread), thread__tid(thread)); |
4296 | printed += fprintf(fp, "%lu events, " , ttrace->nr_events); |
4297 | printed += fprintf(fp, "%.1f%%" , ratio); |
4298 | if (ttrace->pfmaj) |
4299 | printed += fprintf(fp, ", %lu majfaults" , ttrace->pfmaj); |
4300 | if (ttrace->pfmin) |
4301 | printed += fprintf(fp, ", %lu minfaults" , ttrace->pfmin); |
4302 | if (trace->sched) |
4303 | printed += fprintf(fp, ", %.3f msec\n" , ttrace->runtime_ms); |
4304 | else if (fputc('\n', fp) != EOF) |
4305 | ++printed; |
4306 | |
4307 | printed += thread__dump_stats(ttrace, trace, fp); |
4308 | |
4309 | return printed; |
4310 | } |
4311 | |
4312 | static unsigned long thread__nr_events(struct thread_trace *ttrace) |
4313 | { |
4314 | return ttrace ? ttrace->nr_events : 0; |
4315 | } |
4316 | |
4317 | DEFINE_RESORT_RB(threads, |
4318 | (thread__nr_events(thread__priv(a->thread)) < |
4319 | thread__nr_events(thread__priv(b->thread))), |
4320 | struct thread *thread; |
4321 | ) |
4322 | { |
4323 | entry->thread = rb_entry(nd, struct thread_rb_node, rb_node)->thread; |
4324 | } |
4325 | |
4326 | static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) |
4327 | { |
4328 | size_t printed = trace__fprintf_threads_header(fp); |
4329 | struct rb_node *nd; |
4330 | int i; |
4331 | |
4332 | for (i = 0; i < THREADS__TABLE_SIZE; i++) { |
4333 | DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i); |
4334 | |
4335 | if (threads == NULL) { |
4336 | fprintf(fp, "%s" , "Error sorting output by nr_events!\n" ); |
4337 | return 0; |
4338 | } |
4339 | |
4340 | resort_rb__for_each_entry(nd, threads) |
4341 | printed += trace__fprintf_thread(fp, threads_entry->thread, trace); |
4342 | |
4343 | resort_rb__delete(threads); |
4344 | } |
4345 | return printed; |
4346 | } |
4347 | |
4348 | static int trace__set_duration(const struct option *opt, const char *str, |
4349 | int unset __maybe_unused) |
4350 | { |
4351 | struct trace *trace = opt->value; |
4352 | |
4353 | trace->duration_filter = atof(str); |
4354 | return 0; |
4355 | } |
4356 | |
4357 | static int trace__set_filter_pids_from_option(const struct option *opt, const char *str, |
4358 | int unset __maybe_unused) |
4359 | { |
4360 | int ret = -1; |
4361 | size_t i; |
4362 | struct trace *trace = opt->value; |
4363 | /* |
4364 | * FIXME: introduce a intarray class, plain parse csv and create a |
4365 | * { int nr, int entries[] } struct... |
4366 | */ |
4367 | struct intlist *list = intlist__new(slist: str); |
4368 | |
4369 | if (list == NULL) |
4370 | return -1; |
4371 | |
4372 | i = trace->filter_pids.nr = intlist__nr_entries(ilist: list) + 1; |
4373 | trace->filter_pids.entries = calloc(i, sizeof(pid_t)); |
4374 | |
4375 | if (trace->filter_pids.entries == NULL) |
4376 | goto out; |
4377 | |
4378 | trace->filter_pids.entries[0] = getpid(); |
4379 | |
4380 | for (i = 1; i < trace->filter_pids.nr; ++i) |
4381 | trace->filter_pids.entries[i] = intlist__entry(ilist: list, idx: i - 1)->i; |
4382 | |
4383 | intlist__delete(ilist: list); |
4384 | ret = 0; |
4385 | out: |
4386 | return ret; |
4387 | } |
4388 | |
4389 | static int trace__open_output(struct trace *trace, const char *filename) |
4390 | { |
4391 | struct stat st; |
4392 | |
4393 | if (!stat(filename, &st) && st.st_size) { |
4394 | char oldname[PATH_MAX]; |
4395 | |
4396 | scnprintf(buf: oldname, size: sizeof(oldname), fmt: "%s.old" , filename); |
4397 | unlink(oldname); |
4398 | rename(filename, oldname); |
4399 | } |
4400 | |
4401 | trace->output = fopen(filename, "w" ); |
4402 | |
4403 | return trace->output == NULL ? -errno : 0; |
4404 | } |
4405 | |
4406 | static int parse_pagefaults(const struct option *opt, const char *str, |
4407 | int unset __maybe_unused) |
4408 | { |
4409 | int *trace_pgfaults = opt->value; |
4410 | |
4411 | if (strcmp(str, "all" ) == 0) |
4412 | *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN; |
4413 | else if (strcmp(str, "maj" ) == 0) |
4414 | *trace_pgfaults |= TRACE_PFMAJ; |
4415 | else if (strcmp(str, "min" ) == 0) |
4416 | *trace_pgfaults |= TRACE_PFMIN; |
4417 | else |
4418 | return -1; |
4419 | |
4420 | return 0; |
4421 | } |
4422 | |
4423 | static void evlist__set_default_evsel_handler(struct evlist *evlist, void *handler) |
4424 | { |
4425 | struct evsel *evsel; |
4426 | |
4427 | evlist__for_each_entry(evlist, evsel) { |
4428 | if (evsel->handler == NULL) |
4429 | evsel->handler = handler; |
4430 | } |
4431 | } |
4432 | |
4433 | static void evsel__set_syscall_arg_fmt(struct evsel *evsel, const char *name) |
4434 | { |
4435 | struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel); |
4436 | |
4437 | if (fmt) { |
4438 | const struct syscall_fmt *scfmt = syscall_fmt__find(name); |
4439 | |
4440 | if (scfmt) { |
4441 | int skip = 0; |
4442 | |
4443 | if (strcmp(evsel->tp_format->format.fields->name, "__syscall_nr" ) == 0 || |
4444 | strcmp(evsel->tp_format->format.fields->name, "nr" ) == 0) |
4445 | ++skip; |
4446 | |
4447 | memcpy(fmt + skip, scfmt->arg, (evsel->tp_format->format.nr_fields - skip) * sizeof(*fmt)); |
4448 | } |
4449 | } |
4450 | } |
4451 | |
4452 | static int evlist__set_syscall_tp_fields(struct evlist *evlist) |
4453 | { |
4454 | struct evsel *evsel; |
4455 | |
4456 | evlist__for_each_entry(evlist, evsel) { |
4457 | if (evsel->priv || !evsel->tp_format) |
4458 | continue; |
4459 | |
4460 | if (strcmp(evsel->tp_format->system, "syscalls" )) { |
4461 | evsel__init_tp_arg_scnprintf(evsel); |
4462 | continue; |
4463 | } |
4464 | |
4465 | if (evsel__init_syscall_tp(evsel)) |
4466 | return -1; |
4467 | |
4468 | if (!strncmp(evsel->tp_format->name, "sys_enter_" , 10)) { |
4469 | struct syscall_tp *sc = __evsel__syscall_tp(evsel); |
4470 | |
4471 | if (__tp_field__init_ptr(field: &sc->args, offset: sc->id.offset + sizeof(u64))) |
4472 | return -1; |
4473 | |
4474 | evsel__set_syscall_arg_fmt(evsel, name: evsel->tp_format->name + sizeof("sys_enter_" ) - 1); |
4475 | } else if (!strncmp(evsel->tp_format->name, "sys_exit_" , 9)) { |
4476 | struct syscall_tp *sc = __evsel__syscall_tp(evsel); |
4477 | |
4478 | if (__tp_field__init_uint(field: &sc->ret, size: sizeof(u64), offset: sc->id.offset + sizeof(u64), needs_swap: evsel->needs_swap)) |
4479 | return -1; |
4480 | |
4481 | evsel__set_syscall_arg_fmt(evsel, name: evsel->tp_format->name + sizeof("sys_exit_" ) - 1); |
4482 | } |
4483 | } |
4484 | |
4485 | return 0; |
4486 | } |
4487 | |
4488 | /* |
4489 | * XXX: Hackish, just splitting the combined -e+--event (syscalls |
4490 | * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use |
4491 | * existing facilities unchanged (trace->ev_qualifier + parse_options()). |
4492 | * |
4493 | * It'd be better to introduce a parse_options() variant that would return a |
4494 | * list with the terms it didn't match to an event... |
4495 | */ |
4496 | static int trace__parse_events_option(const struct option *opt, const char *str, |
4497 | int unset __maybe_unused) |
4498 | { |
4499 | struct trace *trace = (struct trace *)opt->value; |
4500 | const char *s = str; |
4501 | char *sep = NULL, *lists[2] = { NULL, NULL, }; |
4502 | int len = strlen(str) + 1, err = -1, list, idx; |
4503 | char *strace_groups_dir = system_path(STRACE_GROUPS_DIR); |
4504 | char group_name[PATH_MAX]; |
4505 | const struct syscall_fmt *fmt; |
4506 | |
4507 | if (strace_groups_dir == NULL) |
4508 | return -1; |
4509 | |
4510 | if (*s == '!') { |
4511 | ++s; |
4512 | trace->not_ev_qualifier = true; |
4513 | } |
4514 | |
4515 | while (1) { |
4516 | if ((sep = strchr(s, ',')) != NULL) |
4517 | *sep = '\0'; |
4518 | |
4519 | list = 0; |
4520 | if (syscalltbl__id(trace->sctbl, s) >= 0 || |
4521 | syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) { |
4522 | list = 1; |
4523 | goto do_concat; |
4524 | } |
4525 | |
4526 | fmt = syscall_fmt__find_by_alias(alias: s); |
4527 | if (fmt != NULL) { |
4528 | list = 1; |
4529 | s = fmt->name; |
4530 | } else { |
4531 | path__join(bf: group_name, size: sizeof(group_name), path1: strace_groups_dir, path2: s); |
4532 | if (access(group_name, R_OK) == 0) |
4533 | list = 1; |
4534 | } |
4535 | do_concat: |
4536 | if (lists[list]) { |
4537 | sprintf(buf: lists[list] + strlen(lists[list]), fmt: ",%s" , s); |
4538 | } else { |
4539 | lists[list] = malloc(len); |
4540 | if (lists[list] == NULL) |
4541 | goto out; |
4542 | strcpy(p: lists[list], q: s); |
4543 | } |
4544 | |
4545 | if (!sep) |
4546 | break; |
4547 | |
4548 | *sep = ','; |
4549 | s = sep + 1; |
4550 | } |
4551 | |
4552 | if (lists[1] != NULL) { |
4553 | struct strlist_config slist_config = { |
4554 | .dirname = strace_groups_dir, |
4555 | }; |
4556 | |
4557 | trace->ev_qualifier = strlist__new(slist: lists[1], config: &slist_config); |
4558 | if (trace->ev_qualifier == NULL) { |
4559 | fputs("Not enough memory to parse event qualifier" , trace->output); |
4560 | goto out; |
4561 | } |
4562 | |
4563 | if (trace__validate_ev_qualifier(trace)) |
4564 | goto out; |
4565 | trace->trace_syscalls = true; |
4566 | } |
4567 | |
4568 | err = 0; |
4569 | |
4570 | if (lists[0]) { |
4571 | struct parse_events_option_args parse_events_option_args = { |
4572 | .evlistp = &trace->evlist, |
4573 | }; |
4574 | struct option o = { |
4575 | .value = &parse_events_option_args, |
4576 | }; |
4577 | err = parse_events_option(opt: &o, str: lists[0], unset: 0); |
4578 | } |
4579 | out: |
4580 | free(strace_groups_dir); |
4581 | free(lists[0]); |
4582 | free(lists[1]); |
4583 | if (sep) |
4584 | *sep = ','; |
4585 | |
4586 | return err; |
4587 | } |
4588 | |
4589 | static int trace__parse_cgroups(const struct option *opt, const char *str, int unset) |
4590 | { |
4591 | struct trace *trace = opt->value; |
4592 | |
4593 | if (!list_empty(head: &trace->evlist->core.entries)) { |
4594 | struct option o = { |
4595 | .value = &trace->evlist, |
4596 | }; |
4597 | return parse_cgroups(opt: &o, str, unset); |
4598 | } |
4599 | trace->cgroup = evlist__findnew_cgroup(evlist: trace->evlist, name: str); |
4600 | |
4601 | return 0; |
4602 | } |
4603 | |
4604 | static int trace__config(const char *var, const char *value, void *arg) |
4605 | { |
4606 | struct trace *trace = arg; |
4607 | int err = 0; |
4608 | |
4609 | if (!strcmp(var, "trace.add_events" )) { |
4610 | trace->perfconfig_events = strdup(value); |
4611 | if (trace->perfconfig_events == NULL) { |
4612 | pr_err("Not enough memory for %s\n" , "trace.add_events" ); |
4613 | return -1; |
4614 | } |
4615 | } else if (!strcmp(var, "trace.show_timestamp" )) { |
4616 | trace->show_tstamp = perf_config_bool(var, value); |
4617 | } else if (!strcmp(var, "trace.show_duration" )) { |
4618 | trace->show_duration = perf_config_bool(var, value); |
4619 | } else if (!strcmp(var, "trace.show_arg_names" )) { |
4620 | trace->show_arg_names = perf_config_bool(var, value); |
4621 | if (!trace->show_arg_names) |
4622 | trace->show_zeros = true; |
4623 | } else if (!strcmp(var, "trace.show_zeros" )) { |
4624 | bool new_show_zeros = perf_config_bool(var, value); |
4625 | if (!trace->show_arg_names && !new_show_zeros) { |
4626 | pr_warning("trace.show_zeros has to be set when trace.show_arg_names=no\n" ); |
4627 | goto out; |
4628 | } |
4629 | trace->show_zeros = new_show_zeros; |
4630 | } else if (!strcmp(var, "trace.show_prefix" )) { |
4631 | trace->show_string_prefix = perf_config_bool(var, value); |
4632 | } else if (!strcmp(var, "trace.no_inherit" )) { |
4633 | trace->opts.no_inherit = perf_config_bool(var, value); |
4634 | } else if (!strcmp(var, "trace.args_alignment" )) { |
4635 | int args_alignment = 0; |
4636 | if (perf_config_int(dest: &args_alignment, var, value) == 0) |
4637 | trace->args_alignment = args_alignment; |
4638 | } else if (!strcmp(var, "trace.tracepoint_beautifiers" )) { |
4639 | if (strcasecmp(s1: value, s2: "libtraceevent" ) == 0) |
4640 | trace->libtraceevent_print = true; |
4641 | else if (strcasecmp(s1: value, s2: "libbeauty" ) == 0) |
4642 | trace->libtraceevent_print = false; |
4643 | } |
4644 | out: |
4645 | return err; |
4646 | } |
4647 | |
4648 | static void trace__exit(struct trace *trace) |
4649 | { |
4650 | int i; |
4651 | |
4652 | strlist__delete(slist: trace->ev_qualifier); |
4653 | zfree(&trace->ev_qualifier_ids.entries); |
4654 | if (trace->syscalls.table) { |
4655 | for (i = 0; i <= trace->sctbl->syscalls.max_id; i++) |
4656 | syscall__exit(sc: &trace->syscalls.table[i]); |
4657 | zfree(&trace->syscalls.table); |
4658 | } |
4659 | syscalltbl__delete(trace->sctbl); |
4660 | zfree(&trace->perfconfig_events); |
4661 | } |
4662 | |
4663 | #ifdef HAVE_BPF_SKEL |
4664 | static int bpf__setup_bpf_output(struct evlist *evlist) |
4665 | { |
4666 | int err = parse_event(evlist, "bpf-output/no-inherit=1,name=__augmented_syscalls__/" ); |
4667 | |
4668 | if (err) |
4669 | pr_debug("ERROR: failed to create the \"__augmented_syscalls__\" bpf-output event\n" ); |
4670 | |
4671 | return err; |
4672 | } |
4673 | #endif |
4674 | |
4675 | int cmd_trace(int argc, const char **argv) |
4676 | { |
4677 | const char *trace_usage[] = { |
4678 | "perf trace [<options>] [<command>]" , |
4679 | "perf trace [<options>] -- <command> [<options>]" , |
4680 | "perf trace record [<options>] [<command>]" , |
4681 | "perf trace record [<options>] -- <command> [<options>]" , |
4682 | NULL |
4683 | }; |
4684 | struct trace trace = { |
4685 | .opts = { |
4686 | .target = { |
4687 | .uid = UINT_MAX, |
4688 | .uses_mmap = true, |
4689 | }, |
4690 | .user_freq = UINT_MAX, |
4691 | .user_interval = ULLONG_MAX, |
4692 | .no_buffering = true, |
4693 | .mmap_pages = UINT_MAX, |
4694 | }, |
4695 | .output = stderr, |
4696 | .show_comm = true, |
4697 | .show_tstamp = true, |
4698 | .show_duration = true, |
4699 | .show_arg_names = true, |
4700 | .args_alignment = 70, |
4701 | .trace_syscalls = false, |
4702 | .kernel_syscallchains = false, |
4703 | .max_stack = UINT_MAX, |
4704 | .max_events = ULONG_MAX, |
4705 | }; |
4706 | const char *output_name = NULL; |
4707 | const struct option trace_options[] = { |
4708 | OPT_CALLBACK('e', "event" , &trace, "event" , |
4709 | "event/syscall selector. use 'perf list' to list available events" , |
4710 | trace__parse_events_option), |
4711 | OPT_CALLBACK(0, "filter" , &trace.evlist, "filter" , |
4712 | "event filter" , parse_filter), |
4713 | OPT_BOOLEAN(0, "comm" , &trace.show_comm, |
4714 | "show the thread COMM next to its id" ), |
4715 | OPT_BOOLEAN(0, "tool_stats" , &trace.show_tool_stats, "show tool stats" ), |
4716 | OPT_CALLBACK(0, "expr" , &trace, "expr" , "list of syscalls/events to trace" , |
4717 | trace__parse_events_option), |
4718 | OPT_STRING('o', "output" , &output_name, "file" , "output file name" ), |
4719 | OPT_STRING('i', "input" , &input_name, "file" , "Analyze events in file" ), |
4720 | OPT_STRING('p', "pid" , &trace.opts.target.pid, "pid" , |
4721 | "trace events on existing process id" ), |
4722 | OPT_STRING('t', "tid" , &trace.opts.target.tid, "tid" , |
4723 | "trace events on existing thread id" ), |
4724 | OPT_CALLBACK(0, "filter-pids" , &trace, "CSV list of pids" , |
4725 | "pids to filter (by the kernel)" , trace__set_filter_pids_from_option), |
4726 | OPT_BOOLEAN('a', "all-cpus" , &trace.opts.target.system_wide, |
4727 | "system-wide collection from all CPUs" ), |
4728 | OPT_STRING('C', "cpu" , &trace.opts.target.cpu_list, "cpu" , |
4729 | "list of cpus to monitor" ), |
4730 | OPT_BOOLEAN(0, "no-inherit" , &trace.opts.no_inherit, |
4731 | "child tasks do not inherit counters" ), |
4732 | OPT_CALLBACK('m', "mmap-pages" , &trace.opts.mmap_pages, "pages" , |
4733 | "number of mmap data pages" , evlist__parse_mmap_pages), |
4734 | OPT_STRING('u', "uid" , &trace.opts.target.uid_str, "user" , |
4735 | "user to profile" ), |
4736 | OPT_CALLBACK(0, "duration" , &trace, "float" , |
4737 | "show only events with duration > N.M ms" , |
4738 | trace__set_duration), |
4739 | OPT_BOOLEAN(0, "sched" , &trace.sched, "show blocking scheduler events" ), |
4740 | OPT_INCR('v', "verbose" , &verbose, "be more verbose" ), |
4741 | OPT_BOOLEAN('T', "time" , &trace.full_time, |
4742 | "Show full timestamp, not time relative to first start" ), |
4743 | OPT_BOOLEAN(0, "failure" , &trace.failure_only, |
4744 | "Show only syscalls that failed" ), |
4745 | OPT_BOOLEAN('s', "summary" , &trace.summary_only, |
4746 | "Show only syscall summary with statistics" ), |
4747 | OPT_BOOLEAN('S', "with-summary" , &trace.summary, |
4748 | "Show all syscalls and summary with statistics" ), |
4749 | OPT_BOOLEAN(0, "errno-summary" , &trace.errno_summary, |
4750 | "Show errno stats per syscall, use with -s or -S" ), |
4751 | OPT_CALLBACK_DEFAULT('F', "pf" , &trace.trace_pgfaults, "all|maj|min" , |
4752 | "Trace pagefaults" , parse_pagefaults, "maj" ), |
4753 | OPT_BOOLEAN(0, "syscalls" , &trace.trace_syscalls, "Trace syscalls" ), |
4754 | OPT_BOOLEAN('f', "force" , &trace.force, "don't complain, do it" ), |
4755 | OPT_CALLBACK(0, "call-graph" , &trace.opts, |
4756 | "record_mode[,record_size]" , record_callchain_help, |
4757 | &record_parse_callchain_opt), |
4758 | OPT_BOOLEAN(0, "libtraceevent_print" , &trace.libtraceevent_print, |
4759 | "Use libtraceevent to print the tracepoint arguments." ), |
4760 | OPT_BOOLEAN(0, "kernel-syscall-graph" , &trace.kernel_syscallchains, |
4761 | "Show the kernel callchains on the syscall exit path" ), |
4762 | OPT_ULONG(0, "max-events" , &trace.max_events, |
4763 | "Set the maximum number of events to print, exit after that is reached. " ), |
4764 | OPT_UINTEGER(0, "min-stack" , &trace.min_stack, |
4765 | "Set the minimum stack depth when parsing the callchain, " |
4766 | "anything below the specified depth will be ignored." ), |
4767 | OPT_UINTEGER(0, "max-stack" , &trace.max_stack, |
4768 | "Set the maximum stack depth when parsing the callchain, " |
4769 | "anything beyond the specified depth will be ignored. " |
4770 | "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), |
4771 | OPT_BOOLEAN(0, "sort-events" , &trace.sort_events, |
4772 | "Sort batch of events before processing, use if getting out of order events" ), |
4773 | OPT_BOOLEAN(0, "print-sample" , &trace.print_sample, |
4774 | "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging" ), |
4775 | OPT_UINTEGER(0, "proc-map-timeout" , &proc_map_timeout, |
4776 | "per thread proc mmap processing timeout in ms" ), |
4777 | OPT_CALLBACK('G', "cgroup" , &trace, "name" , "monitor event in cgroup name only" , |
4778 | trace__parse_cgroups), |
4779 | OPT_INTEGER('D', "delay" , &trace.opts.target.initial_delay, |
4780 | "ms to wait before starting measurement after program " |
4781 | "start" ), |
4782 | OPTS_EVSWITCH(&trace.evswitch), |
4783 | OPT_END() |
4784 | }; |
4785 | bool __maybe_unused max_stack_user_set = true; |
4786 | bool mmap_pages_user_set = true; |
4787 | struct evsel *evsel; |
4788 | const char * const trace_subcommands[] = { "record" , NULL }; |
4789 | int err = -1; |
4790 | char bf[BUFSIZ]; |
4791 | struct sigaction sigchld_act; |
4792 | |
4793 | signal(SIGSEGV, sighandler_dump_stack); |
4794 | signal(SIGFPE, sighandler_dump_stack); |
4795 | signal(SIGINT, sighandler_interrupt); |
4796 | |
4797 | memset(&sigchld_act, 0, sizeof(sigchld_act)); |
4798 | sigchld_act.sa_flags = SA_SIGINFO; |
4799 | sigchld_act.sa_sigaction = sighandler_chld; |
4800 | sigaction(SIGCHLD, &sigchld_act, NULL); |
4801 | |
4802 | trace.evlist = evlist__new(); |
4803 | trace.sctbl = syscalltbl__new(); |
4804 | |
4805 | if (trace.evlist == NULL || trace.sctbl == NULL) { |
4806 | pr_err("Not enough memory to run!\n" ); |
4807 | err = -ENOMEM; |
4808 | goto out; |
4809 | } |
4810 | |
4811 | /* |
4812 | * Parsing .perfconfig may entail creating a BPF event, that may need |
4813 | * to create BPF maps, so bump RLIM_MEMLOCK as the default 64K setting |
4814 | * is too small. This affects just this process, not touching the |
4815 | * global setting. If it fails we'll get something in 'perf trace -v' |
4816 | * to help diagnose the problem. |
4817 | */ |
4818 | rlimit__bump_memlock(); |
4819 | |
4820 | err = perf_config(fn: trace__config, &trace); |
4821 | if (err) |
4822 | goto out; |
4823 | |
4824 | argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands, |
4825 | trace_usage, PARSE_OPT_STOP_AT_NON_OPTION); |
4826 | |
4827 | /* |
4828 | * Here we already passed thru trace__parse_events_option() and it has |
4829 | * already figured out if -e syscall_name, if not but if --event |
4830 | * foo:bar was used, the user is interested _just_ in those, say, |
4831 | * tracepoint events, not in the strace-like syscall-name-based mode. |
4832 | * |
4833 | * This is important because we need to check if strace-like mode is |
4834 | * needed to decided if we should filter out the eBPF |
4835 | * __augmented_syscalls__ code, if it is in the mix, say, via |
4836 | * .perfconfig trace.add_events, and filter those out. |
4837 | */ |
4838 | if (!trace.trace_syscalls && !trace.trace_pgfaults && |
4839 | trace.evlist->core.nr_entries == 0 /* Was --events used? */) { |
4840 | trace.trace_syscalls = true; |
4841 | } |
4842 | /* |
4843 | * Now that we have --verbose figured out, lets see if we need to parse |
4844 | * events from .perfconfig, so that if those events fail parsing, say some |
4845 | * BPF program fails, then we'll be able to use --verbose to see what went |
4846 | * wrong in more detail. |
4847 | */ |
4848 | if (trace.perfconfig_events != NULL) { |
4849 | struct parse_events_error parse_err; |
4850 | |
4851 | parse_events_error__init(err: &parse_err); |
4852 | err = parse_events(evlist: trace.evlist, str: trace.perfconfig_events, err: &parse_err); |
4853 | if (err) |
4854 | parse_events_error__print(err: &parse_err, event: trace.perfconfig_events); |
4855 | parse_events_error__exit(err: &parse_err); |
4856 | if (err) |
4857 | goto out; |
4858 | } |
4859 | |
4860 | if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) { |
4861 | usage_with_options_msg(trace_usage, trace_options, |
4862 | "cgroup monitoring only available in system-wide mode" ); |
4863 | } |
4864 | |
4865 | #ifdef HAVE_BPF_SKEL |
4866 | if (!trace.trace_syscalls) |
4867 | goto skip_augmentation; |
4868 | |
4869 | trace.skel = augmented_raw_syscalls_bpf__open(); |
4870 | if (!trace.skel) { |
4871 | pr_debug("Failed to open augmented syscalls BPF skeleton" ); |
4872 | } else { |
4873 | /* |
4874 | * Disable attaching the BPF programs except for sys_enter and |
4875 | * sys_exit that tail call into this as necessary. |
4876 | */ |
4877 | struct bpf_program *prog; |
4878 | |
4879 | bpf_object__for_each_program(prog, trace.skel->obj) { |
4880 | if (prog != trace.skel->progs.sys_enter && prog != trace.skel->progs.sys_exit) |
4881 | bpf_program__set_autoattach(prog, /*autoattach=*/false); |
4882 | } |
4883 | |
4884 | err = augmented_raw_syscalls_bpf__load(trace.skel); |
4885 | |
4886 | if (err < 0) { |
4887 | libbpf_strerror(err, bf, sizeof(bf)); |
4888 | pr_debug("Failed to load augmented syscalls BPF skeleton: %s\n" , bf); |
4889 | } else { |
4890 | augmented_raw_syscalls_bpf__attach(trace.skel); |
4891 | trace__add_syscall_newtp(&trace); |
4892 | } |
4893 | } |
4894 | |
4895 | err = bpf__setup_bpf_output(trace.evlist); |
4896 | if (err) { |
4897 | libbpf_strerror(err, bf, sizeof(bf)); |
4898 | pr_err("ERROR: Setup BPF output event failed: %s\n" , bf); |
4899 | goto out; |
4900 | } |
4901 | trace.syscalls.events.bpf_output = evlist__last(trace.evlist); |
4902 | assert(!strcmp(evsel__name(trace.syscalls.events.bpf_output), "__augmented_syscalls__" )); |
4903 | skip_augmentation: |
4904 | #endif |
4905 | err = -1; |
4906 | |
4907 | if (trace.trace_pgfaults) { |
4908 | trace.opts.sample_address = true; |
4909 | trace.opts.sample_time = true; |
4910 | } |
4911 | |
4912 | if (trace.opts.mmap_pages == UINT_MAX) |
4913 | mmap_pages_user_set = false; |
4914 | |
4915 | if (trace.max_stack == UINT_MAX) { |
4916 | trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl__max_stack(); |
4917 | max_stack_user_set = false; |
4918 | } |
4919 | |
4920 | #ifdef HAVE_DWARF_UNWIND_SUPPORT |
4921 | if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) { |
4922 | record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf" , false); |
4923 | } |
4924 | #endif |
4925 | |
4926 | if (callchain_param.enabled) { |
4927 | if (!mmap_pages_user_set && geteuid() == 0) |
4928 | trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4; |
4929 | |
4930 | symbol_conf.use_callchain = true; |
4931 | } |
4932 | |
4933 | if (trace.evlist->core.nr_entries > 0) { |
4934 | evlist__set_default_evsel_handler(evlist: trace.evlist, handler: trace__event_handler); |
4935 | if (evlist__set_syscall_tp_fields(evlist: trace.evlist)) { |
4936 | perror("failed to set syscalls:* tracepoint fields" ); |
4937 | goto out; |
4938 | } |
4939 | } |
4940 | |
4941 | if (trace.sort_events) { |
4942 | ordered_events__init(oe: &trace.oe.data, deliver: ordered_events__deliver_event, data: &trace); |
4943 | ordered_events__set_copy_on_queue(oe: &trace.oe.data, copy: true); |
4944 | } |
4945 | |
4946 | /* |
4947 | * If we are augmenting syscalls, then combine what we put in the |
4948 | * __augmented_syscalls__ BPF map with what is in the |
4949 | * syscalls:sys_exit_FOO tracepoints, i.e. just like we do without BPF, |
4950 | * combining raw_syscalls:sys_enter with raw_syscalls:sys_exit. |
4951 | * |
4952 | * We'll switch to look at two BPF maps, one for sys_enter and the |
4953 | * other for sys_exit when we start augmenting the sys_exit paths with |
4954 | * buffers that are being copied from kernel to userspace, think 'read' |
4955 | * syscall. |
4956 | */ |
4957 | if (trace.syscalls.events.bpf_output) { |
4958 | evlist__for_each_entry(trace.evlist, evsel) { |
4959 | bool raw_syscalls_sys_exit = strcmp(evsel__name(evsel), "raw_syscalls:sys_exit" ) == 0; |
4960 | |
4961 | if (raw_syscalls_sys_exit) { |
4962 | trace.raw_augmented_syscalls = true; |
4963 | goto init_augmented_syscall_tp; |
4964 | } |
4965 | |
4966 | if (trace.syscalls.events.bpf_output->priv == NULL && |
4967 | strstr(evsel__name(evsel), "syscalls:sys_enter" )) { |
4968 | struct evsel *augmented = trace.syscalls.events.bpf_output; |
4969 | if (evsel__init_augmented_syscall_tp(evsel: augmented, tp: evsel) || |
4970 | evsel__init_augmented_syscall_tp_args(evsel: augmented)) |
4971 | goto out; |
4972 | /* |
4973 | * Augmented is __augmented_syscalls__ BPF_OUTPUT event |
4974 | * Above we made sure we can get from the payload the tp fields |
4975 | * that we get from syscalls:sys_enter tracefs format file. |
4976 | */ |
4977 | augmented->handler = trace__sys_enter; |
4978 | /* |
4979 | * Now we do the same for the *syscalls:sys_enter event so that |
4980 | * if we handle it directly, i.e. if the BPF prog returns 0 so |
4981 | * as not to filter it, then we'll handle it just like we would |
4982 | * for the BPF_OUTPUT one: |
4983 | */ |
4984 | if (evsel__init_augmented_syscall_tp(evsel, tp: evsel) || |
4985 | evsel__init_augmented_syscall_tp_args(evsel)) |
4986 | goto out; |
4987 | evsel->handler = trace__sys_enter; |
4988 | } |
4989 | |
4990 | if (strstarts(str: evsel__name(evsel), prefix: "syscalls:sys_exit_" )) { |
4991 | struct syscall_tp *sc; |
4992 | init_augmented_syscall_tp: |
4993 | if (evsel__init_augmented_syscall_tp(evsel, tp: evsel)) |
4994 | goto out; |
4995 | sc = __evsel__syscall_tp(evsel); |
4996 | /* |
4997 | * For now with BPF raw_augmented we hook into |
4998 | * raw_syscalls:sys_enter and there we get all |
4999 | * 6 syscall args plus the tracepoint common |
5000 | * fields and the syscall_nr (another long). |
5001 | * So we check if that is the case and if so |
5002 | * don't look after the sc->args_size but |
5003 | * always after the full raw_syscalls:sys_enter |
5004 | * payload, which is fixed. |
5005 | * |
5006 | * We'll revisit this later to pass |
5007 | * s->args_size to the BPF augmenter (now |
5008 | * tools/perf/examples/bpf/augmented_raw_syscalls.c, |
5009 | * so that it copies only what we need for each |
5010 | * syscall, like what happens when we use |
5011 | * syscalls:sys_enter_NAME, so that we reduce |
5012 | * the kernel/userspace traffic to just what is |
5013 | * needed for each syscall. |
5014 | */ |
5015 | if (trace.raw_augmented_syscalls) |
5016 | trace.raw_augmented_syscalls_args_size = (6 + 1) * sizeof(long) + sc->id.offset; |
5017 | evsel__init_augmented_syscall_tp_ret(evsel); |
5018 | evsel->handler = trace__sys_exit; |
5019 | } |
5020 | } |
5021 | } |
5022 | |
5023 | if ((argc >= 1) && (strcmp(argv[0], "record" ) == 0)) |
5024 | return trace__record(trace: &trace, argc: argc-1, argv: &argv[1]); |
5025 | |
5026 | /* Using just --errno-summary will trigger --summary */ |
5027 | if (trace.errno_summary && !trace.summary && !trace.summary_only) |
5028 | trace.summary_only = true; |
5029 | |
5030 | /* summary_only implies summary option, but don't overwrite summary if set */ |
5031 | if (trace.summary_only) |
5032 | trace.summary = trace.summary_only; |
5033 | |
5034 | if (output_name != NULL) { |
5035 | err = trace__open_output(trace: &trace, filename: output_name); |
5036 | if (err < 0) { |
5037 | perror("failed to create output file" ); |
5038 | goto out; |
5039 | } |
5040 | } |
5041 | |
5042 | err = evswitch__init(&trace.evswitch, trace.evlist, stderr); |
5043 | if (err) |
5044 | goto out_close; |
5045 | |
5046 | err = target__validate(&trace.opts.target); |
5047 | if (err) { |
5048 | target__strerror(&trace.opts.target, err, bf, sizeof(bf)); |
5049 | fprintf(trace.output, "%s" , bf); |
5050 | goto out_close; |
5051 | } |
5052 | |
5053 | err = target__parse_uid(&trace.opts.target); |
5054 | if (err) { |
5055 | target__strerror(&trace.opts.target, err, bf, sizeof(bf)); |
5056 | fprintf(trace.output, "%s" , bf); |
5057 | goto out_close; |
5058 | } |
5059 | |
5060 | if (!argc && target__none(&trace.opts.target)) |
5061 | trace.opts.target.system_wide = true; |
5062 | |
5063 | if (input_name) |
5064 | err = trace__replay(trace: &trace); |
5065 | else |
5066 | err = trace__run(trace: &trace, argc, argv); |
5067 | |
5068 | out_close: |
5069 | if (output_name != NULL) |
5070 | fclose(trace.output); |
5071 | out: |
5072 | trace__exit(trace: &trace); |
5073 | #ifdef HAVE_BPF_SKEL |
5074 | augmented_raw_syscalls_bpf__destroy(trace.skel); |
5075 | #endif |
5076 | return err; |
5077 | } |
5078 | |