1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #include <trace/syscall.h> |
3 | #include <trace/events/syscalls.h> |
4 | #include <linux/syscalls.h> |
5 | #include <linux/slab.h> |
6 | #include <linux/kernel.h> |
7 | #include <linux/module.h> /* for MODULE_NAME_LEN via KSYM_SYMBOL_LEN */ |
8 | #include <linux/ftrace.h> |
9 | #include <linux/perf_event.h> |
10 | #include <linux/xarray.h> |
11 | #include <asm/syscall.h> |
12 | |
13 | #include "trace_output.h" |
14 | #include "trace.h" |
15 | |
16 | static DEFINE_MUTEX(syscall_trace_lock); |
17 | |
18 | static int syscall_enter_register(struct trace_event_call *event, |
19 | enum trace_reg type, void *data); |
20 | static int syscall_exit_register(struct trace_event_call *event, |
21 | enum trace_reg type, void *data); |
22 | |
23 | static struct list_head * |
24 | syscall_get_enter_fields(struct trace_event_call *call) |
25 | { |
26 | struct syscall_metadata *entry = call->data; |
27 | |
28 | return &entry->enter_fields; |
29 | } |
30 | |
31 | extern struct syscall_metadata *__start_syscalls_metadata[]; |
32 | extern struct syscall_metadata *__stop_syscalls_metadata[]; |
33 | |
34 | static DEFINE_XARRAY(syscalls_metadata_sparse); |
35 | static struct syscall_metadata **syscalls_metadata; |
36 | |
37 | #ifndef ARCH_HAS_SYSCALL_MATCH_SYM_NAME |
38 | static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) |
39 | { |
40 | /* |
41 | * Only compare after the "sys" prefix. Archs that use |
42 | * syscall wrappers may have syscalls symbols aliases prefixed |
43 | * with ".SyS" or ".sys" instead of "sys", leading to an unwanted |
44 | * mismatch. |
45 | */ |
46 | return !strcmp(sym + 3, name + 3); |
47 | } |
48 | #endif |
49 | |
50 | #ifdef ARCH_TRACE_IGNORE_COMPAT_SYSCALLS |
51 | /* |
52 | * Some architectures that allow for 32bit applications |
53 | * to run on a 64bit kernel, do not map the syscalls for |
54 | * the 32bit tasks the same as they do for 64bit tasks. |
55 | * |
56 | * *cough*x86*cough* |
57 | * |
58 | * In such a case, instead of reporting the wrong syscalls, |
59 | * simply ignore them. |
60 | * |
61 | * For an arch to ignore the compat syscalls it needs to |
62 | * define ARCH_TRACE_IGNORE_COMPAT_SYSCALLS as well as |
63 | * define the function arch_trace_is_compat_syscall() to let |
64 | * the tracing system know that it should ignore it. |
65 | */ |
66 | static int |
67 | trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs) |
68 | { |
69 | if (unlikely(arch_trace_is_compat_syscall(regs))) |
70 | return -1; |
71 | |
72 | return syscall_get_nr(task, regs); |
73 | } |
74 | #else |
75 | static inline int |
76 | trace_get_syscall_nr(struct task_struct *task, struct pt_regs *regs) |
77 | { |
78 | return syscall_get_nr(task, regs); |
79 | } |
80 | #endif /* ARCH_TRACE_IGNORE_COMPAT_SYSCALLS */ |
81 | |
82 | static __init struct syscall_metadata * |
83 | find_syscall_meta(unsigned long syscall) |
84 | { |
85 | struct syscall_metadata **start; |
86 | struct syscall_metadata **stop; |
87 | char str[KSYM_SYMBOL_LEN]; |
88 | |
89 | |
90 | start = __start_syscalls_metadata; |
91 | stop = __stop_syscalls_metadata; |
92 | kallsyms_lookup(addr: syscall, NULL, NULL, NULL, namebuf: str); |
93 | |
94 | if (arch_syscall_match_sym_name(sym: str, name: "sys_ni_syscall" )) |
95 | return NULL; |
96 | |
97 | for ( ; start < stop; start++) { |
98 | if ((*start)->name && arch_syscall_match_sym_name(sym: str, name: (*start)->name)) |
99 | return *start; |
100 | } |
101 | return NULL; |
102 | } |
103 | |
104 | static struct syscall_metadata *syscall_nr_to_meta(int nr) |
105 | { |
106 | if (IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR)) |
107 | return xa_load(&syscalls_metadata_sparse, index: (unsigned long)nr); |
108 | |
109 | if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) |
110 | return NULL; |
111 | |
112 | return syscalls_metadata[nr]; |
113 | } |
114 | |
115 | const char *get_syscall_name(int syscall) |
116 | { |
117 | struct syscall_metadata *entry; |
118 | |
119 | entry = syscall_nr_to_meta(nr: syscall); |
120 | if (!entry) |
121 | return NULL; |
122 | |
123 | return entry->name; |
124 | } |
125 | |
126 | static enum print_line_t |
127 | print_syscall_enter(struct trace_iterator *iter, int flags, |
128 | struct trace_event *event) |
129 | { |
130 | struct trace_array *tr = iter->tr; |
131 | struct trace_seq *s = &iter->seq; |
132 | struct trace_entry *ent = iter->ent; |
133 | struct syscall_trace_enter *trace; |
134 | struct syscall_metadata *entry; |
135 | int i, syscall; |
136 | |
137 | trace = (typeof(trace))ent; |
138 | syscall = trace->nr; |
139 | entry = syscall_nr_to_meta(nr: syscall); |
140 | |
141 | if (!entry) |
142 | goto end; |
143 | |
144 | if (entry->enter_event->event.type != ent->type) { |
145 | WARN_ON_ONCE(1); |
146 | goto end; |
147 | } |
148 | |
149 | trace_seq_printf(s, fmt: "%s(" , entry->name); |
150 | |
151 | for (i = 0; i < entry->nb_args; i++) { |
152 | |
153 | if (trace_seq_has_overflowed(s)) |
154 | goto end; |
155 | |
156 | /* parameter types */ |
157 | if (tr && tr->trace_flags & TRACE_ITER_VERBOSE) |
158 | trace_seq_printf(s, fmt: "%s " , entry->types[i]); |
159 | |
160 | /* parameter values */ |
161 | trace_seq_printf(s, fmt: "%s: %lx%s" , entry->args[i], |
162 | trace->args[i], |
163 | i == entry->nb_args - 1 ? "" : ", " ); |
164 | } |
165 | |
166 | trace_seq_putc(s, c: ')'); |
167 | end: |
168 | trace_seq_putc(s, c: '\n'); |
169 | |
170 | return trace_handle_return(s); |
171 | } |
172 | |
173 | static enum print_line_t |
174 | print_syscall_exit(struct trace_iterator *iter, int flags, |
175 | struct trace_event *event) |
176 | { |
177 | struct trace_seq *s = &iter->seq; |
178 | struct trace_entry *ent = iter->ent; |
179 | struct syscall_trace_exit *trace; |
180 | int syscall; |
181 | struct syscall_metadata *entry; |
182 | |
183 | trace = (typeof(trace))ent; |
184 | syscall = trace->nr; |
185 | entry = syscall_nr_to_meta(nr: syscall); |
186 | |
187 | if (!entry) { |
188 | trace_seq_putc(s, c: '\n'); |
189 | goto out; |
190 | } |
191 | |
192 | if (entry->exit_event->event.type != ent->type) { |
193 | WARN_ON_ONCE(1); |
194 | return TRACE_TYPE_UNHANDLED; |
195 | } |
196 | |
197 | trace_seq_printf(s, fmt: "%s -> 0x%lx\n" , entry->name, |
198 | trace->ret); |
199 | |
200 | out: |
201 | return trace_handle_return(s); |
202 | } |
203 | |
204 | #define SYSCALL_FIELD(_type, _name) { \ |
205 | .type = #_type, .name = #_name, \ |
206 | .size = sizeof(_type), .align = __alignof__(_type), \ |
207 | .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER } |
208 | |
209 | static int __init |
210 | __set_enter_print_fmt(struct syscall_metadata *entry, char *buf, int len) |
211 | { |
212 | int i; |
213 | int pos = 0; |
214 | |
215 | /* When len=0, we just calculate the needed length */ |
216 | #define LEN_OR_ZERO (len ? len - pos : 0) |
217 | |
218 | pos += snprintf(buf: buf + pos, LEN_OR_ZERO, fmt: "\"" ); |
219 | for (i = 0; i < entry->nb_args; i++) { |
220 | pos += snprintf(buf: buf + pos, LEN_OR_ZERO, fmt: "%s: 0x%%0%zulx%s" , |
221 | entry->args[i], sizeof(unsigned long), |
222 | i == entry->nb_args - 1 ? "" : ", " ); |
223 | } |
224 | pos += snprintf(buf: buf + pos, LEN_OR_ZERO, fmt: "\"" ); |
225 | |
226 | for (i = 0; i < entry->nb_args; i++) { |
227 | pos += snprintf(buf: buf + pos, LEN_OR_ZERO, |
228 | fmt: ", ((unsigned long)(REC->%s))" , entry->args[i]); |
229 | } |
230 | |
231 | #undef LEN_OR_ZERO |
232 | |
233 | /* return the length of print_fmt */ |
234 | return pos; |
235 | } |
236 | |
237 | static int __init set_syscall_print_fmt(struct trace_event_call *call) |
238 | { |
239 | char *print_fmt; |
240 | int len; |
241 | struct syscall_metadata *entry = call->data; |
242 | |
243 | if (entry->enter_event != call) { |
244 | call->print_fmt = "\"0x%lx\", REC->ret" ; |
245 | return 0; |
246 | } |
247 | |
248 | /* First: called with 0 length to calculate the needed length */ |
249 | len = __set_enter_print_fmt(entry, NULL, len: 0); |
250 | |
251 | print_fmt = kmalloc(size: len + 1, GFP_KERNEL); |
252 | if (!print_fmt) |
253 | return -ENOMEM; |
254 | |
255 | /* Second: actually write the @print_fmt */ |
256 | __set_enter_print_fmt(entry, buf: print_fmt, len: len + 1); |
257 | call->print_fmt = print_fmt; |
258 | |
259 | return 0; |
260 | } |
261 | |
262 | static void __init free_syscall_print_fmt(struct trace_event_call *call) |
263 | { |
264 | struct syscall_metadata *entry = call->data; |
265 | |
266 | if (entry->enter_event == call) |
267 | kfree(objp: call->print_fmt); |
268 | } |
269 | |
270 | static int __init syscall_enter_define_fields(struct trace_event_call *call) |
271 | { |
272 | struct syscall_trace_enter trace; |
273 | struct syscall_metadata *meta = call->data; |
274 | int offset = offsetof(typeof(trace), args); |
275 | int ret = 0; |
276 | int i; |
277 | |
278 | for (i = 0; i < meta->nb_args; i++) { |
279 | ret = trace_define_field(call, type: meta->types[i], |
280 | name: meta->args[i], offset, |
281 | size: sizeof(unsigned long), is_signed: 0, |
282 | filter_type: FILTER_OTHER); |
283 | if (ret) |
284 | break; |
285 | offset += sizeof(unsigned long); |
286 | } |
287 | |
288 | return ret; |
289 | } |
290 | |
291 | static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id) |
292 | { |
293 | struct trace_array *tr = data; |
294 | struct trace_event_file *trace_file; |
295 | struct syscall_trace_enter *entry; |
296 | struct syscall_metadata *sys_data; |
297 | struct trace_event_buffer fbuffer; |
298 | unsigned long args[6]; |
299 | int syscall_nr; |
300 | int size; |
301 | |
302 | syscall_nr = trace_get_syscall_nr(current, regs); |
303 | if (syscall_nr < 0 || syscall_nr >= NR_syscalls) |
304 | return; |
305 | |
306 | /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE) */ |
307 | trace_file = rcu_dereference_sched(tr->enter_syscall_files[syscall_nr]); |
308 | if (!trace_file) |
309 | return; |
310 | |
311 | if (trace_trigger_soft_disabled(file: trace_file)) |
312 | return; |
313 | |
314 | sys_data = syscall_nr_to_meta(nr: syscall_nr); |
315 | if (!sys_data) |
316 | return; |
317 | |
318 | size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args; |
319 | |
320 | entry = trace_event_buffer_reserve(fbuffer: &fbuffer, trace_file, len: size); |
321 | if (!entry) |
322 | return; |
323 | |
324 | entry = ring_buffer_event_data(event: fbuffer.event); |
325 | entry->nr = syscall_nr; |
326 | syscall_get_arguments(current, regs, args); |
327 | memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args); |
328 | |
329 | trace_event_buffer_commit(fbuffer: &fbuffer); |
330 | } |
331 | |
332 | static void ftrace_syscall_exit(void *data, struct pt_regs *regs, long ret) |
333 | { |
334 | struct trace_array *tr = data; |
335 | struct trace_event_file *trace_file; |
336 | struct syscall_trace_exit *entry; |
337 | struct syscall_metadata *sys_data; |
338 | struct trace_event_buffer fbuffer; |
339 | int syscall_nr; |
340 | |
341 | syscall_nr = trace_get_syscall_nr(current, regs); |
342 | if (syscall_nr < 0 || syscall_nr >= NR_syscalls) |
343 | return; |
344 | |
345 | /* Here we're inside tp handler's rcu_read_lock_sched (__DO_TRACE()) */ |
346 | trace_file = rcu_dereference_sched(tr->exit_syscall_files[syscall_nr]); |
347 | if (!trace_file) |
348 | return; |
349 | |
350 | if (trace_trigger_soft_disabled(file: trace_file)) |
351 | return; |
352 | |
353 | sys_data = syscall_nr_to_meta(nr: syscall_nr); |
354 | if (!sys_data) |
355 | return; |
356 | |
357 | entry = trace_event_buffer_reserve(fbuffer: &fbuffer, trace_file, len: sizeof(*entry)); |
358 | if (!entry) |
359 | return; |
360 | |
361 | entry = ring_buffer_event_data(event: fbuffer.event); |
362 | entry->nr = syscall_nr; |
363 | entry->ret = syscall_get_return_value(current, regs); |
364 | |
365 | trace_event_buffer_commit(fbuffer: &fbuffer); |
366 | } |
367 | |
368 | static int reg_event_syscall_enter(struct trace_event_file *file, |
369 | struct trace_event_call *call) |
370 | { |
371 | struct trace_array *tr = file->tr; |
372 | int ret = 0; |
373 | int num; |
374 | |
375 | num = ((struct syscall_metadata *)call->data)->syscall_nr; |
376 | if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) |
377 | return -ENOSYS; |
378 | mutex_lock(&syscall_trace_lock); |
379 | if (!tr->sys_refcount_enter) |
380 | ret = register_trace_sys_enter(probe: ftrace_syscall_enter, data: tr); |
381 | if (!ret) { |
382 | rcu_assign_pointer(tr->enter_syscall_files[num], file); |
383 | tr->sys_refcount_enter++; |
384 | } |
385 | mutex_unlock(lock: &syscall_trace_lock); |
386 | return ret; |
387 | } |
388 | |
389 | static void unreg_event_syscall_enter(struct trace_event_file *file, |
390 | struct trace_event_call *call) |
391 | { |
392 | struct trace_array *tr = file->tr; |
393 | int num; |
394 | |
395 | num = ((struct syscall_metadata *)call->data)->syscall_nr; |
396 | if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) |
397 | return; |
398 | mutex_lock(&syscall_trace_lock); |
399 | tr->sys_refcount_enter--; |
400 | RCU_INIT_POINTER(tr->enter_syscall_files[num], NULL); |
401 | if (!tr->sys_refcount_enter) |
402 | unregister_trace_sys_enter(probe: ftrace_syscall_enter, data: tr); |
403 | mutex_unlock(lock: &syscall_trace_lock); |
404 | } |
405 | |
406 | static int reg_event_syscall_exit(struct trace_event_file *file, |
407 | struct trace_event_call *call) |
408 | { |
409 | struct trace_array *tr = file->tr; |
410 | int ret = 0; |
411 | int num; |
412 | |
413 | num = ((struct syscall_metadata *)call->data)->syscall_nr; |
414 | if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) |
415 | return -ENOSYS; |
416 | mutex_lock(&syscall_trace_lock); |
417 | if (!tr->sys_refcount_exit) |
418 | ret = register_trace_sys_exit(probe: ftrace_syscall_exit, data: tr); |
419 | if (!ret) { |
420 | rcu_assign_pointer(tr->exit_syscall_files[num], file); |
421 | tr->sys_refcount_exit++; |
422 | } |
423 | mutex_unlock(lock: &syscall_trace_lock); |
424 | return ret; |
425 | } |
426 | |
427 | static void unreg_event_syscall_exit(struct trace_event_file *file, |
428 | struct trace_event_call *call) |
429 | { |
430 | struct trace_array *tr = file->tr; |
431 | int num; |
432 | |
433 | num = ((struct syscall_metadata *)call->data)->syscall_nr; |
434 | if (WARN_ON_ONCE(num < 0 || num >= NR_syscalls)) |
435 | return; |
436 | mutex_lock(&syscall_trace_lock); |
437 | tr->sys_refcount_exit--; |
438 | RCU_INIT_POINTER(tr->exit_syscall_files[num], NULL); |
439 | if (!tr->sys_refcount_exit) |
440 | unregister_trace_sys_exit(probe: ftrace_syscall_exit, data: tr); |
441 | mutex_unlock(lock: &syscall_trace_lock); |
442 | } |
443 | |
444 | static int __init init_syscall_trace(struct trace_event_call *call) |
445 | { |
446 | int id; |
447 | int num; |
448 | |
449 | num = ((struct syscall_metadata *)call->data)->syscall_nr; |
450 | if (num < 0 || num >= NR_syscalls) { |
451 | pr_debug("syscall %s metadata not mapped, disabling ftrace event\n" , |
452 | ((struct syscall_metadata *)call->data)->name); |
453 | return -ENOSYS; |
454 | } |
455 | |
456 | if (set_syscall_print_fmt(call) < 0) |
457 | return -ENOMEM; |
458 | |
459 | id = trace_event_raw_init(call); |
460 | |
461 | if (id < 0) { |
462 | free_syscall_print_fmt(call); |
463 | return id; |
464 | } |
465 | |
466 | return id; |
467 | } |
468 | |
469 | static struct trace_event_fields __refdata syscall_enter_fields_array[] = { |
470 | SYSCALL_FIELD(int, __syscall_nr), |
471 | { .type = TRACE_FUNCTION_TYPE, |
472 | .define_fields = syscall_enter_define_fields }, |
473 | {} |
474 | }; |
475 | |
476 | struct trace_event_functions enter_syscall_print_funcs = { |
477 | .trace = print_syscall_enter, |
478 | }; |
479 | |
480 | struct trace_event_functions exit_syscall_print_funcs = { |
481 | .trace = print_syscall_exit, |
482 | }; |
483 | |
484 | struct trace_event_class __refdata event_class_syscall_enter = { |
485 | .system = "syscalls" , |
486 | .reg = syscall_enter_register, |
487 | .fields_array = syscall_enter_fields_array, |
488 | .get_fields = syscall_get_enter_fields, |
489 | .raw_init = init_syscall_trace, |
490 | }; |
491 | |
492 | struct trace_event_class __refdata event_class_syscall_exit = { |
493 | .system = "syscalls" , |
494 | .reg = syscall_exit_register, |
495 | .fields_array = (struct trace_event_fields[]){ |
496 | SYSCALL_FIELD(int, __syscall_nr), |
497 | SYSCALL_FIELD(long, ret), |
498 | {} |
499 | }, |
500 | .fields = LIST_HEAD_INIT(event_class_syscall_exit.fields), |
501 | .raw_init = init_syscall_trace, |
502 | }; |
503 | |
504 | unsigned long __init __weak arch_syscall_addr(int nr) |
505 | { |
506 | return (unsigned long)sys_call_table[nr]; |
507 | } |
508 | |
509 | void __init init_ftrace_syscalls(void) |
510 | { |
511 | struct syscall_metadata *meta; |
512 | unsigned long addr; |
513 | int i; |
514 | void *ret; |
515 | |
516 | if (!IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR)) { |
517 | syscalls_metadata = kcalloc(NR_syscalls, |
518 | size: sizeof(*syscalls_metadata), |
519 | GFP_KERNEL); |
520 | if (!syscalls_metadata) { |
521 | WARN_ON(1); |
522 | return; |
523 | } |
524 | } |
525 | |
526 | for (i = 0; i < NR_syscalls; i++) { |
527 | addr = arch_syscall_addr(nr: i); |
528 | meta = find_syscall_meta(syscall: addr); |
529 | if (!meta) |
530 | continue; |
531 | |
532 | meta->syscall_nr = i; |
533 | |
534 | if (!IS_ENABLED(CONFIG_HAVE_SPARSE_SYSCALL_NR)) { |
535 | syscalls_metadata[i] = meta; |
536 | } else { |
537 | ret = xa_store(&syscalls_metadata_sparse, index: i, entry: meta, |
538 | GFP_KERNEL); |
539 | WARN(xa_is_err(ret), |
540 | "Syscall memory allocation failed\n" ); |
541 | } |
542 | |
543 | } |
544 | } |
545 | |
546 | #ifdef CONFIG_PERF_EVENTS |
547 | |
548 | static DECLARE_BITMAP(enabled_perf_enter_syscalls, NR_syscalls); |
549 | static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls); |
550 | static int sys_perf_refcount_enter; |
551 | static int sys_perf_refcount_exit; |
552 | |
553 | static int perf_call_bpf_enter(struct trace_event_call *call, struct pt_regs *regs, |
554 | struct syscall_metadata *sys_data, |
555 | struct syscall_trace_enter *rec) |
556 | { |
557 | struct syscall_tp_t { |
558 | struct trace_entry ent; |
559 | int syscall_nr; |
560 | unsigned long args[SYSCALL_DEFINE_MAXARGS]; |
561 | } __aligned(8) param; |
562 | int i; |
563 | |
564 | BUILD_BUG_ON(sizeof(param.ent) < sizeof(void *)); |
565 | |
566 | /* bpf prog requires 'regs' to be the first member in the ctx (a.k.a. ¶m) */ |
567 | *(struct pt_regs **)¶m = regs; |
568 | param.syscall_nr = rec->nr; |
569 | for (i = 0; i < sys_data->nb_args; i++) |
570 | param.args[i] = rec->args[i]; |
571 | return trace_call_bpf(call, ctx: ¶m); |
572 | } |
573 | |
574 | static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id) |
575 | { |
576 | struct syscall_metadata *sys_data; |
577 | struct syscall_trace_enter *rec; |
578 | struct hlist_head *head; |
579 | unsigned long args[6]; |
580 | bool valid_prog_array; |
581 | int syscall_nr; |
582 | int rctx; |
583 | int size; |
584 | |
585 | syscall_nr = trace_get_syscall_nr(current, regs); |
586 | if (syscall_nr < 0 || syscall_nr >= NR_syscalls) |
587 | return; |
588 | if (!test_bit(syscall_nr, enabled_perf_enter_syscalls)) |
589 | return; |
590 | |
591 | sys_data = syscall_nr_to_meta(nr: syscall_nr); |
592 | if (!sys_data) |
593 | return; |
594 | |
595 | head = this_cpu_ptr(sys_data->enter_event->perf_events); |
596 | valid_prog_array = bpf_prog_array_valid(call: sys_data->enter_event); |
597 | if (!valid_prog_array && hlist_empty(h: head)) |
598 | return; |
599 | |
600 | /* get the size after alignment with the u32 buffer size field */ |
601 | size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec); |
602 | size = ALIGN(size + sizeof(u32), sizeof(u64)); |
603 | size -= sizeof(u32); |
604 | |
605 | rec = perf_trace_buf_alloc(size, NULL, rctxp: &rctx); |
606 | if (!rec) |
607 | return; |
608 | |
609 | rec->nr = syscall_nr; |
610 | syscall_get_arguments(current, regs, args); |
611 | memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args); |
612 | |
613 | if ((valid_prog_array && |
614 | !perf_call_bpf_enter(call: sys_data->enter_event, regs, sys_data, rec)) || |
615 | hlist_empty(h: head)) { |
616 | perf_swevent_put_recursion_context(rctx); |
617 | return; |
618 | } |
619 | |
620 | perf_trace_buf_submit(raw_data: rec, size, rctx, |
621 | type: sys_data->enter_event->event.type, count: 1, regs, |
622 | head, NULL); |
623 | } |
624 | |
625 | static int perf_sysenter_enable(struct trace_event_call *call) |
626 | { |
627 | int ret = 0; |
628 | int num; |
629 | |
630 | num = ((struct syscall_metadata *)call->data)->syscall_nr; |
631 | |
632 | mutex_lock(&syscall_trace_lock); |
633 | if (!sys_perf_refcount_enter) |
634 | ret = register_trace_sys_enter(probe: perf_syscall_enter, NULL); |
635 | if (ret) { |
636 | pr_info("event trace: Could not activate syscall entry trace point" ); |
637 | } else { |
638 | set_bit(nr: num, addr: enabled_perf_enter_syscalls); |
639 | sys_perf_refcount_enter++; |
640 | } |
641 | mutex_unlock(lock: &syscall_trace_lock); |
642 | return ret; |
643 | } |
644 | |
645 | static void perf_sysenter_disable(struct trace_event_call *call) |
646 | { |
647 | int num; |
648 | |
649 | num = ((struct syscall_metadata *)call->data)->syscall_nr; |
650 | |
651 | mutex_lock(&syscall_trace_lock); |
652 | sys_perf_refcount_enter--; |
653 | clear_bit(nr: num, addr: enabled_perf_enter_syscalls); |
654 | if (!sys_perf_refcount_enter) |
655 | unregister_trace_sys_enter(probe: perf_syscall_enter, NULL); |
656 | mutex_unlock(lock: &syscall_trace_lock); |
657 | } |
658 | |
659 | static int perf_call_bpf_exit(struct trace_event_call *call, struct pt_regs *regs, |
660 | struct syscall_trace_exit *rec) |
661 | { |
662 | struct syscall_tp_t { |
663 | struct trace_entry ent; |
664 | int syscall_nr; |
665 | unsigned long ret; |
666 | } __aligned(8) param; |
667 | |
668 | /* bpf prog requires 'regs' to be the first member in the ctx (a.k.a. ¶m) */ |
669 | *(struct pt_regs **)¶m = regs; |
670 | param.syscall_nr = rec->nr; |
671 | param.ret = rec->ret; |
672 | return trace_call_bpf(call, ctx: ¶m); |
673 | } |
674 | |
675 | static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret) |
676 | { |
677 | struct syscall_metadata *sys_data; |
678 | struct syscall_trace_exit *rec; |
679 | struct hlist_head *head; |
680 | bool valid_prog_array; |
681 | int syscall_nr; |
682 | int rctx; |
683 | int size; |
684 | |
685 | syscall_nr = trace_get_syscall_nr(current, regs); |
686 | if (syscall_nr < 0 || syscall_nr >= NR_syscalls) |
687 | return; |
688 | if (!test_bit(syscall_nr, enabled_perf_exit_syscalls)) |
689 | return; |
690 | |
691 | sys_data = syscall_nr_to_meta(nr: syscall_nr); |
692 | if (!sys_data) |
693 | return; |
694 | |
695 | head = this_cpu_ptr(sys_data->exit_event->perf_events); |
696 | valid_prog_array = bpf_prog_array_valid(call: sys_data->exit_event); |
697 | if (!valid_prog_array && hlist_empty(h: head)) |
698 | return; |
699 | |
700 | /* We can probably do that at build time */ |
701 | size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64)); |
702 | size -= sizeof(u32); |
703 | |
704 | rec = perf_trace_buf_alloc(size, NULL, rctxp: &rctx); |
705 | if (!rec) |
706 | return; |
707 | |
708 | rec->nr = syscall_nr; |
709 | rec->ret = syscall_get_return_value(current, regs); |
710 | |
711 | if ((valid_prog_array && |
712 | !perf_call_bpf_exit(call: sys_data->exit_event, regs, rec)) || |
713 | hlist_empty(h: head)) { |
714 | perf_swevent_put_recursion_context(rctx); |
715 | return; |
716 | } |
717 | |
718 | perf_trace_buf_submit(raw_data: rec, size, rctx, type: sys_data->exit_event->event.type, |
719 | count: 1, regs, head, NULL); |
720 | } |
721 | |
722 | static int perf_sysexit_enable(struct trace_event_call *call) |
723 | { |
724 | int ret = 0; |
725 | int num; |
726 | |
727 | num = ((struct syscall_metadata *)call->data)->syscall_nr; |
728 | |
729 | mutex_lock(&syscall_trace_lock); |
730 | if (!sys_perf_refcount_exit) |
731 | ret = register_trace_sys_exit(probe: perf_syscall_exit, NULL); |
732 | if (ret) { |
733 | pr_info("event trace: Could not activate syscall exit trace point" ); |
734 | } else { |
735 | set_bit(nr: num, addr: enabled_perf_exit_syscalls); |
736 | sys_perf_refcount_exit++; |
737 | } |
738 | mutex_unlock(lock: &syscall_trace_lock); |
739 | return ret; |
740 | } |
741 | |
742 | static void perf_sysexit_disable(struct trace_event_call *call) |
743 | { |
744 | int num; |
745 | |
746 | num = ((struct syscall_metadata *)call->data)->syscall_nr; |
747 | |
748 | mutex_lock(&syscall_trace_lock); |
749 | sys_perf_refcount_exit--; |
750 | clear_bit(nr: num, addr: enabled_perf_exit_syscalls); |
751 | if (!sys_perf_refcount_exit) |
752 | unregister_trace_sys_exit(probe: perf_syscall_exit, NULL); |
753 | mutex_unlock(lock: &syscall_trace_lock); |
754 | } |
755 | |
756 | #endif /* CONFIG_PERF_EVENTS */ |
757 | |
758 | static int syscall_enter_register(struct trace_event_call *event, |
759 | enum trace_reg type, void *data) |
760 | { |
761 | struct trace_event_file *file = data; |
762 | |
763 | switch (type) { |
764 | case TRACE_REG_REGISTER: |
765 | return reg_event_syscall_enter(file, call: event); |
766 | case TRACE_REG_UNREGISTER: |
767 | unreg_event_syscall_enter(file, call: event); |
768 | return 0; |
769 | |
770 | #ifdef CONFIG_PERF_EVENTS |
771 | case TRACE_REG_PERF_REGISTER: |
772 | return perf_sysenter_enable(call: event); |
773 | case TRACE_REG_PERF_UNREGISTER: |
774 | perf_sysenter_disable(call: event); |
775 | return 0; |
776 | case TRACE_REG_PERF_OPEN: |
777 | case TRACE_REG_PERF_CLOSE: |
778 | case TRACE_REG_PERF_ADD: |
779 | case TRACE_REG_PERF_DEL: |
780 | return 0; |
781 | #endif |
782 | } |
783 | return 0; |
784 | } |
785 | |
786 | static int syscall_exit_register(struct trace_event_call *event, |
787 | enum trace_reg type, void *data) |
788 | { |
789 | struct trace_event_file *file = data; |
790 | |
791 | switch (type) { |
792 | case TRACE_REG_REGISTER: |
793 | return reg_event_syscall_exit(file, call: event); |
794 | case TRACE_REG_UNREGISTER: |
795 | unreg_event_syscall_exit(file, call: event); |
796 | return 0; |
797 | |
798 | #ifdef CONFIG_PERF_EVENTS |
799 | case TRACE_REG_PERF_REGISTER: |
800 | return perf_sysexit_enable(call: event); |
801 | case TRACE_REG_PERF_UNREGISTER: |
802 | perf_sysexit_disable(call: event); |
803 | return 0; |
804 | case TRACE_REG_PERF_OPEN: |
805 | case TRACE_REG_PERF_CLOSE: |
806 | case TRACE_REG_PERF_ADD: |
807 | case TRACE_REG_PERF_DEL: |
808 | return 0; |
809 | #endif |
810 | } |
811 | return 0; |
812 | } |
813 | |