1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * event probes |
4 | * |
5 | * Part of this code was copied from kernel/trace/trace_kprobe.c written by |
6 | * Masami Hiramatsu <mhiramat@kernel.org> |
7 | * |
8 | * Copyright (C) 2021, VMware Inc, Steven Rostedt <rostedt@goodmis.org> |
9 | * Copyright (C) 2021, VMware Inc, Tzvetomir Stoyanov tz.stoyanov@gmail.com> |
10 | * |
11 | */ |
12 | #include <linux/module.h> |
13 | #include <linux/mutex.h> |
14 | #include <linux/ftrace.h> |
15 | |
16 | #include "trace_dynevent.h" |
17 | #include "trace_probe.h" |
18 | #include "trace_probe_tmpl.h" |
19 | #include "trace_probe_kernel.h" |
20 | |
21 | #define EPROBE_EVENT_SYSTEM "eprobes" |
22 | |
23 | struct trace_eprobe { |
24 | /* tracepoint system */ |
25 | const char *event_system; |
26 | |
27 | /* tracepoint event */ |
28 | const char *event_name; |
29 | |
30 | /* filter string for the tracepoint */ |
31 | char *filter_str; |
32 | |
33 | struct trace_event_call *event; |
34 | |
35 | struct dyn_event devent; |
36 | struct trace_probe tp; |
37 | }; |
38 | |
39 | struct eprobe_data { |
40 | struct trace_event_file *file; |
41 | struct trace_eprobe *ep; |
42 | }; |
43 | |
44 | |
45 | #define for_each_trace_eprobe_tp(ep, _tp) \ |
46 | list_for_each_entry(ep, trace_probe_probe_list(_tp), tp.list) |
47 | |
48 | static int __trace_eprobe_create(int argc, const char *argv[]); |
49 | |
50 | static void trace_event_probe_cleanup(struct trace_eprobe *ep) |
51 | { |
52 | if (!ep) |
53 | return; |
54 | trace_probe_cleanup(tp: &ep->tp); |
55 | kfree(objp: ep->event_name); |
56 | kfree(objp: ep->event_system); |
57 | if (ep->event) |
58 | trace_event_put_ref(call: ep->event); |
59 | kfree(objp: ep->filter_str); |
60 | kfree(objp: ep); |
61 | } |
62 | |
63 | static struct trace_eprobe *to_trace_eprobe(struct dyn_event *ev) |
64 | { |
65 | return container_of(ev, struct trace_eprobe, devent); |
66 | } |
67 | |
68 | static int eprobe_dyn_event_create(const char *raw_command) |
69 | { |
70 | return trace_probe_create(raw_command, createfn: __trace_eprobe_create); |
71 | } |
72 | |
73 | static int eprobe_dyn_event_show(struct seq_file *m, struct dyn_event *ev) |
74 | { |
75 | struct trace_eprobe *ep = to_trace_eprobe(ev); |
76 | int i; |
77 | |
78 | seq_printf(m, fmt: "e:%s/%s" , trace_probe_group_name(tp: &ep->tp), |
79 | trace_probe_name(tp: &ep->tp)); |
80 | seq_printf(m, fmt: " %s.%s" , ep->event_system, ep->event_name); |
81 | |
82 | for (i = 0; i < ep->tp.nr_args; i++) |
83 | seq_printf(m, fmt: " %s=%s" , ep->tp.args[i].name, ep->tp.args[i].comm); |
84 | seq_putc(m, c: '\n'); |
85 | |
86 | return 0; |
87 | } |
88 | |
89 | static int unregister_trace_eprobe(struct trace_eprobe *ep) |
90 | { |
91 | /* If other probes are on the event, just unregister eprobe */ |
92 | if (trace_probe_has_sibling(tp: &ep->tp)) |
93 | goto unreg; |
94 | |
95 | /* Enabled event can not be unregistered */ |
96 | if (trace_probe_is_enabled(tp: &ep->tp)) |
97 | return -EBUSY; |
98 | |
99 | /* Will fail if probe is being used by ftrace or perf */ |
100 | if (trace_probe_unregister_event_call(tp: &ep->tp)) |
101 | return -EBUSY; |
102 | |
103 | unreg: |
104 | dyn_event_remove(ev: &ep->devent); |
105 | trace_probe_unlink(tp: &ep->tp); |
106 | |
107 | return 0; |
108 | } |
109 | |
110 | static int eprobe_dyn_event_release(struct dyn_event *ev) |
111 | { |
112 | struct trace_eprobe *ep = to_trace_eprobe(ev); |
113 | int ret = unregister_trace_eprobe(ep); |
114 | |
115 | if (!ret) |
116 | trace_event_probe_cleanup(ep); |
117 | return ret; |
118 | } |
119 | |
120 | static bool eprobe_dyn_event_is_busy(struct dyn_event *ev) |
121 | { |
122 | struct trace_eprobe *ep = to_trace_eprobe(ev); |
123 | |
124 | return trace_probe_is_enabled(tp: &ep->tp); |
125 | } |
126 | |
127 | static bool eprobe_dyn_event_match(const char *system, const char *event, |
128 | int argc, const char **argv, struct dyn_event *ev) |
129 | { |
130 | struct trace_eprobe *ep = to_trace_eprobe(ev); |
131 | const char *slash; |
132 | |
133 | /* |
134 | * We match the following: |
135 | * event only - match all eprobes with event name |
136 | * system and event only - match all system/event probes |
137 | * system only - match all system probes |
138 | * |
139 | * The below has the above satisfied with more arguments: |
140 | * |
141 | * attached system/event - If the arg has the system and event |
142 | * the probe is attached to, match |
143 | * probes with the attachment. |
144 | * |
145 | * If any more args are given, then it requires a full match. |
146 | */ |
147 | |
148 | /* |
149 | * If system exists, but this probe is not part of that system |
150 | * do not match. |
151 | */ |
152 | if (system && strcmp(trace_probe_group_name(tp: &ep->tp), system) != 0) |
153 | return false; |
154 | |
155 | /* Must match the event name */ |
156 | if (event[0] != '\0' && strcmp(trace_probe_name(tp: &ep->tp), event) != 0) |
157 | return false; |
158 | |
159 | /* No arguments match all */ |
160 | if (argc < 1) |
161 | return true; |
162 | |
163 | /* First argument is the system/event the probe is attached to */ |
164 | |
165 | slash = strchr(argv[0], '/'); |
166 | if (!slash) |
167 | slash = strchr(argv[0], '.'); |
168 | if (!slash) |
169 | return false; |
170 | |
171 | if (strncmp(ep->event_system, argv[0], slash - argv[0])) |
172 | return false; |
173 | if (strcmp(ep->event_name, slash + 1)) |
174 | return false; |
175 | |
176 | argc--; |
177 | argv++; |
178 | |
179 | /* If there are no other args, then match */ |
180 | if (argc < 1) |
181 | return true; |
182 | |
183 | return trace_probe_match_command_args(tp: &ep->tp, argc, argv); |
184 | } |
185 | |
186 | static struct dyn_event_operations eprobe_dyn_event_ops = { |
187 | .create = eprobe_dyn_event_create, |
188 | .show = eprobe_dyn_event_show, |
189 | .is_busy = eprobe_dyn_event_is_busy, |
190 | .free = eprobe_dyn_event_release, |
191 | .match = eprobe_dyn_event_match, |
192 | }; |
193 | |
194 | static struct trace_eprobe *alloc_event_probe(const char *group, |
195 | const char *this_event, |
196 | struct trace_event_call *event, |
197 | int nargs) |
198 | { |
199 | struct trace_eprobe *ep; |
200 | const char *event_name; |
201 | const char *sys_name; |
202 | int ret = -ENOMEM; |
203 | |
204 | if (!event) |
205 | return ERR_PTR(error: -ENODEV); |
206 | |
207 | sys_name = event->class->system; |
208 | event_name = trace_event_name(call: event); |
209 | |
210 | ep = kzalloc(struct_size(ep, tp.args, nargs), GFP_KERNEL); |
211 | if (!ep) { |
212 | trace_event_put_ref(call: event); |
213 | goto error; |
214 | } |
215 | ep->event = event; |
216 | ep->event_name = kstrdup(s: event_name, GFP_KERNEL); |
217 | if (!ep->event_name) |
218 | goto error; |
219 | ep->event_system = kstrdup(s: sys_name, GFP_KERNEL); |
220 | if (!ep->event_system) |
221 | goto error; |
222 | |
223 | ret = trace_probe_init(tp: &ep->tp, event: this_event, group, alloc_filter: false); |
224 | if (ret < 0) |
225 | goto error; |
226 | |
227 | dyn_event_init(ev: &ep->devent, ops: &eprobe_dyn_event_ops); |
228 | return ep; |
229 | error: |
230 | trace_event_probe_cleanup(ep); |
231 | return ERR_PTR(error: ret); |
232 | } |
233 | |
234 | static int eprobe_event_define_fields(struct trace_event_call *event_call) |
235 | { |
236 | struct eprobe_trace_entry_head field; |
237 | struct trace_probe *tp; |
238 | |
239 | tp = trace_probe_primary_from_call(call: event_call); |
240 | if (WARN_ON_ONCE(!tp)) |
241 | return -ENOENT; |
242 | |
243 | return traceprobe_define_arg_fields(event_call, offset: sizeof(field), tp); |
244 | } |
245 | |
246 | static struct trace_event_fields eprobe_fields_array[] = { |
247 | { .type = TRACE_FUNCTION_TYPE, |
248 | .define_fields = eprobe_event_define_fields }, |
249 | {} |
250 | }; |
251 | |
252 | /* Event entry printers */ |
253 | static enum print_line_t |
254 | print_eprobe_event(struct trace_iterator *iter, int flags, |
255 | struct trace_event *event) |
256 | { |
257 | struct eprobe_trace_entry_head *field; |
258 | struct trace_event_call *pevent; |
259 | struct trace_event *probed_event; |
260 | struct trace_seq *s = &iter->seq; |
261 | struct trace_eprobe *ep; |
262 | struct trace_probe *tp; |
263 | unsigned int type; |
264 | |
265 | field = (struct eprobe_trace_entry_head *)iter->ent; |
266 | tp = trace_probe_primary_from_call( |
267 | container_of(event, struct trace_event_call, event)); |
268 | if (WARN_ON_ONCE(!tp)) |
269 | goto out; |
270 | |
271 | ep = container_of(tp, struct trace_eprobe, tp); |
272 | type = ep->event->event.type; |
273 | |
274 | trace_seq_printf(s, fmt: "%s: (" , trace_probe_name(tp)); |
275 | |
276 | probed_event = ftrace_find_event(type); |
277 | if (probed_event) { |
278 | pevent = container_of(probed_event, struct trace_event_call, event); |
279 | trace_seq_printf(s, fmt: "%s.%s" , pevent->class->system, |
280 | trace_event_name(call: pevent)); |
281 | } else { |
282 | trace_seq_printf(s, fmt: "%u" , type); |
283 | } |
284 | |
285 | trace_seq_putc(s, c: ')'); |
286 | |
287 | if (trace_probe_print_args(s, args: tp->args, nr_args: tp->nr_args, |
288 | data: (u8 *)&field[1], field) < 0) |
289 | goto out; |
290 | |
291 | trace_seq_putc(s, c: '\n'); |
292 | out: |
293 | return trace_handle_return(s); |
294 | } |
295 | |
296 | static nokprobe_inline unsigned long |
297 | get_event_field(struct fetch_insn *code, void *rec) |
298 | { |
299 | struct ftrace_event_field *field = code->data; |
300 | unsigned long val; |
301 | void *addr; |
302 | |
303 | addr = rec + field->offset; |
304 | |
305 | if (is_string_field(field)) { |
306 | switch (field->filter_type) { |
307 | case FILTER_DYN_STRING: |
308 | val = (unsigned long)(rec + (*(unsigned int *)addr & 0xffff)); |
309 | break; |
310 | case FILTER_RDYN_STRING: |
311 | val = (unsigned long)(addr + (*(unsigned int *)addr & 0xffff)); |
312 | break; |
313 | case FILTER_STATIC_STRING: |
314 | val = (unsigned long)addr; |
315 | break; |
316 | case FILTER_PTR_STRING: |
317 | val = (unsigned long)(*(char *)addr); |
318 | break; |
319 | default: |
320 | WARN_ON_ONCE(1); |
321 | return 0; |
322 | } |
323 | return val; |
324 | } |
325 | |
326 | switch (field->size) { |
327 | case 1: |
328 | if (field->is_signed) |
329 | val = *(char *)addr; |
330 | else |
331 | val = *(unsigned char *)addr; |
332 | break; |
333 | case 2: |
334 | if (field->is_signed) |
335 | val = *(short *)addr; |
336 | else |
337 | val = *(unsigned short *)addr; |
338 | break; |
339 | case 4: |
340 | if (field->is_signed) |
341 | val = *(int *)addr; |
342 | else |
343 | val = *(unsigned int *)addr; |
344 | break; |
345 | default: |
346 | if (field->is_signed) |
347 | val = *(long *)addr; |
348 | else |
349 | val = *(unsigned long *)addr; |
350 | break; |
351 | } |
352 | return val; |
353 | } |
354 | |
355 | static int get_eprobe_size(struct trace_probe *tp, void *rec) |
356 | { |
357 | struct fetch_insn *code; |
358 | struct probe_arg *arg; |
359 | int i, len, ret = 0; |
360 | |
361 | for (i = 0; i < tp->nr_args; i++) { |
362 | arg = tp->args + i; |
363 | if (arg->dynamic) { |
364 | unsigned long val; |
365 | |
366 | code = arg->code; |
367 | retry: |
368 | switch (code->op) { |
369 | case FETCH_OP_TP_ARG: |
370 | val = get_event_field(code, rec); |
371 | break; |
372 | case FETCH_NOP_SYMBOL: /* Ignore a place holder */ |
373 | code++; |
374 | goto retry; |
375 | default: |
376 | if (process_common_fetch_insn(code, val: &val) < 0) |
377 | continue; |
378 | } |
379 | code++; |
380 | len = process_fetch_insn_bottom(code, val, NULL, NULL); |
381 | if (len > 0) |
382 | ret += len; |
383 | } |
384 | } |
385 | |
386 | return ret; |
387 | } |
388 | |
389 | /* Kprobe specific fetch functions */ |
390 | |
391 | /* Note that we don't verify it, since the code does not come from user space */ |
392 | static int |
393 | process_fetch_insn(struct fetch_insn *code, void *rec, void *dest, |
394 | void *base) |
395 | { |
396 | unsigned long val; |
397 | int ret; |
398 | |
399 | retry: |
400 | switch (code->op) { |
401 | case FETCH_OP_TP_ARG: |
402 | val = get_event_field(code, rec); |
403 | break; |
404 | case FETCH_NOP_SYMBOL: /* Ignore a place holder */ |
405 | code++; |
406 | goto retry; |
407 | default: |
408 | ret = process_common_fetch_insn(code, val: &val); |
409 | if (ret < 0) |
410 | return ret; |
411 | } |
412 | code++; |
413 | return process_fetch_insn_bottom(code, val, dest, base); |
414 | } |
415 | NOKPROBE_SYMBOL(process_fetch_insn) |
416 | |
417 | /* eprobe handler */ |
418 | static inline void |
419 | __eprobe_trace_func(struct eprobe_data *edata, void *rec) |
420 | { |
421 | struct eprobe_trace_entry_head *entry; |
422 | struct trace_event_call *call = trace_probe_event_call(tp: &edata->ep->tp); |
423 | struct trace_event_buffer fbuffer; |
424 | int dsize; |
425 | |
426 | if (WARN_ON_ONCE(call != edata->file->event_call)) |
427 | return; |
428 | |
429 | if (trace_trigger_soft_disabled(file: edata->file)) |
430 | return; |
431 | |
432 | dsize = get_eprobe_size(tp: &edata->ep->tp, rec); |
433 | |
434 | entry = trace_event_buffer_reserve(fbuffer: &fbuffer, trace_file: edata->file, |
435 | len: sizeof(*entry) + edata->ep->tp.size + dsize); |
436 | |
437 | if (!entry) |
438 | return; |
439 | |
440 | entry = fbuffer.entry = ring_buffer_event_data(event: fbuffer.event); |
441 | store_trace_args(data: &entry[1], tp: &edata->ep->tp, rec, header_size: sizeof(*entry), maxlen: dsize); |
442 | |
443 | trace_event_buffer_commit(fbuffer: &fbuffer); |
444 | } |
445 | |
446 | /* |
447 | * The event probe implementation uses event triggers to get access to |
448 | * the event it is attached to, but is not an actual trigger. The below |
449 | * functions are just stubs to fulfill what is needed to use the trigger |
450 | * infrastructure. |
451 | */ |
452 | static int eprobe_trigger_init(struct event_trigger_data *data) |
453 | { |
454 | return 0; |
455 | } |
456 | |
457 | static void eprobe_trigger_free(struct event_trigger_data *data) |
458 | { |
459 | |
460 | } |
461 | |
462 | static int eprobe_trigger_print(struct seq_file *m, |
463 | struct event_trigger_data *data) |
464 | { |
465 | /* Do not print eprobe event triggers */ |
466 | return 0; |
467 | } |
468 | |
469 | static void eprobe_trigger_func(struct event_trigger_data *data, |
470 | struct trace_buffer *buffer, void *rec, |
471 | struct ring_buffer_event *rbe) |
472 | { |
473 | struct eprobe_data *edata = data->private_data; |
474 | |
475 | if (unlikely(!rec)) |
476 | return; |
477 | |
478 | __eprobe_trace_func(edata, rec); |
479 | } |
480 | |
481 | static struct event_trigger_ops eprobe_trigger_ops = { |
482 | .trigger = eprobe_trigger_func, |
483 | .print = eprobe_trigger_print, |
484 | .init = eprobe_trigger_init, |
485 | .free = eprobe_trigger_free, |
486 | }; |
487 | |
488 | static int eprobe_trigger_cmd_parse(struct event_command *cmd_ops, |
489 | struct trace_event_file *file, |
490 | char *glob, char *cmd, |
491 | char *param_and_filter) |
492 | { |
493 | return -1; |
494 | } |
495 | |
496 | static int eprobe_trigger_reg_func(char *glob, |
497 | struct event_trigger_data *data, |
498 | struct trace_event_file *file) |
499 | { |
500 | return -1; |
501 | } |
502 | |
503 | static void eprobe_trigger_unreg_func(char *glob, |
504 | struct event_trigger_data *data, |
505 | struct trace_event_file *file) |
506 | { |
507 | |
508 | } |
509 | |
510 | static struct event_trigger_ops *eprobe_trigger_get_ops(char *cmd, |
511 | char *param) |
512 | { |
513 | return &eprobe_trigger_ops; |
514 | } |
515 | |
516 | static struct event_command event_trigger_cmd = { |
517 | .name = "eprobe" , |
518 | .trigger_type = ETT_EVENT_EPROBE, |
519 | .flags = EVENT_CMD_FL_NEEDS_REC, |
520 | .parse = eprobe_trigger_cmd_parse, |
521 | .reg = eprobe_trigger_reg_func, |
522 | .unreg = eprobe_trigger_unreg_func, |
523 | .unreg_all = NULL, |
524 | .get_trigger_ops = eprobe_trigger_get_ops, |
525 | .set_filter = NULL, |
526 | }; |
527 | |
528 | static struct event_trigger_data * |
529 | new_eprobe_trigger(struct trace_eprobe *ep, struct trace_event_file *file) |
530 | { |
531 | struct event_trigger_data *trigger; |
532 | struct event_filter *filter = NULL; |
533 | struct eprobe_data *edata; |
534 | int ret; |
535 | |
536 | edata = kzalloc(size: sizeof(*edata), GFP_KERNEL); |
537 | trigger = kzalloc(size: sizeof(*trigger), GFP_KERNEL); |
538 | if (!trigger || !edata) { |
539 | ret = -ENOMEM; |
540 | goto error; |
541 | } |
542 | |
543 | trigger->flags = EVENT_TRIGGER_FL_PROBE; |
544 | trigger->count = -1; |
545 | trigger->ops = &eprobe_trigger_ops; |
546 | |
547 | /* |
548 | * EVENT PROBE triggers are not registered as commands with |
549 | * register_event_command(), as they are not controlled by the user |
550 | * from the trigger file |
551 | */ |
552 | trigger->cmd_ops = &event_trigger_cmd; |
553 | |
554 | INIT_LIST_HEAD(list: &trigger->list); |
555 | |
556 | if (ep->filter_str) { |
557 | ret = create_event_filter(tr: file->tr, call: ep->event, |
558 | filter_str: ep->filter_str, set_str: false, filterp: &filter); |
559 | if (ret) |
560 | goto error; |
561 | } |
562 | RCU_INIT_POINTER(trigger->filter, filter); |
563 | |
564 | edata->file = file; |
565 | edata->ep = ep; |
566 | trigger->private_data = edata; |
567 | |
568 | return trigger; |
569 | error: |
570 | free_event_filter(filter); |
571 | kfree(objp: edata); |
572 | kfree(objp: trigger); |
573 | return ERR_PTR(error: ret); |
574 | } |
575 | |
576 | static int enable_eprobe(struct trace_eprobe *ep, |
577 | struct trace_event_file *eprobe_file) |
578 | { |
579 | struct event_trigger_data *trigger; |
580 | struct trace_event_file *file; |
581 | struct trace_array *tr = eprobe_file->tr; |
582 | |
583 | file = find_event_file(tr, system: ep->event_system, event: ep->event_name); |
584 | if (!file) |
585 | return -ENOENT; |
586 | trigger = new_eprobe_trigger(ep, file: eprobe_file); |
587 | if (IS_ERR(ptr: trigger)) |
588 | return PTR_ERR(ptr: trigger); |
589 | |
590 | list_add_tail_rcu(new: &trigger->list, head: &file->triggers); |
591 | |
592 | trace_event_trigger_enable_disable(file, trigger_enable: 1); |
593 | update_cond_flag(file); |
594 | |
595 | return 0; |
596 | } |
597 | |
598 | static struct trace_event_functions eprobe_funcs = { |
599 | .trace = print_eprobe_event |
600 | }; |
601 | |
602 | static int disable_eprobe(struct trace_eprobe *ep, |
603 | struct trace_array *tr) |
604 | { |
605 | struct event_trigger_data *trigger = NULL, *iter; |
606 | struct trace_event_file *file; |
607 | struct event_filter *filter; |
608 | struct eprobe_data *edata; |
609 | |
610 | file = find_event_file(tr, system: ep->event_system, event: ep->event_name); |
611 | if (!file) |
612 | return -ENOENT; |
613 | |
614 | list_for_each_entry(iter, &file->triggers, list) { |
615 | if (!(iter->flags & EVENT_TRIGGER_FL_PROBE)) |
616 | continue; |
617 | edata = iter->private_data; |
618 | if (edata->ep == ep) { |
619 | trigger = iter; |
620 | break; |
621 | } |
622 | } |
623 | if (!trigger) |
624 | return -ENODEV; |
625 | |
626 | list_del_rcu(entry: &trigger->list); |
627 | |
628 | trace_event_trigger_enable_disable(file, trigger_enable: 0); |
629 | update_cond_flag(file); |
630 | |
631 | /* Make sure nothing is using the edata or trigger */ |
632 | tracepoint_synchronize_unregister(); |
633 | |
634 | filter = rcu_access_pointer(trigger->filter); |
635 | |
636 | if (filter) |
637 | free_event_filter(filter); |
638 | kfree(objp: edata); |
639 | kfree(objp: trigger); |
640 | |
641 | return 0; |
642 | } |
643 | |
644 | static int enable_trace_eprobe(struct trace_event_call *call, |
645 | struct trace_event_file *file) |
646 | { |
647 | struct trace_probe *tp; |
648 | struct trace_eprobe *ep; |
649 | bool enabled; |
650 | int ret = 0; |
651 | int cnt = 0; |
652 | |
653 | tp = trace_probe_primary_from_call(call); |
654 | if (WARN_ON_ONCE(!tp)) |
655 | return -ENODEV; |
656 | enabled = trace_probe_is_enabled(tp); |
657 | |
658 | /* This also changes "enabled" state */ |
659 | if (file) { |
660 | ret = trace_probe_add_file(tp, file); |
661 | if (ret) |
662 | return ret; |
663 | } else |
664 | trace_probe_set_flag(tp, TP_FLAG_PROFILE); |
665 | |
666 | if (enabled) |
667 | return 0; |
668 | |
669 | for_each_trace_eprobe_tp(ep, tp) { |
670 | ret = enable_eprobe(ep, eprobe_file: file); |
671 | if (ret) |
672 | break; |
673 | enabled = true; |
674 | cnt++; |
675 | } |
676 | |
677 | if (ret) { |
678 | /* Failed to enable one of them. Roll back all */ |
679 | if (enabled) { |
680 | /* |
681 | * It's a bug if one failed for something other than memory |
682 | * not being available but another eprobe succeeded. |
683 | */ |
684 | WARN_ON_ONCE(ret != -ENOMEM); |
685 | |
686 | for_each_trace_eprobe_tp(ep, tp) { |
687 | disable_eprobe(ep, tr: file->tr); |
688 | if (!--cnt) |
689 | break; |
690 | } |
691 | } |
692 | if (file) |
693 | trace_probe_remove_file(tp, file); |
694 | else |
695 | trace_probe_clear_flag(tp, TP_FLAG_PROFILE); |
696 | } |
697 | |
698 | return ret; |
699 | } |
700 | |
701 | static int disable_trace_eprobe(struct trace_event_call *call, |
702 | struct trace_event_file *file) |
703 | { |
704 | struct trace_probe *tp; |
705 | struct trace_eprobe *ep; |
706 | |
707 | tp = trace_probe_primary_from_call(call); |
708 | if (WARN_ON_ONCE(!tp)) |
709 | return -ENODEV; |
710 | |
711 | if (file) { |
712 | if (!trace_probe_get_file_link(tp, file)) |
713 | return -ENOENT; |
714 | if (!trace_probe_has_single_file(tp)) |
715 | goto out; |
716 | trace_probe_clear_flag(tp, TP_FLAG_TRACE); |
717 | } else |
718 | trace_probe_clear_flag(tp, TP_FLAG_PROFILE); |
719 | |
720 | if (!trace_probe_is_enabled(tp)) { |
721 | for_each_trace_eprobe_tp(ep, tp) |
722 | disable_eprobe(ep, tr: file->tr); |
723 | } |
724 | |
725 | out: |
726 | if (file) |
727 | /* |
728 | * Synchronization is done in below function. For perf event, |
729 | * file == NULL and perf_trace_event_unreg() calls |
730 | * tracepoint_synchronize_unregister() to ensure synchronize |
731 | * event. We don't need to care about it. |
732 | */ |
733 | trace_probe_remove_file(tp, file); |
734 | |
735 | return 0; |
736 | } |
737 | |
738 | static int eprobe_register(struct trace_event_call *event, |
739 | enum trace_reg type, void *data) |
740 | { |
741 | struct trace_event_file *file = data; |
742 | |
743 | switch (type) { |
744 | case TRACE_REG_REGISTER: |
745 | return enable_trace_eprobe(call: event, file); |
746 | case TRACE_REG_UNREGISTER: |
747 | return disable_trace_eprobe(call: event, file); |
748 | #ifdef CONFIG_PERF_EVENTS |
749 | case TRACE_REG_PERF_REGISTER: |
750 | case TRACE_REG_PERF_UNREGISTER: |
751 | case TRACE_REG_PERF_OPEN: |
752 | case TRACE_REG_PERF_CLOSE: |
753 | case TRACE_REG_PERF_ADD: |
754 | case TRACE_REG_PERF_DEL: |
755 | return 0; |
756 | #endif |
757 | } |
758 | return 0; |
759 | } |
760 | |
761 | static inline void init_trace_eprobe_call(struct trace_eprobe *ep) |
762 | { |
763 | struct trace_event_call *call = trace_probe_event_call(tp: &ep->tp); |
764 | |
765 | call->flags = TRACE_EVENT_FL_EPROBE; |
766 | call->event.funcs = &eprobe_funcs; |
767 | call->class->fields_array = eprobe_fields_array; |
768 | call->class->reg = eprobe_register; |
769 | } |
770 | |
771 | static struct trace_event_call * |
772 | find_and_get_event(const char *system, const char *event_name) |
773 | { |
774 | struct trace_event_call *tp_event; |
775 | const char *name; |
776 | |
777 | list_for_each_entry(tp_event, &ftrace_events, list) { |
778 | /* Skip other probes and ftrace events */ |
779 | if (tp_event->flags & |
780 | (TRACE_EVENT_FL_IGNORE_ENABLE | |
781 | TRACE_EVENT_FL_KPROBE | |
782 | TRACE_EVENT_FL_UPROBE | |
783 | TRACE_EVENT_FL_EPROBE)) |
784 | continue; |
785 | if (!tp_event->class->system || |
786 | strcmp(system, tp_event->class->system)) |
787 | continue; |
788 | name = trace_event_name(call: tp_event); |
789 | if (!name || strcmp(event_name, name)) |
790 | continue; |
791 | if (!trace_event_try_get_ref(call: tp_event)) |
792 | return NULL; |
793 | return tp_event; |
794 | } |
795 | return NULL; |
796 | } |
797 | |
798 | static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[], int i) |
799 | { |
800 | struct traceprobe_parse_context ctx = { |
801 | .event = ep->event, |
802 | .flags = TPARG_FL_KERNEL | TPARG_FL_TEVENT, |
803 | }; |
804 | int ret; |
805 | |
806 | ret = traceprobe_parse_probe_arg(tp: &ep->tp, i, argv: argv[i], ctx: &ctx); |
807 | /* Handle symbols "@" */ |
808 | if (!ret) |
809 | ret = traceprobe_update_arg(arg: &ep->tp.args[i]); |
810 | |
811 | traceprobe_finish_parse(ctx: &ctx); |
812 | return ret; |
813 | } |
814 | |
815 | static int trace_eprobe_parse_filter(struct trace_eprobe *ep, int argc, const char *argv[]) |
816 | { |
817 | struct event_filter *dummy = NULL; |
818 | int i, ret, len = 0; |
819 | char *p; |
820 | |
821 | if (argc == 0) { |
822 | trace_probe_log_err(0, NO_EP_FILTER); |
823 | return -EINVAL; |
824 | } |
825 | |
826 | /* Recover the filter string */ |
827 | for (i = 0; i < argc; i++) |
828 | len += strlen(argv[i]) + 1; |
829 | |
830 | ep->filter_str = kzalloc(size: len, GFP_KERNEL); |
831 | if (!ep->filter_str) |
832 | return -ENOMEM; |
833 | |
834 | p = ep->filter_str; |
835 | for (i = 0; i < argc; i++) { |
836 | if (i) |
837 | ret = snprintf(buf: p, size: len, fmt: " %s" , argv[i]); |
838 | else |
839 | ret = snprintf(buf: p, size: len, fmt: "%s" , argv[i]); |
840 | p += ret; |
841 | len -= ret; |
842 | } |
843 | |
844 | /* |
845 | * Ensure the filter string can be parsed correctly. Note, this |
846 | * filter string is for the original event, not for the eprobe. |
847 | */ |
848 | ret = create_event_filter(tr: top_trace_array(), call: ep->event, filter_str: ep->filter_str, |
849 | set_str: true, filterp: &dummy); |
850 | free_event_filter(filter: dummy); |
851 | if (ret) |
852 | goto error; |
853 | |
854 | return 0; |
855 | error: |
856 | kfree(objp: ep->filter_str); |
857 | ep->filter_str = NULL; |
858 | return ret; |
859 | } |
860 | |
861 | static int __trace_eprobe_create(int argc, const char *argv[]) |
862 | { |
863 | /* |
864 | * Argument syntax: |
865 | * e[:[GRP/][ENAME]] SYSTEM.EVENT [FETCHARGS] [if FILTER] |
866 | * Fetch args (no space): |
867 | * <name>=$<field>[:TYPE] |
868 | */ |
869 | const char *event = NULL, *group = EPROBE_EVENT_SYSTEM; |
870 | const char *sys_event = NULL, *sys_name = NULL; |
871 | struct trace_event_call *event_call; |
872 | struct trace_eprobe *ep = NULL; |
873 | char buf1[MAX_EVENT_NAME_LEN]; |
874 | char buf2[MAX_EVENT_NAME_LEN]; |
875 | char gbuf[MAX_EVENT_NAME_LEN]; |
876 | int ret = 0, filter_idx = 0; |
877 | int i, filter_cnt; |
878 | |
879 | if (argc < 2 || argv[0][0] != 'e') |
880 | return -ECANCELED; |
881 | |
882 | trace_probe_log_init(subsystem: "event_probe" , argc, argv); |
883 | |
884 | event = strchr(&argv[0][1], ':'); |
885 | if (event) { |
886 | event++; |
887 | ret = traceprobe_parse_event_name(pevent: &event, pgroup: &group, buf: gbuf, |
888 | offset: event - argv[0]); |
889 | if (ret) |
890 | goto parse_error; |
891 | } |
892 | |
893 | trace_probe_log_set_index(index: 1); |
894 | sys_event = argv[1]; |
895 | ret = traceprobe_parse_event_name(pevent: &sys_event, pgroup: &sys_name, buf: buf2, offset: 0); |
896 | if (ret || !sys_event || !sys_name) { |
897 | trace_probe_log_err(0, NO_EVENT_INFO); |
898 | goto parse_error; |
899 | } |
900 | |
901 | if (!event) { |
902 | strscpy(p: buf1, q: sys_event, MAX_EVENT_NAME_LEN); |
903 | event = buf1; |
904 | } |
905 | |
906 | for (i = 2; i < argc; i++) { |
907 | if (!strcmp(argv[i], "if" )) { |
908 | filter_idx = i + 1; |
909 | filter_cnt = argc - filter_idx; |
910 | argc = i; |
911 | break; |
912 | } |
913 | } |
914 | |
915 | mutex_lock(&event_mutex); |
916 | event_call = find_and_get_event(system: sys_name, event_name: sys_event); |
917 | ep = alloc_event_probe(group, this_event: event, event: event_call, nargs: argc - 2); |
918 | mutex_unlock(lock: &event_mutex); |
919 | |
920 | if (IS_ERR(ptr: ep)) { |
921 | ret = PTR_ERR(ptr: ep); |
922 | if (ret == -ENODEV) |
923 | trace_probe_log_err(0, BAD_ATTACH_EVENT); |
924 | /* This must return -ENOMEM or missing event, else there is a bug */ |
925 | WARN_ON_ONCE(ret != -ENOMEM && ret != -ENODEV); |
926 | ep = NULL; |
927 | goto error; |
928 | } |
929 | |
930 | if (filter_idx) { |
931 | trace_probe_log_set_index(index: filter_idx); |
932 | ret = trace_eprobe_parse_filter(ep, argc: filter_cnt, argv: argv + filter_idx); |
933 | if (ret) |
934 | goto parse_error; |
935 | } else |
936 | ep->filter_str = NULL; |
937 | |
938 | argc -= 2; argv += 2; |
939 | /* parse arguments */ |
940 | for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { |
941 | trace_probe_log_set_index(index: i + 2); |
942 | ret = trace_eprobe_tp_update_arg(ep, argv, i); |
943 | if (ret) |
944 | goto error; |
945 | } |
946 | ret = traceprobe_set_print_fmt(tp: &ep->tp, ptype: PROBE_PRINT_EVENT); |
947 | if (ret < 0) |
948 | goto error; |
949 | init_trace_eprobe_call(ep); |
950 | mutex_lock(&event_mutex); |
951 | ret = trace_probe_register_event_call(tp: &ep->tp); |
952 | if (ret) { |
953 | if (ret == -EEXIST) { |
954 | trace_probe_log_set_index(index: 0); |
955 | trace_probe_log_err(0, EVENT_EXIST); |
956 | } |
957 | mutex_unlock(lock: &event_mutex); |
958 | goto error; |
959 | } |
960 | ret = dyn_event_add(ev: &ep->devent, call: &ep->tp.event->call); |
961 | mutex_unlock(lock: &event_mutex); |
962 | return ret; |
963 | parse_error: |
964 | ret = -EINVAL; |
965 | error: |
966 | trace_event_probe_cleanup(ep); |
967 | return ret; |
968 | } |
969 | |
970 | /* |
971 | * Register dynevent at core_initcall. This allows kernel to setup eprobe |
972 | * events in postcore_initcall without tracefs. |
973 | */ |
974 | static __init int trace_events_eprobe_init_early(void) |
975 | { |
976 | int err = 0; |
977 | |
978 | err = dyn_event_register(ops: &eprobe_dyn_event_ops); |
979 | if (err) |
980 | pr_warn("Could not register eprobe_dyn_event_ops\n" ); |
981 | |
982 | return err; |
983 | } |
984 | core_initcall(trace_events_eprobe_init_early); |
985 | |