1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * ring buffer based function tracer |
4 | * |
5 | * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com> |
6 | * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com> |
7 | * |
8 | * Originally taken from the RT patch by: |
9 | * Arnaldo Carvalho de Melo <acme@redhat.com> |
10 | * |
11 | * Based on code from the latency_tracer, that is: |
12 | * Copyright (C) 2004-2006 Ingo Molnar |
13 | * Copyright (C) 2004 Nadia Yvette Chambers |
14 | */ |
15 | #include <linux/ring_buffer.h> |
16 | #include <generated/utsrelease.h> |
17 | #include <linux/stacktrace.h> |
18 | #include <linux/writeback.h> |
19 | #include <linux/kallsyms.h> |
20 | #include <linux/seq_file.h> |
21 | #include <linux/notifier.h> |
22 | #include <linux/irqflags.h> |
23 | #include <linux/debugfs.h> |
24 | #include <linux/tracefs.h> |
25 | #include <linux/pagemap.h> |
26 | #include <linux/hardirq.h> |
27 | #include <linux/linkage.h> |
28 | #include <linux/uaccess.h> |
29 | #include <linux/vmalloc.h> |
30 | #include <linux/ftrace.h> |
31 | #include <linux/module.h> |
32 | #include <linux/percpu.h> |
33 | #include <linux/splice.h> |
34 | #include <linux/kdebug.h> |
35 | #include <linux/string.h> |
36 | #include <linux/mount.h> |
37 | #include <linux/rwsem.h> |
38 | #include <linux/slab.h> |
39 | #include <linux/ctype.h> |
40 | #include <linux/init.h> |
41 | #include <linux/poll.h> |
42 | #include <linux/nmi.h> |
43 | #include <linux/fs.h> |
44 | #include <linux/trace.h> |
45 | #include <linux/sched/clock.h> |
46 | #include <linux/sched/rt.h> |
47 | |
48 | #include "trace.h" |
49 | #include "trace_output.h" |
50 | |
51 | /* |
52 | * On boot up, the ring buffer is set to the minimum size, so that |
53 | * we do not waste memory on systems that are not using tracing. |
54 | */ |
55 | bool ring_buffer_expanded; |
56 | |
57 | /* |
58 | * We need to change this state when a selftest is running. |
59 | * A selftest will lurk into the ring-buffer to count the |
60 | * entries inserted during the selftest although some concurrent |
61 | * insertions into the ring-buffer such as trace_printk could occurred |
62 | * at the same time, giving false positive or negative results. |
63 | */ |
64 | static bool __read_mostly tracing_selftest_running; |
65 | |
66 | /* |
67 | * If a tracer is running, we do not want to run SELFTEST. |
68 | */ |
69 | bool __read_mostly tracing_selftest_disabled; |
70 | |
71 | /* Pipe tracepoints to printk */ |
72 | struct trace_iterator *tracepoint_print_iter; |
73 | int tracepoint_printk; |
74 | static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key); |
75 | |
76 | /* For tracers that don't implement custom flags */ |
77 | static struct tracer_opt dummy_tracer_opt[] = { |
78 | { } |
79 | }; |
80 | |
81 | static int |
82 | dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set) |
83 | { |
84 | return 0; |
85 | } |
86 | |
87 | /* |
88 | * To prevent the comm cache from being overwritten when no |
89 | * tracing is active, only save the comm when a trace event |
90 | * occurred. |
91 | */ |
92 | static DEFINE_PER_CPU(bool, trace_taskinfo_save); |
93 | |
94 | /* |
95 | * Kill all tracing for good (never come back). |
96 | * It is initialized to 1 but will turn to zero if the initialization |
97 | * of the tracer is successful. But that is the only place that sets |
98 | * this back to zero. |
99 | */ |
100 | static int tracing_disabled = 1; |
101 | |
102 | cpumask_var_t __read_mostly tracing_buffer_mask; |
103 | |
104 | /* |
105 | * ftrace_dump_on_oops - variable to dump ftrace buffer on oops |
106 | * |
107 | * If there is an oops (or kernel panic) and the ftrace_dump_on_oops |
108 | * is set, then ftrace_dump is called. This will output the contents |
109 | * of the ftrace buffers to the console. This is very useful for |
110 | * capturing traces that lead to crashes and outputing it to a |
111 | * serial console. |
112 | * |
113 | * It is default off, but you can enable it with either specifying |
114 | * "ftrace_dump_on_oops" in the kernel command line, or setting |
115 | * /proc/sys/kernel/ftrace_dump_on_oops |
116 | * Set 1 if you want to dump buffers of all CPUs |
117 | * Set 2 if you want to dump the buffer of the CPU that triggered oops |
118 | */ |
119 | |
120 | enum ftrace_dump_mode ftrace_dump_on_oops; |
121 | |
122 | /* When set, tracing will stop when a WARN*() is hit */ |
123 | int __disable_trace_on_warning; |
124 | |
125 | #ifdef CONFIG_TRACE_EVAL_MAP_FILE |
126 | /* Map of enums to their values, for "eval_map" file */ |
127 | struct trace_eval_map_head { |
128 | struct module *mod; |
129 | unsigned long length; |
130 | }; |
131 | |
132 | union trace_eval_map_item; |
133 | |
134 | struct trace_eval_map_tail { |
135 | /* |
136 | * "end" is first and points to NULL as it must be different |
137 | * than "mod" or "eval_string" |
138 | */ |
139 | union trace_eval_map_item *next; |
140 | const char *end; /* points to NULL */ |
141 | }; |
142 | |
143 | static DEFINE_MUTEX(trace_eval_mutex); |
144 | |
145 | /* |
146 | * The trace_eval_maps are saved in an array with two extra elements, |
147 | * one at the beginning, and one at the end. The beginning item contains |
148 | * the count of the saved maps (head.length), and the module they |
149 | * belong to if not built in (head.mod). The ending item contains a |
150 | * pointer to the next array of saved eval_map items. |
151 | */ |
152 | union trace_eval_map_item { |
153 | struct trace_eval_map map; |
154 | struct trace_eval_map_head head; |
155 | struct trace_eval_map_tail tail; |
156 | }; |
157 | |
158 | static union trace_eval_map_item *trace_eval_maps; |
159 | #endif /* CONFIG_TRACE_EVAL_MAP_FILE */ |
160 | |
161 | static int tracing_set_tracer(struct trace_array *tr, const char *buf); |
162 | |
163 | #define MAX_TRACER_SIZE 100 |
164 | static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata; |
165 | static char *default_bootup_tracer; |
166 | |
167 | static bool allocate_snapshot; |
168 | |
169 | static int __init set_cmdline_ftrace(char *str) |
170 | { |
171 | strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE); |
172 | default_bootup_tracer = bootup_tracer_buf; |
173 | /* We are using ftrace early, expand it */ |
174 | ring_buffer_expanded = true; |
175 | return 1; |
176 | } |
177 | __setup("ftrace=" , set_cmdline_ftrace); |
178 | |
179 | static int __init set_ftrace_dump_on_oops(char *str) |
180 | { |
181 | if (*str++ != '=' || !*str) { |
182 | ftrace_dump_on_oops = DUMP_ALL; |
183 | return 1; |
184 | } |
185 | |
186 | if (!strcmp("orig_cpu" , str)) { |
187 | ftrace_dump_on_oops = DUMP_ORIG; |
188 | return 1; |
189 | } |
190 | |
191 | return 0; |
192 | } |
193 | __setup("ftrace_dump_on_oops" , set_ftrace_dump_on_oops); |
194 | |
195 | static int __init stop_trace_on_warning(char *str) |
196 | { |
197 | if ((strcmp(str, "=0" ) != 0 && strcmp(str, "=off" ) != 0)) |
198 | __disable_trace_on_warning = 1; |
199 | return 1; |
200 | } |
201 | __setup("traceoff_on_warning" , stop_trace_on_warning); |
202 | |
203 | static int __init boot_alloc_snapshot(char *str) |
204 | { |
205 | allocate_snapshot = true; |
206 | /* We also need the main ring buffer expanded */ |
207 | ring_buffer_expanded = true; |
208 | return 1; |
209 | } |
210 | __setup("alloc_snapshot" , boot_alloc_snapshot); |
211 | |
212 | |
213 | static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata; |
214 | |
215 | static int __init set_trace_boot_options(char *str) |
216 | { |
217 | strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); |
218 | return 0; |
219 | } |
220 | __setup("trace_options=" , set_trace_boot_options); |
221 | |
222 | static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata; |
223 | static char *trace_boot_clock __initdata; |
224 | |
225 | static int __init set_trace_boot_clock(char *str) |
226 | { |
227 | strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE); |
228 | trace_boot_clock = trace_boot_clock_buf; |
229 | return 0; |
230 | } |
231 | __setup("trace_clock=" , set_trace_boot_clock); |
232 | |
233 | static int __init set_tracepoint_printk(char *str) |
234 | { |
235 | if ((strcmp(str, "=0" ) != 0 && strcmp(str, "=off" ) != 0)) |
236 | tracepoint_printk = 1; |
237 | return 1; |
238 | } |
239 | __setup("tp_printk" , set_tracepoint_printk); |
240 | |
241 | unsigned long long ns2usecs(u64 nsec) |
242 | { |
243 | nsec += 500; |
244 | do_div(nsec, 1000); |
245 | return nsec; |
246 | } |
247 | |
248 | /* trace_flags holds trace_options default values */ |
249 | #define TRACE_DEFAULT_FLAGS \ |
250 | (FUNCTION_DEFAULT_FLAGS | \ |
251 | TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \ |
252 | TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \ |
253 | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \ |
254 | TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS) |
255 | |
256 | /* trace_options that are only supported by global_trace */ |
257 | #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \ |
258 | TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD) |
259 | |
260 | /* trace_flags that are default zero for instances */ |
261 | #define ZEROED_TRACE_FLAGS \ |
262 | (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK) |
263 | |
264 | /* |
265 | * The global_trace is the descriptor that holds the top-level tracing |
266 | * buffers for the live tracing. |
267 | */ |
268 | static struct trace_array global_trace = { |
269 | .trace_flags = TRACE_DEFAULT_FLAGS, |
270 | }; |
271 | |
272 | LIST_HEAD(ftrace_trace_arrays); |
273 | |
274 | int trace_array_get(struct trace_array *this_tr) |
275 | { |
276 | struct trace_array *tr; |
277 | int ret = -ENODEV; |
278 | |
279 | mutex_lock(&trace_types_lock); |
280 | list_for_each_entry(tr, &ftrace_trace_arrays, list) { |
281 | if (tr == this_tr) { |
282 | tr->ref++; |
283 | ret = 0; |
284 | break; |
285 | } |
286 | } |
287 | mutex_unlock(&trace_types_lock); |
288 | |
289 | return ret; |
290 | } |
291 | |
292 | static void __trace_array_put(struct trace_array *this_tr) |
293 | { |
294 | WARN_ON(!this_tr->ref); |
295 | this_tr->ref--; |
296 | } |
297 | |
298 | void trace_array_put(struct trace_array *this_tr) |
299 | { |
300 | mutex_lock(&trace_types_lock); |
301 | __trace_array_put(this_tr); |
302 | mutex_unlock(&trace_types_lock); |
303 | } |
304 | |
305 | int call_filter_check_discard(struct trace_event_call *call, void *rec, |
306 | struct ring_buffer *buffer, |
307 | struct ring_buffer_event *event) |
308 | { |
309 | if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) && |
310 | !filter_match_preds(call->filter, rec)) { |
311 | __trace_event_discard_commit(buffer, event); |
312 | return 1; |
313 | } |
314 | |
315 | return 0; |
316 | } |
317 | |
318 | void trace_free_pid_list(struct trace_pid_list *pid_list) |
319 | { |
320 | vfree(pid_list->pids); |
321 | kfree(pid_list); |
322 | } |
323 | |
324 | /** |
325 | * trace_find_filtered_pid - check if a pid exists in a filtered_pid list |
326 | * @filtered_pids: The list of pids to check |
327 | * @search_pid: The PID to find in @filtered_pids |
328 | * |
329 | * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis. |
330 | */ |
331 | bool |
332 | trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid) |
333 | { |
334 | /* |
335 | * If pid_max changed after filtered_pids was created, we |
336 | * by default ignore all pids greater than the previous pid_max. |
337 | */ |
338 | if (search_pid >= filtered_pids->pid_max) |
339 | return false; |
340 | |
341 | return test_bit(search_pid, filtered_pids->pids); |
342 | } |
343 | |
344 | /** |
345 | * trace_ignore_this_task - should a task be ignored for tracing |
346 | * @filtered_pids: The list of pids to check |
347 | * @task: The task that should be ignored if not filtered |
348 | * |
349 | * Checks if @task should be traced or not from @filtered_pids. |
350 | * Returns true if @task should *NOT* be traced. |
351 | * Returns false if @task should be traced. |
352 | */ |
353 | bool |
354 | trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task) |
355 | { |
356 | /* |
357 | * Return false, because if filtered_pids does not exist, |
358 | * all pids are good to trace. |
359 | */ |
360 | if (!filtered_pids) |
361 | return false; |
362 | |
363 | return !trace_find_filtered_pid(filtered_pids, task->pid); |
364 | } |
365 | |
366 | /** |
367 | * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list |
368 | * @pid_list: The list to modify |
369 | * @self: The current task for fork or NULL for exit |
370 | * @task: The task to add or remove |
371 | * |
372 | * If adding a task, if @self is defined, the task is only added if @self |
373 | * is also included in @pid_list. This happens on fork and tasks should |
374 | * only be added when the parent is listed. If @self is NULL, then the |
375 | * @task pid will be removed from the list, which would happen on exit |
376 | * of a task. |
377 | */ |
378 | void trace_filter_add_remove_task(struct trace_pid_list *pid_list, |
379 | struct task_struct *self, |
380 | struct task_struct *task) |
381 | { |
382 | if (!pid_list) |
383 | return; |
384 | |
385 | /* For forks, we only add if the forking task is listed */ |
386 | if (self) { |
387 | if (!trace_find_filtered_pid(pid_list, self->pid)) |
388 | return; |
389 | } |
390 | |
391 | /* Sorry, but we don't support pid_max changing after setting */ |
392 | if (task->pid >= pid_list->pid_max) |
393 | return; |
394 | |
395 | /* "self" is set for forks, and NULL for exits */ |
396 | if (self) |
397 | set_bit(task->pid, pid_list->pids); |
398 | else |
399 | clear_bit(task->pid, pid_list->pids); |
400 | } |
401 | |
402 | /** |
403 | * trace_pid_next - Used for seq_file to get to the next pid of a pid_list |
404 | * @pid_list: The pid list to show |
405 | * @v: The last pid that was shown (+1 the actual pid to let zero be displayed) |
406 | * @pos: The position of the file |
407 | * |
408 | * This is used by the seq_file "next" operation to iterate the pids |
409 | * listed in a trace_pid_list structure. |
410 | * |
411 | * Returns the pid+1 as we want to display pid of zero, but NULL would |
412 | * stop the iteration. |
413 | */ |
414 | void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos) |
415 | { |
416 | unsigned long pid = (unsigned long)v; |
417 | |
418 | (*pos)++; |
419 | |
420 | /* pid already is +1 of the actual prevous bit */ |
421 | pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid); |
422 | |
423 | /* Return pid + 1 to allow zero to be represented */ |
424 | if (pid < pid_list->pid_max) |
425 | return (void *)(pid + 1); |
426 | |
427 | return NULL; |
428 | } |
429 | |
430 | /** |
431 | * trace_pid_start - Used for seq_file to start reading pid lists |
432 | * @pid_list: The pid list to show |
433 | * @pos: The position of the file |
434 | * |
435 | * This is used by seq_file "start" operation to start the iteration |
436 | * of listing pids. |
437 | * |
438 | * Returns the pid+1 as we want to display pid of zero, but NULL would |
439 | * stop the iteration. |
440 | */ |
441 | void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos) |
442 | { |
443 | unsigned long pid; |
444 | loff_t l = 0; |
445 | |
446 | pid = find_first_bit(pid_list->pids, pid_list->pid_max); |
447 | if (pid >= pid_list->pid_max) |
448 | return NULL; |
449 | |
450 | /* Return pid + 1 so that zero can be the exit value */ |
451 | for (pid++; pid && l < *pos; |
452 | pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l)) |
453 | ; |
454 | return (void *)pid; |
455 | } |
456 | |
457 | /** |
458 | * trace_pid_show - show the current pid in seq_file processing |
459 | * @m: The seq_file structure to write into |
460 | * @v: A void pointer of the pid (+1) value to display |
461 | * |
462 | * Can be directly used by seq_file operations to display the current |
463 | * pid value. |
464 | */ |
465 | int trace_pid_show(struct seq_file *m, void *v) |
466 | { |
467 | unsigned long pid = (unsigned long)v - 1; |
468 | |
469 | seq_printf(m, "%lu\n" , pid); |
470 | return 0; |
471 | } |
472 | |
473 | /* 128 should be much more than enough */ |
474 | #define PID_BUF_SIZE 127 |
475 | |
476 | int trace_pid_write(struct trace_pid_list *filtered_pids, |
477 | struct trace_pid_list **new_pid_list, |
478 | const char __user *ubuf, size_t cnt) |
479 | { |
480 | struct trace_pid_list *pid_list; |
481 | struct trace_parser parser; |
482 | unsigned long val; |
483 | int nr_pids = 0; |
484 | ssize_t read = 0; |
485 | ssize_t ret = 0; |
486 | loff_t pos; |
487 | pid_t pid; |
488 | |
489 | if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1)) |
490 | return -ENOMEM; |
491 | |
492 | /* |
493 | * Always recreate a new array. The write is an all or nothing |
494 | * operation. Always create a new array when adding new pids by |
495 | * the user. If the operation fails, then the current list is |
496 | * not modified. |
497 | */ |
498 | pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL); |
499 | if (!pid_list) |
500 | return -ENOMEM; |
501 | |
502 | pid_list->pid_max = READ_ONCE(pid_max); |
503 | |
504 | /* Only truncating will shrink pid_max */ |
505 | if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max) |
506 | pid_list->pid_max = filtered_pids->pid_max; |
507 | |
508 | pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3); |
509 | if (!pid_list->pids) { |
510 | kfree(pid_list); |
511 | return -ENOMEM; |
512 | } |
513 | |
514 | if (filtered_pids) { |
515 | /* copy the current bits to the new max */ |
516 | for_each_set_bit(pid, filtered_pids->pids, |
517 | filtered_pids->pid_max) { |
518 | set_bit(pid, pid_list->pids); |
519 | nr_pids++; |
520 | } |
521 | } |
522 | |
523 | while (cnt > 0) { |
524 | |
525 | pos = 0; |
526 | |
527 | ret = trace_get_user(&parser, ubuf, cnt, &pos); |
528 | if (ret < 0 || !trace_parser_loaded(&parser)) |
529 | break; |
530 | |
531 | read += ret; |
532 | ubuf += ret; |
533 | cnt -= ret; |
534 | |
535 | ret = -EINVAL; |
536 | if (kstrtoul(parser.buffer, 0, &val)) |
537 | break; |
538 | if (val >= pid_list->pid_max) |
539 | break; |
540 | |
541 | pid = (pid_t)val; |
542 | |
543 | set_bit(pid, pid_list->pids); |
544 | nr_pids++; |
545 | |
546 | trace_parser_clear(&parser); |
547 | ret = 0; |
548 | } |
549 | trace_parser_put(&parser); |
550 | |
551 | if (ret < 0) { |
552 | trace_free_pid_list(pid_list); |
553 | return ret; |
554 | } |
555 | |
556 | if (!nr_pids) { |
557 | /* Cleared the list of pids */ |
558 | trace_free_pid_list(pid_list); |
559 | read = ret; |
560 | pid_list = NULL; |
561 | } |
562 | |
563 | *new_pid_list = pid_list; |
564 | |
565 | return read; |
566 | } |
567 | |
568 | static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu) |
569 | { |
570 | u64 ts; |
571 | |
572 | /* Early boot up does not have a buffer yet */ |
573 | if (!buf->buffer) |
574 | return trace_clock_local(); |
575 | |
576 | ts = ring_buffer_time_stamp(buf->buffer, cpu); |
577 | ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts); |
578 | |
579 | return ts; |
580 | } |
581 | |
582 | u64 ftrace_now(int cpu) |
583 | { |
584 | return buffer_ftrace_now(&global_trace.trace_buffer, cpu); |
585 | } |
586 | |
587 | /** |
588 | * tracing_is_enabled - Show if global_trace has been disabled |
589 | * |
590 | * Shows if the global trace has been enabled or not. It uses the |
591 | * mirror flag "buffer_disabled" to be used in fast paths such as for |
592 | * the irqsoff tracer. But it may be inaccurate due to races. If you |
593 | * need to know the accurate state, use tracing_is_on() which is a little |
594 | * slower, but accurate. |
595 | */ |
596 | int tracing_is_enabled(void) |
597 | { |
598 | /* |
599 | * For quick access (irqsoff uses this in fast path), just |
600 | * return the mirror variable of the state of the ring buffer. |
601 | * It's a little racy, but we don't really care. |
602 | */ |
603 | smp_rmb(); |
604 | return !global_trace.buffer_disabled; |
605 | } |
606 | |
607 | /* |
608 | * trace_buf_size is the size in bytes that is allocated |
609 | * for a buffer. Note, the number of bytes is always rounded |
610 | * to page size. |
611 | * |
612 | * This number is purposely set to a low number of 16384. |
613 | * If the dump on oops happens, it will be much appreciated |
614 | * to not have to wait for all that output. Anyway this can be |
615 | * boot time and run time configurable. |
616 | */ |
617 | #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */ |
618 | |
619 | static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT; |
620 | |
621 | /* trace_types holds a link list of available tracers. */ |
622 | static struct tracer *trace_types __read_mostly; |
623 | |
624 | /* |
625 | * trace_types_lock is used to protect the trace_types list. |
626 | */ |
627 | DEFINE_MUTEX(trace_types_lock); |
628 | |
629 | /* |
630 | * serialize the access of the ring buffer |
631 | * |
632 | * ring buffer serializes readers, but it is low level protection. |
633 | * The validity of the events (which returns by ring_buffer_peek() ..etc) |
634 | * are not protected by ring buffer. |
635 | * |
636 | * The content of events may become garbage if we allow other process consumes |
637 | * these events concurrently: |
638 | * A) the page of the consumed events may become a normal page |
639 | * (not reader page) in ring buffer, and this page will be rewrited |
640 | * by events producer. |
641 | * B) The page of the consumed events may become a page for splice_read, |
642 | * and this page will be returned to system. |
643 | * |
644 | * These primitives allow multi process access to different cpu ring buffer |
645 | * concurrently. |
646 | * |
647 | * These primitives don't distinguish read-only and read-consume access. |
648 | * Multi read-only access are also serialized. |
649 | */ |
650 | |
651 | #ifdef CONFIG_SMP |
652 | static DECLARE_RWSEM(all_cpu_access_lock); |
653 | static DEFINE_PER_CPU(struct mutex, cpu_access_lock); |
654 | |
655 | static inline void trace_access_lock(int cpu) |
656 | { |
657 | if (cpu == RING_BUFFER_ALL_CPUS) { |
658 | /* gain it for accessing the whole ring buffer. */ |
659 | down_write(&all_cpu_access_lock); |
660 | } else { |
661 | /* gain it for accessing a cpu ring buffer. */ |
662 | |
663 | /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */ |
664 | down_read(&all_cpu_access_lock); |
665 | |
666 | /* Secondly block other access to this @cpu ring buffer. */ |
667 | mutex_lock(&per_cpu(cpu_access_lock, cpu)); |
668 | } |
669 | } |
670 | |
671 | static inline void trace_access_unlock(int cpu) |
672 | { |
673 | if (cpu == RING_BUFFER_ALL_CPUS) { |
674 | up_write(&all_cpu_access_lock); |
675 | } else { |
676 | mutex_unlock(&per_cpu(cpu_access_lock, cpu)); |
677 | up_read(&all_cpu_access_lock); |
678 | } |
679 | } |
680 | |
681 | static inline void trace_access_lock_init(void) |
682 | { |
683 | int cpu; |
684 | |
685 | for_each_possible_cpu(cpu) |
686 | mutex_init(&per_cpu(cpu_access_lock, cpu)); |
687 | } |
688 | |
689 | #else |
690 | |
691 | static DEFINE_MUTEX(access_lock); |
692 | |
693 | static inline void trace_access_lock(int cpu) |
694 | { |
695 | (void)cpu; |
696 | mutex_lock(&access_lock); |
697 | } |
698 | |
699 | static inline void trace_access_unlock(int cpu) |
700 | { |
701 | (void)cpu; |
702 | mutex_unlock(&access_lock); |
703 | } |
704 | |
705 | static inline void trace_access_lock_init(void) |
706 | { |
707 | } |
708 | |
709 | #endif |
710 | |
711 | #ifdef CONFIG_STACKTRACE |
712 | static void __ftrace_trace_stack(struct ring_buffer *buffer, |
713 | unsigned long flags, |
714 | int skip, int pc, struct pt_regs *regs); |
715 | static inline void ftrace_trace_stack(struct trace_array *tr, |
716 | struct ring_buffer *buffer, |
717 | unsigned long flags, |
718 | int skip, int pc, struct pt_regs *regs); |
719 | |
720 | #else |
721 | static inline void __ftrace_trace_stack(struct ring_buffer *buffer, |
722 | unsigned long flags, |
723 | int skip, int pc, struct pt_regs *regs) |
724 | { |
725 | } |
726 | static inline void ftrace_trace_stack(struct trace_array *tr, |
727 | struct ring_buffer *buffer, |
728 | unsigned long flags, |
729 | int skip, int pc, struct pt_regs *regs) |
730 | { |
731 | } |
732 | |
733 | #endif |
734 | |
735 | static __always_inline void |
736 | trace_event_setup(struct ring_buffer_event *event, |
737 | int type, unsigned long flags, int pc) |
738 | { |
739 | struct trace_entry *ent = ring_buffer_event_data(event); |
740 | |
741 | tracing_generic_entry_update(ent, flags, pc); |
742 | ent->type = type; |
743 | } |
744 | |
745 | static __always_inline struct ring_buffer_event * |
746 | __trace_buffer_lock_reserve(struct ring_buffer *buffer, |
747 | int type, |
748 | unsigned long len, |
749 | unsigned long flags, int pc) |
750 | { |
751 | struct ring_buffer_event *event; |
752 | |
753 | event = ring_buffer_lock_reserve(buffer, len); |
754 | if (event != NULL) |
755 | trace_event_setup(event, type, flags, pc); |
756 | |
757 | return event; |
758 | } |
759 | |
760 | void tracer_tracing_on(struct trace_array *tr) |
761 | { |
762 | if (tr->trace_buffer.buffer) |
763 | ring_buffer_record_on(tr->trace_buffer.buffer); |
764 | /* |
765 | * This flag is looked at when buffers haven't been allocated |
766 | * yet, or by some tracers (like irqsoff), that just want to |
767 | * know if the ring buffer has been disabled, but it can handle |
768 | * races of where it gets disabled but we still do a record. |
769 | * As the check is in the fast path of the tracers, it is more |
770 | * important to be fast than accurate. |
771 | */ |
772 | tr->buffer_disabled = 0; |
773 | /* Make the flag seen by readers */ |
774 | smp_wmb(); |
775 | } |
776 | |
777 | /** |
778 | * tracing_on - enable tracing buffers |
779 | * |
780 | * This function enables tracing buffers that may have been |
781 | * disabled with tracing_off. |
782 | */ |
783 | void tracing_on(void) |
784 | { |
785 | tracer_tracing_on(&global_trace); |
786 | } |
787 | EXPORT_SYMBOL_GPL(tracing_on); |
788 | |
789 | |
790 | static __always_inline void |
791 | __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) |
792 | { |
793 | __this_cpu_write(trace_taskinfo_save, true); |
794 | |
795 | /* If this is the temp buffer, we need to commit fully */ |
796 | if (this_cpu_read(trace_buffered_event) == event) { |
797 | /* Length is in event->array[0] */ |
798 | ring_buffer_write(buffer, event->array[0], &event->array[1]); |
799 | /* Release the temp buffer */ |
800 | this_cpu_dec(trace_buffered_event_cnt); |
801 | } else |
802 | ring_buffer_unlock_commit(buffer, event); |
803 | } |
804 | |
805 | /** |
806 | * __trace_puts - write a constant string into the trace buffer. |
807 | * @ip: The address of the caller |
808 | * @str: The constant string to write |
809 | * @size: The size of the string. |
810 | */ |
811 | int __trace_puts(unsigned long ip, const char *str, int size) |
812 | { |
813 | struct ring_buffer_event *event; |
814 | struct ring_buffer *buffer; |
815 | struct print_entry *entry; |
816 | unsigned long irq_flags; |
817 | int alloc; |
818 | int pc; |
819 | |
820 | if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) |
821 | return 0; |
822 | |
823 | pc = preempt_count(); |
824 | |
825 | if (unlikely(tracing_selftest_running || tracing_disabled)) |
826 | return 0; |
827 | |
828 | alloc = sizeof(*entry) + size + 2; /* possible \n added */ |
829 | |
830 | local_save_flags(irq_flags); |
831 | buffer = global_trace.trace_buffer.buffer; |
832 | event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, |
833 | irq_flags, pc); |
834 | if (!event) |
835 | return 0; |
836 | |
837 | entry = ring_buffer_event_data(event); |
838 | entry->ip = ip; |
839 | |
840 | memcpy(&entry->buf, str, size); |
841 | |
842 | /* Add a newline if necessary */ |
843 | if (entry->buf[size - 1] != '\n') { |
844 | entry->buf[size] = '\n'; |
845 | entry->buf[size + 1] = '\0'; |
846 | } else |
847 | entry->buf[size] = '\0'; |
848 | |
849 | __buffer_unlock_commit(buffer, event); |
850 | ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL); |
851 | |
852 | return size; |
853 | } |
854 | EXPORT_SYMBOL_GPL(__trace_puts); |
855 | |
856 | /** |
857 | * __trace_bputs - write the pointer to a constant string into trace buffer |
858 | * @ip: The address of the caller |
859 | * @str: The constant string to write to the buffer to |
860 | */ |
861 | int __trace_bputs(unsigned long ip, const char *str) |
862 | { |
863 | struct ring_buffer_event *event; |
864 | struct ring_buffer *buffer; |
865 | struct bputs_entry *entry; |
866 | unsigned long irq_flags; |
867 | int size = sizeof(struct bputs_entry); |
868 | int pc; |
869 | |
870 | if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) |
871 | return 0; |
872 | |
873 | pc = preempt_count(); |
874 | |
875 | if (unlikely(tracing_selftest_running || tracing_disabled)) |
876 | return 0; |
877 | |
878 | local_save_flags(irq_flags); |
879 | buffer = global_trace.trace_buffer.buffer; |
880 | event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size, |
881 | irq_flags, pc); |
882 | if (!event) |
883 | return 0; |
884 | |
885 | entry = ring_buffer_event_data(event); |
886 | entry->ip = ip; |
887 | entry->str = str; |
888 | |
889 | __buffer_unlock_commit(buffer, event); |
890 | ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL); |
891 | |
892 | return 1; |
893 | } |
894 | EXPORT_SYMBOL_GPL(__trace_bputs); |
895 | |
896 | #ifdef CONFIG_TRACER_SNAPSHOT |
897 | void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data) |
898 | { |
899 | struct tracer *tracer = tr->current_trace; |
900 | unsigned long flags; |
901 | |
902 | if (in_nmi()) { |
903 | internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n" ); |
904 | internal_trace_puts("*** snapshot is being ignored ***\n" ); |
905 | return; |
906 | } |
907 | |
908 | if (!tr->allocated_snapshot) { |
909 | internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n" ); |
910 | internal_trace_puts("*** stopping trace here! ***\n" ); |
911 | tracing_off(); |
912 | return; |
913 | } |
914 | |
915 | /* Note, snapshot can not be used when the tracer uses it */ |
916 | if (tracer->use_max_tr) { |
917 | internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n" ); |
918 | internal_trace_puts("*** Can not use snapshot (sorry) ***\n" ); |
919 | return; |
920 | } |
921 | |
922 | local_irq_save(flags); |
923 | update_max_tr(tr, current, smp_processor_id(), cond_data); |
924 | local_irq_restore(flags); |
925 | } |
926 | |
927 | void tracing_snapshot_instance(struct trace_array *tr) |
928 | { |
929 | tracing_snapshot_instance_cond(tr, NULL); |
930 | } |
931 | |
932 | /** |
933 | * tracing_snapshot - take a snapshot of the current buffer. |
934 | * |
935 | * This causes a swap between the snapshot buffer and the current live |
936 | * tracing buffer. You can use this to take snapshots of the live |
937 | * trace when some condition is triggered, but continue to trace. |
938 | * |
939 | * Note, make sure to allocate the snapshot with either |
940 | * a tracing_snapshot_alloc(), or by doing it manually |
941 | * with: echo 1 > /sys/kernel/debug/tracing/snapshot |
942 | * |
943 | * If the snapshot buffer is not allocated, it will stop tracing. |
944 | * Basically making a permanent snapshot. |
945 | */ |
946 | void tracing_snapshot(void) |
947 | { |
948 | struct trace_array *tr = &global_trace; |
949 | |
950 | tracing_snapshot_instance(tr); |
951 | } |
952 | EXPORT_SYMBOL_GPL(tracing_snapshot); |
953 | |
954 | /** |
955 | * tracing_snapshot_cond - conditionally take a snapshot of the current buffer. |
956 | * @tr: The tracing instance to snapshot |
957 | * @cond_data: The data to be tested conditionally, and possibly saved |
958 | * |
959 | * This is the same as tracing_snapshot() except that the snapshot is |
960 | * conditional - the snapshot will only happen if the |
961 | * cond_snapshot.update() implementation receiving the cond_data |
962 | * returns true, which means that the trace array's cond_snapshot |
963 | * update() operation used the cond_data to determine whether the |
964 | * snapshot should be taken, and if it was, presumably saved it along |
965 | * with the snapshot. |
966 | */ |
967 | void tracing_snapshot_cond(struct trace_array *tr, void *cond_data) |
968 | { |
969 | tracing_snapshot_instance_cond(tr, cond_data); |
970 | } |
971 | EXPORT_SYMBOL_GPL(tracing_snapshot_cond); |
972 | |
973 | /** |
974 | * tracing_snapshot_cond_data - get the user data associated with a snapshot |
975 | * @tr: The tracing instance |
976 | * |
977 | * When the user enables a conditional snapshot using |
978 | * tracing_snapshot_cond_enable(), the user-defined cond_data is saved |
979 | * with the snapshot. This accessor is used to retrieve it. |
980 | * |
981 | * Should not be called from cond_snapshot.update(), since it takes |
982 | * the tr->max_lock lock, which the code calling |
983 | * cond_snapshot.update() has already done. |
984 | * |
985 | * Returns the cond_data associated with the trace array's snapshot. |
986 | */ |
987 | void *tracing_cond_snapshot_data(struct trace_array *tr) |
988 | { |
989 | void *cond_data = NULL; |
990 | |
991 | arch_spin_lock(&tr->max_lock); |
992 | |
993 | if (tr->cond_snapshot) |
994 | cond_data = tr->cond_snapshot->cond_data; |
995 | |
996 | arch_spin_unlock(&tr->max_lock); |
997 | |
998 | return cond_data; |
999 | } |
1000 | EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data); |
1001 | |
1002 | static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf, |
1003 | struct trace_buffer *size_buf, int cpu_id); |
1004 | static void set_buffer_entries(struct trace_buffer *buf, unsigned long val); |
1005 | |
1006 | int tracing_alloc_snapshot_instance(struct trace_array *tr) |
1007 | { |
1008 | int ret; |
1009 | |
1010 | if (!tr->allocated_snapshot) { |
1011 | |
1012 | /* allocate spare buffer */ |
1013 | ret = resize_buffer_duplicate_size(&tr->max_buffer, |
1014 | &tr->trace_buffer, RING_BUFFER_ALL_CPUS); |
1015 | if (ret < 0) |
1016 | return ret; |
1017 | |
1018 | tr->allocated_snapshot = true; |
1019 | } |
1020 | |
1021 | return 0; |
1022 | } |
1023 | |
1024 | static void free_snapshot(struct trace_array *tr) |
1025 | { |
1026 | /* |
1027 | * We don't free the ring buffer. instead, resize it because |
1028 | * The max_tr ring buffer has some state (e.g. ring->clock) and |
1029 | * we want preserve it. |
1030 | */ |
1031 | ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS); |
1032 | set_buffer_entries(&tr->max_buffer, 1); |
1033 | tracing_reset_online_cpus(&tr->max_buffer); |
1034 | tr->allocated_snapshot = false; |
1035 | } |
1036 | |
1037 | /** |
1038 | * tracing_alloc_snapshot - allocate snapshot buffer. |
1039 | * |
1040 | * This only allocates the snapshot buffer if it isn't already |
1041 | * allocated - it doesn't also take a snapshot. |
1042 | * |
1043 | * This is meant to be used in cases where the snapshot buffer needs |
1044 | * to be set up for events that can't sleep but need to be able to |
1045 | * trigger a snapshot. |
1046 | */ |
1047 | int tracing_alloc_snapshot(void) |
1048 | { |
1049 | struct trace_array *tr = &global_trace; |
1050 | int ret; |
1051 | |
1052 | ret = tracing_alloc_snapshot_instance(tr); |
1053 | WARN_ON(ret < 0); |
1054 | |
1055 | return ret; |
1056 | } |
1057 | EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); |
1058 | |
1059 | /** |
1060 | * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer. |
1061 | * |
1062 | * This is similar to tracing_snapshot(), but it will allocate the |
1063 | * snapshot buffer if it isn't already allocated. Use this only |
1064 | * where it is safe to sleep, as the allocation may sleep. |
1065 | * |
1066 | * This causes a swap between the snapshot buffer and the current live |
1067 | * tracing buffer. You can use this to take snapshots of the live |
1068 | * trace when some condition is triggered, but continue to trace. |
1069 | */ |
1070 | void tracing_snapshot_alloc(void) |
1071 | { |
1072 | int ret; |
1073 | |
1074 | ret = tracing_alloc_snapshot(); |
1075 | if (ret < 0) |
1076 | return; |
1077 | |
1078 | tracing_snapshot(); |
1079 | } |
1080 | EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); |
1081 | |
1082 | /** |
1083 | * tracing_snapshot_cond_enable - enable conditional snapshot for an instance |
1084 | * @tr: The tracing instance |
1085 | * @cond_data: User data to associate with the snapshot |
1086 | * @update: Implementation of the cond_snapshot update function |
1087 | * |
1088 | * Check whether the conditional snapshot for the given instance has |
1089 | * already been enabled, or if the current tracer is already using a |
1090 | * snapshot; if so, return -EBUSY, else create a cond_snapshot and |
1091 | * save the cond_data and update function inside. |
1092 | * |
1093 | * Returns 0 if successful, error otherwise. |
1094 | */ |
1095 | int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, |
1096 | cond_update_fn_t update) |
1097 | { |
1098 | struct cond_snapshot *cond_snapshot; |
1099 | int ret = 0; |
1100 | |
1101 | cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL); |
1102 | if (!cond_snapshot) |
1103 | return -ENOMEM; |
1104 | |
1105 | cond_snapshot->cond_data = cond_data; |
1106 | cond_snapshot->update = update; |
1107 | |
1108 | mutex_lock(&trace_types_lock); |
1109 | |
1110 | ret = tracing_alloc_snapshot_instance(tr); |
1111 | if (ret) |
1112 | goto fail_unlock; |
1113 | |
1114 | if (tr->current_trace->use_max_tr) { |
1115 | ret = -EBUSY; |
1116 | goto fail_unlock; |
1117 | } |
1118 | |
1119 | /* |
1120 | * The cond_snapshot can only change to NULL without the |
1121 | * trace_types_lock. We don't care if we race with it going |
1122 | * to NULL, but we want to make sure that it's not set to |
1123 | * something other than NULL when we get here, which we can |
1124 | * do safely with only holding the trace_types_lock and not |
1125 | * having to take the max_lock. |
1126 | */ |
1127 | if (tr->cond_snapshot) { |
1128 | ret = -EBUSY; |
1129 | goto fail_unlock; |
1130 | } |
1131 | |
1132 | arch_spin_lock(&tr->max_lock); |
1133 | tr->cond_snapshot = cond_snapshot; |
1134 | arch_spin_unlock(&tr->max_lock); |
1135 | |
1136 | mutex_unlock(&trace_types_lock); |
1137 | |
1138 | return ret; |
1139 | |
1140 | fail_unlock: |
1141 | mutex_unlock(&trace_types_lock); |
1142 | kfree(cond_snapshot); |
1143 | return ret; |
1144 | } |
1145 | EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable); |
1146 | |
1147 | /** |
1148 | * tracing_snapshot_cond_disable - disable conditional snapshot for an instance |
1149 | * @tr: The tracing instance |
1150 | * |
1151 | * Check whether the conditional snapshot for the given instance is |
1152 | * enabled; if so, free the cond_snapshot associated with it, |
1153 | * otherwise return -EINVAL. |
1154 | * |
1155 | * Returns 0 if successful, error otherwise. |
1156 | */ |
1157 | int tracing_snapshot_cond_disable(struct trace_array *tr) |
1158 | { |
1159 | int ret = 0; |
1160 | |
1161 | arch_spin_lock(&tr->max_lock); |
1162 | |
1163 | if (!tr->cond_snapshot) |
1164 | ret = -EINVAL; |
1165 | else { |
1166 | kfree(tr->cond_snapshot); |
1167 | tr->cond_snapshot = NULL; |
1168 | } |
1169 | |
1170 | arch_spin_unlock(&tr->max_lock); |
1171 | |
1172 | return ret; |
1173 | } |
1174 | EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); |
1175 | #else |
1176 | void tracing_snapshot(void) |
1177 | { |
1178 | WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used" ); |
1179 | } |
1180 | EXPORT_SYMBOL_GPL(tracing_snapshot); |
1181 | void tracing_snapshot_cond(struct trace_array *tr, void *cond_data) |
1182 | { |
1183 | WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used" ); |
1184 | } |
1185 | EXPORT_SYMBOL_GPL(tracing_snapshot_cond); |
1186 | int tracing_alloc_snapshot(void) |
1187 | { |
1188 | WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used" ); |
1189 | return -ENODEV; |
1190 | } |
1191 | EXPORT_SYMBOL_GPL(tracing_alloc_snapshot); |
1192 | void tracing_snapshot_alloc(void) |
1193 | { |
1194 | /* Give warning */ |
1195 | tracing_snapshot(); |
1196 | } |
1197 | EXPORT_SYMBOL_GPL(tracing_snapshot_alloc); |
1198 | void *tracing_cond_snapshot_data(struct trace_array *tr) |
1199 | { |
1200 | return NULL; |
1201 | } |
1202 | EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data); |
1203 | int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update) |
1204 | { |
1205 | return -ENODEV; |
1206 | } |
1207 | EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable); |
1208 | int tracing_snapshot_cond_disable(struct trace_array *tr) |
1209 | { |
1210 | return false; |
1211 | } |
1212 | EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable); |
1213 | #endif /* CONFIG_TRACER_SNAPSHOT */ |
1214 | |
1215 | void tracer_tracing_off(struct trace_array *tr) |
1216 | { |
1217 | if (tr->trace_buffer.buffer) |
1218 | ring_buffer_record_off(tr->trace_buffer.buffer); |
1219 | /* |
1220 | * This flag is looked at when buffers haven't been allocated |
1221 | * yet, or by some tracers (like irqsoff), that just want to |
1222 | * know if the ring buffer has been disabled, but it can handle |
1223 | * races of where it gets disabled but we still do a record. |
1224 | * As the check is in the fast path of the tracers, it is more |
1225 | * important to be fast than accurate. |
1226 | */ |
1227 | tr->buffer_disabled = 1; |
1228 | /* Make the flag seen by readers */ |
1229 | smp_wmb(); |
1230 | } |
1231 | |
1232 | /** |
1233 | * tracing_off - turn off tracing buffers |
1234 | * |
1235 | * This function stops the tracing buffers from recording data. |
1236 | * It does not disable any overhead the tracers themselves may |
1237 | * be causing. This function simply causes all recording to |
1238 | * the ring buffers to fail. |
1239 | */ |
1240 | void tracing_off(void) |
1241 | { |
1242 | tracer_tracing_off(&global_trace); |
1243 | } |
1244 | EXPORT_SYMBOL_GPL(tracing_off); |
1245 | |
1246 | void disable_trace_on_warning(void) |
1247 | { |
1248 | if (__disable_trace_on_warning) |
1249 | tracing_off(); |
1250 | } |
1251 | |
1252 | /** |
1253 | * tracer_tracing_is_on - show real state of ring buffer enabled |
1254 | * @tr : the trace array to know if ring buffer is enabled |
1255 | * |
1256 | * Shows real state of the ring buffer if it is enabled or not. |
1257 | */ |
1258 | bool tracer_tracing_is_on(struct trace_array *tr) |
1259 | { |
1260 | if (tr->trace_buffer.buffer) |
1261 | return ring_buffer_record_is_on(tr->trace_buffer.buffer); |
1262 | return !tr->buffer_disabled; |
1263 | } |
1264 | |
1265 | /** |
1266 | * tracing_is_on - show state of ring buffers enabled |
1267 | */ |
1268 | int tracing_is_on(void) |
1269 | { |
1270 | return tracer_tracing_is_on(&global_trace); |
1271 | } |
1272 | EXPORT_SYMBOL_GPL(tracing_is_on); |
1273 | |
1274 | static int __init set_buf_size(char *str) |
1275 | { |
1276 | unsigned long buf_size; |
1277 | |
1278 | if (!str) |
1279 | return 0; |
1280 | buf_size = memparse(str, &str); |
1281 | /* nr_entries can not be zero */ |
1282 | if (buf_size == 0) |
1283 | return 0; |
1284 | trace_buf_size = buf_size; |
1285 | return 1; |
1286 | } |
1287 | __setup("trace_buf_size=" , set_buf_size); |
1288 | |
1289 | static int __init set_tracing_thresh(char *str) |
1290 | { |
1291 | unsigned long threshold; |
1292 | int ret; |
1293 | |
1294 | if (!str) |
1295 | return 0; |
1296 | ret = kstrtoul(str, 0, &threshold); |
1297 | if (ret < 0) |
1298 | return 0; |
1299 | tracing_thresh = threshold * 1000; |
1300 | return 1; |
1301 | } |
1302 | __setup("tracing_thresh=" , set_tracing_thresh); |
1303 | |
1304 | unsigned long nsecs_to_usecs(unsigned long nsecs) |
1305 | { |
1306 | return nsecs / 1000; |
1307 | } |
1308 | |
1309 | /* |
1310 | * TRACE_FLAGS is defined as a tuple matching bit masks with strings. |
1311 | * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that |
1312 | * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list |
1313 | * of strings in the order that the evals (enum) were defined. |
1314 | */ |
1315 | #undef C |
1316 | #define C(a, b) b |
1317 | |
1318 | /* These must match the bit postions in trace_iterator_flags */ |
1319 | static const char *trace_options[] = { |
1320 | TRACE_FLAGS |
1321 | NULL |
1322 | }; |
1323 | |
1324 | static struct { |
1325 | u64 (*func)(void); |
1326 | const char *name; |
1327 | int in_ns; /* is this clock in nanoseconds? */ |
1328 | } trace_clocks[] = { |
1329 | { trace_clock_local, "local" , 1 }, |
1330 | { trace_clock_global, "global" , 1 }, |
1331 | { trace_clock_counter, "counter" , 0 }, |
1332 | { trace_clock_jiffies, "uptime" , 0 }, |
1333 | { trace_clock, "perf" , 1 }, |
1334 | { ktime_get_mono_fast_ns, "mono" , 1 }, |
1335 | { ktime_get_raw_fast_ns, "mono_raw" , 1 }, |
1336 | { ktime_get_boot_fast_ns, "boot" , 1 }, |
1337 | ARCH_TRACE_CLOCKS |
1338 | }; |
1339 | |
1340 | bool trace_clock_in_ns(struct trace_array *tr) |
1341 | { |
1342 | if (trace_clocks[tr->clock_id].in_ns) |
1343 | return true; |
1344 | |
1345 | return false; |
1346 | } |
1347 | |
1348 | /* |
1349 | * trace_parser_get_init - gets the buffer for trace parser |
1350 | */ |
1351 | int trace_parser_get_init(struct trace_parser *parser, int size) |
1352 | { |
1353 | memset(parser, 0, sizeof(*parser)); |
1354 | |
1355 | parser->buffer = kmalloc(size, GFP_KERNEL); |
1356 | if (!parser->buffer) |
1357 | return 1; |
1358 | |
1359 | parser->size = size; |
1360 | return 0; |
1361 | } |
1362 | |
1363 | /* |
1364 | * trace_parser_put - frees the buffer for trace parser |
1365 | */ |
1366 | void trace_parser_put(struct trace_parser *parser) |
1367 | { |
1368 | kfree(parser->buffer); |
1369 | parser->buffer = NULL; |
1370 | } |
1371 | |
1372 | /* |
1373 | * trace_get_user - reads the user input string separated by space |
1374 | * (matched by isspace(ch)) |
1375 | * |
1376 | * For each string found the 'struct trace_parser' is updated, |
1377 | * and the function returns. |
1378 | * |
1379 | * Returns number of bytes read. |
1380 | * |
1381 | * See kernel/trace/trace.h for 'struct trace_parser' details. |
1382 | */ |
1383 | int trace_get_user(struct trace_parser *parser, const char __user *ubuf, |
1384 | size_t cnt, loff_t *ppos) |
1385 | { |
1386 | char ch; |
1387 | size_t read = 0; |
1388 | ssize_t ret; |
1389 | |
1390 | if (!*ppos) |
1391 | trace_parser_clear(parser); |
1392 | |
1393 | ret = get_user(ch, ubuf++); |
1394 | if (ret) |
1395 | goto out; |
1396 | |
1397 | read++; |
1398 | cnt--; |
1399 | |
1400 | /* |
1401 | * The parser is not finished with the last write, |
1402 | * continue reading the user input without skipping spaces. |
1403 | */ |
1404 | if (!parser->cont) { |
1405 | /* skip white space */ |
1406 | while (cnt && isspace(ch)) { |
1407 | ret = get_user(ch, ubuf++); |
1408 | if (ret) |
1409 | goto out; |
1410 | read++; |
1411 | cnt--; |
1412 | } |
1413 | |
1414 | parser->idx = 0; |
1415 | |
1416 | /* only spaces were written */ |
1417 | if (isspace(ch) || !ch) { |
1418 | *ppos += read; |
1419 | ret = read; |
1420 | goto out; |
1421 | } |
1422 | } |
1423 | |
1424 | /* read the non-space input */ |
1425 | while (cnt && !isspace(ch) && ch) { |
1426 | if (parser->idx < parser->size - 1) |
1427 | parser->buffer[parser->idx++] = ch; |
1428 | else { |
1429 | ret = -EINVAL; |
1430 | goto out; |
1431 | } |
1432 | ret = get_user(ch, ubuf++); |
1433 | if (ret) |
1434 | goto out; |
1435 | read++; |
1436 | cnt--; |
1437 | } |
1438 | |
1439 | /* We either got finished input or we have to wait for another call. */ |
1440 | if (isspace(ch) || !ch) { |
1441 | parser->buffer[parser->idx] = 0; |
1442 | parser->cont = false; |
1443 | } else if (parser->idx < parser->size - 1) { |
1444 | parser->cont = true; |
1445 | parser->buffer[parser->idx++] = ch; |
1446 | /* Make sure the parsed string always terminates with '\0'. */ |
1447 | parser->buffer[parser->idx] = 0; |
1448 | } else { |
1449 | ret = -EINVAL; |
1450 | goto out; |
1451 | } |
1452 | |
1453 | *ppos += read; |
1454 | ret = read; |
1455 | |
1456 | out: |
1457 | return ret; |
1458 | } |
1459 | |
1460 | /* TODO add a seq_buf_to_buffer() */ |
1461 | static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt) |
1462 | { |
1463 | int len; |
1464 | |
1465 | if (trace_seq_used(s) <= s->seq.readpos) |
1466 | return -EBUSY; |
1467 | |
1468 | len = trace_seq_used(s) - s->seq.readpos; |
1469 | if (cnt > len) |
1470 | cnt = len; |
1471 | memcpy(buf, s->buffer + s->seq.readpos, cnt); |
1472 | |
1473 | s->seq.readpos += cnt; |
1474 | return cnt; |
1475 | } |
1476 | |
1477 | unsigned long __read_mostly tracing_thresh; |
1478 | |
1479 | #ifdef CONFIG_TRACER_MAX_TRACE |
1480 | /* |
1481 | * Copy the new maximum trace into the separate maximum-trace |
1482 | * structure. (this way the maximum trace is permanently saved, |
1483 | * for later retrieval via /sys/kernel/tracing/tracing_max_latency) |
1484 | */ |
1485 | static void |
1486 | __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) |
1487 | { |
1488 | struct trace_buffer *trace_buf = &tr->trace_buffer; |
1489 | struct trace_buffer *max_buf = &tr->max_buffer; |
1490 | struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu); |
1491 | struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu); |
1492 | |
1493 | max_buf->cpu = cpu; |
1494 | max_buf->time_start = data->preempt_timestamp; |
1495 | |
1496 | max_data->saved_latency = tr->max_latency; |
1497 | max_data->critical_start = data->critical_start; |
1498 | max_data->critical_end = data->critical_end; |
1499 | |
1500 | strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN); |
1501 | max_data->pid = tsk->pid; |
1502 | /* |
1503 | * If tsk == current, then use current_uid(), as that does not use |
1504 | * RCU. The irq tracer can be called out of RCU scope. |
1505 | */ |
1506 | if (tsk == current) |
1507 | max_data->uid = current_uid(); |
1508 | else |
1509 | max_data->uid = task_uid(tsk); |
1510 | |
1511 | max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; |
1512 | max_data->policy = tsk->policy; |
1513 | max_data->rt_priority = tsk->rt_priority; |
1514 | |
1515 | /* record this tasks comm */ |
1516 | tracing_record_cmdline(tsk); |
1517 | } |
1518 | |
1519 | /** |
1520 | * update_max_tr - snapshot all trace buffers from global_trace to max_tr |
1521 | * @tr: tracer |
1522 | * @tsk: the task with the latency |
1523 | * @cpu: The cpu that initiated the trace. |
1524 | * @cond_data: User data associated with a conditional snapshot |
1525 | * |
1526 | * Flip the buffers between the @tr and the max_tr and record information |
1527 | * about which task was the cause of this latency. |
1528 | */ |
1529 | void |
1530 | update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu, |
1531 | void *cond_data) |
1532 | { |
1533 | if (tr->stop_count) |
1534 | return; |
1535 | |
1536 | WARN_ON_ONCE(!irqs_disabled()); |
1537 | |
1538 | if (!tr->allocated_snapshot) { |
1539 | /* Only the nop tracer should hit this when disabling */ |
1540 | WARN_ON_ONCE(tr->current_trace != &nop_trace); |
1541 | return; |
1542 | } |
1543 | |
1544 | arch_spin_lock(&tr->max_lock); |
1545 | |
1546 | /* Inherit the recordable setting from trace_buffer */ |
1547 | if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer)) |
1548 | ring_buffer_record_on(tr->max_buffer.buffer); |
1549 | else |
1550 | ring_buffer_record_off(tr->max_buffer.buffer); |
1551 | |
1552 | #ifdef CONFIG_TRACER_SNAPSHOT |
1553 | if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) |
1554 | goto out_unlock; |
1555 | #endif |
1556 | swap(tr->trace_buffer.buffer, tr->max_buffer.buffer); |
1557 | |
1558 | __update_max_tr(tr, tsk, cpu); |
1559 | |
1560 | out_unlock: |
1561 | arch_spin_unlock(&tr->max_lock); |
1562 | } |
1563 | |
1564 | /** |
1565 | * update_max_tr_single - only copy one trace over, and reset the rest |
1566 | * @tr - tracer |
1567 | * @tsk - task with the latency |
1568 | * @cpu - the cpu of the buffer to copy. |
1569 | * |
1570 | * Flip the trace of a single CPU buffer between the @tr and the max_tr. |
1571 | */ |
1572 | void |
1573 | update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) |
1574 | { |
1575 | int ret; |
1576 | |
1577 | if (tr->stop_count) |
1578 | return; |
1579 | |
1580 | WARN_ON_ONCE(!irqs_disabled()); |
1581 | if (!tr->allocated_snapshot) { |
1582 | /* Only the nop tracer should hit this when disabling */ |
1583 | WARN_ON_ONCE(tr->current_trace != &nop_trace); |
1584 | return; |
1585 | } |
1586 | |
1587 | arch_spin_lock(&tr->max_lock); |
1588 | |
1589 | ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu); |
1590 | |
1591 | if (ret == -EBUSY) { |
1592 | /* |
1593 | * We failed to swap the buffer due to a commit taking |
1594 | * place on this CPU. We fail to record, but we reset |
1595 | * the max trace buffer (no one writes directly to it) |
1596 | * and flag that it failed. |
1597 | */ |
1598 | trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_, |
1599 | "Failed to swap buffers due to commit in progress\n" ); |
1600 | } |
1601 | |
1602 | WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY); |
1603 | |
1604 | __update_max_tr(tr, tsk, cpu); |
1605 | arch_spin_unlock(&tr->max_lock); |
1606 | } |
1607 | #endif /* CONFIG_TRACER_MAX_TRACE */ |
1608 | |
1609 | static int wait_on_pipe(struct trace_iterator *iter, int full) |
1610 | { |
1611 | /* Iterators are static, they should be filled or empty */ |
1612 | if (trace_buffer_iter(iter, iter->cpu_file)) |
1613 | return 0; |
1614 | |
1615 | return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file, |
1616 | full); |
1617 | } |
1618 | |
1619 | #ifdef CONFIG_FTRACE_STARTUP_TEST |
1620 | static bool selftests_can_run; |
1621 | |
1622 | struct trace_selftests { |
1623 | struct list_head list; |
1624 | struct tracer *type; |
1625 | }; |
1626 | |
1627 | static LIST_HEAD(postponed_selftests); |
1628 | |
1629 | static int save_selftest(struct tracer *type) |
1630 | { |
1631 | struct trace_selftests *selftest; |
1632 | |
1633 | selftest = kmalloc(sizeof(*selftest), GFP_KERNEL); |
1634 | if (!selftest) |
1635 | return -ENOMEM; |
1636 | |
1637 | selftest->type = type; |
1638 | list_add(&selftest->list, &postponed_selftests); |
1639 | return 0; |
1640 | } |
1641 | |
1642 | static int run_tracer_selftest(struct tracer *type) |
1643 | { |
1644 | struct trace_array *tr = &global_trace; |
1645 | struct tracer *saved_tracer = tr->current_trace; |
1646 | int ret; |
1647 | |
1648 | if (!type->selftest || tracing_selftest_disabled) |
1649 | return 0; |
1650 | |
1651 | /* |
1652 | * If a tracer registers early in boot up (before scheduling is |
1653 | * initialized and such), then do not run its selftests yet. |
1654 | * Instead, run it a little later in the boot process. |
1655 | */ |
1656 | if (!selftests_can_run) |
1657 | return save_selftest(type); |
1658 | |
1659 | /* |
1660 | * Run a selftest on this tracer. |
1661 | * Here we reset the trace buffer, and set the current |
1662 | * tracer to be this tracer. The tracer can then run some |
1663 | * internal tracing to verify that everything is in order. |
1664 | * If we fail, we do not register this tracer. |
1665 | */ |
1666 | tracing_reset_online_cpus(&tr->trace_buffer); |
1667 | |
1668 | tr->current_trace = type; |
1669 | |
1670 | #ifdef CONFIG_TRACER_MAX_TRACE |
1671 | if (type->use_max_tr) { |
1672 | /* If we expanded the buffers, make sure the max is expanded too */ |
1673 | if (ring_buffer_expanded) |
1674 | ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size, |
1675 | RING_BUFFER_ALL_CPUS); |
1676 | tr->allocated_snapshot = true; |
1677 | } |
1678 | #endif |
1679 | |
1680 | /* the test is responsible for initializing and enabling */ |
1681 | pr_info("Testing tracer %s: " , type->name); |
1682 | ret = type->selftest(type, tr); |
1683 | /* the test is responsible for resetting too */ |
1684 | tr->current_trace = saved_tracer; |
1685 | if (ret) { |
1686 | printk(KERN_CONT "FAILED!\n" ); |
1687 | /* Add the warning after printing 'FAILED' */ |
1688 | WARN_ON(1); |
1689 | return -1; |
1690 | } |
1691 | /* Only reset on passing, to avoid touching corrupted buffers */ |
1692 | tracing_reset_online_cpus(&tr->trace_buffer); |
1693 | |
1694 | #ifdef CONFIG_TRACER_MAX_TRACE |
1695 | if (type->use_max_tr) { |
1696 | tr->allocated_snapshot = false; |
1697 | |
1698 | /* Shrink the max buffer again */ |
1699 | if (ring_buffer_expanded) |
1700 | ring_buffer_resize(tr->max_buffer.buffer, 1, |
1701 | RING_BUFFER_ALL_CPUS); |
1702 | } |
1703 | #endif |
1704 | |
1705 | printk(KERN_CONT "PASSED\n" ); |
1706 | return 0; |
1707 | } |
1708 | |
1709 | static __init int init_trace_selftests(void) |
1710 | { |
1711 | struct trace_selftests *p, *n; |
1712 | struct tracer *t, **last; |
1713 | int ret; |
1714 | |
1715 | selftests_can_run = true; |
1716 | |
1717 | mutex_lock(&trace_types_lock); |
1718 | |
1719 | if (list_empty(&postponed_selftests)) |
1720 | goto out; |
1721 | |
1722 | pr_info("Running postponed tracer tests:\n" ); |
1723 | |
1724 | list_for_each_entry_safe(p, n, &postponed_selftests, list) { |
1725 | ret = run_tracer_selftest(p->type); |
1726 | /* If the test fails, then warn and remove from available_tracers */ |
1727 | if (ret < 0) { |
1728 | WARN(1, "tracer: %s failed selftest, disabling\n" , |
1729 | p->type->name); |
1730 | last = &trace_types; |
1731 | for (t = trace_types; t; t = t->next) { |
1732 | if (t == p->type) { |
1733 | *last = t->next; |
1734 | break; |
1735 | } |
1736 | last = &t->next; |
1737 | } |
1738 | } |
1739 | list_del(&p->list); |
1740 | kfree(p); |
1741 | } |
1742 | |
1743 | out: |
1744 | mutex_unlock(&trace_types_lock); |
1745 | |
1746 | return 0; |
1747 | } |
1748 | core_initcall(init_trace_selftests); |
1749 | #else |
1750 | static inline int run_tracer_selftest(struct tracer *type) |
1751 | { |
1752 | return 0; |
1753 | } |
1754 | #endif /* CONFIG_FTRACE_STARTUP_TEST */ |
1755 | |
1756 | static void add_tracer_options(struct trace_array *tr, struct tracer *t); |
1757 | |
1758 | static void __init apply_trace_boot_options(void); |
1759 | |
1760 | /** |
1761 | * register_tracer - register a tracer with the ftrace system. |
1762 | * @type - the plugin for the tracer |
1763 | * |
1764 | * Register a new plugin tracer. |
1765 | */ |
1766 | int __init register_tracer(struct tracer *type) |
1767 | { |
1768 | struct tracer *t; |
1769 | int ret = 0; |
1770 | |
1771 | if (!type->name) { |
1772 | pr_info("Tracer must have a name\n" ); |
1773 | return -1; |
1774 | } |
1775 | |
1776 | if (strlen(type->name) >= MAX_TRACER_SIZE) { |
1777 | pr_info("Tracer has a name longer than %d\n" , MAX_TRACER_SIZE); |
1778 | return -1; |
1779 | } |
1780 | |
1781 | mutex_lock(&trace_types_lock); |
1782 | |
1783 | tracing_selftest_running = true; |
1784 | |
1785 | for (t = trace_types; t; t = t->next) { |
1786 | if (strcmp(type->name, t->name) == 0) { |
1787 | /* already found */ |
1788 | pr_info("Tracer %s already registered\n" , |
1789 | type->name); |
1790 | ret = -1; |
1791 | goto out; |
1792 | } |
1793 | } |
1794 | |
1795 | if (!type->set_flag) |
1796 | type->set_flag = &dummy_set_flag; |
1797 | if (!type->flags) { |
1798 | /*allocate a dummy tracer_flags*/ |
1799 | type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL); |
1800 | if (!type->flags) { |
1801 | ret = -ENOMEM; |
1802 | goto out; |
1803 | } |
1804 | type->flags->val = 0; |
1805 | type->flags->opts = dummy_tracer_opt; |
1806 | } else |
1807 | if (!type->flags->opts) |
1808 | type->flags->opts = dummy_tracer_opt; |
1809 | |
1810 | /* store the tracer for __set_tracer_option */ |
1811 | type->flags->trace = type; |
1812 | |
1813 | ret = run_tracer_selftest(type); |
1814 | if (ret < 0) |
1815 | goto out; |
1816 | |
1817 | type->next = trace_types; |
1818 | trace_types = type; |
1819 | add_tracer_options(&global_trace, type); |
1820 | |
1821 | out: |
1822 | tracing_selftest_running = false; |
1823 | mutex_unlock(&trace_types_lock); |
1824 | |
1825 | if (ret || !default_bootup_tracer) |
1826 | goto out_unlock; |
1827 | |
1828 | if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE)) |
1829 | goto out_unlock; |
1830 | |
1831 | printk(KERN_INFO "Starting tracer '%s'\n" , type->name); |
1832 | /* Do we want this tracer to start on bootup? */ |
1833 | tracing_set_tracer(&global_trace, type->name); |
1834 | default_bootup_tracer = NULL; |
1835 | |
1836 | apply_trace_boot_options(); |
1837 | |
1838 | /* disable other selftests, since this will break it. */ |
1839 | tracing_selftest_disabled = true; |
1840 | #ifdef CONFIG_FTRACE_STARTUP_TEST |
1841 | printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n" , |
1842 | type->name); |
1843 | #endif |
1844 | |
1845 | out_unlock: |
1846 | return ret; |
1847 | } |
1848 | |
1849 | void tracing_reset(struct trace_buffer *buf, int cpu) |
1850 | { |
1851 | struct ring_buffer *buffer = buf->buffer; |
1852 | |
1853 | if (!buffer) |
1854 | return; |
1855 | |
1856 | ring_buffer_record_disable(buffer); |
1857 | |
1858 | /* Make sure all commits have finished */ |
1859 | synchronize_rcu(); |
1860 | ring_buffer_reset_cpu(buffer, cpu); |
1861 | |
1862 | ring_buffer_record_enable(buffer); |
1863 | } |
1864 | |
1865 | void tracing_reset_online_cpus(struct trace_buffer *buf) |
1866 | { |
1867 | struct ring_buffer *buffer = buf->buffer; |
1868 | int cpu; |
1869 | |
1870 | if (!buffer) |
1871 | return; |
1872 | |
1873 | ring_buffer_record_disable(buffer); |
1874 | |
1875 | /* Make sure all commits have finished */ |
1876 | synchronize_rcu(); |
1877 | |
1878 | buf->time_start = buffer_ftrace_now(buf, buf->cpu); |
1879 | |
1880 | for_each_online_cpu(cpu) |
1881 | ring_buffer_reset_cpu(buffer, cpu); |
1882 | |
1883 | ring_buffer_record_enable(buffer); |
1884 | } |
1885 | |
1886 | /* Must have trace_types_lock held */ |
1887 | void tracing_reset_all_online_cpus(void) |
1888 | { |
1889 | struct trace_array *tr; |
1890 | |
1891 | list_for_each_entry(tr, &ftrace_trace_arrays, list) { |
1892 | if (!tr->clear_trace) |
1893 | continue; |
1894 | tr->clear_trace = false; |
1895 | tracing_reset_online_cpus(&tr->trace_buffer); |
1896 | #ifdef CONFIG_TRACER_MAX_TRACE |
1897 | tracing_reset_online_cpus(&tr->max_buffer); |
1898 | #endif |
1899 | } |
1900 | } |
1901 | |
1902 | static int *tgid_map; |
1903 | |
1904 | #define SAVED_CMDLINES_DEFAULT 128 |
1905 | #define NO_CMDLINE_MAP UINT_MAX |
1906 | static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED; |
1907 | struct saved_cmdlines_buffer { |
1908 | unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1]; |
1909 | unsigned *map_cmdline_to_pid; |
1910 | unsigned cmdline_num; |
1911 | int cmdline_idx; |
1912 | char *saved_cmdlines; |
1913 | }; |
1914 | static struct saved_cmdlines_buffer *savedcmd; |
1915 | |
1916 | /* temporary disable recording */ |
1917 | static atomic_t trace_record_taskinfo_disabled __read_mostly; |
1918 | |
1919 | static inline char *get_saved_cmdlines(int idx) |
1920 | { |
1921 | return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN]; |
1922 | } |
1923 | |
1924 | static inline void set_cmdline(int idx, const char *cmdline) |
1925 | { |
1926 | strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN); |
1927 | } |
1928 | |
1929 | static int allocate_cmdlines_buffer(unsigned int val, |
1930 | struct saved_cmdlines_buffer *s) |
1931 | { |
1932 | s->map_cmdline_to_pid = kmalloc_array(val, |
1933 | sizeof(*s->map_cmdline_to_pid), |
1934 | GFP_KERNEL); |
1935 | if (!s->map_cmdline_to_pid) |
1936 | return -ENOMEM; |
1937 | |
1938 | s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL); |
1939 | if (!s->saved_cmdlines) { |
1940 | kfree(s->map_cmdline_to_pid); |
1941 | return -ENOMEM; |
1942 | } |
1943 | |
1944 | s->cmdline_idx = 0; |
1945 | s->cmdline_num = val; |
1946 | memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP, |
1947 | sizeof(s->map_pid_to_cmdline)); |
1948 | memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP, |
1949 | val * sizeof(*s->map_cmdline_to_pid)); |
1950 | |
1951 | return 0; |
1952 | } |
1953 | |
1954 | static int trace_create_savedcmd(void) |
1955 | { |
1956 | int ret; |
1957 | |
1958 | savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL); |
1959 | if (!savedcmd) |
1960 | return -ENOMEM; |
1961 | |
1962 | ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd); |
1963 | if (ret < 0) { |
1964 | kfree(savedcmd); |
1965 | savedcmd = NULL; |
1966 | return -ENOMEM; |
1967 | } |
1968 | |
1969 | return 0; |
1970 | } |
1971 | |
1972 | int is_tracing_stopped(void) |
1973 | { |
1974 | return global_trace.stop_count; |
1975 | } |
1976 | |
1977 | /** |
1978 | * tracing_start - quick start of the tracer |
1979 | * |
1980 | * If tracing is enabled but was stopped by tracing_stop, |
1981 | * this will start the tracer back up. |
1982 | */ |
1983 | void tracing_start(void) |
1984 | { |
1985 | struct ring_buffer *buffer; |
1986 | unsigned long flags; |
1987 | |
1988 | if (tracing_disabled) |
1989 | return; |
1990 | |
1991 | raw_spin_lock_irqsave(&global_trace.start_lock, flags); |
1992 | if (--global_trace.stop_count) { |
1993 | if (global_trace.stop_count < 0) { |
1994 | /* Someone screwed up their debugging */ |
1995 | WARN_ON_ONCE(1); |
1996 | global_trace.stop_count = 0; |
1997 | } |
1998 | goto out; |
1999 | } |
2000 | |
2001 | /* Prevent the buffers from switching */ |
2002 | arch_spin_lock(&global_trace.max_lock); |
2003 | |
2004 | buffer = global_trace.trace_buffer.buffer; |
2005 | if (buffer) |
2006 | ring_buffer_record_enable(buffer); |
2007 | |
2008 | #ifdef CONFIG_TRACER_MAX_TRACE |
2009 | buffer = global_trace.max_buffer.buffer; |
2010 | if (buffer) |
2011 | ring_buffer_record_enable(buffer); |
2012 | #endif |
2013 | |
2014 | arch_spin_unlock(&global_trace.max_lock); |
2015 | |
2016 | out: |
2017 | raw_spin_unlock_irqrestore(&global_trace.start_lock, flags); |
2018 | } |
2019 | |
2020 | static void tracing_start_tr(struct trace_array *tr) |
2021 | { |
2022 | struct ring_buffer *buffer; |
2023 | unsigned long flags; |
2024 | |
2025 | if (tracing_disabled) |
2026 | return; |
2027 | |
2028 | /* If global, we need to also start the max tracer */ |
2029 | if (tr->flags & TRACE_ARRAY_FL_GLOBAL) |
2030 | return tracing_start(); |
2031 | |
2032 | raw_spin_lock_irqsave(&tr->start_lock, flags); |
2033 | |
2034 | if (--tr->stop_count) { |
2035 | if (tr->stop_count < 0) { |
2036 | /* Someone screwed up their debugging */ |
2037 | WARN_ON_ONCE(1); |
2038 | tr->stop_count = 0; |
2039 | } |
2040 | goto out; |
2041 | } |
2042 | |
2043 | buffer = tr->trace_buffer.buffer; |
2044 | if (buffer) |
2045 | ring_buffer_record_enable(buffer); |
2046 | |
2047 | out: |
2048 | raw_spin_unlock_irqrestore(&tr->start_lock, flags); |
2049 | } |
2050 | |
2051 | /** |
2052 | * tracing_stop - quick stop of the tracer |
2053 | * |
2054 | * Light weight way to stop tracing. Use in conjunction with |
2055 | * tracing_start. |
2056 | */ |
2057 | void tracing_stop(void) |
2058 | { |
2059 | struct ring_buffer *buffer; |
2060 | unsigned long flags; |
2061 | |
2062 | raw_spin_lock_irqsave(&global_trace.start_lock, flags); |
2063 | if (global_trace.stop_count++) |
2064 | goto out; |
2065 | |
2066 | /* Prevent the buffers from switching */ |
2067 | arch_spin_lock(&global_trace.max_lock); |
2068 | |
2069 | buffer = global_trace.trace_buffer.buffer; |
2070 | if (buffer) |
2071 | ring_buffer_record_disable(buffer); |
2072 | |
2073 | #ifdef CONFIG_TRACER_MAX_TRACE |
2074 | buffer = global_trace.max_buffer.buffer; |
2075 | if (buffer) |
2076 | ring_buffer_record_disable(buffer); |
2077 | #endif |
2078 | |
2079 | arch_spin_unlock(&global_trace.max_lock); |
2080 | |
2081 | out: |
2082 | raw_spin_unlock_irqrestore(&global_trace.start_lock, flags); |
2083 | } |
2084 | |
2085 | static void tracing_stop_tr(struct trace_array *tr) |
2086 | { |
2087 | struct ring_buffer *buffer; |
2088 | unsigned long flags; |
2089 | |
2090 | /* If global, we need to also stop the max tracer */ |
2091 | if (tr->flags & TRACE_ARRAY_FL_GLOBAL) |
2092 | return tracing_stop(); |
2093 | |
2094 | raw_spin_lock_irqsave(&tr->start_lock, flags); |
2095 | if (tr->stop_count++) |
2096 | goto out; |
2097 | |
2098 | buffer = tr->trace_buffer.buffer; |
2099 | if (buffer) |
2100 | ring_buffer_record_disable(buffer); |
2101 | |
2102 | out: |
2103 | raw_spin_unlock_irqrestore(&tr->start_lock, flags); |
2104 | } |
2105 | |
2106 | static int trace_save_cmdline(struct task_struct *tsk) |
2107 | { |
2108 | unsigned pid, idx; |
2109 | |
2110 | /* treat recording of idle task as a success */ |
2111 | if (!tsk->pid) |
2112 | return 1; |
2113 | |
2114 | if (unlikely(tsk->pid > PID_MAX_DEFAULT)) |
2115 | return 0; |
2116 | |
2117 | /* |
2118 | * It's not the end of the world if we don't get |
2119 | * the lock, but we also don't want to spin |
2120 | * nor do we want to disable interrupts, |
2121 | * so if we miss here, then better luck next time. |
2122 | */ |
2123 | if (!arch_spin_trylock(&trace_cmdline_lock)) |
2124 | return 0; |
2125 | |
2126 | idx = savedcmd->map_pid_to_cmdline[tsk->pid]; |
2127 | if (idx == NO_CMDLINE_MAP) { |
2128 | idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num; |
2129 | |
2130 | /* |
2131 | * Check whether the cmdline buffer at idx has a pid |
2132 | * mapped. We are going to overwrite that entry so we |
2133 | * need to clear the map_pid_to_cmdline. Otherwise we |
2134 | * would read the new comm for the old pid. |
2135 | */ |
2136 | pid = savedcmd->map_cmdline_to_pid[idx]; |
2137 | if (pid != NO_CMDLINE_MAP) |
2138 | savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP; |
2139 | |
2140 | savedcmd->map_cmdline_to_pid[idx] = tsk->pid; |
2141 | savedcmd->map_pid_to_cmdline[tsk->pid] = idx; |
2142 | |
2143 | savedcmd->cmdline_idx = idx; |
2144 | } |
2145 | |
2146 | set_cmdline(idx, tsk->comm); |
2147 | |
2148 | arch_spin_unlock(&trace_cmdline_lock); |
2149 | |
2150 | return 1; |
2151 | } |
2152 | |
2153 | static void __trace_find_cmdline(int pid, char comm[]) |
2154 | { |
2155 | unsigned map; |
2156 | |
2157 | if (!pid) { |
2158 | strcpy(comm, "<idle>" ); |
2159 | return; |
2160 | } |
2161 | |
2162 | if (WARN_ON_ONCE(pid < 0)) { |
2163 | strcpy(comm, "<XXX>" ); |
2164 | return; |
2165 | } |
2166 | |
2167 | if (pid > PID_MAX_DEFAULT) { |
2168 | strcpy(comm, "<...>" ); |
2169 | return; |
2170 | } |
2171 | |
2172 | map = savedcmd->map_pid_to_cmdline[pid]; |
2173 | if (map != NO_CMDLINE_MAP) |
2174 | strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN); |
2175 | else |
2176 | strcpy(comm, "<...>" ); |
2177 | } |
2178 | |
2179 | void trace_find_cmdline(int pid, char comm[]) |
2180 | { |
2181 | preempt_disable(); |
2182 | arch_spin_lock(&trace_cmdline_lock); |
2183 | |
2184 | __trace_find_cmdline(pid, comm); |
2185 | |
2186 | arch_spin_unlock(&trace_cmdline_lock); |
2187 | preempt_enable(); |
2188 | } |
2189 | |
2190 | int trace_find_tgid(int pid) |
2191 | { |
2192 | if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT)) |
2193 | return 0; |
2194 | |
2195 | return tgid_map[pid]; |
2196 | } |
2197 | |
2198 | static int trace_save_tgid(struct task_struct *tsk) |
2199 | { |
2200 | /* treat recording of idle task as a success */ |
2201 | if (!tsk->pid) |
2202 | return 1; |
2203 | |
2204 | if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT)) |
2205 | return 0; |
2206 | |
2207 | tgid_map[tsk->pid] = tsk->tgid; |
2208 | return 1; |
2209 | } |
2210 | |
2211 | static bool tracing_record_taskinfo_skip(int flags) |
2212 | { |
2213 | if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID)))) |
2214 | return true; |
2215 | if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on()) |
2216 | return true; |
2217 | if (!__this_cpu_read(trace_taskinfo_save)) |
2218 | return true; |
2219 | return false; |
2220 | } |
2221 | |
2222 | /** |
2223 | * tracing_record_taskinfo - record the task info of a task |
2224 | * |
2225 | * @task - task to record |
2226 | * @flags - TRACE_RECORD_CMDLINE for recording comm |
2227 | * - TRACE_RECORD_TGID for recording tgid |
2228 | */ |
2229 | void tracing_record_taskinfo(struct task_struct *task, int flags) |
2230 | { |
2231 | bool done; |
2232 | |
2233 | if (tracing_record_taskinfo_skip(flags)) |
2234 | return; |
2235 | |
2236 | /* |
2237 | * Record as much task information as possible. If some fail, continue |
2238 | * to try to record the others. |
2239 | */ |
2240 | done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task); |
2241 | done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task); |
2242 | |
2243 | /* If recording any information failed, retry again soon. */ |
2244 | if (!done) |
2245 | return; |
2246 | |
2247 | __this_cpu_write(trace_taskinfo_save, false); |
2248 | } |
2249 | |
2250 | /** |
2251 | * tracing_record_taskinfo_sched_switch - record task info for sched_switch |
2252 | * |
2253 | * @prev - previous task during sched_switch |
2254 | * @next - next task during sched_switch |
2255 | * @flags - TRACE_RECORD_CMDLINE for recording comm |
2256 | * TRACE_RECORD_TGID for recording tgid |
2257 | */ |
2258 | void tracing_record_taskinfo_sched_switch(struct task_struct *prev, |
2259 | struct task_struct *next, int flags) |
2260 | { |
2261 | bool done; |
2262 | |
2263 | if (tracing_record_taskinfo_skip(flags)) |
2264 | return; |
2265 | |
2266 | /* |
2267 | * Record as much task information as possible. If some fail, continue |
2268 | * to try to record the others. |
2269 | */ |
2270 | done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev); |
2271 | done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next); |
2272 | done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev); |
2273 | done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next); |
2274 | |
2275 | /* If recording any information failed, retry again soon. */ |
2276 | if (!done) |
2277 | return; |
2278 | |
2279 | __this_cpu_write(trace_taskinfo_save, false); |
2280 | } |
2281 | |
2282 | /* Helpers to record a specific task information */ |
2283 | void tracing_record_cmdline(struct task_struct *task) |
2284 | { |
2285 | tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE); |
2286 | } |
2287 | |
2288 | void tracing_record_tgid(struct task_struct *task) |
2289 | { |
2290 | tracing_record_taskinfo(task, TRACE_RECORD_TGID); |
2291 | } |
2292 | |
2293 | /* |
2294 | * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq |
2295 | * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function |
2296 | * simplifies those functions and keeps them in sync. |
2297 | */ |
2298 | enum print_line_t trace_handle_return(struct trace_seq *s) |
2299 | { |
2300 | return trace_seq_has_overflowed(s) ? |
2301 | TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED; |
2302 | } |
2303 | EXPORT_SYMBOL_GPL(trace_handle_return); |
2304 | |
2305 | void |
2306 | tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, |
2307 | int pc) |
2308 | { |
2309 | struct task_struct *tsk = current; |
2310 | |
2311 | entry->preempt_count = pc & 0xff; |
2312 | entry->pid = (tsk) ? tsk->pid : 0; |
2313 | entry->flags = |
2314 | #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT |
2315 | (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | |
2316 | #else |
2317 | TRACE_FLAG_IRQS_NOSUPPORT | |
2318 | #endif |
2319 | ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) | |
2320 | ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) | |
2321 | ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) | |
2322 | (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) | |
2323 | (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0); |
2324 | } |
2325 | EXPORT_SYMBOL_GPL(tracing_generic_entry_update); |
2326 | |
2327 | struct ring_buffer_event * |
2328 | trace_buffer_lock_reserve(struct ring_buffer *buffer, |
2329 | int type, |
2330 | unsigned long len, |
2331 | unsigned long flags, int pc) |
2332 | { |
2333 | return __trace_buffer_lock_reserve(buffer, type, len, flags, pc); |
2334 | } |
2335 | |
2336 | DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); |
2337 | DEFINE_PER_CPU(int, trace_buffered_event_cnt); |
2338 | static int trace_buffered_event_ref; |
2339 | |
2340 | /** |
2341 | * trace_buffered_event_enable - enable buffering events |
2342 | * |
2343 | * When events are being filtered, it is quicker to use a temporary |
2344 | * buffer to write the event data into if there's a likely chance |
2345 | * that it will not be committed. The discard of the ring buffer |
2346 | * is not as fast as committing, and is much slower than copying |
2347 | * a commit. |
2348 | * |
2349 | * When an event is to be filtered, allocate per cpu buffers to |
2350 | * write the event data into, and if the event is filtered and discarded |
2351 | * it is simply dropped, otherwise, the entire data is to be committed |
2352 | * in one shot. |
2353 | */ |
2354 | void trace_buffered_event_enable(void) |
2355 | { |
2356 | struct ring_buffer_event *event; |
2357 | struct page *page; |
2358 | int cpu; |
2359 | |
2360 | WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); |
2361 | |
2362 | if (trace_buffered_event_ref++) |
2363 | return; |
2364 | |
2365 | for_each_tracing_cpu(cpu) { |
2366 | page = alloc_pages_node(cpu_to_node(cpu), |
2367 | GFP_KERNEL | __GFP_NORETRY, 0); |
2368 | if (!page) |
2369 | goto failed; |
2370 | |
2371 | event = page_address(page); |
2372 | memset(event, 0, sizeof(*event)); |
2373 | |
2374 | per_cpu(trace_buffered_event, cpu) = event; |
2375 | |
2376 | preempt_disable(); |
2377 | if (cpu == smp_processor_id() && |
2378 | this_cpu_read(trace_buffered_event) != |
2379 | per_cpu(trace_buffered_event, cpu)) |
2380 | WARN_ON_ONCE(1); |
2381 | preempt_enable(); |
2382 | } |
2383 | |
2384 | return; |
2385 | failed: |
2386 | trace_buffered_event_disable(); |
2387 | } |
2388 | |
2389 | static void enable_trace_buffered_event(void *data) |
2390 | { |
2391 | /* Probably not needed, but do it anyway */ |
2392 | smp_rmb(); |
2393 | this_cpu_dec(trace_buffered_event_cnt); |
2394 | } |
2395 | |
2396 | static void disable_trace_buffered_event(void *data) |
2397 | { |
2398 | this_cpu_inc(trace_buffered_event_cnt); |
2399 | } |
2400 | |
2401 | /** |
2402 | * trace_buffered_event_disable - disable buffering events |
2403 | * |
2404 | * When a filter is removed, it is faster to not use the buffered |
2405 | * events, and to commit directly into the ring buffer. Free up |
2406 | * the temp buffers when there are no more users. This requires |
2407 | * special synchronization with current events. |
2408 | */ |
2409 | void trace_buffered_event_disable(void) |
2410 | { |
2411 | int cpu; |
2412 | |
2413 | WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); |
2414 | |
2415 | if (WARN_ON_ONCE(!trace_buffered_event_ref)) |
2416 | return; |
2417 | |
2418 | if (--trace_buffered_event_ref) |
2419 | return; |
2420 | |
2421 | preempt_disable(); |
2422 | /* For each CPU, set the buffer as used. */ |
2423 | smp_call_function_many(tracing_buffer_mask, |
2424 | disable_trace_buffered_event, NULL, 1); |
2425 | preempt_enable(); |
2426 | |
2427 | /* Wait for all current users to finish */ |
2428 | synchronize_rcu(); |
2429 | |
2430 | for_each_tracing_cpu(cpu) { |
2431 | free_page((unsigned long)per_cpu(trace_buffered_event, cpu)); |
2432 | per_cpu(trace_buffered_event, cpu) = NULL; |
2433 | } |
2434 | /* |
2435 | * Make sure trace_buffered_event is NULL before clearing |
2436 | * trace_buffered_event_cnt. |
2437 | */ |
2438 | smp_wmb(); |
2439 | |
2440 | preempt_disable(); |
2441 | /* Do the work on each cpu */ |
2442 | smp_call_function_many(tracing_buffer_mask, |
2443 | enable_trace_buffered_event, NULL, 1); |
2444 | preempt_enable(); |
2445 | } |
2446 | |
2447 | static struct ring_buffer *temp_buffer; |
2448 | |
2449 | struct ring_buffer_event * |
2450 | trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, |
2451 | struct trace_event_file *trace_file, |
2452 | int type, unsigned long len, |
2453 | unsigned long flags, int pc) |
2454 | { |
2455 | struct ring_buffer_event *entry; |
2456 | int val; |
2457 | |
2458 | *current_rb = trace_file->tr->trace_buffer.buffer; |
2459 | |
2460 | if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags & |
2461 | (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) && |
2462 | (entry = this_cpu_read(trace_buffered_event))) { |
2463 | /* Try to use the per cpu buffer first */ |
2464 | val = this_cpu_inc_return(trace_buffered_event_cnt); |
2465 | if (val == 1) { |
2466 | trace_event_setup(entry, type, flags, pc); |
2467 | entry->array[0] = len; |
2468 | return entry; |
2469 | } |
2470 | this_cpu_dec(trace_buffered_event_cnt); |
2471 | } |
2472 | |
2473 | entry = __trace_buffer_lock_reserve(*current_rb, |
2474 | type, len, flags, pc); |
2475 | /* |
2476 | * If tracing is off, but we have triggers enabled |
2477 | * we still need to look at the event data. Use the temp_buffer |
2478 | * to store the trace event for the tigger to use. It's recusive |
2479 | * safe and will not be recorded anywhere. |
2480 | */ |
2481 | if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) { |
2482 | *current_rb = temp_buffer; |
2483 | entry = __trace_buffer_lock_reserve(*current_rb, |
2484 | type, len, flags, pc); |
2485 | } |
2486 | return entry; |
2487 | } |
2488 | EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); |
2489 | |
2490 | static DEFINE_SPINLOCK(tracepoint_iter_lock); |
2491 | static DEFINE_MUTEX(tracepoint_printk_mutex); |
2492 | |
2493 | static void output_printk(struct trace_event_buffer *fbuffer) |
2494 | { |
2495 | struct trace_event_call *event_call; |
2496 | struct trace_event *event; |
2497 | unsigned long flags; |
2498 | struct trace_iterator *iter = tracepoint_print_iter; |
2499 | |
2500 | /* We should never get here if iter is NULL */ |
2501 | if (WARN_ON_ONCE(!iter)) |
2502 | return; |
2503 | |
2504 | event_call = fbuffer->trace_file->event_call; |
2505 | if (!event_call || !event_call->event.funcs || |
2506 | !event_call->event.funcs->trace) |
2507 | return; |
2508 | |
2509 | event = &fbuffer->trace_file->event_call->event; |
2510 | |
2511 | spin_lock_irqsave(&tracepoint_iter_lock, flags); |
2512 | trace_seq_init(&iter->seq); |
2513 | iter->ent = fbuffer->entry; |
2514 | event_call->event.funcs->trace(iter, 0, event); |
2515 | trace_seq_putc(&iter->seq, 0); |
2516 | printk("%s" , iter->seq.buffer); |
2517 | |
2518 | spin_unlock_irqrestore(&tracepoint_iter_lock, flags); |
2519 | } |
2520 | |
2521 | int tracepoint_printk_sysctl(struct ctl_table *table, int write, |
2522 | void __user *buffer, size_t *lenp, |
2523 | loff_t *ppos) |
2524 | { |
2525 | int save_tracepoint_printk; |
2526 | int ret; |
2527 | |
2528 | mutex_lock(&tracepoint_printk_mutex); |
2529 | save_tracepoint_printk = tracepoint_printk; |
2530 | |
2531 | ret = proc_dointvec(table, write, buffer, lenp, ppos); |
2532 | |
2533 | /* |
2534 | * This will force exiting early, as tracepoint_printk |
2535 | * is always zero when tracepoint_printk_iter is not allocated |
2536 | */ |
2537 | if (!tracepoint_print_iter) |
2538 | tracepoint_printk = 0; |
2539 | |
2540 | if (save_tracepoint_printk == tracepoint_printk) |
2541 | goto out; |
2542 | |
2543 | if (tracepoint_printk) |
2544 | static_key_enable(&tracepoint_printk_key.key); |
2545 | else |
2546 | static_key_disable(&tracepoint_printk_key.key); |
2547 | |
2548 | out: |
2549 | mutex_unlock(&tracepoint_printk_mutex); |
2550 | |
2551 | return ret; |
2552 | } |
2553 | |
2554 | void trace_event_buffer_commit(struct trace_event_buffer *fbuffer) |
2555 | { |
2556 | if (static_key_false(&tracepoint_printk_key.key)) |
2557 | output_printk(fbuffer); |
2558 | |
2559 | event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer, |
2560 | fbuffer->event, fbuffer->entry, |
2561 | fbuffer->flags, fbuffer->pc); |
2562 | } |
2563 | EXPORT_SYMBOL_GPL(trace_event_buffer_commit); |
2564 | |
2565 | /* |
2566 | * Skip 3: |
2567 | * |
2568 | * trace_buffer_unlock_commit_regs() |
2569 | * trace_event_buffer_commit() |
2570 | * trace_event_raw_event_xxx() |
2571 | */ |
2572 | # define STACK_SKIP 3 |
2573 | |
2574 | void trace_buffer_unlock_commit_regs(struct trace_array *tr, |
2575 | struct ring_buffer *buffer, |
2576 | struct ring_buffer_event *event, |
2577 | unsigned long flags, int pc, |
2578 | struct pt_regs *regs) |
2579 | { |
2580 | __buffer_unlock_commit(buffer, event); |
2581 | |
2582 | /* |
2583 | * If regs is not set, then skip the necessary functions. |
2584 | * Note, we can still get here via blktrace, wakeup tracer |
2585 | * and mmiotrace, but that's ok if they lose a function or |
2586 | * two. They are not that meaningful. |
2587 | */ |
2588 | ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs); |
2589 | ftrace_trace_userstack(buffer, flags, pc); |
2590 | } |
2591 | |
2592 | /* |
2593 | * Similar to trace_buffer_unlock_commit_regs() but do not dump stack. |
2594 | */ |
2595 | void |
2596 | trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer, |
2597 | struct ring_buffer_event *event) |
2598 | { |
2599 | __buffer_unlock_commit(buffer, event); |
2600 | } |
2601 | |
2602 | static void |
2603 | trace_process_export(struct trace_export *export, |
2604 | struct ring_buffer_event *event) |
2605 | { |
2606 | struct trace_entry *entry; |
2607 | unsigned int size = 0; |
2608 | |
2609 | entry = ring_buffer_event_data(event); |
2610 | size = ring_buffer_event_length(event); |
2611 | export->write(export, entry, size); |
2612 | } |
2613 | |
2614 | static DEFINE_MUTEX(ftrace_export_lock); |
2615 | |
2616 | static struct trace_export __rcu *ftrace_exports_list __read_mostly; |
2617 | |
2618 | static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled); |
2619 | |
2620 | static inline void ftrace_exports_enable(void) |
2621 | { |
2622 | static_branch_enable(&ftrace_exports_enabled); |
2623 | } |
2624 | |
2625 | static inline void ftrace_exports_disable(void) |
2626 | { |
2627 | static_branch_disable(&ftrace_exports_enabled); |
2628 | } |
2629 | |
2630 | static void ftrace_exports(struct ring_buffer_event *event) |
2631 | { |
2632 | struct trace_export *export; |
2633 | |
2634 | preempt_disable_notrace(); |
2635 | |
2636 | export = rcu_dereference_raw_notrace(ftrace_exports_list); |
2637 | while (export) { |
2638 | trace_process_export(export, event); |
2639 | export = rcu_dereference_raw_notrace(export->next); |
2640 | } |
2641 | |
2642 | preempt_enable_notrace(); |
2643 | } |
2644 | |
2645 | static inline void |
2646 | add_trace_export(struct trace_export **list, struct trace_export *export) |
2647 | { |
2648 | rcu_assign_pointer(export->next, *list); |
2649 | /* |
2650 | * We are entering export into the list but another |
2651 | * CPU might be walking that list. We need to make sure |
2652 | * the export->next pointer is valid before another CPU sees |
2653 | * the export pointer included into the list. |
2654 | */ |
2655 | rcu_assign_pointer(*list, export); |
2656 | } |
2657 | |
2658 | static inline int |
2659 | rm_trace_export(struct trace_export **list, struct trace_export *export) |
2660 | { |
2661 | struct trace_export **p; |
2662 | |
2663 | for (p = list; *p != NULL; p = &(*p)->next) |
2664 | if (*p == export) |
2665 | break; |
2666 | |
2667 | if (*p != export) |
2668 | return -1; |
2669 | |
2670 | rcu_assign_pointer(*p, (*p)->next); |
2671 | |
2672 | return 0; |
2673 | } |
2674 | |
2675 | static inline void |
2676 | add_ftrace_export(struct trace_export **list, struct trace_export *export) |
2677 | { |
2678 | if (*list == NULL) |
2679 | ftrace_exports_enable(); |
2680 | |
2681 | add_trace_export(list, export); |
2682 | } |
2683 | |
2684 | static inline int |
2685 | rm_ftrace_export(struct trace_export **list, struct trace_export *export) |
2686 | { |
2687 | int ret; |
2688 | |
2689 | ret = rm_trace_export(list, export); |
2690 | if (*list == NULL) |
2691 | ftrace_exports_disable(); |
2692 | |
2693 | return ret; |
2694 | } |
2695 | |
2696 | int register_ftrace_export(struct trace_export *export) |
2697 | { |
2698 | if (WARN_ON_ONCE(!export->write)) |
2699 | return -1; |
2700 | |
2701 | mutex_lock(&ftrace_export_lock); |
2702 | |
2703 | add_ftrace_export(&ftrace_exports_list, export); |
2704 | |
2705 | mutex_unlock(&ftrace_export_lock); |
2706 | |
2707 | return 0; |
2708 | } |
2709 | EXPORT_SYMBOL_GPL(register_ftrace_export); |
2710 | |
2711 | int unregister_ftrace_export(struct trace_export *export) |
2712 | { |
2713 | int ret; |
2714 | |
2715 | mutex_lock(&ftrace_export_lock); |
2716 | |
2717 | ret = rm_ftrace_export(&ftrace_exports_list, export); |
2718 | |
2719 | mutex_unlock(&ftrace_export_lock); |
2720 | |
2721 | return ret; |
2722 | } |
2723 | EXPORT_SYMBOL_GPL(unregister_ftrace_export); |
2724 | |
2725 | void |
2726 | trace_function(struct trace_array *tr, |
2727 | unsigned long ip, unsigned long parent_ip, unsigned long flags, |
2728 | int pc) |
2729 | { |
2730 | struct trace_event_call *call = &event_function; |
2731 | struct ring_buffer *buffer = tr->trace_buffer.buffer; |
2732 | struct ring_buffer_event *event; |
2733 | struct ftrace_entry *entry; |
2734 | |
2735 | event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry), |
2736 | flags, pc); |
2737 | if (!event) |
2738 | return; |
2739 | entry = ring_buffer_event_data(event); |
2740 | entry->ip = ip; |
2741 | entry->parent_ip = parent_ip; |
2742 | |
2743 | if (!call_filter_check_discard(call, entry, buffer, event)) { |
2744 | if (static_branch_unlikely(&ftrace_exports_enabled)) |
2745 | ftrace_exports(event); |
2746 | __buffer_unlock_commit(buffer, event); |
2747 | } |
2748 | } |
2749 | |
2750 | #ifdef CONFIG_STACKTRACE |
2751 | |
2752 | #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long)) |
2753 | struct ftrace_stack { |
2754 | unsigned long calls[FTRACE_STACK_MAX_ENTRIES]; |
2755 | }; |
2756 | |
2757 | static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack); |
2758 | static DEFINE_PER_CPU(int, ftrace_stack_reserve); |
2759 | |
2760 | static void __ftrace_trace_stack(struct ring_buffer *buffer, |
2761 | unsigned long flags, |
2762 | int skip, int pc, struct pt_regs *regs) |
2763 | { |
2764 | struct trace_event_call *call = &event_kernel_stack; |
2765 | struct ring_buffer_event *event; |
2766 | struct stack_entry *entry; |
2767 | struct stack_trace trace; |
2768 | int use_stack; |
2769 | int size = FTRACE_STACK_ENTRIES; |
2770 | |
2771 | trace.nr_entries = 0; |
2772 | trace.skip = skip; |
2773 | |
2774 | /* |
2775 | * Add one, for this function and the call to save_stack_trace() |
2776 | * If regs is set, then these functions will not be in the way. |
2777 | */ |
2778 | #ifndef CONFIG_UNWINDER_ORC |
2779 | if (!regs) |
2780 | trace.skip++; |
2781 | #endif |
2782 | |
2783 | /* |
2784 | * Since events can happen in NMIs there's no safe way to |
2785 | * use the per cpu ftrace_stacks. We reserve it and if an interrupt |
2786 | * or NMI comes in, it will just have to use the default |
2787 | * FTRACE_STACK_SIZE. |
2788 | */ |
2789 | preempt_disable_notrace(); |
2790 | |
2791 | use_stack = __this_cpu_inc_return(ftrace_stack_reserve); |
2792 | /* |
2793 | * We don't need any atomic variables, just a barrier. |
2794 | * If an interrupt comes in, we don't care, because it would |
2795 | * have exited and put the counter back to what we want. |
2796 | * We just need a barrier to keep gcc from moving things |
2797 | * around. |
2798 | */ |
2799 | barrier(); |
2800 | if (use_stack == 1) { |
2801 | trace.entries = this_cpu_ptr(ftrace_stack.calls); |
2802 | trace.max_entries = FTRACE_STACK_MAX_ENTRIES; |
2803 | |
2804 | if (regs) |
2805 | save_stack_trace_regs(regs, &trace); |
2806 | else |
2807 | save_stack_trace(&trace); |
2808 | |
2809 | if (trace.nr_entries > size) |
2810 | size = trace.nr_entries; |
2811 | } else |
2812 | /* From now on, use_stack is a boolean */ |
2813 | use_stack = 0; |
2814 | |
2815 | size *= sizeof(unsigned long); |
2816 | |
2817 | event = __trace_buffer_lock_reserve(buffer, TRACE_STACK, |
2818 | sizeof(*entry) + size, flags, pc); |
2819 | if (!event) |
2820 | goto out; |
2821 | entry = ring_buffer_event_data(event); |
2822 | |
2823 | memset(&entry->caller, 0, size); |
2824 | |
2825 | if (use_stack) |
2826 | memcpy(&entry->caller, trace.entries, |
2827 | trace.nr_entries * sizeof(unsigned long)); |
2828 | else { |
2829 | trace.max_entries = FTRACE_STACK_ENTRIES; |
2830 | trace.entries = entry->caller; |
2831 | if (regs) |
2832 | save_stack_trace_regs(regs, &trace); |
2833 | else |
2834 | save_stack_trace(&trace); |
2835 | } |
2836 | |
2837 | entry->size = trace.nr_entries; |
2838 | |
2839 | if (!call_filter_check_discard(call, entry, buffer, event)) |
2840 | __buffer_unlock_commit(buffer, event); |
2841 | |
2842 | out: |
2843 | /* Again, don't let gcc optimize things here */ |
2844 | barrier(); |
2845 | __this_cpu_dec(ftrace_stack_reserve); |
2846 | preempt_enable_notrace(); |
2847 | |
2848 | } |
2849 | |
2850 | static inline void ftrace_trace_stack(struct trace_array *tr, |
2851 | struct ring_buffer *buffer, |
2852 | unsigned long flags, |
2853 | int skip, int pc, struct pt_regs *regs) |
2854 | { |
2855 | if (!(tr->trace_flags & TRACE_ITER_STACKTRACE)) |
2856 | return; |
2857 | |
2858 | __ftrace_trace_stack(buffer, flags, skip, pc, regs); |
2859 | } |
2860 | |
2861 | void __trace_stack(struct trace_array *tr, unsigned long flags, int skip, |
2862 | int pc) |
2863 | { |
2864 | struct ring_buffer *buffer = tr->trace_buffer.buffer; |
2865 | |
2866 | if (rcu_is_watching()) { |
2867 | __ftrace_trace_stack(buffer, flags, skip, pc, NULL); |
2868 | return; |
2869 | } |
2870 | |
2871 | /* |
2872 | * When an NMI triggers, RCU is enabled via rcu_nmi_enter(), |
2873 | * but if the above rcu_is_watching() failed, then the NMI |
2874 | * triggered someplace critical, and rcu_irq_enter() should |
2875 | * not be called from NMI. |
2876 | */ |
2877 | if (unlikely(in_nmi())) |
2878 | return; |
2879 | |
2880 | rcu_irq_enter_irqson(); |
2881 | __ftrace_trace_stack(buffer, flags, skip, pc, NULL); |
2882 | rcu_irq_exit_irqson(); |
2883 | } |
2884 | |
2885 | /** |
2886 | * trace_dump_stack - record a stack back trace in the trace buffer |
2887 | * @skip: Number of functions to skip (helper handlers) |
2888 | */ |
2889 | void trace_dump_stack(int skip) |
2890 | { |
2891 | unsigned long flags; |
2892 | |
2893 | if (tracing_disabled || tracing_selftest_running) |
2894 | return; |
2895 | |
2896 | local_save_flags(flags); |
2897 | |
2898 | #ifndef CONFIG_UNWINDER_ORC |
2899 | /* Skip 1 to skip this function. */ |
2900 | skip++; |
2901 | #endif |
2902 | __ftrace_trace_stack(global_trace.trace_buffer.buffer, |
2903 | flags, skip, preempt_count(), NULL); |
2904 | } |
2905 | EXPORT_SYMBOL_GPL(trace_dump_stack); |
2906 | |
2907 | static DEFINE_PER_CPU(int, user_stack_count); |
2908 | |
2909 | void |
2910 | ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc) |
2911 | { |
2912 | struct trace_event_call *call = &event_user_stack; |
2913 | struct ring_buffer_event *event; |
2914 | struct userstack_entry *entry; |
2915 | struct stack_trace trace; |
2916 | |
2917 | if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE)) |
2918 | return; |
2919 | |
2920 | /* |
2921 | * NMIs can not handle page faults, even with fix ups. |
2922 | * The save user stack can (and often does) fault. |
2923 | */ |
2924 | if (unlikely(in_nmi())) |
2925 | return; |
2926 | |
2927 | /* |
2928 | * prevent recursion, since the user stack tracing may |
2929 | * trigger other kernel events. |
2930 | */ |
2931 | preempt_disable(); |
2932 | if (__this_cpu_read(user_stack_count)) |
2933 | goto out; |
2934 | |
2935 | __this_cpu_inc(user_stack_count); |
2936 | |
2937 | event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK, |
2938 | sizeof(*entry), flags, pc); |
2939 | if (!event) |
2940 | goto out_drop_count; |
2941 | entry = ring_buffer_event_data(event); |
2942 | |
2943 | entry->tgid = current->tgid; |
2944 | memset(&entry->caller, 0, sizeof(entry->caller)); |
2945 | |
2946 | trace.nr_entries = 0; |
2947 | trace.max_entries = FTRACE_STACK_ENTRIES; |
2948 | trace.skip = 0; |
2949 | trace.entries = entry->caller; |
2950 | |
2951 | save_stack_trace_user(&trace); |
2952 | if (!call_filter_check_discard(call, entry, buffer, event)) |
2953 | __buffer_unlock_commit(buffer, event); |
2954 | |
2955 | out_drop_count: |
2956 | __this_cpu_dec(user_stack_count); |
2957 | out: |
2958 | preempt_enable(); |
2959 | } |
2960 | |
2961 | #ifdef UNUSED |
2962 | static void __trace_userstack(struct trace_array *tr, unsigned long flags) |
2963 | { |
2964 | ftrace_trace_userstack(tr, flags, preempt_count()); |
2965 | } |
2966 | #endif /* UNUSED */ |
2967 | |
2968 | #endif /* CONFIG_STACKTRACE */ |
2969 | |
2970 | /* created for use with alloc_percpu */ |
2971 | struct trace_buffer_struct { |
2972 | int nesting; |
2973 | char buffer[4][TRACE_BUF_SIZE]; |
2974 | }; |
2975 | |
2976 | static struct trace_buffer_struct *trace_percpu_buffer; |
2977 | |
2978 | /* |
2979 | * Thise allows for lockless recording. If we're nested too deeply, then |
2980 | * this returns NULL. |
2981 | */ |
2982 | static char *get_trace_buf(void) |
2983 | { |
2984 | struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer); |
2985 | |
2986 | if (!buffer || buffer->nesting >= 4) |
2987 | return NULL; |
2988 | |
2989 | buffer->nesting++; |
2990 | |
2991 | /* Interrupts must see nesting incremented before we use the buffer */ |
2992 | barrier(); |
2993 | return &buffer->buffer[buffer->nesting][0]; |
2994 | } |
2995 | |
2996 | static void put_trace_buf(void) |
2997 | { |
2998 | /* Don't let the decrement of nesting leak before this */ |
2999 | barrier(); |
3000 | this_cpu_dec(trace_percpu_buffer->nesting); |
3001 | } |
3002 | |
3003 | static int alloc_percpu_trace_buffer(void) |
3004 | { |
3005 | struct trace_buffer_struct *buffers; |
3006 | |
3007 | buffers = alloc_percpu(struct trace_buffer_struct); |
3008 | if (WARN(!buffers, "Could not allocate percpu trace_printk buffer" )) |
3009 | return -ENOMEM; |
3010 | |
3011 | trace_percpu_buffer = buffers; |
3012 | return 0; |
3013 | } |
3014 | |
3015 | static int buffers_allocated; |
3016 | |
3017 | void trace_printk_init_buffers(void) |
3018 | { |
3019 | if (buffers_allocated) |
3020 | return; |
3021 | |
3022 | if (alloc_percpu_trace_buffer()) |
3023 | return; |
3024 | |
3025 | /* trace_printk() is for debug use only. Don't use it in production. */ |
3026 | |
3027 | pr_warn("\n" ); |
3028 | pr_warn("**********************************************************\n" ); |
3029 | pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n" ); |
3030 | pr_warn("** **\n" ); |
3031 | pr_warn("** trace_printk() being used. Allocating extra memory. **\n" ); |
3032 | pr_warn("** **\n" ); |
3033 | pr_warn("** This means that this is a DEBUG kernel and it is **\n" ); |
3034 | pr_warn("** unsafe for production use. **\n" ); |
3035 | pr_warn("** **\n" ); |
3036 | pr_warn("** If you see this message and you are not debugging **\n" ); |
3037 | pr_warn("** the kernel, report this immediately to your vendor! **\n" ); |
3038 | pr_warn("** **\n" ); |
3039 | pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n" ); |
3040 | pr_warn("**********************************************************\n" ); |
3041 | |
3042 | /* Expand the buffers to set size */ |
3043 | tracing_update_buffers(); |
3044 | |
3045 | buffers_allocated = 1; |
3046 | |
3047 | /* |
3048 | * trace_printk_init_buffers() can be called by modules. |
3049 | * If that happens, then we need to start cmdline recording |
3050 | * directly here. If the global_trace.buffer is already |
3051 | * allocated here, then this was called by module code. |
3052 | */ |
3053 | if (global_trace.trace_buffer.buffer) |
3054 | tracing_start_cmdline_record(); |
3055 | } |
3056 | |
3057 | void trace_printk_start_comm(void) |
3058 | { |
3059 | /* Start tracing comms if trace printk is set */ |
3060 | if (!buffers_allocated) |
3061 | return; |
3062 | tracing_start_cmdline_record(); |
3063 | } |
3064 | |
3065 | static void trace_printk_start_stop_comm(int enabled) |
3066 | { |
3067 | if (!buffers_allocated) |
3068 | return; |
3069 | |
3070 | if (enabled) |
3071 | tracing_start_cmdline_record(); |
3072 | else |
3073 | tracing_stop_cmdline_record(); |
3074 | } |
3075 | |
3076 | /** |
3077 | * trace_vbprintk - write binary msg to tracing buffer |
3078 | * |
3079 | */ |
3080 | int trace_vbprintk(unsigned long ip, const char *fmt, va_list args) |
3081 | { |
3082 | struct trace_event_call *call = &event_bprint; |
3083 | struct ring_buffer_event *event; |
3084 | struct ring_buffer *buffer; |
3085 | struct trace_array *tr = &global_trace; |
3086 | struct bprint_entry *entry; |
3087 | unsigned long flags; |
3088 | char *tbuffer; |
3089 | int len = 0, size, pc; |
3090 | |
3091 | if (unlikely(tracing_selftest_running || tracing_disabled)) |
3092 | return 0; |
3093 | |
3094 | /* Don't pollute graph traces with trace_vprintk internals */ |
3095 | pause_graph_tracing(); |
3096 | |
3097 | pc = preempt_count(); |
3098 | preempt_disable_notrace(); |
3099 | |
3100 | tbuffer = get_trace_buf(); |
3101 | if (!tbuffer) { |
3102 | len = 0; |
3103 | goto out_nobuffer; |
3104 | } |
3105 | |
3106 | len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args); |
3107 | |
3108 | if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0) |
3109 | goto out; |
3110 | |
3111 | local_save_flags(flags); |
3112 | size = sizeof(*entry) + sizeof(u32) * len; |
3113 | buffer = tr->trace_buffer.buffer; |
3114 | event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size, |
3115 | flags, pc); |
3116 | if (!event) |
3117 | goto out; |
3118 | entry = ring_buffer_event_data(event); |
3119 | entry->ip = ip; |
3120 | entry->fmt = fmt; |
3121 | |
3122 | memcpy(entry->buf, tbuffer, sizeof(u32) * len); |
3123 | if (!call_filter_check_discard(call, entry, buffer, event)) { |
3124 | __buffer_unlock_commit(buffer, event); |
3125 | ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL); |
3126 | } |
3127 | |
3128 | out: |
3129 | put_trace_buf(); |
3130 | |
3131 | out_nobuffer: |
3132 | preempt_enable_notrace(); |
3133 | unpause_graph_tracing(); |
3134 | |
3135 | return len; |
3136 | } |
3137 | EXPORT_SYMBOL_GPL(trace_vbprintk); |
3138 | |
3139 | __printf(3, 0) |
3140 | static int |
3141 | __trace_array_vprintk(struct ring_buffer *buffer, |
3142 | unsigned long ip, const char *fmt, va_list args) |
3143 | { |
3144 | struct trace_event_call *call = &event_print; |
3145 | struct ring_buffer_event *event; |
3146 | int len = 0, size, pc; |
3147 | struct print_entry *entry; |
3148 | unsigned long flags; |
3149 | char *tbuffer; |
3150 | |
3151 | if (tracing_disabled || tracing_selftest_running) |
3152 | return 0; |
3153 | |
3154 | /* Don't pollute graph traces with trace_vprintk internals */ |
3155 | pause_graph_tracing(); |
3156 | |
3157 | pc = preempt_count(); |
3158 | preempt_disable_notrace(); |
3159 | |
3160 | |
3161 | tbuffer = get_trace_buf(); |
3162 | if (!tbuffer) { |
3163 | len = 0; |
3164 | goto out_nobuffer; |
3165 | } |
3166 | |
3167 | len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args); |
3168 | |
3169 | local_save_flags(flags); |
3170 | size = sizeof(*entry) + len + 1; |
3171 | event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, |
3172 | flags, pc); |
3173 | if (!event) |
3174 | goto out; |
3175 | entry = ring_buffer_event_data(event); |
3176 | entry->ip = ip; |
3177 | |
3178 | memcpy(&entry->buf, tbuffer, len + 1); |
3179 | if (!call_filter_check_discard(call, entry, buffer, event)) { |
3180 | __buffer_unlock_commit(buffer, event); |
3181 | ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL); |
3182 | } |
3183 | |
3184 | out: |
3185 | put_trace_buf(); |
3186 | |
3187 | out_nobuffer: |
3188 | preempt_enable_notrace(); |
3189 | unpause_graph_tracing(); |
3190 | |
3191 | return len; |
3192 | } |
3193 | |
3194 | __printf(3, 0) |
3195 | int trace_array_vprintk(struct trace_array *tr, |
3196 | unsigned long ip, const char *fmt, va_list args) |
3197 | { |
3198 | return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args); |
3199 | } |
3200 | |
3201 | __printf(3, 0) |
3202 | int trace_array_printk(struct trace_array *tr, |
3203 | unsigned long ip, const char *fmt, ...) |
3204 | { |
3205 | int ret; |
3206 | va_list ap; |
3207 | |
3208 | if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) |
3209 | return 0; |
3210 | |
3211 | va_start(ap, fmt); |
3212 | ret = trace_array_vprintk(tr, ip, fmt, ap); |
3213 | va_end(ap); |
3214 | return ret; |
3215 | } |
3216 | |
3217 | __printf(3, 4) |
3218 | int trace_array_printk_buf(struct ring_buffer *buffer, |
3219 | unsigned long ip, const char *fmt, ...) |
3220 | { |
3221 | int ret; |
3222 | va_list ap; |
3223 | |
3224 | if (!(global_trace.trace_flags & TRACE_ITER_PRINTK)) |
3225 | return 0; |
3226 | |
3227 | va_start(ap, fmt); |
3228 | ret = __trace_array_vprintk(buffer, ip, fmt, ap); |
3229 | va_end(ap); |
3230 | return ret; |
3231 | } |
3232 | |
3233 | __printf(2, 0) |
3234 | int trace_vprintk(unsigned long ip, const char *fmt, va_list args) |
3235 | { |
3236 | return trace_array_vprintk(&global_trace, ip, fmt, args); |
3237 | } |
3238 | EXPORT_SYMBOL_GPL(trace_vprintk); |
3239 | |
3240 | static void trace_iterator_increment(struct trace_iterator *iter) |
3241 | { |
3242 | struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu); |
3243 | |
3244 | iter->idx++; |
3245 | if (buf_iter) |
3246 | ring_buffer_read(buf_iter, NULL); |
3247 | } |
3248 | |
3249 | static struct trace_entry * |
3250 | peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts, |
3251 | unsigned long *lost_events) |
3252 | { |
3253 | struct ring_buffer_event *event; |
3254 | struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu); |
3255 | |
3256 | if (buf_iter) |
3257 | event = ring_buffer_iter_peek(buf_iter, ts); |
3258 | else |
3259 | event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts, |
3260 | lost_events); |
3261 | |
3262 | if (event) { |
3263 | iter->ent_size = ring_buffer_event_length(event); |
3264 | return ring_buffer_event_data(event); |
3265 | } |
3266 | iter->ent_size = 0; |
3267 | return NULL; |
3268 | } |
3269 | |
3270 | static struct trace_entry * |
3271 | __find_next_entry(struct trace_iterator *iter, int *ent_cpu, |
3272 | unsigned long *missing_events, u64 *ent_ts) |
3273 | { |
3274 | struct ring_buffer *buffer = iter->trace_buffer->buffer; |
3275 | struct trace_entry *ent, *next = NULL; |
3276 | unsigned long lost_events = 0, next_lost = 0; |
3277 | int cpu_file = iter->cpu_file; |
3278 | u64 next_ts = 0, ts; |
3279 | int next_cpu = -1; |
3280 | int next_size = 0; |
3281 | int cpu; |
3282 | |
3283 | /* |
3284 | * If we are in a per_cpu trace file, don't bother by iterating over |
3285 | * all cpu and peek directly. |
3286 | */ |
3287 | if (cpu_file > RING_BUFFER_ALL_CPUS) { |
3288 | if (ring_buffer_empty_cpu(buffer, cpu_file)) |
3289 | return NULL; |
3290 | ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events); |
3291 | if (ent_cpu) |
3292 | *ent_cpu = cpu_file; |
3293 | |
3294 | return ent; |
3295 | } |
3296 | |
3297 | for_each_tracing_cpu(cpu) { |
3298 | |
3299 | if (ring_buffer_empty_cpu(buffer, cpu)) |
3300 | continue; |
3301 | |
3302 | ent = peek_next_entry(iter, cpu, &ts, &lost_events); |
3303 | |
3304 | /* |
3305 | * Pick the entry with the smallest timestamp: |
3306 | */ |
3307 | if (ent && (!next || ts < next_ts)) { |
3308 | next = ent; |
3309 | next_cpu = cpu; |
3310 | next_ts = ts; |
3311 | next_lost = lost_events; |
3312 | next_size = iter->ent_size; |
3313 | } |
3314 | } |
3315 | |
3316 | iter->ent_size = next_size; |
3317 | |
3318 | if (ent_cpu) |
3319 | *ent_cpu = next_cpu; |
3320 | |
3321 | if (ent_ts) |
3322 | *ent_ts = next_ts; |
3323 | |
3324 | if (missing_events) |
3325 | *missing_events = next_lost; |
3326 | |
3327 | return next; |
3328 | } |
3329 | |
3330 | /* Find the next real entry, without updating the iterator itself */ |
3331 | struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, |
3332 | int *ent_cpu, u64 *ent_ts) |
3333 | { |
3334 | return __find_next_entry(iter, ent_cpu, NULL, ent_ts); |
3335 | } |
3336 | |
3337 | /* Find the next real entry, and increment the iterator to the next entry */ |
3338 | void *trace_find_next_entry_inc(struct trace_iterator *iter) |
3339 | { |
3340 | iter->ent = __find_next_entry(iter, &iter->cpu, |
3341 | &iter->lost_events, &iter->ts); |
3342 | |
3343 | if (iter->ent) |
3344 | trace_iterator_increment(iter); |
3345 | |
3346 | return iter->ent ? iter : NULL; |
3347 | } |
3348 | |
3349 | static void trace_consume(struct trace_iterator *iter) |
3350 | { |
3351 | ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts, |
3352 | &iter->lost_events); |
3353 | } |
3354 | |
3355 | static void *s_next(struct seq_file *m, void *v, loff_t *pos) |
3356 | { |
3357 | struct trace_iterator *iter = m->private; |
3358 | int i = (int)*pos; |
3359 | void *ent; |
3360 | |
3361 | WARN_ON_ONCE(iter->leftover); |
3362 | |
3363 | (*pos)++; |
3364 | |
3365 | /* can't go backwards */ |
3366 | if (iter->idx > i) |
3367 | return NULL; |
3368 | |
3369 | if (iter->idx < 0) |
3370 | ent = trace_find_next_entry_inc(iter); |
3371 | else |
3372 | ent = iter; |
3373 | |
3374 | while (ent && iter->idx < i) |
3375 | ent = trace_find_next_entry_inc(iter); |
3376 | |
3377 | iter->pos = *pos; |
3378 | |
3379 | return ent; |
3380 | } |
3381 | |
3382 | void tracing_iter_reset(struct trace_iterator *iter, int cpu) |
3383 | { |
3384 | struct ring_buffer_event *event; |
3385 | struct ring_buffer_iter *buf_iter; |
3386 | unsigned long entries = 0; |
3387 | u64 ts; |
3388 | |
3389 | per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0; |
3390 | |
3391 | buf_iter = trace_buffer_iter(iter, cpu); |
3392 | if (!buf_iter) |
3393 | return; |
3394 | |
3395 | ring_buffer_iter_reset(buf_iter); |
3396 | |
3397 | /* |
3398 | * We could have the case with the max latency tracers |
3399 | * that a reset never took place on a cpu. This is evident |
3400 | * by the timestamp being before the start of the buffer. |
3401 | */ |
3402 | while ((event = ring_buffer_iter_peek(buf_iter, &ts))) { |
3403 | if (ts >= iter->trace_buffer->time_start) |
3404 | break; |
3405 | entries++; |
3406 | ring_buffer_read(buf_iter, NULL); |
3407 | } |
3408 | |
3409 | per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries; |
3410 | } |
3411 | |
3412 | /* |
3413 | * The current tracer is copied to avoid a global locking |
3414 | * all around. |
3415 | */ |
3416 | static void *s_start(struct seq_file *m, loff_t *pos) |
3417 | { |
3418 | struct trace_iterator *iter = m->private; |
3419 | struct trace_array *tr = iter->tr; |
3420 | int cpu_file = iter->cpu_file; |
3421 | void *p = NULL; |
3422 | loff_t l = 0; |
3423 | int cpu; |
3424 | |
3425 | /* |
3426 | * copy the tracer to avoid using a global lock all around. |
3427 | * iter->trace is a copy of current_trace, the pointer to the |
3428 | * name may be used instead of a strcmp(), as iter->trace->name |
3429 | * will point to the same string as current_trace->name. |
3430 | */ |
3431 | mutex_lock(&trace_types_lock); |
3432 | if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) |
3433 | *iter->trace = *tr->current_trace; |
3434 | mutex_unlock(&trace_types_lock); |
3435 | |
3436 | #ifdef CONFIG_TRACER_MAX_TRACE |
3437 | if (iter->snapshot && iter->trace->use_max_tr) |
3438 | return ERR_PTR(-EBUSY); |
3439 | #endif |
3440 | |
3441 | if (!iter->snapshot) |
3442 | atomic_inc(&trace_record_taskinfo_disabled); |
3443 | |
3444 | if (*pos != iter->pos) { |
3445 | iter->ent = NULL; |
3446 | iter->cpu = 0; |
3447 | iter->idx = -1; |
3448 | |
3449 | if (cpu_file == RING_BUFFER_ALL_CPUS) { |
3450 | for_each_tracing_cpu(cpu) |
3451 | tracing_iter_reset(iter, cpu); |
3452 | } else |
3453 | tracing_iter_reset(iter, cpu_file); |
3454 | |
3455 | iter->leftover = 0; |
3456 | for (p = iter; p && l < *pos; p = s_next(m, p, &l)) |
3457 | ; |
3458 | |
3459 | } else { |
3460 | /* |
3461 | * If we overflowed the seq_file before, then we want |
3462 | * to just reuse the trace_seq buffer again. |
3463 | */ |
3464 | if (iter->leftover) |
3465 | p = iter; |
3466 | else { |
3467 | l = *pos - 1; |
3468 | p = s_next(m, p, &l); |
3469 | } |
3470 | } |
3471 | |
3472 | trace_event_read_lock(); |
3473 | trace_access_lock(cpu_file); |
3474 | return p; |
3475 | } |
3476 | |
3477 | static void s_stop(struct seq_file *m, void *p) |
3478 | { |
3479 | struct trace_iterator *iter = m->private; |
3480 | |
3481 | #ifdef CONFIG_TRACER_MAX_TRACE |
3482 | if (iter->snapshot && iter->trace->use_max_tr) |
3483 | return; |
3484 | #endif |
3485 | |
3486 | if (!iter->snapshot) |
3487 | atomic_dec(&trace_record_taskinfo_disabled); |
3488 | |
3489 | trace_access_unlock(iter->cpu_file); |
3490 | trace_event_read_unlock(); |
3491 | } |
3492 | |
3493 | static void |
3494 | get_total_entries(struct trace_buffer *buf, |
3495 | unsigned long *total, unsigned long *entries) |
3496 | { |
3497 | unsigned long count; |
3498 | int cpu; |
3499 | |
3500 | *total = 0; |
3501 | *entries = 0; |
3502 | |
3503 | for_each_tracing_cpu(cpu) { |
3504 | count = ring_buffer_entries_cpu(buf->buffer, cpu); |
3505 | /* |
3506 | * If this buffer has skipped entries, then we hold all |
3507 | * entries for the trace and we need to ignore the |
3508 | * ones before the time stamp. |
3509 | */ |
3510 | if (per_cpu_ptr(buf->data, cpu)->skipped_entries) { |
3511 | count -= per_cpu_ptr(buf->data, cpu)->skipped_entries; |
3512 | /* total is the same as the entries */ |
3513 | *total += count; |
3514 | } else |
3515 | *total += count + |
3516 | ring_buffer_overrun_cpu(buf->buffer, cpu); |
3517 | *entries += count; |
3518 | } |
3519 | } |
3520 | |
3521 | static void (struct seq_file *m) |
3522 | { |
3523 | seq_puts(m, "# _------=> CPU# \n" |
3524 | "# / _-----=> irqs-off \n" |
3525 | "# | / _----=> need-resched \n" |
3526 | "# || / _---=> hardirq/softirq \n" |
3527 | "# ||| / _--=> preempt-depth \n" |
3528 | "# |||| / delay \n" |
3529 | "# cmd pid ||||| time | caller \n" |
3530 | "# \\ / ||||| \\ | / \n" ); |
3531 | } |
3532 | |
3533 | static void print_event_info(struct |
---|