1// SPDX-License-Identifier: GPL-2.0
2/*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15#include <linux/ring_buffer.h>
16#include <generated/utsrelease.h>
17#include <linux/stacktrace.h>
18#include <linux/writeback.h>
19#include <linux/kallsyms.h>
20#include <linux/security.h>
21#include <linux/seq_file.h>
22#include <linux/irqflags.h>
23#include <linux/debugfs.h>
24#include <linux/tracefs.h>
25#include <linux/pagemap.h>
26#include <linux/hardirq.h>
27#include <linux/linkage.h>
28#include <linux/uaccess.h>
29#include <linux/vmalloc.h>
30#include <linux/ftrace.h>
31#include <linux/module.h>
32#include <linux/percpu.h>
33#include <linux/splice.h>
34#include <linux/kdebug.h>
35#include <linux/string.h>
36#include <linux/mount.h>
37#include <linux/rwsem.h>
38#include <linux/slab.h>
39#include <linux/ctype.h>
40#include <linux/init.h>
41#include <linux/panic_notifier.h>
42#include <linux/poll.h>
43#include <linux/nmi.h>
44#include <linux/fs.h>
45#include <linux/trace.h>
46#include <linux/sched/clock.h>
47#include <linux/sched/rt.h>
48#include <linux/fsnotify.h>
49#include <linux/irq_work.h>
50#include <linux/workqueue.h>
51
52#include <asm/setup.h> /* COMMAND_LINE_SIZE */
53
54#include "trace.h"
55#include "trace_output.h"
56
57#ifdef CONFIG_FTRACE_STARTUP_TEST
58/*
59 * We need to change this state when a selftest is running.
60 * A selftest will lurk into the ring-buffer to count the
61 * entries inserted during the selftest although some concurrent
62 * insertions into the ring-buffer such as trace_printk could occurred
63 * at the same time, giving false positive or negative results.
64 */
65static bool __read_mostly tracing_selftest_running;
66
67/*
68 * If boot-time tracing including tracers/events via kernel cmdline
69 * is running, we do not want to run SELFTEST.
70 */
71bool __read_mostly tracing_selftest_disabled;
72
73void __init disable_tracing_selftest(const char *reason)
74{
75 if (!tracing_selftest_disabled) {
76 tracing_selftest_disabled = true;
77 pr_info("Ftrace startup test is disabled due to %s\n", reason);
78 }
79}
80#else
81#define tracing_selftest_running 0
82#define tracing_selftest_disabled 0
83#endif
84
85/* Pipe tracepoints to printk */
86static struct trace_iterator *tracepoint_print_iter;
87int tracepoint_printk;
88static bool tracepoint_printk_stop_on_boot __initdata;
89static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90
91/* For tracers that don't implement custom flags */
92static struct tracer_opt dummy_tracer_opt[] = {
93 { }
94};
95
96static int
97dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98{
99 return 0;
100}
101
102/*
103 * To prevent the comm cache from being overwritten when no
104 * tracing is active, only save the comm when a trace event
105 * occurred.
106 */
107static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108
109/*
110 * Kill all tracing for good (never come back).
111 * It is initialized to 1 but will turn to zero if the initialization
112 * of the tracer is successful. But that is the only place that sets
113 * this back to zero.
114 */
115static int tracing_disabled = 1;
116
117cpumask_var_t __read_mostly tracing_buffer_mask;
118
119/*
120 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121 *
122 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123 * is set, then ftrace_dump is called. This will output the contents
124 * of the ftrace buffers to the console. This is very useful for
125 * capturing traces that lead to crashes and outputing it to a
126 * serial console.
127 *
128 * It is default off, but you can enable it with either specifying
129 * "ftrace_dump_on_oops" in the kernel command line, or setting
130 * /proc/sys/kernel/ftrace_dump_on_oops
131 * Set 1 if you want to dump buffers of all CPUs
132 * Set 2 if you want to dump the buffer of the CPU that triggered oops
133 */
134
135enum ftrace_dump_mode ftrace_dump_on_oops;
136
137/* When set, tracing will stop when a WARN*() is hit */
138int __disable_trace_on_warning;
139
140#ifdef CONFIG_TRACE_EVAL_MAP_FILE
141/* Map of enums to their values, for "eval_map" file */
142struct trace_eval_map_head {
143 struct module *mod;
144 unsigned long length;
145};
146
147union trace_eval_map_item;
148
149struct trace_eval_map_tail {
150 /*
151 * "end" is first and points to NULL as it must be different
152 * than "mod" or "eval_string"
153 */
154 union trace_eval_map_item *next;
155 const char *end; /* points to NULL */
156};
157
158static DEFINE_MUTEX(trace_eval_mutex);
159
160/*
161 * The trace_eval_maps are saved in an array with two extra elements,
162 * one at the beginning, and one at the end. The beginning item contains
163 * the count of the saved maps (head.length), and the module they
164 * belong to if not built in (head.mod). The ending item contains a
165 * pointer to the next array of saved eval_map items.
166 */
167union trace_eval_map_item {
168 struct trace_eval_map map;
169 struct trace_eval_map_head head;
170 struct trace_eval_map_tail tail;
171};
172
173static union trace_eval_map_item *trace_eval_maps;
174#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175
176int tracing_set_tracer(struct trace_array *tr, const char *buf);
177static void ftrace_trace_userstack(struct trace_array *tr,
178 struct trace_buffer *buffer,
179 unsigned int trace_ctx);
180
181#define MAX_TRACER_SIZE 100
182static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183static char *default_bootup_tracer;
184
185static bool allocate_snapshot;
186static bool snapshot_at_boot;
187
188static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
189static int boot_instance_index;
190
191static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
192static int boot_snapshot_index;
193
194static int __init set_cmdline_ftrace(char *str)
195{
196 strscpy(p: bootup_tracer_buf, q: str, MAX_TRACER_SIZE);
197 default_bootup_tracer = bootup_tracer_buf;
198 /* We are using ftrace early, expand it */
199 trace_set_ring_buffer_expanded(NULL);
200 return 1;
201}
202__setup("ftrace=", set_cmdline_ftrace);
203
204static int __init set_ftrace_dump_on_oops(char *str)
205{
206 if (*str++ != '=' || !*str || !strcmp("1", str)) {
207 ftrace_dump_on_oops = DUMP_ALL;
208 return 1;
209 }
210
211 if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
212 ftrace_dump_on_oops = DUMP_ORIG;
213 return 1;
214 }
215
216 return 0;
217}
218__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
219
220static int __init stop_trace_on_warning(char *str)
221{
222 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
223 __disable_trace_on_warning = 1;
224 return 1;
225}
226__setup("traceoff_on_warning", stop_trace_on_warning);
227
228static int __init boot_alloc_snapshot(char *str)
229{
230 char *slot = boot_snapshot_info + boot_snapshot_index;
231 int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
232 int ret;
233
234 if (str[0] == '=') {
235 str++;
236 if (strlen(str) >= left)
237 return -1;
238
239 ret = snprintf(buf: slot, size: left, fmt: "%s\t", str);
240 boot_snapshot_index += ret;
241 } else {
242 allocate_snapshot = true;
243 /* We also need the main ring buffer expanded */
244 trace_set_ring_buffer_expanded(NULL);
245 }
246 return 1;
247}
248__setup("alloc_snapshot", boot_alloc_snapshot);
249
250
251static int __init boot_snapshot(char *str)
252{
253 snapshot_at_boot = true;
254 boot_alloc_snapshot(str);
255 return 1;
256}
257__setup("ftrace_boot_snapshot", boot_snapshot);
258
259
260static int __init boot_instance(char *str)
261{
262 char *slot = boot_instance_info + boot_instance_index;
263 int left = sizeof(boot_instance_info) - boot_instance_index;
264 int ret;
265
266 if (strlen(str) >= left)
267 return -1;
268
269 ret = snprintf(buf: slot, size: left, fmt: "%s\t", str);
270 boot_instance_index += ret;
271
272 return 1;
273}
274__setup("trace_instance=", boot_instance);
275
276
277static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
278
279static int __init set_trace_boot_options(char *str)
280{
281 strscpy(p: trace_boot_options_buf, q: str, MAX_TRACER_SIZE);
282 return 1;
283}
284__setup("trace_options=", set_trace_boot_options);
285
286static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
287static char *trace_boot_clock __initdata;
288
289static int __init set_trace_boot_clock(char *str)
290{
291 strscpy(p: trace_boot_clock_buf, q: str, MAX_TRACER_SIZE);
292 trace_boot_clock = trace_boot_clock_buf;
293 return 1;
294}
295__setup("trace_clock=", set_trace_boot_clock);
296
297static int __init set_tracepoint_printk(char *str)
298{
299 /* Ignore the "tp_printk_stop_on_boot" param */
300 if (*str == '_')
301 return 0;
302
303 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
304 tracepoint_printk = 1;
305 return 1;
306}
307__setup("tp_printk", set_tracepoint_printk);
308
309static int __init set_tracepoint_printk_stop(char *str)
310{
311 tracepoint_printk_stop_on_boot = true;
312 return 1;
313}
314__setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
315
316unsigned long long ns2usecs(u64 nsec)
317{
318 nsec += 500;
319 do_div(nsec, 1000);
320 return nsec;
321}
322
323static void
324trace_process_export(struct trace_export *export,
325 struct ring_buffer_event *event, int flag)
326{
327 struct trace_entry *entry;
328 unsigned int size = 0;
329
330 if (export->flags & flag) {
331 entry = ring_buffer_event_data(event);
332 size = ring_buffer_event_length(event);
333 export->write(export, entry, size);
334 }
335}
336
337static DEFINE_MUTEX(ftrace_export_lock);
338
339static struct trace_export __rcu *ftrace_exports_list __read_mostly;
340
341static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
342static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
343static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
344
345static inline void ftrace_exports_enable(struct trace_export *export)
346{
347 if (export->flags & TRACE_EXPORT_FUNCTION)
348 static_branch_inc(&trace_function_exports_enabled);
349
350 if (export->flags & TRACE_EXPORT_EVENT)
351 static_branch_inc(&trace_event_exports_enabled);
352
353 if (export->flags & TRACE_EXPORT_MARKER)
354 static_branch_inc(&trace_marker_exports_enabled);
355}
356
357static inline void ftrace_exports_disable(struct trace_export *export)
358{
359 if (export->flags & TRACE_EXPORT_FUNCTION)
360 static_branch_dec(&trace_function_exports_enabled);
361
362 if (export->flags & TRACE_EXPORT_EVENT)
363 static_branch_dec(&trace_event_exports_enabled);
364
365 if (export->flags & TRACE_EXPORT_MARKER)
366 static_branch_dec(&trace_marker_exports_enabled);
367}
368
369static void ftrace_exports(struct ring_buffer_event *event, int flag)
370{
371 struct trace_export *export;
372
373 preempt_disable_notrace();
374
375 export = rcu_dereference_raw_check(ftrace_exports_list);
376 while (export) {
377 trace_process_export(export, event, flag);
378 export = rcu_dereference_raw_check(export->next);
379 }
380
381 preempt_enable_notrace();
382}
383
384static inline void
385add_trace_export(struct trace_export **list, struct trace_export *export)
386{
387 rcu_assign_pointer(export->next, *list);
388 /*
389 * We are entering export into the list but another
390 * CPU might be walking that list. We need to make sure
391 * the export->next pointer is valid before another CPU sees
392 * the export pointer included into the list.
393 */
394 rcu_assign_pointer(*list, export);
395}
396
397static inline int
398rm_trace_export(struct trace_export **list, struct trace_export *export)
399{
400 struct trace_export **p;
401
402 for (p = list; *p != NULL; p = &(*p)->next)
403 if (*p == export)
404 break;
405
406 if (*p != export)
407 return -1;
408
409 rcu_assign_pointer(*p, (*p)->next);
410
411 return 0;
412}
413
414static inline void
415add_ftrace_export(struct trace_export **list, struct trace_export *export)
416{
417 ftrace_exports_enable(export);
418
419 add_trace_export(list, export);
420}
421
422static inline int
423rm_ftrace_export(struct trace_export **list, struct trace_export *export)
424{
425 int ret;
426
427 ret = rm_trace_export(list, export);
428 ftrace_exports_disable(export);
429
430 return ret;
431}
432
433int register_ftrace_export(struct trace_export *export)
434{
435 if (WARN_ON_ONCE(!export->write))
436 return -1;
437
438 mutex_lock(&ftrace_export_lock);
439
440 add_ftrace_export(list: &ftrace_exports_list, export);
441
442 mutex_unlock(lock: &ftrace_export_lock);
443
444 return 0;
445}
446EXPORT_SYMBOL_GPL(register_ftrace_export);
447
448int unregister_ftrace_export(struct trace_export *export)
449{
450 int ret;
451
452 mutex_lock(&ftrace_export_lock);
453
454 ret = rm_ftrace_export(list: &ftrace_exports_list, export);
455
456 mutex_unlock(lock: &ftrace_export_lock);
457
458 return ret;
459}
460EXPORT_SYMBOL_GPL(unregister_ftrace_export);
461
462/* trace_flags holds trace_options default values */
463#define TRACE_DEFAULT_FLAGS \
464 (FUNCTION_DEFAULT_FLAGS | \
465 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
466 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
467 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
468 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \
469 TRACE_ITER_HASH_PTR)
470
471/* trace_options that are only supported by global_trace */
472#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
473 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
474
475/* trace_flags that are default zero for instances */
476#define ZEROED_TRACE_FLAGS \
477 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
478
479/*
480 * The global_trace is the descriptor that holds the top-level tracing
481 * buffers for the live tracing.
482 */
483static struct trace_array global_trace = {
484 .trace_flags = TRACE_DEFAULT_FLAGS,
485};
486
487void trace_set_ring_buffer_expanded(struct trace_array *tr)
488{
489 if (!tr)
490 tr = &global_trace;
491 tr->ring_buffer_expanded = true;
492}
493
494LIST_HEAD(ftrace_trace_arrays);
495
496int trace_array_get(struct trace_array *this_tr)
497{
498 struct trace_array *tr;
499 int ret = -ENODEV;
500
501 mutex_lock(&trace_types_lock);
502 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
503 if (tr == this_tr) {
504 tr->ref++;
505 ret = 0;
506 break;
507 }
508 }
509 mutex_unlock(lock: &trace_types_lock);
510
511 return ret;
512}
513
514static void __trace_array_put(struct trace_array *this_tr)
515{
516 WARN_ON(!this_tr->ref);
517 this_tr->ref--;
518}
519
520/**
521 * trace_array_put - Decrement the reference counter for this trace array.
522 * @this_tr : pointer to the trace array
523 *
524 * NOTE: Use this when we no longer need the trace array returned by
525 * trace_array_get_by_name(). This ensures the trace array can be later
526 * destroyed.
527 *
528 */
529void trace_array_put(struct trace_array *this_tr)
530{
531 if (!this_tr)
532 return;
533
534 mutex_lock(&trace_types_lock);
535 __trace_array_put(this_tr);
536 mutex_unlock(lock: &trace_types_lock);
537}
538EXPORT_SYMBOL_GPL(trace_array_put);
539
540int tracing_check_open_get_tr(struct trace_array *tr)
541{
542 int ret;
543
544 ret = security_locked_down(what: LOCKDOWN_TRACEFS);
545 if (ret)
546 return ret;
547
548 if (tracing_disabled)
549 return -ENODEV;
550
551 if (tr && trace_array_get(this_tr: tr) < 0)
552 return -ENODEV;
553
554 return 0;
555}
556
557int call_filter_check_discard(struct trace_event_call *call, void *rec,
558 struct trace_buffer *buffer,
559 struct ring_buffer_event *event)
560{
561 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
562 !filter_match_preds(filter: call->filter, rec)) {
563 __trace_event_discard_commit(buffer, event);
564 return 1;
565 }
566
567 return 0;
568}
569
570/**
571 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
572 * @filtered_pids: The list of pids to check
573 * @search_pid: The PID to find in @filtered_pids
574 *
575 * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
576 */
577bool
578trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
579{
580 return trace_pid_list_is_set(pid_list: filtered_pids, pid: search_pid);
581}
582
583/**
584 * trace_ignore_this_task - should a task be ignored for tracing
585 * @filtered_pids: The list of pids to check
586 * @filtered_no_pids: The list of pids not to be traced
587 * @task: The task that should be ignored if not filtered
588 *
589 * Checks if @task should be traced or not from @filtered_pids.
590 * Returns true if @task should *NOT* be traced.
591 * Returns false if @task should be traced.
592 */
593bool
594trace_ignore_this_task(struct trace_pid_list *filtered_pids,
595 struct trace_pid_list *filtered_no_pids,
596 struct task_struct *task)
597{
598 /*
599 * If filtered_no_pids is not empty, and the task's pid is listed
600 * in filtered_no_pids, then return true.
601 * Otherwise, if filtered_pids is empty, that means we can
602 * trace all tasks. If it has content, then only trace pids
603 * within filtered_pids.
604 */
605
606 return (filtered_pids &&
607 !trace_find_filtered_pid(filtered_pids, search_pid: task->pid)) ||
608 (filtered_no_pids &&
609 trace_find_filtered_pid(filtered_pids: filtered_no_pids, search_pid: task->pid));
610}
611
612/**
613 * trace_filter_add_remove_task - Add or remove a task from a pid_list
614 * @pid_list: The list to modify
615 * @self: The current task for fork or NULL for exit
616 * @task: The task to add or remove
617 *
618 * If adding a task, if @self is defined, the task is only added if @self
619 * is also included in @pid_list. This happens on fork and tasks should
620 * only be added when the parent is listed. If @self is NULL, then the
621 * @task pid will be removed from the list, which would happen on exit
622 * of a task.
623 */
624void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
625 struct task_struct *self,
626 struct task_struct *task)
627{
628 if (!pid_list)
629 return;
630
631 /* For forks, we only add if the forking task is listed */
632 if (self) {
633 if (!trace_find_filtered_pid(filtered_pids: pid_list, search_pid: self->pid))
634 return;
635 }
636
637 /* "self" is set for forks, and NULL for exits */
638 if (self)
639 trace_pid_list_set(pid_list, pid: task->pid);
640 else
641 trace_pid_list_clear(pid_list, pid: task->pid);
642}
643
644/**
645 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
646 * @pid_list: The pid list to show
647 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
648 * @pos: The position of the file
649 *
650 * This is used by the seq_file "next" operation to iterate the pids
651 * listed in a trace_pid_list structure.
652 *
653 * Returns the pid+1 as we want to display pid of zero, but NULL would
654 * stop the iteration.
655 */
656void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
657{
658 long pid = (unsigned long)v;
659 unsigned int next;
660
661 (*pos)++;
662
663 /* pid already is +1 of the actual previous bit */
664 if (trace_pid_list_next(pid_list, pid, next: &next) < 0)
665 return NULL;
666
667 pid = next;
668
669 /* Return pid + 1 to allow zero to be represented */
670 return (void *)(pid + 1);
671}
672
673/**
674 * trace_pid_start - Used for seq_file to start reading pid lists
675 * @pid_list: The pid list to show
676 * @pos: The position of the file
677 *
678 * This is used by seq_file "start" operation to start the iteration
679 * of listing pids.
680 *
681 * Returns the pid+1 as we want to display pid of zero, but NULL would
682 * stop the iteration.
683 */
684void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
685{
686 unsigned long pid;
687 unsigned int first;
688 loff_t l = 0;
689
690 if (trace_pid_list_first(pid_list, pid: &first) < 0)
691 return NULL;
692
693 pid = first;
694
695 /* Return pid + 1 so that zero can be the exit value */
696 for (pid++; pid && l < *pos;
697 pid = (unsigned long)trace_pid_next(pid_list, v: (void *)pid, pos: &l))
698 ;
699 return (void *)pid;
700}
701
702/**
703 * trace_pid_show - show the current pid in seq_file processing
704 * @m: The seq_file structure to write into
705 * @v: A void pointer of the pid (+1) value to display
706 *
707 * Can be directly used by seq_file operations to display the current
708 * pid value.
709 */
710int trace_pid_show(struct seq_file *m, void *v)
711{
712 unsigned long pid = (unsigned long)v - 1;
713
714 seq_printf(m, fmt: "%lu\n", pid);
715 return 0;
716}
717
718/* 128 should be much more than enough */
719#define PID_BUF_SIZE 127
720
721int trace_pid_write(struct trace_pid_list *filtered_pids,
722 struct trace_pid_list **new_pid_list,
723 const char __user *ubuf, size_t cnt)
724{
725 struct trace_pid_list *pid_list;
726 struct trace_parser parser;
727 unsigned long val;
728 int nr_pids = 0;
729 ssize_t read = 0;
730 ssize_t ret;
731 loff_t pos;
732 pid_t pid;
733
734 if (trace_parser_get_init(parser: &parser, PID_BUF_SIZE + 1))
735 return -ENOMEM;
736
737 /*
738 * Always recreate a new array. The write is an all or nothing
739 * operation. Always create a new array when adding new pids by
740 * the user. If the operation fails, then the current list is
741 * not modified.
742 */
743 pid_list = trace_pid_list_alloc();
744 if (!pid_list) {
745 trace_parser_put(parser: &parser);
746 return -ENOMEM;
747 }
748
749 if (filtered_pids) {
750 /* copy the current bits to the new max */
751 ret = trace_pid_list_first(pid_list: filtered_pids, pid: &pid);
752 while (!ret) {
753 trace_pid_list_set(pid_list, pid);
754 ret = trace_pid_list_next(pid_list: filtered_pids, pid: pid + 1, next: &pid);
755 nr_pids++;
756 }
757 }
758
759 ret = 0;
760 while (cnt > 0) {
761
762 pos = 0;
763
764 ret = trace_get_user(parser: &parser, ubuf, cnt, ppos: &pos);
765 if (ret < 0)
766 break;
767
768 read += ret;
769 ubuf += ret;
770 cnt -= ret;
771
772 if (!trace_parser_loaded(parser: &parser))
773 break;
774
775 ret = -EINVAL;
776 if (kstrtoul(s: parser.buffer, base: 0, res: &val))
777 break;
778
779 pid = (pid_t)val;
780
781 if (trace_pid_list_set(pid_list, pid) < 0) {
782 ret = -1;
783 break;
784 }
785 nr_pids++;
786
787 trace_parser_clear(parser: &parser);
788 ret = 0;
789 }
790 trace_parser_put(parser: &parser);
791
792 if (ret < 0) {
793 trace_pid_list_free(pid_list);
794 return ret;
795 }
796
797 if (!nr_pids) {
798 /* Cleared the list of pids */
799 trace_pid_list_free(pid_list);
800 pid_list = NULL;
801 }
802
803 *new_pid_list = pid_list;
804
805 return read;
806}
807
808static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
809{
810 u64 ts;
811
812 /* Early boot up does not have a buffer yet */
813 if (!buf->buffer)
814 return trace_clock_local();
815
816 ts = ring_buffer_time_stamp(buffer: buf->buffer);
817 ring_buffer_normalize_time_stamp(buffer: buf->buffer, cpu, ts: &ts);
818
819 return ts;
820}
821
822u64 ftrace_now(int cpu)
823{
824 return buffer_ftrace_now(buf: &global_trace.array_buffer, cpu);
825}
826
827/**
828 * tracing_is_enabled - Show if global_trace has been enabled
829 *
830 * Shows if the global trace has been enabled or not. It uses the
831 * mirror flag "buffer_disabled" to be used in fast paths such as for
832 * the irqsoff tracer. But it may be inaccurate due to races. If you
833 * need to know the accurate state, use tracing_is_on() which is a little
834 * slower, but accurate.
835 */
836int tracing_is_enabled(void)
837{
838 /*
839 * For quick access (irqsoff uses this in fast path), just
840 * return the mirror variable of the state of the ring buffer.
841 * It's a little racy, but we don't really care.
842 */
843 smp_rmb();
844 return !global_trace.buffer_disabled;
845}
846
847/*
848 * trace_buf_size is the size in bytes that is allocated
849 * for a buffer. Note, the number of bytes is always rounded
850 * to page size.
851 *
852 * This number is purposely set to a low number of 16384.
853 * If the dump on oops happens, it will be much appreciated
854 * to not have to wait for all that output. Anyway this can be
855 * boot time and run time configurable.
856 */
857#define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
858
859static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
860
861/* trace_types holds a link list of available tracers. */
862static struct tracer *trace_types __read_mostly;
863
864/*
865 * trace_types_lock is used to protect the trace_types list.
866 */
867DEFINE_MUTEX(trace_types_lock);
868
869/*
870 * serialize the access of the ring buffer
871 *
872 * ring buffer serializes readers, but it is low level protection.
873 * The validity of the events (which returns by ring_buffer_peek() ..etc)
874 * are not protected by ring buffer.
875 *
876 * The content of events may become garbage if we allow other process consumes
877 * these events concurrently:
878 * A) the page of the consumed events may become a normal page
879 * (not reader page) in ring buffer, and this page will be rewritten
880 * by events producer.
881 * B) The page of the consumed events may become a page for splice_read,
882 * and this page will be returned to system.
883 *
884 * These primitives allow multi process access to different cpu ring buffer
885 * concurrently.
886 *
887 * These primitives don't distinguish read-only and read-consume access.
888 * Multi read-only access are also serialized.
889 */
890
891#ifdef CONFIG_SMP
892static DECLARE_RWSEM(all_cpu_access_lock);
893static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
894
895static inline void trace_access_lock(int cpu)
896{
897 if (cpu == RING_BUFFER_ALL_CPUS) {
898 /* gain it for accessing the whole ring buffer. */
899 down_write(sem: &all_cpu_access_lock);
900 } else {
901 /* gain it for accessing a cpu ring buffer. */
902
903 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
904 down_read(sem: &all_cpu_access_lock);
905
906 /* Secondly block other access to this @cpu ring buffer. */
907 mutex_lock(&per_cpu(cpu_access_lock, cpu));
908 }
909}
910
911static inline void trace_access_unlock(int cpu)
912{
913 if (cpu == RING_BUFFER_ALL_CPUS) {
914 up_write(sem: &all_cpu_access_lock);
915 } else {
916 mutex_unlock(lock: &per_cpu(cpu_access_lock, cpu));
917 up_read(sem: &all_cpu_access_lock);
918 }
919}
920
921static inline void trace_access_lock_init(void)
922{
923 int cpu;
924
925 for_each_possible_cpu(cpu)
926 mutex_init(&per_cpu(cpu_access_lock, cpu));
927}
928
929#else
930
931static DEFINE_MUTEX(access_lock);
932
933static inline void trace_access_lock(int cpu)
934{
935 (void)cpu;
936 mutex_lock(&access_lock);
937}
938
939static inline void trace_access_unlock(int cpu)
940{
941 (void)cpu;
942 mutex_unlock(&access_lock);
943}
944
945static inline void trace_access_lock_init(void)
946{
947}
948
949#endif
950
951#ifdef CONFIG_STACKTRACE
952static void __ftrace_trace_stack(struct trace_buffer *buffer,
953 unsigned int trace_ctx,
954 int skip, struct pt_regs *regs);
955static inline void ftrace_trace_stack(struct trace_array *tr,
956 struct trace_buffer *buffer,
957 unsigned int trace_ctx,
958 int skip, struct pt_regs *regs);
959
960#else
961static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
962 unsigned int trace_ctx,
963 int skip, struct pt_regs *regs)
964{
965}
966static inline void ftrace_trace_stack(struct trace_array *tr,
967 struct trace_buffer *buffer,
968 unsigned long trace_ctx,
969 int skip, struct pt_regs *regs)
970{
971}
972
973#endif
974
975static __always_inline void
976trace_event_setup(struct ring_buffer_event *event,
977 int type, unsigned int trace_ctx)
978{
979 struct trace_entry *ent = ring_buffer_event_data(event);
980
981 tracing_generic_entry_update(entry: ent, type, trace_ctx);
982}
983
984static __always_inline struct ring_buffer_event *
985__trace_buffer_lock_reserve(struct trace_buffer *buffer,
986 int type,
987 unsigned long len,
988 unsigned int trace_ctx)
989{
990 struct ring_buffer_event *event;
991
992 event = ring_buffer_lock_reserve(buffer, length: len);
993 if (event != NULL)
994 trace_event_setup(event, type, trace_ctx);
995
996 return event;
997}
998
999void tracer_tracing_on(struct trace_array *tr)
1000{
1001 if (tr->array_buffer.buffer)
1002 ring_buffer_record_on(buffer: tr->array_buffer.buffer);
1003 /*
1004 * This flag is looked at when buffers haven't been allocated
1005 * yet, or by some tracers (like irqsoff), that just want to
1006 * know if the ring buffer has been disabled, but it can handle
1007 * races of where it gets disabled but we still do a record.
1008 * As the check is in the fast path of the tracers, it is more
1009 * important to be fast than accurate.
1010 */
1011 tr->buffer_disabled = 0;
1012 /* Make the flag seen by readers */
1013 smp_wmb();
1014}
1015
1016/**
1017 * tracing_on - enable tracing buffers
1018 *
1019 * This function enables tracing buffers that may have been
1020 * disabled with tracing_off.
1021 */
1022void tracing_on(void)
1023{
1024 tracer_tracing_on(tr: &global_trace);
1025}
1026EXPORT_SYMBOL_GPL(tracing_on);
1027
1028
1029static __always_inline void
1030__buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1031{
1032 __this_cpu_write(trace_taskinfo_save, true);
1033
1034 /* If this is the temp buffer, we need to commit fully */
1035 if (this_cpu_read(trace_buffered_event) == event) {
1036 /* Length is in event->array[0] */
1037 ring_buffer_write(buffer, length: event->array[0], data: &event->array[1]);
1038 /* Release the temp buffer */
1039 this_cpu_dec(trace_buffered_event_cnt);
1040 /* ring_buffer_unlock_commit() enables preemption */
1041 preempt_enable_notrace();
1042 } else
1043 ring_buffer_unlock_commit(buffer);
1044}
1045
1046int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1047 const char *str, int size)
1048{
1049 struct ring_buffer_event *event;
1050 struct trace_buffer *buffer;
1051 struct print_entry *entry;
1052 unsigned int trace_ctx;
1053 int alloc;
1054
1055 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1056 return 0;
1057
1058 if (unlikely(tracing_selftest_running && tr == &global_trace))
1059 return 0;
1060
1061 if (unlikely(tracing_disabled))
1062 return 0;
1063
1064 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1065
1066 trace_ctx = tracing_gen_ctx();
1067 buffer = tr->array_buffer.buffer;
1068 ring_buffer_nest_start(buffer);
1069 event = __trace_buffer_lock_reserve(buffer, type: TRACE_PRINT, len: alloc,
1070 trace_ctx);
1071 if (!event) {
1072 size = 0;
1073 goto out;
1074 }
1075
1076 entry = ring_buffer_event_data(event);
1077 entry->ip = ip;
1078
1079 memcpy(&entry->buf, str, size);
1080
1081 /* Add a newline if necessary */
1082 if (entry->buf[size - 1] != '\n') {
1083 entry->buf[size] = '\n';
1084 entry->buf[size + 1] = '\0';
1085 } else
1086 entry->buf[size] = '\0';
1087
1088 __buffer_unlock_commit(buffer, event);
1089 ftrace_trace_stack(tr, buffer, trace_ctx, skip: 4, NULL);
1090 out:
1091 ring_buffer_nest_end(buffer);
1092 return size;
1093}
1094EXPORT_SYMBOL_GPL(__trace_array_puts);
1095
1096/**
1097 * __trace_puts - write a constant string into the trace buffer.
1098 * @ip: The address of the caller
1099 * @str: The constant string to write
1100 * @size: The size of the string.
1101 */
1102int __trace_puts(unsigned long ip, const char *str, int size)
1103{
1104 return __trace_array_puts(&global_trace, ip, str, size);
1105}
1106EXPORT_SYMBOL_GPL(__trace_puts);
1107
1108/**
1109 * __trace_bputs - write the pointer to a constant string into trace buffer
1110 * @ip: The address of the caller
1111 * @str: The constant string to write to the buffer to
1112 */
1113int __trace_bputs(unsigned long ip, const char *str)
1114{
1115 struct ring_buffer_event *event;
1116 struct trace_buffer *buffer;
1117 struct bputs_entry *entry;
1118 unsigned int trace_ctx;
1119 int size = sizeof(struct bputs_entry);
1120 int ret = 0;
1121
1122 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1123 return 0;
1124
1125 if (unlikely(tracing_selftest_running || tracing_disabled))
1126 return 0;
1127
1128 trace_ctx = tracing_gen_ctx();
1129 buffer = global_trace.array_buffer.buffer;
1130
1131 ring_buffer_nest_start(buffer);
1132 event = __trace_buffer_lock_reserve(buffer, type: TRACE_BPUTS, len: size,
1133 trace_ctx);
1134 if (!event)
1135 goto out;
1136
1137 entry = ring_buffer_event_data(event);
1138 entry->ip = ip;
1139 entry->str = str;
1140
1141 __buffer_unlock_commit(buffer, event);
1142 ftrace_trace_stack(tr: &global_trace, buffer, trace_ctx, skip: 4, NULL);
1143
1144 ret = 1;
1145 out:
1146 ring_buffer_nest_end(buffer);
1147 return ret;
1148}
1149EXPORT_SYMBOL_GPL(__trace_bputs);
1150
1151#ifdef CONFIG_TRACER_SNAPSHOT
1152static void tracing_snapshot_instance_cond(struct trace_array *tr,
1153 void *cond_data)
1154{
1155 struct tracer *tracer = tr->current_trace;
1156 unsigned long flags;
1157
1158 if (in_nmi()) {
1159 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1160 trace_array_puts(tr, "*** snapshot is being ignored ***\n");
1161 return;
1162 }
1163
1164 if (!tr->allocated_snapshot) {
1165 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1166 trace_array_puts(tr, "*** stopping trace here! ***\n");
1167 tracer_tracing_off(tr);
1168 return;
1169 }
1170
1171 /* Note, snapshot can not be used when the tracer uses it */
1172 if (tracer->use_max_tr) {
1173 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1174 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1175 return;
1176 }
1177
1178 local_irq_save(flags);
1179 update_max_tr(tr, current, smp_processor_id(), cond_data);
1180 local_irq_restore(flags);
1181}
1182
1183void tracing_snapshot_instance(struct trace_array *tr)
1184{
1185 tracing_snapshot_instance_cond(tr, NULL);
1186}
1187
1188/**
1189 * tracing_snapshot - take a snapshot of the current buffer.
1190 *
1191 * This causes a swap between the snapshot buffer and the current live
1192 * tracing buffer. You can use this to take snapshots of the live
1193 * trace when some condition is triggered, but continue to trace.
1194 *
1195 * Note, make sure to allocate the snapshot with either
1196 * a tracing_snapshot_alloc(), or by doing it manually
1197 * with: echo 1 > /sys/kernel/tracing/snapshot
1198 *
1199 * If the snapshot buffer is not allocated, it will stop tracing.
1200 * Basically making a permanent snapshot.
1201 */
1202void tracing_snapshot(void)
1203{
1204 struct trace_array *tr = &global_trace;
1205
1206 tracing_snapshot_instance(tr);
1207}
1208EXPORT_SYMBOL_GPL(tracing_snapshot);
1209
1210/**
1211 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1212 * @tr: The tracing instance to snapshot
1213 * @cond_data: The data to be tested conditionally, and possibly saved
1214 *
1215 * This is the same as tracing_snapshot() except that the snapshot is
1216 * conditional - the snapshot will only happen if the
1217 * cond_snapshot.update() implementation receiving the cond_data
1218 * returns true, which means that the trace array's cond_snapshot
1219 * update() operation used the cond_data to determine whether the
1220 * snapshot should be taken, and if it was, presumably saved it along
1221 * with the snapshot.
1222 */
1223void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1224{
1225 tracing_snapshot_instance_cond(tr, cond_data);
1226}
1227EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1228
1229/**
1230 * tracing_cond_snapshot_data - get the user data associated with a snapshot
1231 * @tr: The tracing instance
1232 *
1233 * When the user enables a conditional snapshot using
1234 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1235 * with the snapshot. This accessor is used to retrieve it.
1236 *
1237 * Should not be called from cond_snapshot.update(), since it takes
1238 * the tr->max_lock lock, which the code calling
1239 * cond_snapshot.update() has already done.
1240 *
1241 * Returns the cond_data associated with the trace array's snapshot.
1242 */
1243void *tracing_cond_snapshot_data(struct trace_array *tr)
1244{
1245 void *cond_data = NULL;
1246
1247 local_irq_disable();
1248 arch_spin_lock(&tr->max_lock);
1249
1250 if (tr->cond_snapshot)
1251 cond_data = tr->cond_snapshot->cond_data;
1252
1253 arch_spin_unlock(&tr->max_lock);
1254 local_irq_enable();
1255
1256 return cond_data;
1257}
1258EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1259
1260static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1261 struct array_buffer *size_buf, int cpu_id);
1262static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1263
1264int tracing_alloc_snapshot_instance(struct trace_array *tr)
1265{
1266 int ret;
1267
1268 if (!tr->allocated_snapshot) {
1269
1270 /* allocate spare buffer */
1271 ret = resize_buffer_duplicate_size(trace_buf: &tr->max_buffer,
1272 size_buf: &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1273 if (ret < 0)
1274 return ret;
1275
1276 tr->allocated_snapshot = true;
1277 }
1278
1279 return 0;
1280}
1281
1282static void free_snapshot(struct trace_array *tr)
1283{
1284 /*
1285 * We don't free the ring buffer. instead, resize it because
1286 * The max_tr ring buffer has some state (e.g. ring->clock) and
1287 * we want preserve it.
1288 */
1289 ring_buffer_resize(buffer: tr->max_buffer.buffer, size: 1, RING_BUFFER_ALL_CPUS);
1290 set_buffer_entries(buf: &tr->max_buffer, val: 1);
1291 tracing_reset_online_cpus(buf: &tr->max_buffer);
1292 tr->allocated_snapshot = false;
1293}
1294
1295/**
1296 * tracing_alloc_snapshot - allocate snapshot buffer.
1297 *
1298 * This only allocates the snapshot buffer if it isn't already
1299 * allocated - it doesn't also take a snapshot.
1300 *
1301 * This is meant to be used in cases where the snapshot buffer needs
1302 * to be set up for events that can't sleep but need to be able to
1303 * trigger a snapshot.
1304 */
1305int tracing_alloc_snapshot(void)
1306{
1307 struct trace_array *tr = &global_trace;
1308 int ret;
1309
1310 ret = tracing_alloc_snapshot_instance(tr);
1311 WARN_ON(ret < 0);
1312
1313 return ret;
1314}
1315EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1316
1317/**
1318 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1319 *
1320 * This is similar to tracing_snapshot(), but it will allocate the
1321 * snapshot buffer if it isn't already allocated. Use this only
1322 * where it is safe to sleep, as the allocation may sleep.
1323 *
1324 * This causes a swap between the snapshot buffer and the current live
1325 * tracing buffer. You can use this to take snapshots of the live
1326 * trace when some condition is triggered, but continue to trace.
1327 */
1328void tracing_snapshot_alloc(void)
1329{
1330 int ret;
1331
1332 ret = tracing_alloc_snapshot();
1333 if (ret < 0)
1334 return;
1335
1336 tracing_snapshot();
1337}
1338EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1339
1340/**
1341 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1342 * @tr: The tracing instance
1343 * @cond_data: User data to associate with the snapshot
1344 * @update: Implementation of the cond_snapshot update function
1345 *
1346 * Check whether the conditional snapshot for the given instance has
1347 * already been enabled, or if the current tracer is already using a
1348 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1349 * save the cond_data and update function inside.
1350 *
1351 * Returns 0 if successful, error otherwise.
1352 */
1353int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1354 cond_update_fn_t update)
1355{
1356 struct cond_snapshot *cond_snapshot;
1357 int ret = 0;
1358
1359 cond_snapshot = kzalloc(size: sizeof(*cond_snapshot), GFP_KERNEL);
1360 if (!cond_snapshot)
1361 return -ENOMEM;
1362
1363 cond_snapshot->cond_data = cond_data;
1364 cond_snapshot->update = update;
1365
1366 mutex_lock(&trace_types_lock);
1367
1368 ret = tracing_alloc_snapshot_instance(tr);
1369 if (ret)
1370 goto fail_unlock;
1371
1372 if (tr->current_trace->use_max_tr) {
1373 ret = -EBUSY;
1374 goto fail_unlock;
1375 }
1376
1377 /*
1378 * The cond_snapshot can only change to NULL without the
1379 * trace_types_lock. We don't care if we race with it going
1380 * to NULL, but we want to make sure that it's not set to
1381 * something other than NULL when we get here, which we can
1382 * do safely with only holding the trace_types_lock and not
1383 * having to take the max_lock.
1384 */
1385 if (tr->cond_snapshot) {
1386 ret = -EBUSY;
1387 goto fail_unlock;
1388 }
1389
1390 local_irq_disable();
1391 arch_spin_lock(&tr->max_lock);
1392 tr->cond_snapshot = cond_snapshot;
1393 arch_spin_unlock(&tr->max_lock);
1394 local_irq_enable();
1395
1396 mutex_unlock(lock: &trace_types_lock);
1397
1398 return ret;
1399
1400 fail_unlock:
1401 mutex_unlock(lock: &trace_types_lock);
1402 kfree(objp: cond_snapshot);
1403 return ret;
1404}
1405EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1406
1407/**
1408 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1409 * @tr: The tracing instance
1410 *
1411 * Check whether the conditional snapshot for the given instance is
1412 * enabled; if so, free the cond_snapshot associated with it,
1413 * otherwise return -EINVAL.
1414 *
1415 * Returns 0 if successful, error otherwise.
1416 */
1417int tracing_snapshot_cond_disable(struct trace_array *tr)
1418{
1419 int ret = 0;
1420
1421 local_irq_disable();
1422 arch_spin_lock(&tr->max_lock);
1423
1424 if (!tr->cond_snapshot)
1425 ret = -EINVAL;
1426 else {
1427 kfree(objp: tr->cond_snapshot);
1428 tr->cond_snapshot = NULL;
1429 }
1430
1431 arch_spin_unlock(&tr->max_lock);
1432 local_irq_enable();
1433
1434 return ret;
1435}
1436EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1437#else
1438void tracing_snapshot(void)
1439{
1440 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1441}
1442EXPORT_SYMBOL_GPL(tracing_snapshot);
1443void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1444{
1445 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1446}
1447EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1448int tracing_alloc_snapshot(void)
1449{
1450 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1451 return -ENODEV;
1452}
1453EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1454void tracing_snapshot_alloc(void)
1455{
1456 /* Give warning */
1457 tracing_snapshot();
1458}
1459EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1460void *tracing_cond_snapshot_data(struct trace_array *tr)
1461{
1462 return NULL;
1463}
1464EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1465int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1466{
1467 return -ENODEV;
1468}
1469EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1470int tracing_snapshot_cond_disable(struct trace_array *tr)
1471{
1472 return false;
1473}
1474EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1475#define free_snapshot(tr) do { } while (0)
1476#endif /* CONFIG_TRACER_SNAPSHOT */
1477
1478void tracer_tracing_off(struct trace_array *tr)
1479{
1480 if (tr->array_buffer.buffer)
1481 ring_buffer_record_off(buffer: tr->array_buffer.buffer);
1482 /*
1483 * This flag is looked at when buffers haven't been allocated
1484 * yet, or by some tracers (like irqsoff), that just want to
1485 * know if the ring buffer has been disabled, but it can handle
1486 * races of where it gets disabled but we still do a record.
1487 * As the check is in the fast path of the tracers, it is more
1488 * important to be fast than accurate.
1489 */
1490 tr->buffer_disabled = 1;
1491 /* Make the flag seen by readers */
1492 smp_wmb();
1493}
1494
1495/**
1496 * tracing_off - turn off tracing buffers
1497 *
1498 * This function stops the tracing buffers from recording data.
1499 * It does not disable any overhead the tracers themselves may
1500 * be causing. This function simply causes all recording to
1501 * the ring buffers to fail.
1502 */
1503void tracing_off(void)
1504{
1505 tracer_tracing_off(tr: &global_trace);
1506}
1507EXPORT_SYMBOL_GPL(tracing_off);
1508
1509void disable_trace_on_warning(void)
1510{
1511 if (__disable_trace_on_warning) {
1512 trace_array_printk_buf(buffer: global_trace.array_buffer.buffer, _THIS_IP_,
1513 fmt: "Disabling tracing due to warning\n");
1514 tracing_off();
1515 }
1516}
1517
1518/**
1519 * tracer_tracing_is_on - show real state of ring buffer enabled
1520 * @tr : the trace array to know if ring buffer is enabled
1521 *
1522 * Shows real state of the ring buffer if it is enabled or not.
1523 */
1524bool tracer_tracing_is_on(struct trace_array *tr)
1525{
1526 if (tr->array_buffer.buffer)
1527 return ring_buffer_record_is_on(buffer: tr->array_buffer.buffer);
1528 return !tr->buffer_disabled;
1529}
1530
1531/**
1532 * tracing_is_on - show state of ring buffers enabled
1533 */
1534int tracing_is_on(void)
1535{
1536 return tracer_tracing_is_on(tr: &global_trace);
1537}
1538EXPORT_SYMBOL_GPL(tracing_is_on);
1539
1540static int __init set_buf_size(char *str)
1541{
1542 unsigned long buf_size;
1543
1544 if (!str)
1545 return 0;
1546 buf_size = memparse(ptr: str, retptr: &str);
1547 /*
1548 * nr_entries can not be zero and the startup
1549 * tests require some buffer space. Therefore
1550 * ensure we have at least 4096 bytes of buffer.
1551 */
1552 trace_buf_size = max(4096UL, buf_size);
1553 return 1;
1554}
1555__setup("trace_buf_size=", set_buf_size);
1556
1557static int __init set_tracing_thresh(char *str)
1558{
1559 unsigned long threshold;
1560 int ret;
1561
1562 if (!str)
1563 return 0;
1564 ret = kstrtoul(s: str, base: 0, res: &threshold);
1565 if (ret < 0)
1566 return 0;
1567 tracing_thresh = threshold * 1000;
1568 return 1;
1569}
1570__setup("tracing_thresh=", set_tracing_thresh);
1571
1572unsigned long nsecs_to_usecs(unsigned long nsecs)
1573{
1574 return nsecs / 1000;
1575}
1576
1577/*
1578 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1579 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1580 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1581 * of strings in the order that the evals (enum) were defined.
1582 */
1583#undef C
1584#define C(a, b) b
1585
1586/* These must match the bit positions in trace_iterator_flags */
1587static const char *trace_options[] = {
1588 TRACE_FLAGS
1589 NULL
1590};
1591
1592static struct {
1593 u64 (*func)(void);
1594 const char *name;
1595 int in_ns; /* is this clock in nanoseconds? */
1596} trace_clocks[] = {
1597 { trace_clock_local, "local", 1 },
1598 { trace_clock_global, "global", 1 },
1599 { trace_clock_counter, "counter", 0 },
1600 { trace_clock_jiffies, "uptime", 0 },
1601 { trace_clock, "perf", 1 },
1602 { ktime_get_mono_fast_ns, "mono", 1 },
1603 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1604 { ktime_get_boot_fast_ns, "boot", 1 },
1605 { ktime_get_tai_fast_ns, "tai", 1 },
1606 ARCH_TRACE_CLOCKS
1607};
1608
1609bool trace_clock_in_ns(struct trace_array *tr)
1610{
1611 if (trace_clocks[tr->clock_id].in_ns)
1612 return true;
1613
1614 return false;
1615}
1616
1617/*
1618 * trace_parser_get_init - gets the buffer for trace parser
1619 */
1620int trace_parser_get_init(struct trace_parser *parser, int size)
1621{
1622 memset(parser, 0, sizeof(*parser));
1623
1624 parser->buffer = kmalloc(size, GFP_KERNEL);
1625 if (!parser->buffer)
1626 return 1;
1627
1628 parser->size = size;
1629 return 0;
1630}
1631
1632/*
1633 * trace_parser_put - frees the buffer for trace parser
1634 */
1635void trace_parser_put(struct trace_parser *parser)
1636{
1637 kfree(objp: parser->buffer);
1638 parser->buffer = NULL;
1639}
1640
1641/*
1642 * trace_get_user - reads the user input string separated by space
1643 * (matched by isspace(ch))
1644 *
1645 * For each string found the 'struct trace_parser' is updated,
1646 * and the function returns.
1647 *
1648 * Returns number of bytes read.
1649 *
1650 * See kernel/trace/trace.h for 'struct trace_parser' details.
1651 */
1652int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1653 size_t cnt, loff_t *ppos)
1654{
1655 char ch;
1656 size_t read = 0;
1657 ssize_t ret;
1658
1659 if (!*ppos)
1660 trace_parser_clear(parser);
1661
1662 ret = get_user(ch, ubuf++);
1663 if (ret)
1664 goto out;
1665
1666 read++;
1667 cnt--;
1668
1669 /*
1670 * The parser is not finished with the last write,
1671 * continue reading the user input without skipping spaces.
1672 */
1673 if (!parser->cont) {
1674 /* skip white space */
1675 while (cnt && isspace(ch)) {
1676 ret = get_user(ch, ubuf++);
1677 if (ret)
1678 goto out;
1679 read++;
1680 cnt--;
1681 }
1682
1683 parser->idx = 0;
1684
1685 /* only spaces were written */
1686 if (isspace(ch) || !ch) {
1687 *ppos += read;
1688 ret = read;
1689 goto out;
1690 }
1691 }
1692
1693 /* read the non-space input */
1694 while (cnt && !isspace(ch) && ch) {
1695 if (parser->idx < parser->size - 1)
1696 parser->buffer[parser->idx++] = ch;
1697 else {
1698 ret = -EINVAL;
1699 goto out;
1700 }
1701 ret = get_user(ch, ubuf++);
1702 if (ret)
1703 goto out;
1704 read++;
1705 cnt--;
1706 }
1707
1708 /* We either got finished input or we have to wait for another call. */
1709 if (isspace(ch) || !ch) {
1710 parser->buffer[parser->idx] = 0;
1711 parser->cont = false;
1712 } else if (parser->idx < parser->size - 1) {
1713 parser->cont = true;
1714 parser->buffer[parser->idx++] = ch;
1715 /* Make sure the parsed string always terminates with '\0'. */
1716 parser->buffer[parser->idx] = 0;
1717 } else {
1718 ret = -EINVAL;
1719 goto out;
1720 }
1721
1722 *ppos += read;
1723 ret = read;
1724
1725out:
1726 return ret;
1727}
1728
1729/* TODO add a seq_buf_to_buffer() */
1730static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1731{
1732 int len;
1733
1734 if (trace_seq_used(s) <= s->readpos)
1735 return -EBUSY;
1736
1737 len = trace_seq_used(s) - s->readpos;
1738 if (cnt > len)
1739 cnt = len;
1740 memcpy(buf, s->buffer + s->readpos, cnt);
1741
1742 s->readpos += cnt;
1743 return cnt;
1744}
1745
1746unsigned long __read_mostly tracing_thresh;
1747
1748#ifdef CONFIG_TRACER_MAX_TRACE
1749static const struct file_operations tracing_max_lat_fops;
1750
1751#ifdef LATENCY_FS_NOTIFY
1752
1753static struct workqueue_struct *fsnotify_wq;
1754
1755static void latency_fsnotify_workfn(struct work_struct *work)
1756{
1757 struct trace_array *tr = container_of(work, struct trace_array,
1758 fsnotify_work);
1759 fsnotify_inode(inode: tr->d_max_latency->d_inode, FS_MODIFY);
1760}
1761
1762static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1763{
1764 struct trace_array *tr = container_of(iwork, struct trace_array,
1765 fsnotify_irqwork);
1766 queue_work(wq: fsnotify_wq, work: &tr->fsnotify_work);
1767}
1768
1769static void trace_create_maxlat_file(struct trace_array *tr,
1770 struct dentry *d_tracer)
1771{
1772 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1773 init_irq_work(work: &tr->fsnotify_irqwork, func: latency_fsnotify_workfn_irq);
1774 tr->d_max_latency = trace_create_file(name: "tracing_max_latency",
1775 TRACE_MODE_WRITE,
1776 parent: d_tracer, data: tr,
1777 fops: &tracing_max_lat_fops);
1778}
1779
1780__init static int latency_fsnotify_init(void)
1781{
1782 fsnotify_wq = alloc_workqueue(fmt: "tr_max_lat_wq",
1783 flags: WQ_UNBOUND | WQ_HIGHPRI, max_active: 0);
1784 if (!fsnotify_wq) {
1785 pr_err("Unable to allocate tr_max_lat_wq\n");
1786 return -ENOMEM;
1787 }
1788 return 0;
1789}
1790
1791late_initcall_sync(latency_fsnotify_init);
1792
1793void latency_fsnotify(struct trace_array *tr)
1794{
1795 if (!fsnotify_wq)
1796 return;
1797 /*
1798 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1799 * possible that we are called from __schedule() or do_idle(), which
1800 * could cause a deadlock.
1801 */
1802 irq_work_queue(work: &tr->fsnotify_irqwork);
1803}
1804
1805#else /* !LATENCY_FS_NOTIFY */
1806
1807#define trace_create_maxlat_file(tr, d_tracer) \
1808 trace_create_file("tracing_max_latency", TRACE_MODE_WRITE, \
1809 d_tracer, tr, &tracing_max_lat_fops)
1810
1811#endif
1812
1813/*
1814 * Copy the new maximum trace into the separate maximum-trace
1815 * structure. (this way the maximum trace is permanently saved,
1816 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1817 */
1818static void
1819__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1820{
1821 struct array_buffer *trace_buf = &tr->array_buffer;
1822 struct array_buffer *max_buf = &tr->max_buffer;
1823 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1824 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1825
1826 max_buf->cpu = cpu;
1827 max_buf->time_start = data->preempt_timestamp;
1828
1829 max_data->saved_latency = tr->max_latency;
1830 max_data->critical_start = data->critical_start;
1831 max_data->critical_end = data->critical_end;
1832
1833 strncpy(p: max_data->comm, q: tsk->comm, size: TASK_COMM_LEN);
1834 max_data->pid = tsk->pid;
1835 /*
1836 * If tsk == current, then use current_uid(), as that does not use
1837 * RCU. The irq tracer can be called out of RCU scope.
1838 */
1839 if (tsk == current)
1840 max_data->uid = current_uid();
1841 else
1842 max_data->uid = task_uid(tsk);
1843
1844 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1845 max_data->policy = tsk->policy;
1846 max_data->rt_priority = tsk->rt_priority;
1847
1848 /* record this tasks comm */
1849 tracing_record_cmdline(task: tsk);
1850 latency_fsnotify(tr);
1851}
1852
1853/**
1854 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1855 * @tr: tracer
1856 * @tsk: the task with the latency
1857 * @cpu: The cpu that initiated the trace.
1858 * @cond_data: User data associated with a conditional snapshot
1859 *
1860 * Flip the buffers between the @tr and the max_tr and record information
1861 * about which task was the cause of this latency.
1862 */
1863void
1864update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1865 void *cond_data)
1866{
1867 if (tr->stop_count)
1868 return;
1869
1870 WARN_ON_ONCE(!irqs_disabled());
1871
1872 if (!tr->allocated_snapshot) {
1873 /* Only the nop tracer should hit this when disabling */
1874 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1875 return;
1876 }
1877
1878 arch_spin_lock(&tr->max_lock);
1879
1880 /* Inherit the recordable setting from array_buffer */
1881 if (ring_buffer_record_is_set_on(buffer: tr->array_buffer.buffer))
1882 ring_buffer_record_on(buffer: tr->max_buffer.buffer);
1883 else
1884 ring_buffer_record_off(buffer: tr->max_buffer.buffer);
1885
1886#ifdef CONFIG_TRACER_SNAPSHOT
1887 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1888 arch_spin_unlock(&tr->max_lock);
1889 return;
1890 }
1891#endif
1892 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1893
1894 __update_max_tr(tr, tsk, cpu);
1895
1896 arch_spin_unlock(&tr->max_lock);
1897}
1898
1899/**
1900 * update_max_tr_single - only copy one trace over, and reset the rest
1901 * @tr: tracer
1902 * @tsk: task with the latency
1903 * @cpu: the cpu of the buffer to copy.
1904 *
1905 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1906 */
1907void
1908update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1909{
1910 int ret;
1911
1912 if (tr->stop_count)
1913 return;
1914
1915 WARN_ON_ONCE(!irqs_disabled());
1916 if (!tr->allocated_snapshot) {
1917 /* Only the nop tracer should hit this when disabling */
1918 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1919 return;
1920 }
1921
1922 arch_spin_lock(&tr->max_lock);
1923
1924 ret = ring_buffer_swap_cpu(buffer_a: tr->max_buffer.buffer, buffer_b: tr->array_buffer.buffer, cpu);
1925
1926 if (ret == -EBUSY) {
1927 /*
1928 * We failed to swap the buffer due to a commit taking
1929 * place on this CPU. We fail to record, but we reset
1930 * the max trace buffer (no one writes directly to it)
1931 * and flag that it failed.
1932 * Another reason is resize is in progress.
1933 */
1934 trace_array_printk_buf(buffer: tr->max_buffer.buffer, _THIS_IP_,
1935 fmt: "Failed to swap buffers due to commit or resize in progress\n");
1936 }
1937
1938 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1939
1940 __update_max_tr(tr, tsk, cpu);
1941 arch_spin_unlock(&tr->max_lock);
1942}
1943
1944#endif /* CONFIG_TRACER_MAX_TRACE */
1945
1946static int wait_on_pipe(struct trace_iterator *iter, int full)
1947{
1948 /* Iterators are static, they should be filled or empty */
1949 if (trace_buffer_iter(iter, cpu: iter->cpu_file))
1950 return 0;
1951
1952 return ring_buffer_wait(buffer: iter->array_buffer->buffer, cpu: iter->cpu_file,
1953 full);
1954}
1955
1956#ifdef CONFIG_FTRACE_STARTUP_TEST
1957static bool selftests_can_run;
1958
1959struct trace_selftests {
1960 struct list_head list;
1961 struct tracer *type;
1962};
1963
1964static LIST_HEAD(postponed_selftests);
1965
1966static int save_selftest(struct tracer *type)
1967{
1968 struct trace_selftests *selftest;
1969
1970 selftest = kmalloc(size: sizeof(*selftest), GFP_KERNEL);
1971 if (!selftest)
1972 return -ENOMEM;
1973
1974 selftest->type = type;
1975 list_add(new: &selftest->list, head: &postponed_selftests);
1976 return 0;
1977}
1978
1979static int run_tracer_selftest(struct tracer *type)
1980{
1981 struct trace_array *tr = &global_trace;
1982 struct tracer *saved_tracer = tr->current_trace;
1983 int ret;
1984
1985 if (!type->selftest || tracing_selftest_disabled)
1986 return 0;
1987
1988 /*
1989 * If a tracer registers early in boot up (before scheduling is
1990 * initialized and such), then do not run its selftests yet.
1991 * Instead, run it a little later in the boot process.
1992 */
1993 if (!selftests_can_run)
1994 return save_selftest(type);
1995
1996 if (!tracing_is_on()) {
1997 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1998 type->name);
1999 return 0;
2000 }
2001
2002 /*
2003 * Run a selftest on this tracer.
2004 * Here we reset the trace buffer, and set the current
2005 * tracer to be this tracer. The tracer can then run some
2006 * internal tracing to verify that everything is in order.
2007 * If we fail, we do not register this tracer.
2008 */
2009 tracing_reset_online_cpus(buf: &tr->array_buffer);
2010
2011 tr->current_trace = type;
2012
2013#ifdef CONFIG_TRACER_MAX_TRACE
2014 if (type->use_max_tr) {
2015 /* If we expanded the buffers, make sure the max is expanded too */
2016 if (tr->ring_buffer_expanded)
2017 ring_buffer_resize(buffer: tr->max_buffer.buffer, size: trace_buf_size,
2018 RING_BUFFER_ALL_CPUS);
2019 tr->allocated_snapshot = true;
2020 }
2021#endif
2022
2023 /* the test is responsible for initializing and enabling */
2024 pr_info("Testing tracer %s: ", type->name);
2025 ret = type->selftest(type, tr);
2026 /* the test is responsible for resetting too */
2027 tr->current_trace = saved_tracer;
2028 if (ret) {
2029 printk(KERN_CONT "FAILED!\n");
2030 /* Add the warning after printing 'FAILED' */
2031 WARN_ON(1);
2032 return -1;
2033 }
2034 /* Only reset on passing, to avoid touching corrupted buffers */
2035 tracing_reset_online_cpus(buf: &tr->array_buffer);
2036
2037#ifdef CONFIG_TRACER_MAX_TRACE
2038 if (type->use_max_tr) {
2039 tr->allocated_snapshot = false;
2040
2041 /* Shrink the max buffer again */
2042 if (tr->ring_buffer_expanded)
2043 ring_buffer_resize(buffer: tr->max_buffer.buffer, size: 1,
2044 RING_BUFFER_ALL_CPUS);
2045 }
2046#endif
2047
2048 printk(KERN_CONT "PASSED\n");
2049 return 0;
2050}
2051
2052static int do_run_tracer_selftest(struct tracer *type)
2053{
2054 int ret;
2055
2056 /*
2057 * Tests can take a long time, especially if they are run one after the
2058 * other, as does happen during bootup when all the tracers are
2059 * registered. This could cause the soft lockup watchdog to trigger.
2060 */
2061 cond_resched();
2062
2063 tracing_selftest_running = true;
2064 ret = run_tracer_selftest(type);
2065 tracing_selftest_running = false;
2066
2067 return ret;
2068}
2069
2070static __init int init_trace_selftests(void)
2071{
2072 struct trace_selftests *p, *n;
2073 struct tracer *t, **last;
2074 int ret;
2075
2076 selftests_can_run = true;
2077
2078 mutex_lock(&trace_types_lock);
2079
2080 if (list_empty(head: &postponed_selftests))
2081 goto out;
2082
2083 pr_info("Running postponed tracer tests:\n");
2084
2085 tracing_selftest_running = true;
2086 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2087 /* This loop can take minutes when sanitizers are enabled, so
2088 * lets make sure we allow RCU processing.
2089 */
2090 cond_resched();
2091 ret = run_tracer_selftest(type: p->type);
2092 /* If the test fails, then warn and remove from available_tracers */
2093 if (ret < 0) {
2094 WARN(1, "tracer: %s failed selftest, disabling\n",
2095 p->type->name);
2096 last = &trace_types;
2097 for (t = trace_types; t; t = t->next) {
2098 if (t == p->type) {
2099 *last = t->next;
2100 break;
2101 }
2102 last = &t->next;
2103 }
2104 }
2105 list_del(entry: &p->list);
2106 kfree(objp: p);
2107 }
2108 tracing_selftest_running = false;
2109
2110 out:
2111 mutex_unlock(lock: &trace_types_lock);
2112
2113 return 0;
2114}
2115core_initcall(init_trace_selftests);
2116#else
2117static inline int run_tracer_selftest(struct tracer *type)
2118{
2119 return 0;
2120}
2121static inline int do_run_tracer_selftest(struct tracer *type)
2122{
2123 return 0;
2124}
2125#endif /* CONFIG_FTRACE_STARTUP_TEST */
2126
2127static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2128
2129static void __init apply_trace_boot_options(void);
2130
2131/**
2132 * register_tracer - register a tracer with the ftrace system.
2133 * @type: the plugin for the tracer
2134 *
2135 * Register a new plugin tracer.
2136 */
2137int __init register_tracer(struct tracer *type)
2138{
2139 struct tracer *t;
2140 int ret = 0;
2141
2142 if (!type->name) {
2143 pr_info("Tracer must have a name\n");
2144 return -1;
2145 }
2146
2147 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2148 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2149 return -1;
2150 }
2151
2152 if (security_locked_down(what: LOCKDOWN_TRACEFS)) {
2153 pr_warn("Can not register tracer %s due to lockdown\n",
2154 type->name);
2155 return -EPERM;
2156 }
2157
2158 mutex_lock(&trace_types_lock);
2159
2160 for (t = trace_types; t; t = t->next) {
2161 if (strcmp(type->name, t->name) == 0) {
2162 /* already found */
2163 pr_info("Tracer %s already registered\n",
2164 type->name);
2165 ret = -1;
2166 goto out;
2167 }
2168 }
2169
2170 if (!type->set_flag)
2171 type->set_flag = &dummy_set_flag;
2172 if (!type->flags) {
2173 /*allocate a dummy tracer_flags*/
2174 type->flags = kmalloc(size: sizeof(*type->flags), GFP_KERNEL);
2175 if (!type->flags) {
2176 ret = -ENOMEM;
2177 goto out;
2178 }
2179 type->flags->val = 0;
2180 type->flags->opts = dummy_tracer_opt;
2181 } else
2182 if (!type->flags->opts)
2183 type->flags->opts = dummy_tracer_opt;
2184
2185 /* store the tracer for __set_tracer_option */
2186 type->flags->trace = type;
2187
2188 ret = do_run_tracer_selftest(type);
2189 if (ret < 0)
2190 goto out;
2191
2192 type->next = trace_types;
2193 trace_types = type;
2194 add_tracer_options(tr: &global_trace, t: type);
2195
2196 out:
2197 mutex_unlock(lock: &trace_types_lock);
2198
2199 if (ret || !default_bootup_tracer)
2200 goto out_unlock;
2201
2202 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2203 goto out_unlock;
2204
2205 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2206 /* Do we want this tracer to start on bootup? */
2207 tracing_set_tracer(tr: &global_trace, buf: type->name);
2208 default_bootup_tracer = NULL;
2209
2210 apply_trace_boot_options();
2211
2212 /* disable other selftests, since this will break it. */
2213 disable_tracing_selftest(reason: "running a tracer");
2214
2215 out_unlock:
2216 return ret;
2217}
2218
2219static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2220{
2221 struct trace_buffer *buffer = buf->buffer;
2222
2223 if (!buffer)
2224 return;
2225
2226 ring_buffer_record_disable(buffer);
2227
2228 /* Make sure all commits have finished */
2229 synchronize_rcu();
2230 ring_buffer_reset_cpu(buffer, cpu);
2231
2232 ring_buffer_record_enable(buffer);
2233}
2234
2235void tracing_reset_online_cpus(struct array_buffer *buf)
2236{
2237 struct trace_buffer *buffer = buf->buffer;
2238
2239 if (!buffer)
2240 return;
2241
2242 ring_buffer_record_disable(buffer);
2243
2244 /* Make sure all commits have finished */
2245 synchronize_rcu();
2246
2247 buf->time_start = buffer_ftrace_now(buf, cpu: buf->cpu);
2248
2249 ring_buffer_reset_online_cpus(buffer);
2250
2251 ring_buffer_record_enable(buffer);
2252}
2253
2254/* Must have trace_types_lock held */
2255void tracing_reset_all_online_cpus_unlocked(void)
2256{
2257 struct trace_array *tr;
2258
2259 lockdep_assert_held(&trace_types_lock);
2260
2261 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2262 if (!tr->clear_trace)
2263 continue;
2264 tr->clear_trace = false;
2265 tracing_reset_online_cpus(buf: &tr->array_buffer);
2266#ifdef CONFIG_TRACER_MAX_TRACE
2267 tracing_reset_online_cpus(buf: &tr->max_buffer);
2268#endif
2269 }
2270}
2271
2272void tracing_reset_all_online_cpus(void)
2273{
2274 mutex_lock(&trace_types_lock);
2275 tracing_reset_all_online_cpus_unlocked();
2276 mutex_unlock(lock: &trace_types_lock);
2277}
2278
2279/*
2280 * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2281 * is the tgid last observed corresponding to pid=i.
2282 */
2283static int *tgid_map;
2284
2285/* The maximum valid index into tgid_map. */
2286static size_t tgid_map_max;
2287
2288#define SAVED_CMDLINES_DEFAULT 128
2289#define NO_CMDLINE_MAP UINT_MAX
2290/*
2291 * Preemption must be disabled before acquiring trace_cmdline_lock.
2292 * The various trace_arrays' max_lock must be acquired in a context
2293 * where interrupt is disabled.
2294 */
2295static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2296struct saved_cmdlines_buffer {
2297 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2298 unsigned *map_cmdline_to_pid;
2299 unsigned cmdline_num;
2300 int cmdline_idx;
2301 char *saved_cmdlines;
2302};
2303static struct saved_cmdlines_buffer *savedcmd;
2304
2305static inline char *get_saved_cmdlines(int idx)
2306{
2307 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2308}
2309
2310static inline void set_cmdline(int idx, const char *cmdline)
2311{
2312 strncpy(p: get_saved_cmdlines(idx), q: cmdline, size: TASK_COMM_LEN);
2313}
2314
2315static int allocate_cmdlines_buffer(unsigned int val,
2316 struct saved_cmdlines_buffer *s)
2317{
2318 s->map_cmdline_to_pid = kmalloc_array(n: val,
2319 size: sizeof(*s->map_cmdline_to_pid),
2320 GFP_KERNEL);
2321 if (!s->map_cmdline_to_pid)
2322 return -ENOMEM;
2323
2324 s->saved_cmdlines = kmalloc_array(n: TASK_COMM_LEN, size: val, GFP_KERNEL);
2325 if (!s->saved_cmdlines) {
2326 kfree(objp: s->map_cmdline_to_pid);
2327 return -ENOMEM;
2328 }
2329
2330 s->cmdline_idx = 0;
2331 s->cmdline_num = val;
2332 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2333 sizeof(s->map_pid_to_cmdline));
2334 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2335 val * sizeof(*s->map_cmdline_to_pid));
2336
2337 return 0;
2338}
2339
2340static int trace_create_savedcmd(void)
2341{
2342 int ret;
2343
2344 savedcmd = kmalloc(size: sizeof(*savedcmd), GFP_KERNEL);
2345 if (!savedcmd)
2346 return -ENOMEM;
2347
2348 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, s: savedcmd);
2349 if (ret < 0) {
2350 kfree(objp: savedcmd);
2351 savedcmd = NULL;
2352 return -ENOMEM;
2353 }
2354
2355 return 0;
2356}
2357
2358int is_tracing_stopped(void)
2359{
2360 return global_trace.stop_count;
2361}
2362
2363/**
2364 * tracing_start - quick start of the tracer
2365 *
2366 * If tracing is enabled but was stopped by tracing_stop,
2367 * this will start the tracer back up.
2368 */
2369void tracing_start(void)
2370{
2371 struct trace_buffer *buffer;
2372 unsigned long flags;
2373
2374 if (tracing_disabled)
2375 return;
2376
2377 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2378 if (--global_trace.stop_count) {
2379 if (global_trace.stop_count < 0) {
2380 /* Someone screwed up their debugging */
2381 WARN_ON_ONCE(1);
2382 global_trace.stop_count = 0;
2383 }
2384 goto out;
2385 }
2386
2387 /* Prevent the buffers from switching */
2388 arch_spin_lock(&global_trace.max_lock);
2389
2390 buffer = global_trace.array_buffer.buffer;
2391 if (buffer)
2392 ring_buffer_record_enable(buffer);
2393
2394#ifdef CONFIG_TRACER_MAX_TRACE
2395 buffer = global_trace.max_buffer.buffer;
2396 if (buffer)
2397 ring_buffer_record_enable(buffer);
2398#endif
2399
2400 arch_spin_unlock(&global_trace.max_lock);
2401
2402 out:
2403 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2404}
2405
2406static void tracing_start_tr(struct trace_array *tr)
2407{
2408 struct trace_buffer *buffer;
2409 unsigned long flags;
2410
2411 if (tracing_disabled)
2412 return;
2413
2414 /* If global, we need to also start the max tracer */
2415 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2416 return tracing_start();
2417
2418 raw_spin_lock_irqsave(&tr->start_lock, flags);
2419
2420 if (--tr->stop_count) {
2421 if (tr->stop_count < 0) {
2422 /* Someone screwed up their debugging */
2423 WARN_ON_ONCE(1);
2424 tr->stop_count = 0;
2425 }
2426 goto out;
2427 }
2428
2429 buffer = tr->array_buffer.buffer;
2430 if (buffer)
2431 ring_buffer_record_enable(buffer);
2432
2433 out:
2434 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2435}
2436
2437/**
2438 * tracing_stop - quick stop of the tracer
2439 *
2440 * Light weight way to stop tracing. Use in conjunction with
2441 * tracing_start.
2442 */
2443void tracing_stop(void)
2444{
2445 struct trace_buffer *buffer;
2446 unsigned long flags;
2447
2448 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2449 if (global_trace.stop_count++)
2450 goto out;
2451
2452 /* Prevent the buffers from switching */
2453 arch_spin_lock(&global_trace.max_lock);
2454
2455 buffer = global_trace.array_buffer.buffer;
2456 if (buffer)
2457 ring_buffer_record_disable(buffer);
2458
2459#ifdef CONFIG_TRACER_MAX_TRACE
2460 buffer = global_trace.max_buffer.buffer;
2461 if (buffer)
2462 ring_buffer_record_disable(buffer);
2463#endif
2464
2465 arch_spin_unlock(&global_trace.max_lock);
2466
2467 out:
2468 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2469}
2470
2471static void tracing_stop_tr(struct trace_array *tr)
2472{
2473 struct trace_buffer *buffer;
2474 unsigned long flags;
2475
2476 /* If global, we need to also stop the max tracer */
2477 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2478 return tracing_stop();
2479
2480 raw_spin_lock_irqsave(&tr->start_lock, flags);
2481 if (tr->stop_count++)
2482 goto out;
2483
2484 buffer = tr->array_buffer.buffer;
2485 if (buffer)
2486 ring_buffer_record_disable(buffer);
2487
2488 out:
2489 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2490}
2491
2492static int trace_save_cmdline(struct task_struct *tsk)
2493{
2494 unsigned tpid, idx;
2495
2496 /* treat recording of idle task as a success */
2497 if (!tsk->pid)
2498 return 1;
2499
2500 tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2501
2502 /*
2503 * It's not the end of the world if we don't get
2504 * the lock, but we also don't want to spin
2505 * nor do we want to disable interrupts,
2506 * so if we miss here, then better luck next time.
2507 *
2508 * This is called within the scheduler and wake up, so interrupts
2509 * had better been disabled and run queue lock been held.
2510 */
2511 lockdep_assert_preemption_disabled();
2512 if (!arch_spin_trylock(&trace_cmdline_lock))
2513 return 0;
2514
2515 idx = savedcmd->map_pid_to_cmdline[tpid];
2516 if (idx == NO_CMDLINE_MAP) {
2517 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2518
2519 savedcmd->map_pid_to_cmdline[tpid] = idx;
2520 savedcmd->cmdline_idx = idx;
2521 }
2522
2523 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2524 set_cmdline(idx, cmdline: tsk->comm);
2525
2526 arch_spin_unlock(&trace_cmdline_lock);
2527
2528 return 1;
2529}
2530
2531static void __trace_find_cmdline(int pid, char comm[])
2532{
2533 unsigned map;
2534 int tpid;
2535
2536 if (!pid) {
2537 strcpy(p: comm, q: "<idle>");
2538 return;
2539 }
2540
2541 if (WARN_ON_ONCE(pid < 0)) {
2542 strcpy(p: comm, q: "<XXX>");
2543 return;
2544 }
2545
2546 tpid = pid & (PID_MAX_DEFAULT - 1);
2547 map = savedcmd->map_pid_to_cmdline[tpid];
2548 if (map != NO_CMDLINE_MAP) {
2549 tpid = savedcmd->map_cmdline_to_pid[map];
2550 if (tpid == pid) {
2551 strscpy(p: comm, q: get_saved_cmdlines(idx: map), size: TASK_COMM_LEN);
2552 return;
2553 }
2554 }
2555 strcpy(p: comm, q: "<...>");
2556}
2557
2558void trace_find_cmdline(int pid, char comm[])
2559{
2560 preempt_disable();
2561 arch_spin_lock(&trace_cmdline_lock);
2562
2563 __trace_find_cmdline(pid, comm);
2564
2565 arch_spin_unlock(&trace_cmdline_lock);
2566 preempt_enable();
2567}
2568
2569static int *trace_find_tgid_ptr(int pid)
2570{
2571 /*
2572 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2573 * if we observe a non-NULL tgid_map then we also observe the correct
2574 * tgid_map_max.
2575 */
2576 int *map = smp_load_acquire(&tgid_map);
2577
2578 if (unlikely(!map || pid > tgid_map_max))
2579 return NULL;
2580
2581 return &map[pid];
2582}
2583
2584int trace_find_tgid(int pid)
2585{
2586 int *ptr = trace_find_tgid_ptr(pid);
2587
2588 return ptr ? *ptr : 0;
2589}
2590
2591static int trace_save_tgid(struct task_struct *tsk)
2592{
2593 int *ptr;
2594
2595 /* treat recording of idle task as a success */
2596 if (!tsk->pid)
2597 return 1;
2598
2599 ptr = trace_find_tgid_ptr(pid: tsk->pid);
2600 if (!ptr)
2601 return 0;
2602
2603 *ptr = tsk->tgid;
2604 return 1;
2605}
2606
2607static bool tracing_record_taskinfo_skip(int flags)
2608{
2609 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2610 return true;
2611 if (!__this_cpu_read(trace_taskinfo_save))
2612 return true;
2613 return false;
2614}
2615
2616/**
2617 * tracing_record_taskinfo - record the task info of a task
2618 *
2619 * @task: task to record
2620 * @flags: TRACE_RECORD_CMDLINE for recording comm
2621 * TRACE_RECORD_TGID for recording tgid
2622 */
2623void tracing_record_taskinfo(struct task_struct *task, int flags)
2624{
2625 bool done;
2626
2627 if (tracing_record_taskinfo_skip(flags))
2628 return;
2629
2630 /*
2631 * Record as much task information as possible. If some fail, continue
2632 * to try to record the others.
2633 */
2634 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(tsk: task);
2635 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(tsk: task);
2636
2637 /* If recording any information failed, retry again soon. */
2638 if (!done)
2639 return;
2640
2641 __this_cpu_write(trace_taskinfo_save, false);
2642}
2643
2644/**
2645 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2646 *
2647 * @prev: previous task during sched_switch
2648 * @next: next task during sched_switch
2649 * @flags: TRACE_RECORD_CMDLINE for recording comm
2650 * TRACE_RECORD_TGID for recording tgid
2651 */
2652void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2653 struct task_struct *next, int flags)
2654{
2655 bool done;
2656
2657 if (tracing_record_taskinfo_skip(flags))
2658 return;
2659
2660 /*
2661 * Record as much task information as possible. If some fail, continue
2662 * to try to record the others.
2663 */
2664 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(tsk: prev);
2665 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(tsk: next);
2666 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(tsk: prev);
2667 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(tsk: next);
2668
2669 /* If recording any information failed, retry again soon. */
2670 if (!done)
2671 return;
2672
2673 __this_cpu_write(trace_taskinfo_save, false);
2674}
2675
2676/* Helpers to record a specific task information */
2677void tracing_record_cmdline(struct task_struct *task)
2678{
2679 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2680}
2681
2682void tracing_record_tgid(struct task_struct *task)
2683{
2684 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2685}
2686
2687/*
2688 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2689 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2690 * simplifies those functions and keeps them in sync.
2691 */
2692enum print_line_t trace_handle_return(struct trace_seq *s)
2693{
2694 return trace_seq_has_overflowed(s) ?
2695 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2696}
2697EXPORT_SYMBOL_GPL(trace_handle_return);
2698
2699static unsigned short migration_disable_value(void)
2700{
2701#if defined(CONFIG_SMP)
2702 return current->migration_disabled;
2703#else
2704 return 0;
2705#endif
2706}
2707
2708unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2709{
2710 unsigned int trace_flags = irqs_status;
2711 unsigned int pc;
2712
2713 pc = preempt_count();
2714
2715 if (pc & NMI_MASK)
2716 trace_flags |= TRACE_FLAG_NMI;
2717 if (pc & HARDIRQ_MASK)
2718 trace_flags |= TRACE_FLAG_HARDIRQ;
2719 if (in_serving_softirq())
2720 trace_flags |= TRACE_FLAG_SOFTIRQ;
2721 if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2722 trace_flags |= TRACE_FLAG_BH_OFF;
2723
2724 if (tif_need_resched())
2725 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2726 if (test_preempt_need_resched())
2727 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2728 return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2729 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2730}
2731
2732struct ring_buffer_event *
2733trace_buffer_lock_reserve(struct trace_buffer *buffer,
2734 int type,
2735 unsigned long len,
2736 unsigned int trace_ctx)
2737{
2738 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2739}
2740
2741DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2742DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2743static int trace_buffered_event_ref;
2744
2745/**
2746 * trace_buffered_event_enable - enable buffering events
2747 *
2748 * When events are being filtered, it is quicker to use a temporary
2749 * buffer to write the event data into if there's a likely chance
2750 * that it will not be committed. The discard of the ring buffer
2751 * is not as fast as committing, and is much slower than copying
2752 * a commit.
2753 *
2754 * When an event is to be filtered, allocate per cpu buffers to
2755 * write the event data into, and if the event is filtered and discarded
2756 * it is simply dropped, otherwise, the entire data is to be committed
2757 * in one shot.
2758 */
2759void trace_buffered_event_enable(void)
2760{
2761 struct ring_buffer_event *event;
2762 struct page *page;
2763 int cpu;
2764
2765 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2766
2767 if (trace_buffered_event_ref++)
2768 return;
2769
2770 for_each_tracing_cpu(cpu) {
2771 page = alloc_pages_node(cpu_to_node(cpu),
2772 GFP_KERNEL | __GFP_NORETRY, order: 0);
2773 if (!page)
2774 goto failed;
2775
2776 event = page_address(page);
2777 memset(event, 0, sizeof(*event));
2778
2779 per_cpu(trace_buffered_event, cpu) = event;
2780
2781 preempt_disable();
2782 if (cpu == smp_processor_id() &&
2783 __this_cpu_read(trace_buffered_event) !=
2784 per_cpu(trace_buffered_event, cpu))
2785 WARN_ON_ONCE(1);
2786 preempt_enable();
2787 }
2788
2789 return;
2790 failed:
2791 trace_buffered_event_disable();
2792}
2793
2794static void enable_trace_buffered_event(void *data)
2795{
2796 /* Probably not needed, but do it anyway */
2797 smp_rmb();
2798 this_cpu_dec(trace_buffered_event_cnt);
2799}
2800
2801static void disable_trace_buffered_event(void *data)
2802{
2803 this_cpu_inc(trace_buffered_event_cnt);
2804}
2805
2806/**
2807 * trace_buffered_event_disable - disable buffering events
2808 *
2809 * When a filter is removed, it is faster to not use the buffered
2810 * events, and to commit directly into the ring buffer. Free up
2811 * the temp buffers when there are no more users. This requires
2812 * special synchronization with current events.
2813 */
2814void trace_buffered_event_disable(void)
2815{
2816 int cpu;
2817
2818 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2819
2820 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2821 return;
2822
2823 if (--trace_buffered_event_ref)
2824 return;
2825
2826 preempt_disable();
2827 /* For each CPU, set the buffer as used. */
2828 smp_call_function_many(mask: tracing_buffer_mask,
2829 func: disable_trace_buffered_event, NULL, wait: 1);
2830 preempt_enable();
2831
2832 /* Wait for all current users to finish */
2833 synchronize_rcu();
2834
2835 for_each_tracing_cpu(cpu) {
2836 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2837 per_cpu(trace_buffered_event, cpu) = NULL;
2838 }
2839 /*
2840 * Make sure trace_buffered_event is NULL before clearing
2841 * trace_buffered_event_cnt.
2842 */
2843 smp_wmb();
2844
2845 preempt_disable();
2846 /* Do the work on each cpu */
2847 smp_call_function_many(mask: tracing_buffer_mask,
2848 func: enable_trace_buffered_event, NULL, wait: 1);
2849 preempt_enable();
2850}
2851
2852static struct trace_buffer *temp_buffer;
2853
2854struct ring_buffer_event *
2855trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2856 struct trace_event_file *trace_file,
2857 int type, unsigned long len,
2858 unsigned int trace_ctx)
2859{
2860 struct ring_buffer_event *entry;
2861 struct trace_array *tr = trace_file->tr;
2862 int val;
2863
2864 *current_rb = tr->array_buffer.buffer;
2865
2866 if (!tr->no_filter_buffering_ref &&
2867 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2868 preempt_disable_notrace();
2869 /*
2870 * Filtering is on, so try to use the per cpu buffer first.
2871 * This buffer will simulate a ring_buffer_event,
2872 * where the type_len is zero and the array[0] will
2873 * hold the full length.
2874 * (see include/linux/ring-buffer.h for details on
2875 * how the ring_buffer_event is structured).
2876 *
2877 * Using a temp buffer during filtering and copying it
2878 * on a matched filter is quicker than writing directly
2879 * into the ring buffer and then discarding it when
2880 * it doesn't match. That is because the discard
2881 * requires several atomic operations to get right.
2882 * Copying on match and doing nothing on a failed match
2883 * is still quicker than no copy on match, but having
2884 * to discard out of the ring buffer on a failed match.
2885 */
2886 if ((entry = __this_cpu_read(trace_buffered_event))) {
2887 int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2888
2889 val = this_cpu_inc_return(trace_buffered_event_cnt);
2890
2891 /*
2892 * Preemption is disabled, but interrupts and NMIs
2893 * can still come in now. If that happens after
2894 * the above increment, then it will have to go
2895 * back to the old method of allocating the event
2896 * on the ring buffer, and if the filter fails, it
2897 * will have to call ring_buffer_discard_commit()
2898 * to remove it.
2899 *
2900 * Need to also check the unlikely case that the
2901 * length is bigger than the temp buffer size.
2902 * If that happens, then the reserve is pretty much
2903 * guaranteed to fail, as the ring buffer currently
2904 * only allows events less than a page. But that may
2905 * change in the future, so let the ring buffer reserve
2906 * handle the failure in that case.
2907 */
2908 if (val == 1 && likely(len <= max_len)) {
2909 trace_event_setup(event: entry, type, trace_ctx);
2910 entry->array[0] = len;
2911 /* Return with preemption disabled */
2912 return entry;
2913 }
2914 this_cpu_dec(trace_buffered_event_cnt);
2915 }
2916 /* __trace_buffer_lock_reserve() disables preemption */
2917 preempt_enable_notrace();
2918 }
2919
2920 entry = __trace_buffer_lock_reserve(buffer: *current_rb, type, len,
2921 trace_ctx);
2922 /*
2923 * If tracing is off, but we have triggers enabled
2924 * we still need to look at the event data. Use the temp_buffer
2925 * to store the trace event for the trigger to use. It's recursive
2926 * safe and will not be recorded anywhere.
2927 */
2928 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2929 *current_rb = temp_buffer;
2930 entry = __trace_buffer_lock_reserve(buffer: *current_rb, type, len,
2931 trace_ctx);
2932 }
2933 return entry;
2934}
2935EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2936
2937static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2938static DEFINE_MUTEX(tracepoint_printk_mutex);
2939
2940static void output_printk(struct trace_event_buffer *fbuffer)
2941{
2942 struct trace_event_call *event_call;
2943 struct trace_event_file *file;
2944 struct trace_event *event;
2945 unsigned long flags;
2946 struct trace_iterator *iter = tracepoint_print_iter;
2947
2948 /* We should never get here if iter is NULL */
2949 if (WARN_ON_ONCE(!iter))
2950 return;
2951
2952 event_call = fbuffer->trace_file->event_call;
2953 if (!event_call || !event_call->event.funcs ||
2954 !event_call->event.funcs->trace)
2955 return;
2956
2957 file = fbuffer->trace_file;
2958 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2959 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2960 !filter_match_preds(filter: file->filter, rec: fbuffer->entry)))
2961 return;
2962
2963 event = &fbuffer->trace_file->event_call->event;
2964
2965 raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2966 trace_seq_init(s: &iter->seq);
2967 iter->ent = fbuffer->entry;
2968 event_call->event.funcs->trace(iter, 0, event);
2969 trace_seq_putc(s: &iter->seq, c: 0);
2970 printk("%s", iter->seq.buffer);
2971
2972 raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2973}
2974
2975int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2976 void *buffer, size_t *lenp,
2977 loff_t *ppos)
2978{
2979 int save_tracepoint_printk;
2980 int ret;
2981
2982 mutex_lock(&tracepoint_printk_mutex);
2983 save_tracepoint_printk = tracepoint_printk;
2984
2985 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2986
2987 /*
2988 * This will force exiting early, as tracepoint_printk
2989 * is always zero when tracepoint_printk_iter is not allocated
2990 */
2991 if (!tracepoint_print_iter)
2992 tracepoint_printk = 0;
2993
2994 if (save_tracepoint_printk == tracepoint_printk)
2995 goto out;
2996
2997 if (tracepoint_printk)
2998 static_key_enable(key: &tracepoint_printk_key.key);
2999 else
3000 static_key_disable(key: &tracepoint_printk_key.key);
3001
3002 out:
3003 mutex_unlock(lock: &tracepoint_printk_mutex);
3004
3005 return ret;
3006}
3007
3008void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
3009{
3010 enum event_trigger_type tt = ETT_NONE;
3011 struct trace_event_file *file = fbuffer->trace_file;
3012
3013 if (__event_trigger_test_discard(file, buffer: fbuffer->buffer, event: fbuffer->event,
3014 entry: fbuffer->entry, tt: &tt))
3015 goto discard;
3016
3017 if (static_key_false(key: &tracepoint_printk_key.key))
3018 output_printk(fbuffer);
3019
3020 if (static_branch_unlikely(&trace_event_exports_enabled))
3021 ftrace_exports(event: fbuffer->event, TRACE_EXPORT_EVENT);
3022
3023 trace_buffer_unlock_commit_regs(tr: file->tr, buffer: fbuffer->buffer,
3024 event: fbuffer->event, trcace_ctx: fbuffer->trace_ctx, regs: fbuffer->regs);
3025
3026discard:
3027 if (tt)
3028 event_triggers_post_call(file, tt);
3029
3030}
3031EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3032
3033/*
3034 * Skip 3:
3035 *
3036 * trace_buffer_unlock_commit_regs()
3037 * trace_event_buffer_commit()
3038 * trace_event_raw_event_xxx()
3039 */
3040# define STACK_SKIP 3
3041
3042void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3043 struct trace_buffer *buffer,
3044 struct ring_buffer_event *event,
3045 unsigned int trace_ctx,
3046 struct pt_regs *regs)
3047{
3048 __buffer_unlock_commit(buffer, event);
3049
3050 /*
3051 * If regs is not set, then skip the necessary functions.
3052 * Note, we can still get here via blktrace, wakeup tracer
3053 * and mmiotrace, but that's ok if they lose a function or
3054 * two. They are not that meaningful.
3055 */
3056 ftrace_trace_stack(tr, buffer, trace_ctx, skip: regs ? 0 : STACK_SKIP, regs);
3057 ftrace_trace_userstack(tr, buffer, trace_ctx);
3058}
3059
3060/*
3061 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3062 */
3063void
3064trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3065 struct ring_buffer_event *event)
3066{
3067 __buffer_unlock_commit(buffer, event);
3068}
3069
3070void
3071trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3072 parent_ip, unsigned int trace_ctx)
3073{
3074 struct trace_event_call *call = &event_function;
3075 struct trace_buffer *buffer = tr->array_buffer.buffer;
3076 struct ring_buffer_event *event;
3077 struct ftrace_entry *entry;
3078
3079 event = __trace_buffer_lock_reserve(buffer, type: TRACE_FN, len: sizeof(*entry),
3080 trace_ctx);
3081 if (!event)
3082 return;
3083 entry = ring_buffer_event_data(event);
3084 entry->ip = ip;
3085 entry->parent_ip = parent_ip;
3086
3087 if (!call_filter_check_discard(call, rec: entry, buffer, event)) {
3088 if (static_branch_unlikely(&trace_function_exports_enabled))
3089 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3090 __buffer_unlock_commit(buffer, event);
3091 }
3092}
3093
3094#ifdef CONFIG_STACKTRACE
3095
3096/* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3097#define FTRACE_KSTACK_NESTING 4
3098
3099#define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3100
3101struct ftrace_stack {
3102 unsigned long calls[FTRACE_KSTACK_ENTRIES];
3103};
3104
3105
3106struct ftrace_stacks {
3107 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
3108};
3109
3110static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3111static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3112
3113static void __ftrace_trace_stack(struct trace_buffer *buffer,
3114 unsigned int trace_ctx,
3115 int skip, struct pt_regs *regs)
3116{
3117 struct trace_event_call *call = &event_kernel_stack;
3118 struct ring_buffer_event *event;
3119 unsigned int size, nr_entries;
3120 struct ftrace_stack *fstack;
3121 struct stack_entry *entry;
3122 int stackidx;
3123
3124 /*
3125 * Add one, for this function and the call to save_stack_trace()
3126 * If regs is set, then these functions will not be in the way.
3127 */
3128#ifndef CONFIG_UNWINDER_ORC
3129 if (!regs)
3130 skip++;
3131#endif
3132
3133 preempt_disable_notrace();
3134
3135 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3136
3137 /* This should never happen. If it does, yell once and skip */
3138 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3139 goto out;
3140
3141 /*
3142 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3143 * interrupt will either see the value pre increment or post
3144 * increment. If the interrupt happens pre increment it will have
3145 * restored the counter when it returns. We just need a barrier to
3146 * keep gcc from moving things around.
3147 */
3148 barrier();
3149
3150 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3151 size = ARRAY_SIZE(fstack->calls);
3152
3153 if (regs) {
3154 nr_entries = stack_trace_save_regs(regs, store: fstack->calls,
3155 size, skipnr: skip);
3156 } else {
3157 nr_entries = stack_trace_save(store: fstack->calls, size, skipnr: skip);
3158 }
3159
3160 event = __trace_buffer_lock_reserve(buffer, type: TRACE_STACK,
3161 struct_size(entry, caller, nr_entries),
3162 trace_ctx);
3163 if (!event)
3164 goto out;
3165 entry = ring_buffer_event_data(event);
3166
3167 entry->size = nr_entries;
3168 memcpy(&entry->caller, fstack->calls,
3169 flex_array_size(entry, caller, nr_entries));
3170
3171 if (!call_filter_check_discard(call, rec: entry, buffer, event))
3172 __buffer_unlock_commit(buffer, event);
3173
3174 out:
3175 /* Again, don't let gcc optimize things here */
3176 barrier();
3177 __this_cpu_dec(ftrace_stack_reserve);
3178 preempt_enable_notrace();
3179
3180}
3181
3182static inline void ftrace_trace_stack(struct trace_array *tr,
3183 struct trace_buffer *buffer,
3184 unsigned int trace_ctx,
3185 int skip, struct pt_regs *regs)
3186{
3187 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3188 return;
3189
3190 __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3191}
3192
3193void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3194 int skip)
3195{
3196 struct trace_buffer *buffer = tr->array_buffer.buffer;
3197
3198 if (rcu_is_watching()) {
3199 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3200 return;
3201 }
3202
3203 if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3204 return;
3205
3206 /*
3207 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3208 * but if the above rcu_is_watching() failed, then the NMI
3209 * triggered someplace critical, and ct_irq_enter() should
3210 * not be called from NMI.
3211 */
3212 if (unlikely(in_nmi()))
3213 return;
3214
3215 ct_irq_enter_irqson();
3216 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3217 ct_irq_exit_irqson();
3218}
3219
3220/**
3221 * trace_dump_stack - record a stack back trace in the trace buffer
3222 * @skip: Number of functions to skip (helper handlers)
3223 */
3224void trace_dump_stack(int skip)
3225{
3226 if (tracing_disabled || tracing_selftest_running)
3227 return;
3228
3229#ifndef CONFIG_UNWINDER_ORC
3230 /* Skip 1 to skip this function. */
3231 skip++;
3232#endif
3233 __ftrace_trace_stack(buffer: global_trace.array_buffer.buffer,
3234 trace_ctx: tracing_gen_ctx(), skip, NULL);
3235}
3236EXPORT_SYMBOL_GPL(trace_dump_stack);
3237
3238#ifdef CONFIG_USER_STACKTRACE_SUPPORT
3239static DEFINE_PER_CPU(int, user_stack_count);
3240
3241static void
3242ftrace_trace_userstack(struct trace_array *tr,
3243 struct trace_buffer *buffer, unsigned int trace_ctx)
3244{
3245 struct trace_event_call *call = &event_user_stack;
3246 struct ring_buffer_event *event;
3247 struct userstack_entry *entry;
3248
3249 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3250 return;
3251
3252 /*
3253 * NMIs can not handle page faults, even with fix ups.
3254 * The save user stack can (and often does) fault.
3255 */
3256 if (unlikely(in_nmi()))
3257 return;
3258
3259 /*
3260 * prevent recursion, since the user stack tracing may
3261 * trigger other kernel events.
3262 */
3263 preempt_disable();
3264 if (__this_cpu_read(user_stack_count))
3265 goto out;
3266
3267 __this_cpu_inc(user_stack_count);
3268
3269 event = __trace_buffer_lock_reserve(buffer, type: TRACE_USER_STACK,
3270 len: sizeof(*entry), trace_ctx);
3271 if (!event)
3272 goto out_drop_count;
3273 entry = ring_buffer_event_data(event);
3274
3275 entry->tgid = current->tgid;
3276 memset(&entry->caller, 0, sizeof(entry->caller));
3277
3278 stack_trace_save_user(store: entry->caller, FTRACE_STACK_ENTRIES);
3279 if (!call_filter_check_discard(call, rec: entry, buffer, event))
3280 __buffer_unlock_commit(buffer, event);
3281
3282 out_drop_count:
3283 __this_cpu_dec(user_stack_count);
3284 out:
3285 preempt_enable();
3286}
3287#else /* CONFIG_USER_STACKTRACE_SUPPORT */
3288static void ftrace_trace_userstack(struct trace_array *tr,
3289 struct trace_buffer *buffer,
3290 unsigned int trace_ctx)
3291{
3292}
3293#endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3294
3295#endif /* CONFIG_STACKTRACE */
3296
3297static inline void
3298func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3299 unsigned long long delta)
3300{
3301 entry->bottom_delta_ts = delta & U32_MAX;
3302 entry->top_delta_ts = (delta >> 32);
3303}
3304
3305void trace_last_func_repeats(struct trace_array *tr,
3306 struct trace_func_repeats *last_info,
3307 unsigned int trace_ctx)
3308{
3309 struct trace_buffer *buffer = tr->array_buffer.buffer;
3310 struct func_repeats_entry *entry;
3311 struct ring_buffer_event *event;
3312 u64 delta;
3313
3314 event = __trace_buffer_lock_reserve(buffer, type: TRACE_FUNC_REPEATS,
3315 len: sizeof(*entry), trace_ctx);
3316 if (!event)
3317 return;
3318
3319 delta = ring_buffer_event_time_stamp(buffer, event) -
3320 last_info->ts_last_call;
3321
3322 entry = ring_buffer_event_data(event);
3323 entry->ip = last_info->ip;
3324 entry->parent_ip = last_info->parent_ip;
3325 entry->count = last_info->count;
3326 func_repeats_set_delta_ts(entry, delta);
3327
3328 __buffer_unlock_commit(buffer, event);
3329}
3330
3331/* created for use with alloc_percpu */
3332struct trace_buffer_struct {
3333 int nesting;
3334 char buffer[4][TRACE_BUF_SIZE];
3335};
3336
3337static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3338
3339/*
3340 * This allows for lockless recording. If we're nested too deeply, then
3341 * this returns NULL.
3342 */
3343static char *get_trace_buf(void)
3344{
3345 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3346
3347 if (!trace_percpu_buffer || buffer->nesting >= 4)
3348 return NULL;
3349
3350 buffer->nesting++;
3351
3352 /* Interrupts must see nesting incremented before we use the buffer */
3353 barrier();
3354 return &buffer->buffer[buffer->nesting - 1][0];
3355}
3356
3357static void put_trace_buf(void)
3358{
3359 /* Don't let the decrement of nesting leak before this */
3360 barrier();
3361 this_cpu_dec(trace_percpu_buffer->nesting);
3362}
3363
3364static int alloc_percpu_trace_buffer(void)
3365{
3366 struct trace_buffer_struct __percpu *buffers;
3367
3368 if (trace_percpu_buffer)
3369 return 0;
3370
3371 buffers = alloc_percpu(struct trace_buffer_struct);
3372 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3373 return -ENOMEM;
3374
3375 trace_percpu_buffer = buffers;
3376 return 0;
3377}
3378
3379static int buffers_allocated;
3380
3381void trace_printk_init_buffers(void)
3382{
3383 if (buffers_allocated)
3384 return;
3385
3386 if (alloc_percpu_trace_buffer())
3387 return;
3388
3389 /* trace_printk() is for debug use only. Don't use it in production. */
3390
3391 pr_warn("\n");
3392 pr_warn("**********************************************************\n");
3393 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3394 pr_warn("** **\n");
3395 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3396 pr_warn("** **\n");
3397 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3398 pr_warn("** unsafe for production use. **\n");
3399 pr_warn("** **\n");
3400 pr_warn("** If you see this message and you are not debugging **\n");
3401 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3402 pr_warn("** **\n");
3403 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3404 pr_warn("**********************************************************\n");
3405
3406 /* Expand the buffers to set size */
3407 tracing_update_buffers(tr: &global_trace);
3408
3409 buffers_allocated = 1;
3410
3411 /*
3412 * trace_printk_init_buffers() can be called by modules.
3413 * If that happens, then we need to start cmdline recording
3414 * directly here. If the global_trace.buffer is already
3415 * allocated here, then this was called by module code.
3416 */
3417 if (global_trace.array_buffer.buffer)
3418 tracing_start_cmdline_record();
3419}
3420EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3421
3422void trace_printk_start_comm(void)
3423{
3424 /* Start tracing comms if trace printk is set */
3425 if (!buffers_allocated)
3426 return;
3427 tracing_start_cmdline_record();
3428}
3429
3430static void trace_printk_start_stop_comm(int enabled)
3431{
3432 if (!buffers_allocated)
3433 return;
3434
3435 if (enabled)
3436 tracing_start_cmdline_record();
3437 else
3438 tracing_stop_cmdline_record();
3439}
3440
3441/**
3442 * trace_vbprintk - write binary msg to tracing buffer
3443 * @ip: The address of the caller
3444 * @fmt: The string format to write to the buffer
3445 * @args: Arguments for @fmt
3446 */
3447int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3448{
3449 struct trace_event_call *call = &event_bprint;
3450 struct ring_buffer_event *event;
3451 struct trace_buffer *buffer;
3452 struct trace_array *tr = &global_trace;
3453 struct bprint_entry *entry;
3454 unsigned int trace_ctx;
3455 char *tbuffer;
3456 int len = 0, size;
3457
3458 if (unlikely(tracing_selftest_running || tracing_disabled))
3459 return 0;
3460
3461 /* Don't pollute graph traces with trace_vprintk internals */
3462 pause_graph_tracing();
3463
3464 trace_ctx = tracing_gen_ctx();
3465 preempt_disable_notrace();
3466
3467 tbuffer = get_trace_buf();
3468 if (!tbuffer) {
3469 len = 0;
3470 goto out_nobuffer;
3471 }
3472
3473 len = vbin_printf(bin_buf: (u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3474
3475 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3476 goto out_put;
3477
3478 size = sizeof(*entry) + sizeof(u32) * len;
3479 buffer = tr->array_buffer.buffer;
3480 ring_buffer_nest_start(buffer);
3481 event = __trace_buffer_lock_reserve(buffer, type: TRACE_BPRINT, len: size,
3482 trace_ctx);
3483 if (!event)
3484 goto out;
3485 entry = ring_buffer_event_data(event);
3486 entry->ip = ip;
3487 entry->fmt = fmt;
3488
3489 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3490 if (!call_filter_check_discard(call, rec: entry, buffer, event)) {
3491 __buffer_unlock_commit(buffer, event);
3492 ftrace_trace_stack(tr, buffer, trace_ctx, skip: 6, NULL);
3493 }
3494
3495out:
3496 ring_buffer_nest_end(buffer);
3497out_put:
3498 put_trace_buf();
3499
3500out_nobuffer:
3501 preempt_enable_notrace();
3502 unpause_graph_tracing();
3503
3504 return len;
3505}
3506EXPORT_SYMBOL_GPL(trace_vbprintk);
3507
3508__printf(3, 0)
3509static int
3510__trace_array_vprintk(struct trace_buffer *buffer,
3511 unsigned long ip, const char *fmt, va_list args)
3512{
3513 struct trace_event_call *call = &event_print;
3514 struct ring_buffer_event *event;
3515 int len = 0, size;
3516 struct print_entry *entry;
3517 unsigned int trace_ctx;
3518 char *tbuffer;
3519
3520 if (tracing_disabled)
3521 return 0;
3522
3523 /* Don't pollute graph traces with trace_vprintk internals */
3524 pause_graph_tracing();
3525
3526 trace_ctx = tracing_gen_ctx();
3527 preempt_disable_notrace();
3528
3529
3530 tbuffer = get_trace_buf();
3531 if (!tbuffer) {
3532 len = 0;
3533 goto out_nobuffer;
3534 }
3535
3536 len = vscnprintf(buf: tbuffer, TRACE_BUF_SIZE, fmt, args);
3537
3538 size = sizeof(*entry) + len + 1;
3539 ring_buffer_nest_start(buffer);
3540 event = __trace_buffer_lock_reserve(buffer, type: TRACE_PRINT, len: size,
3541 trace_ctx);
3542 if (!event)
3543 goto out;
3544 entry = ring_buffer_event_data(event);
3545 entry->ip = ip;
3546
3547 memcpy(&entry->buf, tbuffer, len + 1);
3548 if (!call_filter_check_discard(call, rec: entry, buffer, event)) {
3549 __buffer_unlock_commit(buffer, event);
3550 ftrace_trace_stack(tr: &global_trace, buffer, trace_ctx, skip: 6, NULL);
3551 }
3552
3553out:
3554 ring_buffer_nest_end(buffer);
3555 put_trace_buf();
3556
3557out_nobuffer:
3558 preempt_enable_notrace();
3559 unpause_graph_tracing();
3560
3561 return len;
3562}
3563
3564__printf(3, 0)
3565int trace_array_vprintk(struct trace_array *tr,
3566 unsigned long ip, const char *fmt, va_list args)
3567{
3568 if (tracing_selftest_running && tr == &global_trace)
3569 return 0;
3570
3571 return __trace_array_vprintk(buffer: tr->array_buffer.buffer, ip, fmt, args);
3572}
3573
3574/**
3575 * trace_array_printk - Print a message to a specific instance
3576 * @tr: The instance trace_array descriptor
3577 * @ip: The instruction pointer that this is called from.
3578 * @fmt: The format to print (printf format)
3579 *
3580 * If a subsystem sets up its own instance, they have the right to
3581 * printk strings into their tracing instance buffer using this
3582 * function. Note, this function will not write into the top level
3583 * buffer (use trace_printk() for that), as writing into the top level
3584 * buffer should only have events that can be individually disabled.
3585 * trace_printk() is only used for debugging a kernel, and should not
3586 * be ever incorporated in normal use.
3587 *
3588 * trace_array_printk() can be used, as it will not add noise to the
3589 * top level tracing buffer.
3590 *
3591 * Note, trace_array_init_printk() must be called on @tr before this
3592 * can be used.
3593 */
3594__printf(3, 0)
3595int trace_array_printk(struct trace_array *tr,
3596 unsigned long ip, const char *fmt, ...)
3597{
3598 int ret;
3599 va_list ap;
3600
3601 if (!tr)
3602 return -ENOENT;
3603
3604 /* This is only allowed for created instances */
3605 if (tr == &global_trace)
3606 return 0;
3607
3608 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3609 return 0;
3610
3611 va_start(ap, fmt);
3612 ret = trace_array_vprintk(tr, ip, fmt, args: ap);
3613 va_end(ap);
3614 return ret;
3615}
3616EXPORT_SYMBOL_GPL(trace_array_printk);
3617
3618/**
3619 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3620 * @tr: The trace array to initialize the buffers for
3621 *
3622 * As trace_array_printk() only writes into instances, they are OK to
3623 * have in the kernel (unlike trace_printk()). This needs to be called
3624 * before trace_array_printk() can be used on a trace_array.
3625 */
3626int trace_array_init_printk(struct trace_array *tr)
3627{
3628 if (!tr)
3629 return -ENOENT;
3630
3631 /* This is only allowed for created instances */
3632 if (tr == &global_trace)
3633 return -EINVAL;
3634
3635 return alloc_percpu_trace_buffer();
3636}
3637EXPORT_SYMBOL_GPL(trace_array_init_printk);
3638
3639__printf(3, 4)
3640int trace_array_printk_buf(struct trace_buffer *buffer,
3641 unsigned long ip, const char *fmt, ...)
3642{
3643 int ret;
3644 va_list ap;
3645
3646 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3647 return 0;
3648
3649 va_start(ap, fmt);
3650 ret = __trace_array_vprintk(buffer, ip, fmt, args: ap);
3651 va_end(ap);
3652 return ret;
3653}
3654
3655__printf(2, 0)
3656int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3657{
3658 return trace_array_vprintk(tr: &global_trace, ip, fmt, args);
3659}
3660EXPORT_SYMBOL_GPL(trace_vprintk);
3661
3662static void trace_iterator_increment(struct trace_iterator *iter)
3663{
3664 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu: iter->cpu);
3665
3666 iter->idx++;
3667 if (buf_iter)
3668 ring_buffer_iter_advance(iter: buf_iter);
3669}
3670
3671static struct trace_entry *
3672peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3673 unsigned long *lost_events)
3674{
3675 struct ring_buffer_event *event;
3676 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3677
3678 if (buf_iter) {
3679 event = ring_buffer_iter_peek(iter: buf_iter, ts);
3680 if (lost_events)
3681 *lost_events = ring_buffer_iter_dropped(iter: buf_iter) ?
3682 (unsigned long)-1 : 0;
3683 } else {
3684 event = ring_buffer_peek(buffer: iter->array_buffer->buffer, cpu, ts,
3685 lost_events);
3686 }
3687
3688 if (event) {
3689 iter->ent_size = ring_buffer_event_length(event);
3690 return ring_buffer_event_data(event);
3691 }
3692 iter->ent_size = 0;
3693 return NULL;
3694}
3695
3696static struct trace_entry *
3697__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3698 unsigned long *missing_events, u64 *ent_ts)
3699{
3700 struct trace_buffer *buffer = iter->array_buffer->buffer;
3701 struct trace_entry *ent, *next = NULL;
3702 unsigned long lost_events = 0, next_lost = 0;
3703 int cpu_file = iter->cpu_file;
3704 u64 next_ts = 0, ts;
3705 int next_cpu = -1;
3706 int next_size = 0;
3707 int cpu;
3708
3709 /*
3710 * If we are in a per_cpu trace file, don't bother by iterating over
3711 * all cpu and peek directly.
3712 */
3713 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3714 if (ring_buffer_empty_cpu(buffer, cpu: cpu_file))
3715 return NULL;
3716 ent = peek_next_entry(iter, cpu: cpu_file, ts: ent_ts, lost_events: missing_events);
3717 if (ent_cpu)
3718 *ent_cpu = cpu_file;
3719
3720 return ent;
3721 }
3722
3723 for_each_tracing_cpu(cpu) {
3724
3725 if (ring_buffer_empty_cpu(buffer, cpu))
3726 continue;
3727
3728 ent = peek_next_entry(iter, cpu, ts: &ts, lost_events: &lost_events);
3729
3730 /*
3731 * Pick the entry with the smallest timestamp:
3732 */
3733 if (ent && (!next || ts < next_ts)) {
3734 next = ent;
3735 next_cpu = cpu;
3736 next_ts = ts;
3737 next_lost = lost_events;
3738 next_size = iter->ent_size;
3739 }
3740 }
3741
3742 iter->ent_size = next_size;
3743
3744 if (ent_cpu)
3745 *ent_cpu = next_cpu;
3746
3747 if (ent_ts)
3748 *ent_ts = next_ts;
3749
3750 if (missing_events)
3751 *missing_events = next_lost;
3752
3753 return next;
3754}
3755
3756#define STATIC_FMT_BUF_SIZE 128
3757static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3758
3759char *trace_iter_expand_format(struct trace_iterator *iter)
3760{
3761 char *tmp;
3762
3763 /*
3764 * iter->tr is NULL when used with tp_printk, which makes
3765 * this get called where it is not safe to call krealloc().
3766 */
3767 if (!iter->tr || iter->fmt == static_fmt_buf)
3768 return NULL;
3769
3770 tmp = krealloc(objp: iter->fmt, new_size: iter->fmt_size + STATIC_FMT_BUF_SIZE,
3771 GFP_KERNEL);
3772 if (tmp) {
3773 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3774 iter->fmt = tmp;
3775 }
3776
3777 return tmp;
3778}
3779
3780/* Returns true if the string is safe to dereference from an event */
3781static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3782 bool star, int len)
3783{
3784 unsigned long addr = (unsigned long)str;
3785 struct trace_event *trace_event;
3786 struct trace_event_call *event;
3787
3788 /* Ignore strings with no length */
3789 if (star && !len)
3790 return true;
3791
3792 /* OK if part of the event data */
3793 if ((addr >= (unsigned long)iter->ent) &&
3794 (addr < (unsigned long)iter->ent + iter->ent_size))
3795 return true;
3796
3797 /* OK if part of the temp seq buffer */
3798 if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3799 (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3800 return true;
3801
3802 /* Core rodata can not be freed */
3803 if (is_kernel_rodata(addr))
3804 return true;
3805
3806 if (trace_is_tracepoint_string(str))
3807 return true;
3808
3809 /*
3810 * Now this could be a module event, referencing core module
3811 * data, which is OK.
3812 */
3813 if (!iter->ent)
3814 return false;
3815
3816 trace_event = ftrace_find_event(type: iter->ent->type);
3817 if (!trace_event)
3818 return false;
3819
3820 event = container_of(trace_event, struct trace_event_call, event);
3821 if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3822 return false;
3823
3824 /* Would rather have rodata, but this will suffice */
3825 if (within_module_core(addr, mod: event->module))
3826 return true;
3827
3828 return false;
3829}
3830
3831static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3832
3833static int test_can_verify_check(const char *fmt, ...)
3834{
3835 char buf[16];
3836 va_list ap;
3837 int ret;
3838
3839 /*
3840 * The verifier is dependent on vsnprintf() modifies the va_list
3841 * passed to it, where it is sent as a reference. Some architectures
3842 * (like x86_32) passes it by value, which means that vsnprintf()
3843 * does not modify the va_list passed to it, and the verifier
3844 * would then need to be able to understand all the values that
3845 * vsnprintf can use. If it is passed by value, then the verifier
3846 * is disabled.
3847 */
3848 va_start(ap, fmt);
3849 vsnprintf(buf, size: 16, fmt: "%d", args: ap);
3850 ret = va_arg(ap, int);
3851 va_end(ap);
3852
3853 return ret;
3854}
3855
3856static void test_can_verify(void)
3857{
3858 if (!test_can_verify_check(fmt: "%d %d", 0, 1)) {
3859 pr_info("trace event string verifier disabled\n");
3860 static_branch_inc(&trace_no_verify);
3861 }
3862}
3863
3864/**
3865 * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3866 * @iter: The iterator that holds the seq buffer and the event being printed
3867 * @fmt: The format used to print the event
3868 * @ap: The va_list holding the data to print from @fmt.
3869 *
3870 * This writes the data into the @iter->seq buffer using the data from
3871 * @fmt and @ap. If the format has a %s, then the source of the string
3872 * is examined to make sure it is safe to print, otherwise it will
3873 * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3874 * pointer.
3875 */
3876void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3877 va_list ap)
3878{
3879 const char *p = fmt;
3880 const char *str;
3881 int i, j;
3882
3883 if (WARN_ON_ONCE(!fmt))
3884 return;
3885
3886 if (static_branch_unlikely(&trace_no_verify))
3887 goto print;
3888
3889 /* Don't bother checking when doing a ftrace_dump() */
3890 if (iter->fmt == static_fmt_buf)
3891 goto print;
3892
3893 while (*p) {
3894 bool star = false;
3895 int len = 0;
3896
3897 j = 0;
3898
3899 /* We only care about %s and variants */
3900 for (i = 0; p[i]; i++) {
3901 if (i + 1 >= iter->fmt_size) {
3902 /*
3903 * If we can't expand the copy buffer,
3904 * just print it.
3905 */
3906 if (!trace_iter_expand_format(iter))
3907 goto print;
3908 }
3909
3910 if (p[i] == '\\' && p[i+1]) {
3911 i++;
3912 continue;
3913 }
3914 if (p[i] == '%') {
3915 /* Need to test cases like %08.*s */
3916 for (j = 1; p[i+j]; j++) {
3917 if (isdigit(c: p[i+j]) ||
3918 p[i+j] == '.')
3919 continue;
3920 if (p[i+j] == '*') {
3921 star = true;
3922 continue;
3923 }
3924 break;
3925 }
3926 if (p[i+j] == 's')
3927 break;
3928 star = false;
3929 }
3930 j = 0;
3931 }
3932 /* If no %s found then just print normally */
3933 if (!p[i])
3934 break;
3935
3936 /* Copy up to the %s, and print that */
3937 strncpy(p: iter->fmt, q: p, size: i);
3938 iter->fmt[i] = '\0';
3939 trace_seq_vprintf(s: &iter->seq, fmt: iter->fmt, args: ap);
3940
3941 /*
3942 * If iter->seq is full, the above call no longer guarantees
3943 * that ap is in sync with fmt processing, and further calls
3944 * to va_arg() can return wrong positional arguments.
3945 *
3946 * Ensure that ap is no longer used in this case.
3947 */
3948 if (iter->seq.full) {
3949 p = "";
3950 break;
3951 }
3952
3953 if (star)
3954 len = va_arg(ap, int);
3955
3956 /* The ap now points to the string data of the %s */
3957 str = va_arg(ap, const char *);
3958
3959 /*
3960 * If you hit this warning, it is likely that the
3961 * trace event in question used %s on a string that
3962 * was saved at the time of the event, but may not be
3963 * around when the trace is read. Use __string(),
3964 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3965 * instead. See samples/trace_events/trace-events-sample.h
3966 * for reference.
3967 */
3968 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3969 "fmt: '%s' current_buffer: '%s'",
3970 fmt, seq_buf_str(&iter->seq.seq))) {
3971 int ret;
3972
3973 /* Try to safely read the string */
3974 if (star) {
3975 if (len + 1 > iter->fmt_size)
3976 len = iter->fmt_size - 1;
3977 if (len < 0)
3978 len = 0;
3979 ret = copy_from_kernel_nofault(dst: iter->fmt, src: str, size: len);
3980 iter->fmt[len] = 0;
3981 star = false;
3982 } else {
3983 ret = strncpy_from_kernel_nofault(dst: iter->fmt, unsafe_addr: str,
3984 count: iter->fmt_size);
3985 }
3986 if (ret < 0)
3987 trace_seq_printf(s: &iter->seq, fmt: "(0x%px)", str);
3988 else
3989 trace_seq_printf(s: &iter->seq, fmt: "(0x%px:%s)",
3990 str, iter->fmt);
3991 str = "[UNSAFE-MEMORY]";
3992 strcpy(p: iter->fmt, q: "%s");
3993 } else {
3994 strncpy(p: iter->fmt, q: p + i, size: j + 1);
3995 iter->fmt[j+1] = '\0';
3996 }
3997 if (star)
3998 trace_seq_printf(s: &iter->seq, fmt: iter->fmt, len, str);
3999 else
4000 trace_seq_printf(s: &iter->seq, fmt: iter->fmt, str);
4001
4002 p += i + j + 1;
4003 }
4004 print:
4005 if (*p)
4006 trace_seq_vprintf(s: &iter->seq, fmt: p, args: ap);
4007}
4008
4009const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
4010{
4011 const char *p, *new_fmt;
4012 char *q;
4013
4014 if (WARN_ON_ONCE(!fmt))
4015 return fmt;
4016
4017 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
4018 return fmt;
4019
4020 p = fmt;
4021 new_fmt = q = iter->fmt;
4022 while (*p) {
4023 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4024 if (!trace_iter_expand_format(iter))
4025 return fmt;
4026
4027 q += iter->fmt - new_fmt;
4028 new_fmt = iter->fmt;
4029 }
4030
4031 *q++ = *p++;
4032
4033 /* Replace %p with %px */
4034 if (p[-1] == '%') {
4035 if (p[0] == '%') {
4036 *q++ = *p++;
4037 } else if (p[0] == 'p' && !isalnum(p[1])) {
4038 *q++ = *p++;
4039 *q++ = 'x';
4040 }
4041 }
4042 }
4043 *q = '\0';
4044
4045 return new_fmt;
4046}
4047
4048#define STATIC_TEMP_BUF_SIZE 128
4049static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4050
4051/* Find the next real entry, without updating the iterator itself */
4052struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4053 int *ent_cpu, u64 *ent_ts)
4054{
4055 /* __find_next_entry will reset ent_size */
4056 int ent_size = iter->ent_size;
4057 struct trace_entry *entry;
4058
4059 /*
4060 * If called from ftrace_dump(), then the iter->temp buffer
4061 * will be the static_temp_buf and not created from kmalloc.
4062 * If the entry size is greater than the buffer, we can
4063 * not save it. Just return NULL in that case. This is only
4064 * used to add markers when two consecutive events' time
4065 * stamps have a large delta. See trace_print_lat_context()
4066 */
4067 if (iter->temp == static_temp_buf &&
4068 STATIC_TEMP_BUF_SIZE < ent_size)
4069 return NULL;
4070
4071 /*
4072 * The __find_next_entry() may call peek_next_entry(), which may
4073 * call ring_buffer_peek() that may make the contents of iter->ent
4074 * undefined. Need to copy iter->ent now.
4075 */
4076 if (iter->ent && iter->ent != iter->temp) {
4077 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4078 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4079 void *temp;
4080 temp = kmalloc(size: iter->ent_size, GFP_KERNEL);
4081 if (!temp)
4082 return NULL;
4083 kfree(objp: iter->temp);
4084 iter->temp = temp;
4085 iter->temp_size = iter->ent_size;
4086 }
4087 memcpy(iter->temp, iter->ent, iter->ent_size);
4088 iter->ent = iter->temp;
4089 }
4090 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4091 /* Put back the original ent_size */
4092 iter->ent_size = ent_size;
4093
4094 return entry;
4095}
4096
4097/* Find the next real entry, and increment the iterator to the next entry */
4098void *trace_find_next_entry_inc(struct trace_iterator *iter)
4099{
4100 iter->ent = __find_next_entry(iter, ent_cpu: &iter->cpu,
4101 missing_events: &iter->lost_events, ent_ts: &iter->ts);
4102
4103 if (iter->ent)
4104 trace_iterator_increment(iter);
4105
4106 return iter->ent ? iter : NULL;
4107}
4108
4109static void trace_consume(struct trace_iterator *iter)
4110{
4111 ring_buffer_consume(buffer: iter->array_buffer->buffer, cpu: iter->cpu, ts: &iter->ts,
4112 lost_events: &iter->lost_events);
4113}
4114
4115static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4116{
4117 struct trace_iterator *iter = m->private;
4118 int i = (int)*pos;
4119 void *ent;
4120
4121 WARN_ON_ONCE(iter->leftover);
4122
4123 (*pos)++;
4124
4125 /* can't go backwards */
4126 if (iter->idx > i)
4127 return NULL;
4128
4129 if (iter->idx < 0)
4130 ent = trace_find_next_entry_inc(iter);
4131 else
4132 ent = iter;
4133
4134 while (ent && iter->idx < i)
4135 ent = trace_find_next_entry_inc(iter);
4136
4137 iter->pos = *pos;
4138
4139 return ent;
4140}
4141
4142void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4143{
4144 struct ring_buffer_iter *buf_iter;
4145 unsigned long entries = 0;
4146 u64 ts;
4147
4148 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4149
4150 buf_iter = trace_buffer_iter(iter, cpu);
4151 if (!buf_iter)
4152 return;
4153
4154 ring_buffer_iter_reset(iter: buf_iter);
4155
4156 /*
4157 * We could have the case with the max latency tracers
4158 * that a reset never took place on a cpu. This is evident
4159 * by the timestamp being before the start of the buffer.
4160 */
4161 while (ring_buffer_iter_peek(iter: buf_iter, ts: &ts)) {
4162 if (ts >= iter->array_buffer->time_start)
4163 break;
4164 entries++;
4165 ring_buffer_iter_advance(iter: buf_iter);
4166 }
4167
4168 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4169}
4170
4171/*
4172 * The current tracer is copied to avoid a global locking
4173 * all around.
4174 */
4175static void *s_start(struct seq_file *m, loff_t *pos)
4176{
4177 struct trace_iterator *iter = m->private;
4178 struct trace_array *tr = iter->tr;
4179 int cpu_file = iter->cpu_file;
4180 void *p = NULL;
4181 loff_t l = 0;
4182 int cpu;
4183
4184 mutex_lock(&trace_types_lock);
4185 if (unlikely(tr->current_trace != iter->trace)) {
4186 /* Close iter->trace before switching to the new current tracer */
4187 if (iter->trace->close)
4188 iter->trace->close(iter);
4189 iter->trace = tr->current_trace;
4190 /* Reopen the new current tracer */
4191 if (iter->trace->open)
4192 iter->trace->open(iter);
4193 }
4194 mutex_unlock(lock: &trace_types_lock);
4195
4196#ifdef CONFIG_TRACER_MAX_TRACE
4197 if (iter->snapshot && iter->trace->use_max_tr)
4198 return ERR_PTR(error: -EBUSY);
4199#endif
4200
4201 if (*pos != iter->pos) {
4202 iter->ent = NULL;
4203 iter->cpu = 0;
4204 iter->idx = -1;
4205
4206 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4207 for_each_tracing_cpu(cpu)
4208 tracing_iter_reset(iter, cpu);
4209 } else
4210 tracing_iter_reset(iter, cpu: cpu_file);
4211
4212 iter->leftover = 0;
4213 for (p = iter; p && l < *pos; p = s_next(m, v: p, pos: &l))
4214 ;
4215
4216 } else {
4217 /*
4218 * If we overflowed the seq_file before, then we want
4219 * to just reuse the trace_seq buffer again.
4220 */
4221 if (iter->leftover)
4222 p = iter;
4223 else {
4224 l = *pos - 1;
4225 p = s_next(m, v: p, pos: &l);
4226 }
4227 }
4228
4229 trace_event_read_lock();
4230 trace_access_lock(cpu: cpu_file);
4231 return p;
4232}
4233
4234static void s_stop(struct seq_file *m, void *p)
4235{
4236 struct trace_iterator *iter = m->private;
4237
4238#ifdef CONFIG_TRACER_MAX_TRACE
4239 if (iter->snapshot && iter->trace->use_max_tr)
4240 return;
4241#endif
4242
4243 trace_access_unlock(cpu: iter->cpu_file);
4244 trace_event_read_unlock();
4245}
4246
4247static void
4248get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4249 unsigned long *entries, int cpu)
4250{
4251 unsigned long count;
4252
4253 count = ring_buffer_entries_cpu(buffer: buf->buffer, cpu);
4254 /*
4255 * If this buffer has skipped entries, then we hold all
4256 * entries for the trace and we need to ignore the
4257 * ones before the time stamp.
4258 */
4259 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4260 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4261 /* total is the same as the entries */
4262 *total = count;
4263 } else
4264 *total = count +
4265 ring_buffer_overrun_cpu(buffer: buf->buffer, cpu);
4266 *entries = count;
4267}
4268
4269static void
4270get_total_entries(struct array_buffer *buf,
4271 unsigned long *total, unsigned long *entries)
4272{
4273 unsigned long t, e;
4274 int cpu;
4275
4276 *total = 0;
4277 *entries = 0;
4278
4279 for_each_tracing_cpu(cpu) {
4280 get_total_entries_cpu(buf, total: &t, entries: &e, cpu);
4281 *total += t;
4282 *entries += e;
4283 }
4284}
4285
4286unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4287{
4288 unsigned long total, entries;
4289
4290 if (!tr)
4291 tr = &global_trace;
4292
4293 get_total_entries_cpu(buf: &tr->array_buffer, total: &total, entries: &entries, cpu);
4294
4295 return entries;
4296}
4297
4298unsigned long trace_total_entries(struct trace_array *tr)
4299{
4300 unsigned long total, entries;
4301
4302 if (!tr)
4303 tr = &global_trace;
4304
4305 get_total_entries(buf: &tr->array_buffer, total: &total, entries: &entries);
4306
4307 return entries;
4308}
4309
4310static void print_lat_help_header(struct seq_file *m)
4311{
4312 seq_puts(m, s: "# _------=> CPU# \n"
4313 "# / _-----=> irqs-off/BH-disabled\n"
4314 "# | / _----=> need-resched \n"
4315 "# || / _---=> hardirq/softirq \n"
4316 "# ||| / _--=> preempt-depth \n"
4317 "# |||| / _-=> migrate-disable \n"
4318 "# ||||| / delay \n"
4319 "# cmd pid |||||| time | caller \n"
4320 "# \\ / |||||| \\ | / \n");
4321}
4322
4323static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4324{
4325 unsigned long total;
4326 unsigned long entries;
4327
4328 get_total_entries(buf, total: &total, entries: &entries);
4329 seq_printf(m, fmt: "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
4330 entries, total, num_online_cpus());
4331 seq_puts(m, s: "#\n");
4332}
4333
4334static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4335 unsigned int flags)
4336{
4337 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4338
4339 print_event_info(buf, m);
4340
4341 seq_printf(m, fmt: "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
4342 seq_printf(m, fmt: "# | | %s | | |\n", tgid ? " | " : "");
4343}
4344
4345static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4346 unsigned int flags)
4347{
4348 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4349 static const char space[] = " ";
4350 int prec = tgid ? 12 : 2;
4351
4352 print_event_info(buf, m);
4353
4354 seq_printf(m, fmt: "# %.*s _-----=> irqs-off/BH-disabled\n", prec, space);
4355 seq_printf(m, fmt: "# %.*s / _----=> need-resched\n", prec, space);
4356 seq_printf(m, fmt: "# %.*s| / _---=> hardirq/softirq\n", prec, space);
4357 seq_printf(m, fmt: "# %.*s|| / _--=> preempt-depth\n", prec, space);
4358 seq_printf(m, fmt: "# %.*s||| / _-=> migrate-disable\n", prec, space);
4359 seq_printf(m, fmt: "# %.*s|||| / delay\n", prec, space);
4360 seq_printf(m, fmt: "# TASK-PID %.*s CPU# ||||| TIMESTAMP FUNCTION\n", prec, " TGID ");
4361 seq_printf(m, fmt: "# | | %.*s | ||||| | |\n", prec, " | ");
4362}
4363
4364void
4365print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4366{
4367 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4368 struct array_buffer *buf = iter->array_buffer;
4369 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4370 struct tracer *type = iter->trace;
4371 unsigned long entries;
4372 unsigned long total;
4373 const char *name = type->name;
4374
4375 get_total_entries(buf, total: &total, entries: &entries);
4376
4377 seq_printf(m, fmt: "# %s latency trace v1.1.5 on %s\n",
4378 name, UTS_RELEASE);
4379 seq_puts(m, s: "# -----------------------------------"
4380 "---------------------------------\n");
4381 seq_printf(m, fmt: "# latency: %lu us, #%lu/%lu, CPU#%d |"
4382 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4383 nsecs_to_usecs(nsecs: data->saved_latency),
4384 entries,
4385 total,
4386 buf->cpu,
4387 preempt_model_none() ? "server" :
4388 preempt_model_voluntary() ? "desktop" :
4389 preempt_model_full() ? "preempt" :
4390 preempt_model_rt() ? "preempt_rt" :
4391 "unknown",
4392 /* These are reserved for later use */
4393 0, 0, 0, 0);
4394#ifdef CONFIG_SMP
4395 seq_printf(m, fmt: " #P:%d)\n", num_online_cpus());
4396#else
4397 seq_puts(m, ")\n");
4398#endif
4399 seq_puts(m, s: "# -----------------\n");
4400 seq_printf(m, fmt: "# | task: %.16s-%d "
4401 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4402 data->comm, data->pid,
4403 from_kuid_munged(to: seq_user_ns(seq: m), uid: data->uid), data->nice,
4404 data->policy, data->rt_priority);
4405 seq_puts(m, s: "# -----------------\n");
4406
4407 if (data->critical_start) {
4408 seq_puts(m, s: "# => started at: ");
4409 seq_print_ip_sym(s: &iter->seq, ip: data->critical_start, sym_flags);
4410 trace_print_seq(m, s: &iter->seq);
4411 seq_puts(m, s: "\n# => ended at: ");
4412 seq_print_ip_sym(s: &iter->seq, ip: data->critical_end, sym_flags);
4413 trace_print_seq(m, s: &iter->seq);
4414 seq_puts(m, s: "\n#\n");
4415 }
4416
4417 seq_puts(m, s: "#\n");
4418}
4419
4420static void test_cpu_buff_start(struct trace_iterator *iter)
4421{
4422 struct trace_seq *s = &iter->seq;
4423 struct trace_array *tr = iter->tr;
4424
4425 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4426 return;
4427
4428 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4429 return;
4430
4431 if (cpumask_available(mask: iter->started) &&
4432 cpumask_test_cpu(cpu: iter->cpu, cpumask: iter->started))
4433 return;
4434
4435 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4436 return;
4437
4438 if (cpumask_available(mask: iter->started))
4439 cpumask_set_cpu(cpu: iter->cpu, dstp: iter->started);
4440
4441 /* Don't print started cpu buffer for the first entry of the trace */
4442 if (iter->idx > 1)
4443 trace_seq_printf(s, fmt: "##### CPU %u buffer started ####\n",
4444 iter->cpu);
4445}
4446
4447static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4448{
4449 struct trace_array *tr = iter->tr;
4450 struct trace_seq *s = &iter->seq;
4451 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4452 struct trace_entry *entry;
4453 struct trace_event *event;
4454
4455 entry = iter->ent;
4456
4457 test_cpu_buff_start(iter);
4458
4459 event = ftrace_find_event(type: entry->type);
4460
4461 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4462 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4463 trace_print_lat_context(iter);
4464 else
4465 trace_print_context(iter);
4466 }
4467
4468 if (trace_seq_has_overflowed(s))
4469 return TRACE_TYPE_PARTIAL_LINE;
4470
4471 if (event) {
4472 if (tr->trace_flags & TRACE_ITER_FIELDS)
4473 return print_event_fields(iter, event);
4474 return event->funcs->trace(iter, sym_flags, event);
4475 }
4476
4477 trace_seq_printf(s, fmt: "Unknown type %d\n", entry->type);
4478
4479 return trace_handle_return(s);
4480}
4481
4482static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4483{
4484 struct trace_array *tr = iter->tr;
4485 struct trace_seq *s = &iter->seq;
4486 struct trace_entry *entry;
4487 struct trace_event *event;
4488
4489 entry = iter->ent;
4490
4491 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4492 trace_seq_printf(s, fmt: "%d %d %llu ",
4493 entry->pid, iter->cpu, iter->ts);
4494
4495 if (trace_seq_has_overflowed(s))
4496 return TRACE_TYPE_PARTIAL_LINE;
4497
4498 event = ftrace_find_event(type: entry->type);
4499 if (event)
4500 return event->funcs->raw(iter, 0, event);
4501
4502 trace_seq_printf(s, fmt: "%d ?\n", entry->type);
4503
4504 return trace_handle_return(s);
4505}
4506
4507static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4508{
4509 struct trace_array *tr = iter->tr;
4510 struct trace_seq *s = &iter->seq;
4511 unsigned char newline = '\n';
4512 struct trace_entry *entry;
4513 struct trace_event *event;
4514
4515 entry = iter->ent;
4516
4517 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4518 SEQ_PUT_HEX_FIELD(s, entry->pid);
4519 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4520 SEQ_PUT_HEX_FIELD(s, iter->ts);
4521 if (trace_seq_has_overflowed(s))
4522 return TRACE_TYPE_PARTIAL_LINE;
4523 }
4524
4525 event = ftrace_find_event(type: entry->type);
4526 if (event) {
4527 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4528 if (ret != TRACE_TYPE_HANDLED)
4529 return ret;
4530 }
4531
4532 SEQ_PUT_FIELD(s, newline);
4533
4534 return trace_handle_return(s);
4535}
4536
4537static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4538{
4539 struct trace_array *tr = iter->tr;
4540 struct trace_seq *s = &iter->seq;
4541 struct trace_entry *entry;
4542 struct trace_event *event;
4543
4544 entry = iter->ent;
4545
4546 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4547 SEQ_PUT_FIELD(s, entry->pid);
4548 SEQ_PUT_FIELD(s, iter->cpu);
4549 SEQ_PUT_FIELD(s, iter->ts);
4550 if (trace_seq_has_overflowed(s))
4551 return TRACE_TYPE_PARTIAL_LINE;
4552 }
4553
4554 event = ftrace_find_event(type: entry->type);
4555 return event ? event->funcs->binary(iter, 0, event) :
4556 TRACE_TYPE_HANDLED;
4557}
4558
4559int trace_empty(struct trace_iterator *iter)
4560{
4561 struct ring_buffer_iter *buf_iter;
4562 int cpu;
4563
4564 /* If we are looking at one CPU buffer, only check that one */
4565 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4566 cpu = iter->cpu_file;
4567 buf_iter = trace_buffer_iter(iter, cpu);
4568 if (buf_iter) {
4569 if (!ring_buffer_iter_empty(iter: buf_iter))
4570 return 0;
4571 } else {
4572 if (!ring_buffer_empty_cpu(buffer: iter->array_buffer->buffer, cpu))
4573 return 0;
4574 }
4575 return 1;
4576 }
4577
4578 for_each_tracing_cpu(cpu) {
4579 buf_iter = trace_buffer_iter(iter, cpu);
4580 if (buf_iter) {
4581 if (!ring_buffer_iter_empty(iter: buf_iter))
4582 return 0;
4583 } else {
4584 if (!ring_buffer_empty_cpu(buffer: iter->array_buffer->buffer, cpu))
4585 return 0;
4586 }
4587 }
4588
4589 return 1;
4590}
4591
4592/* Called with trace_event_read_lock() held. */
4593enum print_line_t print_trace_line(struct trace_iterator *iter)
4594{
4595 struct trace_array *tr = iter->tr;
4596 unsigned long trace_flags = tr->trace_flags;
4597 enum print_line_t ret;
4598
4599 if (iter->lost_events) {
4600 if (iter->lost_events == (unsigned long)-1)
4601 trace_seq_printf(s: &iter->seq, fmt: "CPU:%d [LOST EVENTS]\n",
4602 iter->cpu);
4603 else
4604 trace_seq_printf(s: &iter->seq, fmt: "CPU:%d [LOST %lu EVENTS]\n",
4605 iter->cpu, iter->lost_events);
4606 if (trace_seq_has_overflowed(s: &iter->seq))
4607 return TRACE_TYPE_PARTIAL_LINE;
4608 }
4609
4610 if (iter->trace && iter->trace->print_line) {
4611 ret = iter->trace->print_line(iter);
4612 if (ret != TRACE_TYPE_UNHANDLED)
4613 return ret;
4614 }
4615
4616 if (iter->ent->type == TRACE_BPUTS &&
4617 trace_flags & TRACE_ITER_PRINTK &&
4618 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4619 return trace_print_bputs_msg_only(iter);
4620
4621 if (iter->ent->type == TRACE_BPRINT &&
4622 trace_flags & TRACE_ITER_PRINTK &&
4623 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4624 return trace_print_bprintk_msg_only(iter);
4625
4626 if (iter->ent->type == TRACE_PRINT &&
4627 trace_flags & TRACE_ITER_PRINTK &&
4628 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4629 return trace_print_printk_msg_only(iter);
4630
4631 if (trace_flags & TRACE_ITER_BIN)
4632 return print_bin_fmt(iter);
4633
4634 if (trace_flags & TRACE_ITER_HEX)
4635 return print_hex_fmt(iter);
4636
4637 if (trace_flags & TRACE_ITER_RAW)
4638 return print_raw_fmt(iter);
4639
4640 return print_trace_fmt(iter);
4641}
4642
4643void trace_latency_header(struct seq_file *m)
4644{
4645 struct trace_iterator *iter = m->private;
4646 struct trace_array *tr = iter->tr;
4647
4648 /* print nothing if the buffers are empty */
4649 if (trace_empty(iter))
4650 return;
4651
4652 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4653 print_trace_header(m, iter);
4654
4655 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4656 print_lat_help_header(m);
4657}
4658
4659void trace_default_header(struct seq_file *m)
4660{
4661 struct trace_iterator *iter = m->private;
4662 struct trace_array *tr = iter->tr;
4663 unsigned long trace_flags = tr->trace_flags;
4664
4665 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4666 return;
4667
4668 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4669 /* print nothing if the buffers are empty */
4670 if (trace_empty(iter))
4671 return;
4672 print_trace_header(m, iter);
4673 if (!(trace_flags & TRACE_ITER_VERBOSE))
4674 print_lat_help_header(m);
4675 } else {
4676 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4677 if (trace_flags & TRACE_ITER_IRQ_INFO)
4678 print_func_help_header_irq(buf: iter->array_buffer,
4679 m, flags: trace_flags);
4680 else
4681 print_func_help_header(buf: iter->array_buffer, m,
4682 flags: trace_flags);
4683 }
4684 }
4685}
4686
4687static void test_ftrace_alive(struct seq_file *m)
4688{
4689 if (!ftrace_is_dead())
4690 return;
4691 seq_puts(m, s: "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4692 "# MAY BE MISSING FUNCTION EVENTS\n");
4693}
4694
4695#ifdef CONFIG_TRACER_MAX_TRACE
4696static void show_snapshot_main_help(struct seq_file *m)
4697{
4698 seq_puts(m, s: "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4699 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4700 "# Takes a snapshot of the main buffer.\n"
4701 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4702 "# (Doesn't have to be '2' works with any number that\n"
4703 "# is not a '0' or '1')\n");
4704}
4705
4706static void show_snapshot_percpu_help(struct seq_file *m)
4707{
4708 seq_puts(m, s: "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4709#ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4710 seq_puts(m, s: "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4711 "# Takes a snapshot of the main buffer for this cpu.\n");
4712#else
4713 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4714 "# Must use main snapshot file to allocate.\n");
4715#endif
4716 seq_puts(m, s: "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4717 "# (Doesn't have to be '2' works with any number that\n"
4718 "# is not a '0' or '1')\n");
4719}
4720
4721static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4722{
4723 if (iter->tr->allocated_snapshot)
4724 seq_puts(m, s: "#\n# * Snapshot is allocated *\n#\n");
4725 else
4726 seq_puts(m, s: "#\n# * Snapshot is freed *\n#\n");
4727
4728 seq_puts(m, s: "# Snapshot commands:\n");
4729 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4730 show_snapshot_main_help(m);
4731 else
4732 show_snapshot_percpu_help(m);
4733}
4734#else
4735/* Should never be called */
4736static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4737#endif
4738
4739static int s_show(struct seq_file *m, void *v)
4740{
4741 struct trace_iterator *iter = v;
4742 int ret;
4743
4744 if (iter->ent == NULL) {
4745 if (iter->tr) {
4746 seq_printf(m, fmt: "# tracer: %s\n", iter->trace->name);
4747 seq_puts(m, s: "#\n");
4748 test_ftrace_alive(m);
4749 }
4750 if (iter->snapshot && trace_empty(iter))
4751 print_snapshot_help(m, iter);
4752 else if (iter->trace && iter->trace->print_header)
4753 iter->trace->print_header(m);
4754 else
4755 trace_default_header(m);
4756
4757 } else if (iter->leftover) {
4758 /*
4759 * If we filled the seq_file buffer earlier, we
4760 * want to just show it now.
4761 */
4762 ret = trace_print_seq(m, s: &iter->seq);
4763
4764 /* ret should this time be zero, but you never know */
4765 iter->leftover = ret;
4766
4767 } else {
4768 print_trace_line(iter);
4769 ret = trace_print_seq(m, s: &iter->seq);
4770 /*
4771 * If we overflow the seq_file buffer, then it will
4772 * ask us for this data again at start up.
4773 * Use that instead.
4774 * ret is 0 if seq_file write succeeded.
4775 * -1 otherwise.
4776 */
4777 iter->leftover = ret;
4778 }
4779
4780 return 0;
4781}
4782
4783/*
4784 * Should be used after trace_array_get(), trace_types_lock
4785 * ensures that i_cdev was already initialized.
4786 */
4787static inline int tracing_get_cpu(struct inode *inode)
4788{
4789 if (inode->i_cdev) /* See trace_create_cpu_file() */
4790 return (long)inode->i_cdev - 1;
4791 return RING_BUFFER_ALL_CPUS;
4792}
4793
4794static const struct seq_operations tracer_seq_ops = {
4795 .start = s_start,
4796 .next = s_next,
4797 .stop = s_stop,
4798 .show = s_show,
4799};
4800
4801/*
4802 * Note, as iter itself can be allocated and freed in different
4803 * ways, this function is only used to free its content, and not
4804 * the iterator itself. The only requirement to all the allocations
4805 * is that it must zero all fields (kzalloc), as freeing works with
4806 * ethier allocated content or NULL.
4807 */
4808static void free_trace_iter_content(struct trace_iterator *iter)
4809{
4810 /* The fmt is either NULL, allocated or points to static_fmt_buf */
4811 if (iter->fmt != static_fmt_buf)
4812 kfree(objp: iter->fmt);
4813
4814 kfree(objp: iter->temp);
4815 kfree(objp: iter->buffer_iter);
4816 mutex_destroy(lock: &iter->mutex);
4817 free_cpumask_var(mask: iter->started);
4818}
4819
4820static struct trace_iterator *
4821__tracing_open(struct inode *inode, struct file *file, bool snapshot)
4822{
4823 struct trace_array *tr = inode->i_private;
4824 struct trace_iterator *iter;
4825 int cpu;
4826
4827 if (tracing_disabled)
4828 return ERR_PTR(error: -ENODEV);
4829
4830 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4831 if (!iter)
4832 return ERR_PTR(error: -ENOMEM);
4833
4834 iter->buffer_iter = kcalloc(n: nr_cpu_ids, size: sizeof(*iter->buffer_iter),
4835 GFP_KERNEL);
4836 if (!iter->buffer_iter)
4837 goto release;
4838
4839 /*
4840 * trace_find_next_entry() may need to save off iter->ent.
4841 * It will place it into the iter->temp buffer. As most
4842 * events are less than 128, allocate a buffer of that size.
4843 * If one is greater, then trace_find_next_entry() will
4844 * allocate a new buffer to adjust for the bigger iter->ent.
4845 * It's not critical if it fails to get allocated here.
4846 */
4847 iter->temp = kmalloc(size: 128, GFP_KERNEL);
4848 if (iter->temp)
4849 iter->temp_size = 128;
4850
4851 /*
4852 * trace_event_printf() may need to modify given format
4853 * string to replace %p with %px so that it shows real address
4854 * instead of hash value. However, that is only for the event
4855 * tracing, other tracer may not need. Defer the allocation
4856 * until it is needed.
4857 */
4858 iter->fmt = NULL;
4859 iter->fmt_size = 0;
4860
4861 mutex_lock(&trace_types_lock);
4862 iter->trace = tr->current_trace;
4863
4864 if (!zalloc_cpumask_var(mask: &iter->started, GFP_KERNEL))
4865 goto fail;
4866
4867 iter->tr = tr;
4868
4869#ifdef CONFIG_TRACER_MAX_TRACE
4870 /* Currently only the top directory has a snapshot */
4871 if (tr->current_trace->print_max || snapshot)
4872 iter->array_buffer = &tr->max_buffer;
4873 else
4874#endif
4875 iter->array_buffer = &tr->array_buffer;
4876 iter->snapshot = snapshot;
4877 iter->pos = -1;
4878 iter->cpu_file = tracing_get_cpu(inode);
4879 mutex_init(&iter->mutex);
4880
4881 /* Notify the tracer early; before we stop tracing. */
4882 if (iter->trace->open)
4883 iter->trace->open(iter);
4884
4885 /* Annotate start of buffers if we had overruns */
4886 if (ring_buffer_overruns(buffer: iter->array_buffer->buffer))
4887 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4888
4889 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4890 if (trace_clocks[tr->clock_id].in_ns)
4891 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4892
4893 /*
4894 * If pause-on-trace is enabled, then stop the trace while
4895 * dumping, unless this is the "snapshot" file
4896 */
4897 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4898 tracing_stop_tr(tr);
4899
4900 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4901 for_each_tracing_cpu(cpu) {
4902 iter->buffer_iter[cpu] =
4903 ring_buffer_read_prepare(buffer: iter->array_buffer->buffer,
4904 cpu, GFP_KERNEL);
4905 }
4906 ring_buffer_read_prepare_sync();
4907 for_each_tracing_cpu(cpu) {
4908 ring_buffer_read_start(iter: iter->buffer_iter[cpu]);
4909 tracing_iter_reset(iter, cpu);
4910 }
4911 } else {
4912 cpu = iter->cpu_file;
4913 iter->buffer_iter[cpu] =
4914 ring_buffer_read_prepare(buffer: iter->array_buffer->buffer,
4915 cpu, GFP_KERNEL);
4916 ring_buffer_read_prepare_sync();
4917 ring_buffer_read_start(iter: iter->buffer_iter[cpu]);
4918 tracing_iter_reset(iter, cpu);
4919 }
4920
4921 mutex_unlock(lock: &trace_types_lock);
4922
4923 return iter;
4924
4925 fail:
4926 mutex_unlock(lock: &trace_types_lock);
4927 free_trace_iter_content(iter);
4928release:
4929 seq_release_private(inode, file);
4930 return ERR_PTR(error: -ENOMEM);
4931}
4932
4933int tracing_open_generic(struct inode *inode, struct file *filp)
4934{
4935 int ret;
4936
4937 ret = tracing_check_open_get_tr(NULL);
4938 if (ret)
4939 return ret;
4940
4941 filp->private_data = inode->i_private;
4942 return 0;
4943}
4944
4945bool tracing_is_disabled(void)
4946{
4947 return (tracing_disabled) ? true: false;
4948}
4949
4950/*
4951 * Open and update trace_array ref count.
4952 * Must have the current trace_array passed to it.
4953 */
4954int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4955{
4956 struct trace_array *tr = inode->i_private;
4957 int ret;
4958
4959 ret = tracing_check_open_get_tr(tr);
4960 if (ret)
4961 return ret;
4962
4963 filp->private_data = inode->i_private;
4964
4965 return 0;
4966}
4967
4968/*
4969 * The private pointer of the inode is the trace_event_file.
4970 * Update the tr ref count associated to it.
4971 */
4972int tracing_open_file_tr(struct inode *inode, struct file *filp)
4973{
4974 struct trace_event_file *file = inode->i_private;
4975 int ret;
4976
4977 ret = tracing_check_open_get_tr(tr: file->tr);
4978 if (ret)
4979 return ret;
4980
4981 mutex_lock(&event_mutex);
4982
4983 /* Fail if the file is marked for removal */
4984 if (file->flags & EVENT_FILE_FL_FREED) {
4985 trace_array_put(file->tr);
4986 ret = -ENODEV;
4987 } else {
4988 event_file_get(file);
4989 }
4990
4991 mutex_unlock(lock: &event_mutex);
4992 if (ret)
4993 return ret;
4994
4995 filp->private_data = inode->i_private;
4996
4997 return 0;
4998}
4999
5000int tracing_release_file_tr(struct inode *inode, struct file *filp)
5001{
5002 struct trace_event_file *file = inode->i_private;
5003
5004 trace_array_put(file->tr);
5005 event_file_put(file);
5006
5007 return 0;
5008}
5009
5010static int tracing_mark_open(struct inode *inode, struct file *filp)
5011{
5012 stream_open(inode, filp);
5013 return tracing_open_generic_tr(inode, filp);
5014}
5015
5016static int tracing_release(struct inode *inode, struct file *file)
5017{
5018 struct trace_array *tr = inode->i_private;
5019 struct seq_file *m = file->private_data;
5020 struct trace_iterator *iter;
5021 int cpu;
5022
5023 if (!(file->f_mode & FMODE_READ)) {
5024 trace_array_put(tr);
5025 return 0;
5026 }
5027
5028 /* Writes do not use seq_file */
5029 iter = m->private;
5030 mutex_lock(&trace_types_lock);
5031
5032 for_each_tracing_cpu(cpu) {
5033 if (iter->buffer_iter[cpu])
5034 ring_buffer_read_finish(iter: iter->buffer_iter[cpu]);
5035 }
5036
5037 if (iter->trace && iter->trace->close)
5038 iter->trace->close(iter);
5039
5040 if (!iter->snapshot && tr->stop_count)
5041 /* reenable tracing if it was previously enabled */
5042 tracing_start_tr(tr);
5043
5044 __trace_array_put(this_tr: tr);
5045
5046 mutex_unlock(lock: &trace_types_lock);
5047
5048 free_trace_iter_content(iter);
5049 seq_release_private(inode, file);
5050
5051 return 0;
5052}
5053
5054static int tracing_release_generic_tr(struct inode *inode, struct file *file)
5055{
5056 struct trace_array *tr = inode->i_private;
5057
5058 trace_array_put(tr);
5059 return 0;
5060}
5061
5062static int tracing_single_release_tr(struct inode *inode, struct file *file)
5063{
5064 struct trace_array *tr = inode->i_private;
5065
5066 trace_array_put(tr);
5067
5068 return single_release(inode, file);
5069}
5070
5071static int tracing_open(struct inode *inode, struct file *file)
5072{
5073 struct trace_array *tr = inode->i_private;
5074 struct trace_iterator *iter;
5075 int ret;
5076
5077 ret = tracing_check_open_get_tr(tr);
5078 if (ret)
5079 return ret;
5080
5081 /* If this file was open for write, then erase contents */
5082 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5083 int cpu = tracing_get_cpu(inode);
5084 struct array_buffer *trace_buf = &tr->array_buffer;
5085
5086#ifdef CONFIG_TRACER_MAX_TRACE
5087 if (tr->current_trace->print_max)
5088 trace_buf = &tr->max_buffer;
5089#endif
5090
5091 if (cpu == RING_BUFFER_ALL_CPUS)
5092 tracing_reset_online_cpus(buf: trace_buf);
5093 else
5094 tracing_reset_cpu(buf: trace_buf, cpu);
5095 }
5096
5097 if (file->f_mode & FMODE_READ) {
5098 iter = __tracing_open(inode, file, snapshot: false);
5099 if (IS_ERR(ptr: iter))
5100 ret = PTR_ERR(ptr: iter);
5101 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5102 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5103 }
5104
5105 if (ret < 0)
5106 trace_array_put(tr);
5107
5108 return ret;
5109}
5110
5111/*
5112 * Some tracers are not suitable for instance buffers.
5113 * A tracer is always available for the global array (toplevel)
5114 * or if it explicitly states that it is.
5115 */
5116static bool
5117trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5118{
5119 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5120}
5121
5122/* Find the next tracer that this trace array may use */
5123static struct tracer *
5124get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5125{
5126 while (t && !trace_ok_for_array(t, tr))
5127 t = t->next;
5128
5129 return t;
5130}
5131
5132static void *
5133t_next(struct seq_file *m, void *v, loff_t *pos)
5134{
5135 struct trace_array *tr = m->private;
5136 struct tracer *t = v;
5137
5138 (*pos)++;
5139
5140 if (t)
5141 t = get_tracer_for_array(tr, t: t->next);
5142
5143 return t;
5144}
5145
5146static void *t_start(struct seq_file *m, loff_t *pos)
5147{
5148 struct trace_array *tr = m->private;
5149 struct tracer *t;
5150 loff_t l = 0;
5151
5152 mutex_lock(&trace_types_lock);
5153
5154 t = get_tracer_for_array(tr, t: trace_types);
5155 for (; t && l < *pos; t = t_next(m, v: t, pos: &l))
5156 ;
5157
5158 return t;
5159}
5160
5161static void t_stop(struct seq_file *m, void *p)
5162{
5163 mutex_unlock(lock: &trace_types_lock);
5164}
5165
5166static int t_show(struct seq_file *m, void *v)
5167{
5168 struct tracer *t = v;
5169
5170 if (!t)
5171 return 0;
5172
5173 seq_puts(m, s: t->name);
5174 if (t->next)
5175 seq_putc(m, c: ' ');
5176 else
5177 seq_putc(m, c: '\n');
5178
5179 return 0;
5180}
5181
5182static const struct seq_operations show_traces_seq_ops = {
5183 .start = t_start,
5184 .next = t_next,
5185 .stop = t_stop,
5186 .show = t_show,
5187};
5188
5189static int show_traces_open(struct inode *inode, struct file *file)
5190{
5191 struct trace_array *tr = inode->i_private;
5192 struct seq_file *m;
5193 int ret;
5194
5195 ret = tracing_check_open_get_tr(tr);
5196 if (ret)
5197 return ret;
5198
5199 ret = seq_open(file, &show_traces_seq_ops);
5200 if (ret) {
5201 trace_array_put(tr);
5202 return ret;
5203 }
5204
5205 m = file->private_data;
5206 m->private = tr;
5207
5208 return 0;
5209}
5210
5211static int show_traces_release(struct inode *inode, struct file *file)
5212{
5213 struct trace_array *tr = inode->i_private;
5214
5215 trace_array_put(tr);
5216 return seq_release(inode, file);
5217}
5218
5219static ssize_t
5220tracing_write_stub(struct file *filp, const char __user *ubuf,
5221 size_t count, loff_t *ppos)
5222{
5223 return count;
5224}
5225
5226loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5227{
5228 int ret;
5229
5230 if (file->f_mode & FMODE_READ)
5231 ret = seq_lseek(file, offset, whence);
5232 else
5233 file->f_pos = ret = 0;
5234
5235 return ret;
5236}
5237
5238static const struct file_operations tracing_fops = {
5239 .open = tracing_open,
5240 .read = seq_read,
5241 .read_iter = seq_read_iter,
5242 .splice_read = copy_splice_read,
5243 .write = tracing_write_stub,
5244 .llseek = tracing_lseek,
5245 .release = tracing_release,
5246};
5247
5248static const struct file_operations show_traces_fops = {
5249 .open = show_traces_open,
5250 .read = seq_read,
5251 .llseek = seq_lseek,
5252 .release = show_traces_release,
5253};
5254
5255static ssize_t
5256tracing_cpumask_read(struct file *filp, char __user *ubuf,
5257 size_t count, loff_t *ppos)
5258{
5259 struct trace_array *tr = file_inode(f: filp)->i_private;
5260 char *mask_str;
5261 int len;
5262
5263 len = snprintf(NULL, size: 0, fmt: "%*pb\n",
5264 cpumask_pr_args(tr->tracing_cpumask)) + 1;
5265 mask_str = kmalloc(size: len, GFP_KERNEL);
5266 if (!mask_str)
5267 return -ENOMEM;
5268
5269 len = snprintf(buf: mask_str, size: len, fmt: "%*pb\n",
5270 cpumask_pr_args(tr->tracing_cpumask));
5271 if (len >= count) {
5272 count = -EINVAL;
5273 goto out_err;
5274 }
5275 count = simple_read_from_buffer(to: ubuf, count, ppos, from: mask_str, available: len);
5276
5277out_err:
5278 kfree(objp: mask_str);
5279
5280 return count;
5281}
5282
5283int tracing_set_cpumask(struct trace_array *tr,
5284 cpumask_var_t tracing_cpumask_new)
5285{
5286 int cpu;
5287
5288 if (!tr)
5289 return -EINVAL;
5290
5291 local_irq_disable();
5292 arch_spin_lock(&tr->max_lock);
5293 for_each_tracing_cpu(cpu) {
5294 /*
5295 * Increase/decrease the disabled counter if we are
5296 * about to flip a bit in the cpumask:
5297 */
5298 if (cpumask_test_cpu(cpu, cpumask: tr->tracing_cpumask) &&
5299 !cpumask_test_cpu(cpu, cpumask: tracing_cpumask_new)) {
5300 atomic_inc(v: &per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5301 ring_buffer_record_disable_cpu(buffer: tr->array_buffer.buffer, cpu);
5302#ifdef CONFIG_TRACER_MAX_TRACE
5303 ring_buffer_record_disable_cpu(buffer: tr->max_buffer.buffer, cpu);
5304#endif
5305 }
5306 if (!cpumask_test_cpu(cpu, cpumask: tr->tracing_cpumask) &&
5307 cpumask_test_cpu(cpu, cpumask: tracing_cpumask_new)) {
5308 atomic_dec(v: &per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5309 ring_buffer_record_enable_cpu(buffer: tr->array_buffer.buffer, cpu);
5310#ifdef CONFIG_TRACER_MAX_TRACE
5311 ring_buffer_record_enable_cpu(buffer: tr->max_buffer.buffer, cpu);
5312#endif
5313 }
5314 }
5315 arch_spin_unlock(&tr->max_lock);
5316 local_irq_enable();
5317
5318 cpumask_copy(dstp: tr->tracing_cpumask, srcp: tracing_cpumask_new);
5319
5320 return 0;
5321}
5322
5323static ssize_t
5324tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5325 size_t count, loff_t *ppos)
5326{
5327 struct trace_array *tr = file_inode(f: filp)->i_private;
5328 cpumask_var_t tracing_cpumask_new;
5329 int err;
5330
5331 if (!zalloc_cpumask_var(mask: &tracing_cpumask_new, GFP_KERNEL))
5332 return -ENOMEM;
5333
5334 err = cpumask_parse_user(buf: ubuf, len: count, dstp: tracing_cpumask_new);
5335 if (err)
5336 goto err_free;
5337
5338 err = tracing_set_cpumask(tr, tracing_cpumask_new);
5339 if (err)
5340 goto err_free;
5341
5342 free_cpumask_var(mask: tracing_cpumask_new);
5343
5344 return count;
5345
5346err_free:
5347 free_cpumask_var(mask: tracing_cpumask_new);
5348
5349 return err;
5350}
5351
5352static const struct file_operations tracing_cpumask_fops = {
5353 .open = tracing_open_generic_tr,
5354 .read = tracing_cpumask_read,
5355 .write = tracing_cpumask_write,
5356 .release = tracing_release_generic_tr,
5357 .llseek = generic_file_llseek,
5358};
5359
5360static int tracing_trace_options_show(struct seq_file *m, void *v)
5361{
5362 struct tracer_opt *trace_opts;
5363 struct trace_array *tr = m->private;
5364 u32 tracer_flags;
5365 int i;
5366
5367 mutex_lock(&trace_types_lock);
5368 tracer_flags = tr->current_trace->flags->val;
5369 trace_opts = tr->current_trace->flags->opts;
5370
5371 for (i = 0; trace_options[i]; i++) {
5372 if (tr->trace_flags & (1 << i))
5373 seq_printf(m, fmt: "%s\n", trace_options[i]);
5374 else
5375 seq_printf(m, fmt: "no%s\n", trace_options[i]);
5376 }
5377
5378 for (i = 0; trace_opts[i].name; i++) {
5379 if (tracer_flags & trace_opts[i].bit)
5380 seq_printf(m, fmt: "%s\n", trace_opts[i].name);
5381 else
5382 seq_printf(m, fmt: "no%s\n", trace_opts[i].name);
5383 }
5384 mutex_unlock(lock: &trace_types_lock);
5385
5386 return 0;
5387}
5388
5389static int __set_tracer_option(struct trace_array *tr,
5390 struct tracer_flags *tracer_flags,
5391 struct tracer_opt *opts, int neg)
5392{
5393 struct tracer *trace = tracer_flags->trace;
5394 int ret;
5395
5396 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5397 if (ret)
5398 return ret;
5399
5400 if (neg)
5401 tracer_flags->val &= ~opts->bit;
5402 else
5403 tracer_flags->val |= opts->bit;
5404 return 0;
5405}
5406
5407/* Try to assign a tracer specific option */
5408static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5409{
5410 struct tracer *trace = tr->current_trace;
5411 struct tracer_flags *tracer_flags = trace->flags;
5412 struct tracer_opt *opts = NULL;
5413 int i;
5414
5415 for (i = 0; tracer_flags->opts[i].name; i++) {
5416 opts = &tracer_flags->opts[i];
5417
5418 if (strcmp(cmp, opts->name) == 0)
5419 return __set_tracer_option(tr, tracer_flags: trace->flags, opts, neg);
5420 }
5421
5422 return -EINVAL;
5423}
5424
5425/* Some tracers require overwrite to stay enabled */
5426int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5427{
5428 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5429 return -1;
5430
5431 return 0;
5432}
5433
5434int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5435{
5436 int *map;
5437
5438 if ((mask == TRACE_ITER_RECORD_TGID) ||
5439 (mask == TRACE_ITER_RECORD_CMD))
5440 lockdep_assert_held(&event_mutex);
5441
5442 /* do nothing if flag is already set */
5443 if (!!(tr->trace_flags & mask) == !!enabled)
5444 return 0;
5445
5446 /* Give the tracer a chance to approve the change */
5447 if (tr->current_trace->flag_changed)
5448 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5449 return -EINVAL;
5450
5451 if (enabled)
5452 tr->trace_flags |= mask;
5453 else
5454 tr->trace_flags &= ~mask;
5455
5456 if (mask == TRACE_ITER_RECORD_CMD)
5457 trace_event_enable_cmd_record(enable: enabled);
5458
5459 if (mask == TRACE_ITER_RECORD_TGID) {
5460 if (!tgid_map) {
5461 tgid_map_max = pid_max;
5462 map = kvcalloc(n: tgid_map_max + 1, size: sizeof(*tgid_map),
5463 GFP_KERNEL);
5464
5465 /*
5466 * Pairs with smp_load_acquire() in
5467 * trace_find_tgid_ptr() to ensure that if it observes
5468 * the tgid_map we just allocated then it also observes
5469 * the corresponding tgid_map_max value.
5470 */
5471 smp_store_release(&tgid_map, map);
5472 }
5473 if (!tgid_map) {
5474 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5475 return -ENOMEM;
5476 }
5477
5478 trace_event_enable_tgid_record(enable: enabled);
5479 }
5480
5481 if (mask == TRACE_ITER_EVENT_FORK)
5482 trace_event_follow_fork(tr, enable: enabled);
5483
5484 if (mask == TRACE_ITER_FUNC_FORK)
5485 ftrace_pid_follow_fork(tr, enable: enabled);
5486
5487 if (mask == TRACE_ITER_OVERWRITE) {
5488 ring_buffer_change_overwrite(buffer: tr->array_buffer.buffer, val: enabled);
5489#ifdef CONFIG_TRACER_MAX_TRACE
5490 ring_buffer_change_overwrite(buffer: tr->max_buffer.buffer, val: enabled);
5491#endif
5492 }
5493
5494 if (mask == TRACE_ITER_PRINTK) {
5495 trace_printk_start_stop_comm(enabled);
5496 trace_printk_control(enabled);
5497 }
5498
5499 return 0;
5500}
5501
5502int trace_set_options(struct trace_array *tr, char *option)
5503{
5504 char *cmp;
5505 int neg = 0;
5506 int ret;
5507 size_t orig_len = strlen(option);
5508 int len;
5509
5510 cmp = strstrip(str: option);
5511
5512 len = str_has_prefix(str: cmp, prefix: "no");
5513 if (len)
5514 neg = 1;
5515
5516 cmp += len;
5517
5518 mutex_lock(&event_mutex);
5519 mutex_lock(&trace_types_lock);
5520
5521 ret = match_string(array: trace_options, n: -1, string: cmp);
5522 /* If no option could be set, test the specific tracer options */
5523 if (ret < 0)
5524 ret = set_tracer_option(tr, cmp, neg);
5525 else
5526 ret = set_tracer_flag(tr, mask: 1 << ret, enabled: !neg);
5527
5528 mutex_unlock(lock: &trace_types_lock);
5529 mutex_unlock(lock: &event_mutex);
5530
5531 /*
5532 * If the first trailing whitespace is replaced with '\0' by strstrip,
5533 * turn it back into a space.
5534 */
5535 if (orig_len > strlen(option))
5536 option[strlen(option)] = ' ';
5537
5538 return ret;
5539}
5540
5541static void __init apply_trace_boot_options(void)
5542{
5543 char *buf = trace_boot_options_buf;
5544 char *option;
5545
5546 while (true) {
5547 option = strsep(&buf, ",");
5548
5549 if (!option)
5550 break;
5551
5552 if (*option)
5553 trace_set_options(tr: &global_trace, option);
5554
5555 /* Put back the comma to allow this to be called again */
5556 if (buf)
5557 *(buf - 1) = ',';
5558 }
5559}
5560
5561static ssize_t
5562tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5563 size_t cnt, loff_t *ppos)
5564{
5565 struct seq_file *m = filp->private_data;
5566 struct trace_array *tr = m->private;
5567 char buf[64];
5568 int ret;
5569
5570 if (cnt >= sizeof(buf))
5571 return -EINVAL;
5572
5573 if (copy_from_user(to: buf, from: ubuf, n: cnt))
5574 return -EFAULT;
5575
5576 buf[cnt] = 0;
5577
5578 ret = trace_set_options(tr, option: buf);
5579 if (ret < 0)
5580 return ret;
5581
5582 *ppos += cnt;
5583
5584 return cnt;
5585}
5586
5587static int tracing_trace_options_open(struct inode *inode, struct file *file)
5588{
5589 struct trace_array *tr = inode->i_private;
5590 int ret;
5591
5592 ret = tracing_check_open_get_tr(tr);
5593 if (ret)
5594 return ret;
5595
5596 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5597 if (ret < 0)
5598 trace_array_put(tr);
5599
5600 return ret;
5601}
5602
5603static const struct file_operations tracing_iter_fops = {
5604 .open = tracing_trace_options_open,
5605 .read = seq_read,
5606 .llseek = seq_lseek,
5607 .release = tracing_single_release_tr,
5608 .write = tracing_trace_options_write,
5609};
5610
5611static const char readme_msg[] =
5612 "tracing mini-HOWTO:\n\n"
5613 "# echo 0 > tracing_on : quick way to disable tracing\n"
5614 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5615 " Important files:\n"
5616 " trace\t\t\t- The static contents of the buffer\n"
5617 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5618 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5619 " current_tracer\t- function and latency tracers\n"
5620 " available_tracers\t- list of configured tracers for current_tracer\n"
5621 " error_log\t- error log for failed commands (that support it)\n"
5622 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5623 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5624 " trace_clock\t\t- change the clock used to order events\n"
5625 " local: Per cpu clock but may not be synced across CPUs\n"
5626 " global: Synced across CPUs but slows tracing down.\n"
5627 " counter: Not a clock, but just an increment\n"
5628 " uptime: Jiffy counter from time of boot\n"
5629 " perf: Same clock that perf events use\n"
5630#ifdef CONFIG_X86_64
5631 " x86-tsc: TSC cycle counter\n"
5632#endif
5633 "\n timestamp_mode\t- view the mode used to timestamp events\n"
5634 " delta: Delta difference against a buffer-wide timestamp\n"
5635 " absolute: Absolute (standalone) timestamp\n"
5636 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5637 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5638 " tracing_cpumask\t- Limit which CPUs to trace\n"
5639 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5640 "\t\t\t Remove sub-buffer with rmdir\n"
5641 " trace_options\t\t- Set format or modify how tracing happens\n"
5642 "\t\t\t Disable an option by prefixing 'no' to the\n"
5643 "\t\t\t option name\n"
5644 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5645#ifdef CONFIG_DYNAMIC_FTRACE
5646 "\n available_filter_functions - list of functions that can be filtered on\n"
5647 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5648 "\t\t\t functions\n"
5649 "\t accepts: func_full_name or glob-matching-pattern\n"
5650 "\t modules: Can select a group via module\n"
5651 "\t Format: :mod:<module-name>\n"
5652 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5653 "\t triggers: a command to perform when function is hit\n"
5654 "\t Format: <function>:<trigger>[:count]\n"
5655 "\t trigger: traceon, traceoff\n"
5656 "\t\t enable_event:<system>:<event>\n"
5657 "\t\t disable_event:<system>:<event>\n"
5658#ifdef CONFIG_STACKTRACE
5659 "\t\t stacktrace\n"
5660#endif
5661#ifdef CONFIG_TRACER_SNAPSHOT
5662 "\t\t snapshot\n"
5663#endif
5664 "\t\t dump\n"
5665 "\t\t cpudump\n"
5666 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5667 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5668 "\t The first one will disable tracing every time do_fault is hit\n"
5669 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5670 "\t The first time do trap is hit and it disables tracing, the\n"
5671 "\t counter will decrement to 2. If tracing is already disabled,\n"
5672 "\t the counter will not decrement. It only decrements when the\n"
5673 "\t trigger did work\n"
5674 "\t To remove trigger without count:\n"
5675 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5676 "\t To remove trigger with a count:\n"
5677 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5678 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5679 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5680 "\t modules: Can select a group via module command :mod:\n"
5681 "\t Does not accept triggers\n"
5682#endif /* CONFIG_DYNAMIC_FTRACE */
5683#ifdef CONFIG_FUNCTION_TRACER
5684 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5685 "\t\t (function)\n"
5686 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5687 "\t\t (function)\n"
5688#endif
5689#ifdef CONFIG_FUNCTION_GRAPH_TRACER
5690 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5691 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5692 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5693#endif
5694#ifdef CONFIG_TRACER_SNAPSHOT
5695 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5696 "\t\t\t snapshot buffer. Read the contents for more\n"
5697 "\t\t\t information\n"
5698#endif
5699#ifdef CONFIG_STACK_TRACER
5700 " stack_trace\t\t- Shows the max stack trace when active\n"
5701 " stack_max_size\t- Shows current max stack size that was traced\n"
5702 "\t\t\t Write into this file to reset the max size (trigger a\n"
5703 "\t\t\t new trace)\n"
5704#ifdef CONFIG_DYNAMIC_FTRACE
5705 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5706 "\t\t\t traces\n"
5707#endif
5708#endif /* CONFIG_STACK_TRACER */
5709#ifdef CONFIG_DYNAMIC_EVENTS
5710 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5711 "\t\t\t Write into this file to define/undefine new trace events.\n"
5712#endif
5713#ifdef CONFIG_KPROBE_EVENTS
5714 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5715 "\t\t\t Write into this file to define/undefine new trace events.\n"
5716#endif
5717#ifdef CONFIG_UPROBE_EVENTS
5718 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5719 "\t\t\t Write into this file to define/undefine new trace events.\n"
5720#endif
5721#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5722 defined(CONFIG_FPROBE_EVENTS)
5723 "\t accepts: event-definitions (one definition per line)\n"
5724#if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5725 "\t Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5726 "\t r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5727#endif
5728#ifdef CONFIG_FPROBE_EVENTS
5729 "\t f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5730 "\t t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5731#endif
5732#ifdef CONFIG_HIST_TRIGGERS
5733 "\t s:[synthetic/]<event> <field> [<field>]\n"
5734#endif
5735 "\t e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5736 "\t -:[<group>/][<event>]\n"
5737#ifdef CONFIG_KPROBE_EVENTS
5738 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5739 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5740#endif
5741#ifdef CONFIG_UPROBE_EVENTS
5742 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5743#endif
5744 "\t args: <name>=fetcharg[:type]\n"
5745 "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5746#ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5747#ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5748 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5749 "\t <argname>[->field[->field|.field...]],\n"
5750#else
5751 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5752#endif
5753#else
5754 "\t $stack<index>, $stack, $retval, $comm,\n"
5755#endif
5756 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5757 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5758 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5759 "\t symstr, <type>\\[<array-size>\\]\n"
5760#ifdef CONFIG_HIST_TRIGGERS
5761 "\t field: <stype> <name>;\n"
5762 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5763 "\t [unsigned] char/int/long\n"
5764#endif
5765 "\t efield: For event probes ('e' types), the field is on of the fields\n"
5766 "\t of the <attached-group>/<attached-event>.\n"
5767#endif
5768 " events/\t\t- Directory containing all trace event subsystems:\n"
5769 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5770 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5771 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5772 "\t\t\t events\n"
5773 " filter\t\t- If set, only events passing filter are traced\n"
5774 " events/<system>/<event>/\t- Directory containing control files for\n"
5775 "\t\t\t <event>:\n"
5776 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5777 " filter\t\t- If set, only events passing filter are traced\n"
5778 " trigger\t\t- If set, a command to perform when event is hit\n"
5779 "\t Format: <trigger>[:count][if <filter>]\n"
5780 "\t trigger: traceon, traceoff\n"
5781 "\t enable_event:<system>:<event>\n"
5782 "\t disable_event:<system>:<event>\n"
5783#ifdef CONFIG_HIST_TRIGGERS
5784 "\t enable_hist:<system>:<event>\n"
5785 "\t disable_hist:<system>:<event>\n"
5786#endif
5787#ifdef CONFIG_STACKTRACE
5788 "\t\t stacktrace\n"
5789#endif
5790#ifdef CONFIG_TRACER_SNAPSHOT
5791 "\t\t snapshot\n"
5792#endif
5793#ifdef CONFIG_HIST_TRIGGERS
5794 "\t\t hist (see below)\n"
5795#endif
5796 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5797 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5798 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5799 "\t events/block/block_unplug/trigger\n"
5800 "\t The first disables tracing every time block_unplug is hit.\n"
5801 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5802 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5803 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5804 "\t Like function triggers, the counter is only decremented if it\n"
5805 "\t enabled or disabled tracing.\n"
5806 "\t To remove a trigger without a count:\n"
5807 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5808 "\t To remove a trigger with a count:\n"
5809 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5810 "\t Filters can be ignored when removing a trigger.\n"
5811#ifdef CONFIG_HIST_TRIGGERS
5812 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5813 "\t Format: hist:keys=<field1[,field2,...]>\n"
5814 "\t [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5815 "\t [:values=<field1[,field2,...]>]\n"
5816 "\t [:sort=<field1[,field2,...]>]\n"
5817 "\t [:size=#entries]\n"
5818 "\t [:pause][:continue][:clear]\n"
5819 "\t [:name=histname1]\n"
5820 "\t [:nohitcount]\n"
5821 "\t [:<handler>.<action>]\n"
5822 "\t [if <filter>]\n\n"
5823 "\t Note, special fields can be used as well:\n"
5824 "\t common_timestamp - to record current timestamp\n"
5825 "\t common_cpu - to record the CPU the event happened on\n"
5826 "\n"
5827 "\t A hist trigger variable can be:\n"
5828 "\t - a reference to a field e.g. x=current_timestamp,\n"
5829 "\t - a reference to another variable e.g. y=$x,\n"
5830 "\t - a numeric literal: e.g. ms_per_sec=1000,\n"
5831 "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5832 "\n"
5833 "\t hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5834 "\t multiplication(*) and division(/) operators. An operand can be either a\n"
5835 "\t variable reference, field or numeric literal.\n"
5836 "\n"
5837 "\t When a matching event is hit, an entry is added to a hash\n"
5838 "\t table using the key(s) and value(s) named, and the value of a\n"
5839 "\t sum called 'hitcount' is incremented. Keys and values\n"
5840 "\t correspond to fields in the event's format description. Keys\n"
5841 "\t can be any field, or the special string 'common_stacktrace'.\n"
5842 "\t Compound keys consisting of up to two fields can be specified\n"
5843 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5844 "\t fields. Sort keys consisting of up to two fields can be\n"
5845 "\t specified using the 'sort' keyword. The sort direction can\n"
5846 "\t be modified by appending '.descending' or '.ascending' to a\n"
5847 "\t sort field. The 'size' parameter can be used to specify more\n"
5848 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5849 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5850 "\t its histogram data will be shared with other triggers of the\n"
5851 "\t same name, and trigger hits will update this common data.\n\n"
5852 "\t Reading the 'hist' file for the event will dump the hash\n"
5853 "\t table in its entirety to stdout. If there are multiple hist\n"
5854 "\t triggers attached to an event, there will be a table for each\n"
5855 "\t trigger in the output. The table displayed for a named\n"
5856 "\t trigger will be the same as any other instance having the\n"
5857 "\t same name. The default format used to display a given field\n"
5858 "\t can be modified by appending any of the following modifiers\n"
5859 "\t to the field name, as applicable:\n\n"
5860 "\t .hex display a number as a hex value\n"
5861 "\t .sym display an address as a symbol\n"
5862 "\t .sym-offset display an address as a symbol and offset\n"
5863 "\t .execname display a common_pid as a program name\n"
5864 "\t .syscall display a syscall id as a syscall name\n"
5865 "\t .log2 display log2 value rather than raw number\n"
5866 "\t .buckets=size display values in groups of size rather than raw number\n"
5867 "\t .usecs display a common_timestamp in microseconds\n"
5868 "\t .percent display a number of percentage value\n"
5869 "\t .graph display a bar-graph of a value\n\n"
5870 "\t The 'pause' parameter can be used to pause an existing hist\n"
5871 "\t trigger or to start a hist trigger but not log any events\n"
5872 "\t until told to do so. 'continue' can be used to start or\n"
5873 "\t restart a paused hist trigger.\n\n"
5874 "\t The 'clear' parameter will clear the contents of a running\n"
5875 "\t hist trigger and leave its current paused/active state\n"
5876 "\t unchanged.\n\n"
5877 "\t The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5878 "\t raw hitcount in the histogram.\n\n"
5879 "\t The enable_hist and disable_hist triggers can be used to\n"
5880 "\t have one event conditionally start and stop another event's\n"
5881 "\t already-attached hist trigger. The syntax is analogous to\n"
5882 "\t the enable_event and disable_event triggers.\n\n"
5883 "\t Hist trigger handlers and actions are executed whenever a\n"
5884 "\t a histogram entry is added or updated. They take the form:\n\n"
5885 "\t <handler>.<action>\n\n"
5886 "\t The available handlers are:\n\n"
5887 "\t onmatch(matching.event) - invoke on addition or update\n"
5888 "\t onmax(var) - invoke if var exceeds current max\n"
5889 "\t onchange(var) - invoke action if var changes\n\n"
5890 "\t The available actions are:\n\n"
5891 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5892 "\t save(field,...) - save current event fields\n"
5893#ifdef CONFIG_TRACER_SNAPSHOT
5894 "\t snapshot() - snapshot the trace buffer\n\n"
5895#endif
5896#ifdef CONFIG_SYNTH_EVENTS
5897 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5898 "\t Write into this file to define/undefine new synthetic events.\n"
5899 "\t example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5900#endif
5901#endif
5902;
5903
5904static ssize_t
5905tracing_readme_read(struct file *filp, char __user *ubuf,
5906 size_t cnt, loff_t *ppos)
5907{
5908 return simple_read_from_buffer(to: ubuf, count: cnt, ppos,
5909 from: readme_msg, strlen(readme_msg));
5910}
5911
5912static const struct file_operations tracing_readme_fops = {
5913 .open = tracing_open_generic,
5914 .read = tracing_readme_read,
5915 .llseek = generic_file_llseek,
5916};
5917
5918static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5919{
5920 int pid = ++(*pos);
5921
5922 return trace_find_tgid_ptr(pid);
5923}
5924
5925static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5926{
5927 int pid = *pos;
5928
5929 return trace_find_tgid_ptr(pid);
5930}
5931
5932static void saved_tgids_stop(struct seq_file *m, void *v)
5933{
5934}
5935
5936static int saved_tgids_show(struct seq_file *m, void *v)
5937{
5938 int *entry = (int *)v;
5939 int pid = entry - tgid_map;
5940 int tgid = *entry;
5941
5942 if (tgid == 0)
5943 return SEQ_SKIP;
5944
5945 seq_printf(m, fmt: "%d %d\n", pid, tgid);
5946 return 0;
5947}
5948
5949static const struct seq_operations tracing_saved_tgids_seq_ops = {
5950 .start = saved_tgids_start,
5951 .stop = saved_tgids_stop,
5952 .next = saved_tgids_next,
5953 .show = saved_tgids_show,
5954};
5955
5956static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5957{
5958 int ret;
5959
5960 ret = tracing_check_open_get_tr(NULL);
5961 if (ret)
5962 return ret;
5963
5964 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5965}
5966
5967
5968static const struct file_operations tracing_saved_tgids_fops = {
5969 .open = tracing_saved_tgids_open,
5970 .read = seq_read,
5971 .llseek = seq_lseek,
5972 .release = seq_release,
5973};
5974
5975static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5976{
5977 unsigned int *ptr = v;
5978
5979 if (*pos || m->count)
5980 ptr++;
5981
5982 (*pos)++;
5983
5984 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5985 ptr++) {
5986 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5987 continue;
5988
5989 return ptr;
5990 }
5991
5992 return NULL;
5993}
5994
5995static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5996{
5997 void *v;
5998 loff_t l = 0;
5999
6000 preempt_disable();
6001 arch_spin_lock(&trace_cmdline_lock);
6002
6003 v = &savedcmd->map_cmdline_to_pid[0];
6004 while (l <= *pos) {
6005 v = saved_cmdlines_next(m, v, pos: &l);
6006 if (!v)
6007 return NULL;
6008 }
6009
6010 return v;
6011}
6012
6013static void saved_cmdlines_stop(struct seq_file *m, void *v)
6014{
6015 arch_spin_unlock(&trace_cmdline_lock);
6016 preempt_enable();
6017}
6018
6019static int saved_cmdlines_show(struct seq_file *m, void *v)
6020{
6021 char buf[TASK_COMM_LEN];
6022 unsigned int *pid = v;
6023
6024 __trace_find_cmdline(pid: *pid, comm: buf);
6025 seq_printf(m, fmt: "%d %s\n", *pid, buf);
6026 return 0;
6027}
6028
6029static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6030 .start = saved_cmdlines_start,
6031 .next = saved_cmdlines_next,
6032 .stop = saved_cmdlines_stop,
6033 .show = saved_cmdlines_show,
6034};
6035
6036static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6037{
6038 int ret;
6039
6040 ret = tracing_check_open_get_tr(NULL);
6041 if (ret)
6042 return ret;
6043
6044 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6045}
6046
6047static const struct file_operations tracing_saved_cmdlines_fops = {
6048 .open = tracing_saved_cmdlines_open,
6049 .read = seq_read,
6050 .llseek = seq_lseek,
6051 .release = seq_release,
6052};
6053
6054static ssize_t
6055tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6056 size_t cnt, loff_t *ppos)
6057{
6058 char buf[64];
6059 int r;
6060
6061 preempt_disable();
6062 arch_spin_lock(&trace_cmdline_lock);
6063 r = scnprintf(buf, size: sizeof(buf), fmt: "%u\n", savedcmd->cmdline_num);
6064 arch_spin_unlock(&trace_cmdline_lock);
6065 preempt_enable();
6066
6067 return simple_read_from_buffer(to: ubuf, count: cnt, ppos, from: buf, available: r);
6068}
6069
6070static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
6071{
6072 kfree(objp: s->saved_cmdlines);
6073 kfree(objp: s->map_cmdline_to_pid);
6074 kfree(objp: s);
6075}
6076
6077static int tracing_resize_saved_cmdlines(unsigned int val)
6078{
6079 struct saved_cmdlines_buffer *s, *savedcmd_temp;
6080
6081 s = kmalloc(size: sizeof(*s), GFP_KERNEL);
6082 if (!s)
6083 return -ENOMEM;
6084
6085 if (allocate_cmdlines_buffer(val, s) < 0) {
6086 kfree(objp: s);
6087 return -ENOMEM;
6088 }
6089
6090 preempt_disable();
6091 arch_spin_lock(&trace_cmdline_lock);
6092 savedcmd_temp = savedcmd;
6093 savedcmd = s;
6094 arch_spin_unlock(&trace_cmdline_lock);
6095 preempt_enable();
6096 free_saved_cmdlines_buffer(s: savedcmd_temp);
6097
6098 return 0;
6099}
6100
6101static ssize_t
6102tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6103 size_t cnt, loff_t *ppos)
6104{
6105 unsigned long val;
6106 int ret;
6107
6108 ret = kstrtoul_from_user(s: ubuf, count: cnt, base: 10, res: &val);
6109 if (ret)
6110 return ret;
6111
6112 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
6113 if (!val || val > PID_MAX_DEFAULT)
6114 return -EINVAL;
6115
6116 ret = tracing_resize_saved_cmdlines(val: (unsigned int)val);
6117 if (ret < 0)
6118 return ret;
6119
6120 *ppos += cnt;
6121
6122 return cnt;
6123}
6124
6125static const struct file_operations tracing_saved_cmdlines_size_fops = {
6126 .open = tracing_open_generic,
6127 .read = tracing_saved_cmdlines_size_read,
6128 .write = tracing_saved_cmdlines_size_write,
6129};
6130
6131#ifdef CONFIG_TRACE_EVAL_MAP_FILE
6132static union trace_eval_map_item *
6133update_eval_map(union trace_eval_map_item *ptr)
6134{
6135 if (!ptr->map.eval_string) {
6136 if (ptr->tail.next) {
6137 ptr = ptr->tail.next;
6138 /* Set ptr to the next real item (skip head) */
6139 ptr++;
6140 } else
6141 return NULL;
6142 }
6143 return ptr;
6144}
6145
6146static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6147{
6148 union trace_eval_map_item *ptr = v;
6149
6150 /*
6151 * Paranoid! If ptr points to end, we don't want to increment past it.
6152 * This really should never happen.
6153 */
6154 (*pos)++;
6155 ptr = update_eval_map(ptr);
6156 if (WARN_ON_ONCE(!ptr))
6157 return NULL;
6158
6159 ptr++;
6160 ptr = update_eval_map(ptr);
6161
6162 return ptr;
6163}
6164
6165static void *eval_map_start(struct seq_file *m, loff_t *pos)
6166{
6167 union trace_eval_map_item *v;
6168 loff_t l = 0;
6169
6170 mutex_lock(&trace_eval_mutex);
6171
6172 v = trace_eval_maps;
6173 if (v)
6174 v++;
6175
6176 while (v && l < *pos) {
6177 v = eval_map_next(m, v, pos: &l);
6178 }
6179
6180 return v;
6181}
6182
6183static void eval_map_stop(struct seq_file *m, void *v)
6184{
6185 mutex_unlock(lock: &trace_eval_mutex);
6186}
6187
6188static int eval_map_show(struct seq_file *m, void *v)
6189{
6190 union trace_eval_map_item *ptr = v;
6191
6192 seq_printf(m, fmt: "%s %ld (%s)\n",
6193 ptr->map.eval_string, ptr->map.eval_value,
6194 ptr->map.system);
6195
6196 return 0;
6197}
6198
6199static const struct seq_operations tracing_eval_map_seq_ops = {
6200 .start = eval_map_start,
6201 .next = eval_map_next,
6202 .stop = eval_map_stop,
6203 .show = eval_map_show,
6204};
6205
6206static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6207{
6208 int ret;
6209
6210 ret = tracing_check_open_get_tr(NULL);
6211 if (ret)
6212 return ret;
6213
6214 return seq_open(filp, &tracing_eval_map_seq_ops);
6215}
6216
6217static const struct file_operations tracing_eval_map_fops = {
6218 .open = tracing_eval_map_open,
6219 .read = seq_read,
6220 .llseek = seq_lseek,
6221 .release = seq_release,
6222};
6223
6224static inline union trace_eval_map_item *
6225trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6226{
6227 /* Return tail of array given the head */
6228 return ptr + ptr->head.length + 1;
6229}
6230
6231static void
6232trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6233 int len)
6234{
6235 struct trace_eval_map **stop;
6236 struct trace_eval_map **map;
6237 union trace_eval_map_item *map_array;
6238 union trace_eval_map_item *ptr;
6239
6240 stop = start + len;
6241
6242 /*
6243 * The trace_eval_maps contains the map plus a head and tail item,
6244 * where the head holds the module and length of array, and the
6245 * tail holds a pointer to the next list.
6246 */
6247 map_array = kmalloc_array(n: len + 2, size: sizeof(*map_array), GFP_KERNEL);
6248 if (!map_array) {
6249 pr_warn("Unable to allocate trace eval mapping\n");
6250 return;
6251 }
6252
6253 mutex_lock(&trace_eval_mutex);
6254
6255 if (!trace_eval_maps)
6256 trace_eval_maps = map_array;
6257 else {
6258 ptr = trace_eval_maps;
6259 for (;;) {
6260 ptr = trace_eval_jmp_to_tail(ptr);
6261 if (!ptr->tail.next)
6262 break;
6263 ptr = ptr->tail.next;
6264
6265 }
6266 ptr->tail.next = map_array;
6267 }
6268 map_array->head.mod = mod;
6269 map_array->head.length = len;
6270 map_array++;
6271
6272 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6273 map_array->map = **map;
6274 map_array++;
6275 }
6276 memset(map_array, 0, sizeof(*map_array));
6277
6278 mutex_unlock(lock: &trace_eval_mutex);
6279}
6280
6281static void trace_create_eval_file(struct dentry *d_tracer)
6282{
6283 trace_create_file(name: "eval_map", TRACE_MODE_READ, parent: d_tracer,
6284 NULL, fops: &tracing_eval_map_fops);
6285}
6286
6287#else /* CONFIG_TRACE_EVAL_MAP_FILE */
6288static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6289static inline void trace_insert_eval_map_file(struct module *mod,
6290 struct trace_eval_map **start, int len) { }
6291#endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6292
6293static void trace_insert_eval_map(struct module *mod,
6294 struct trace_eval_map **start, int len)
6295{
6296 struct trace_eval_map **map;
6297
6298 if (len <= 0)
6299 return;
6300
6301 map = start;
6302
6303 trace_event_eval_update(map, len);
6304
6305 trace_insert_eval_map_file(mod, start, len);
6306}
6307
6308static ssize_t
6309tracing_set_trace_read(struct file *filp, char __user *ubuf,
6310 size_t cnt, loff_t *ppos)
6311{
6312 struct trace_array *tr = filp->private_data;
6313 char buf[MAX_TRACER_SIZE+2];
6314 int r;
6315
6316 mutex_lock(&trace_types_lock);
6317 r = sprintf(buf, fmt: "%s\n", tr->current_trace->name);
6318 mutex_unlock(lock: &trace_types_lock);
6319
6320 return simple_read_from_buffer(to: ubuf, count: cnt, ppos, from: buf, available: r);
6321}
6322
6323int tracer_init(struct tracer *t, struct trace_array *tr)
6324{
6325 tracing_reset_online_cpus(buf: &tr->array_buffer);
6326 return t->init(tr);
6327}
6328
6329static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6330{
6331 int cpu;
6332
6333 for_each_tracing_cpu(cpu)
6334 per_cpu_ptr(buf->data, cpu)->entries = val;
6335}
6336
6337static void update_buffer_entries(struct array_buffer *buf, int cpu)
6338{
6339 if (cpu == RING_BUFFER_ALL_CPUS) {
6340 set_buffer_entries(buf, val: ring_buffer_size(buffer: buf->buffer, cpu: 0));
6341 } else {
6342 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buffer: buf->buffer, cpu);
6343 }
6344}
6345
6346#ifdef CONFIG_TRACER_MAX_TRACE
6347/* resize @tr's buffer to the size of @size_tr's entries */
6348static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6349 struct array_buffer *size_buf, int cpu_id)
6350{
6351 int cpu, ret = 0;
6352
6353 if (cpu_id == RING_BUFFER_ALL_CPUS) {
6354 for_each_tracing_cpu(cpu) {
6355 ret = ring_buffer_resize(buffer: trace_buf->buffer,
6356 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6357 if (ret < 0)
6358 break;
6359 per_cpu_ptr(trace_buf->data, cpu)->entries =
6360 per_cpu_ptr(size_buf->data, cpu)->entries;
6361 }
6362 } else {
6363 ret = ring_buffer_resize(buffer: trace_buf->buffer,
6364 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu: cpu_id);
6365 if (ret == 0)
6366 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6367 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6368 }
6369
6370 return ret;
6371}
6372#endif /* CONFIG_TRACER_MAX_TRACE */
6373
6374static int __tracing_resize_ring_buffer(struct trace_array *tr,
6375 unsigned long size, int cpu)
6376{
6377 int ret;
6378
6379 /*
6380 * If kernel or user changes the size of the ring buffer
6381 * we use the size that was given, and we can forget about
6382 * expanding it later.
6383 */
6384 trace_set_ring_buffer_expanded(tr);
6385
6386 /* May be called before buffers are initialized */
6387 if (!tr->array_buffer.buffer)
6388 return 0;
6389
6390 ret = ring_buffer_resize(buffer: tr->array_buffer.buffer, size, cpu);
6391 if (ret < 0)
6392 return ret;
6393
6394#ifdef CONFIG_TRACER_MAX_TRACE
6395 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6396 !tr->current_trace->use_max_tr)
6397 goto out;
6398
6399 ret = ring_buffer_resize(buffer: tr->max_buffer.buffer, size, cpu);
6400 if (ret < 0) {
6401 int r = resize_buffer_duplicate_size(trace_buf: &tr->array_buffer,
6402 size_buf: &tr->array_buffer, cpu_id: cpu);
6403 if (r < 0) {
6404 /*
6405 * AARGH! We are left with different
6406 * size max buffer!!!!
6407 * The max buffer is our "snapshot" buffer.
6408 * When a tracer needs a snapshot (one of the
6409 * latency tracers), it swaps the max buffer
6410 * with the saved snap shot. We succeeded to
6411 * update the size of the main buffer, but failed to
6412 * update the size of the max buffer. But when we tried
6413 * to reset the main buffer to the original size, we
6414 * failed there too. This is very unlikely to
6415 * happen, but if it does, warn and kill all
6416 * tracing.
6417 */
6418 WARN_ON(1);
6419 tracing_disabled = 1;
6420 }
6421 return ret;
6422 }
6423
6424 update_buffer_entries(buf: &tr->max_buffer, cpu);
6425
6426 out:
6427#endif /* CONFIG_TRACER_MAX_TRACE */
6428
6429 update_buffer_entries(buf: &tr->array_buffer, cpu);
6430
6431 return ret;
6432}
6433
6434ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6435 unsigned long size, int cpu_id)
6436{
6437 int ret;
6438
6439 mutex_lock(&trace_types_lock);
6440
6441 if (cpu_id != RING_BUFFER_ALL_CPUS) {
6442 /* make sure, this cpu is enabled in the mask */
6443 if (!cpumask_test_cpu(cpu: cpu_id, cpumask: tracing_buffer_mask)) {
6444 ret = -EINVAL;
6445 goto out;
6446 }
6447 }
6448
6449 ret = __tracing_resize_ring_buffer(tr, size, cpu: cpu_id);
6450 if (ret < 0)
6451 ret = -ENOMEM;
6452
6453out:
6454 mutex_unlock(lock: &trace_types_lock);
6455
6456 return ret;
6457}
6458
6459
6460/**
6461 * tracing_update_buffers - used by tracing facility to expand ring buffers
6462 * @tr: The tracing instance
6463 *
6464 * To save on memory when the tracing is never used on a system with it
6465 * configured in. The ring buffers are set to a minimum size. But once
6466 * a user starts to use the tracing facility, then they need to grow
6467 * to their default size.
6468 *
6469 * This function is to be called when a tracer is about to be used.
6470 */
6471int tracing_update_buffers(struct trace_array *tr)
6472{
6473 int ret = 0;
6474
6475 mutex_lock(&trace_types_lock);
6476 if (!tr->ring_buffer_expanded)
6477 ret = __tracing_resize_ring_buffer(tr, size: trace_buf_size,
6478 RING_BUFFER_ALL_CPUS);
6479 mutex_unlock(lock: &trace_types_lock);
6480
6481 return ret;
6482}
6483
6484struct trace_option_dentry;
6485
6486static void
6487create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6488
6489/*
6490 * Used to clear out the tracer before deletion of an instance.
6491 * Must have trace_types_lock held.
6492 */
6493static void tracing_set_nop(struct trace_array *tr)
6494{
6495 if (tr->current_trace == &nop_trace)
6496 return;
6497
6498 tr->current_trace->enabled--;
6499
6500 if (tr->current_trace->reset)
6501 tr->current_trace->reset(tr);
6502
6503 tr->current_trace = &nop_trace;
6504}
6505
6506static bool tracer_options_updated;
6507
6508static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6509{
6510 /* Only enable if the directory has been created already. */
6511 if (!tr->dir)
6512 return;
6513
6514 /* Only create trace option files after update_tracer_options finish */
6515 if (!tracer_options_updated)
6516 return;
6517
6518 create_trace_option_files(tr, tracer: t);
6519}
6520
6521int tracing_set_tracer(struct trace_array *tr, const char *buf)
6522{
6523 struct tracer *t;
6524#ifdef CONFIG_TRACER_MAX_TRACE
6525 bool had_max_tr;
6526#endif
6527 int ret = 0;
6528
6529 mutex_lock(&trace_types_lock);
6530
6531 if (!tr->ring_buffer_expanded) {
6532 ret = __tracing_resize_ring_buffer(tr, size: trace_buf_size,
6533 RING_BUFFER_ALL_CPUS);
6534 if (ret < 0)
6535 goto out;
6536 ret = 0;
6537 }
6538
6539 for (t = trace_types; t; t = t->next) {
6540 if (strcmp(t->name, buf) == 0)
6541 break;
6542 }
6543 if (!t) {
6544 ret = -EINVAL;
6545 goto out;
6546 }
6547 if (t == tr->current_trace)
6548 goto out;
6549
6550#ifdef CONFIG_TRACER_SNAPSHOT
6551 if (t->use_max_tr) {
6552 local_irq_disable();
6553 arch_spin_lock(&tr->max_lock);
6554 if (tr->cond_snapshot)
6555 ret = -EBUSY;
6556 arch_spin_unlock(&tr->max_lock);
6557 local_irq_enable();
6558 if (ret)
6559 goto out;
6560 }
6561#endif
6562 /* Some tracers won't work on kernel command line */
6563 if (system_state < SYSTEM_RUNNING && t->noboot) {
6564 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6565 t->name);
6566 goto out;
6567 }
6568
6569 /* Some tracers are only allowed for the top level buffer */
6570 if (!trace_ok_for_array(t, tr)) {
6571 ret = -EINVAL;
6572 goto out;
6573 }
6574
6575 /* If trace pipe files are being read, we can't change the tracer */
6576 if (tr->trace_ref) {
6577 ret = -EBUSY;
6578 goto out;
6579 }
6580
6581 trace_branch_disable();
6582
6583 tr->current_trace->enabled--;
6584
6585 if (tr->current_trace->reset)
6586 tr->current_trace->reset(tr);
6587
6588#ifdef CONFIG_TRACER_MAX_TRACE
6589 had_max_tr = tr->current_trace->use_max_tr;
6590
6591 /* Current trace needs to be nop_trace before synchronize_rcu */
6592 tr->current_trace = &nop_trace;
6593
6594 if (had_max_tr && !t->use_max_tr) {
6595 /*
6596 * We need to make sure that the update_max_tr sees that
6597 * current_trace changed to nop_trace to keep it from
6598 * swapping the buffers after we resize it.
6599 * The update_max_tr is called from interrupts disabled
6600 * so a synchronized_sched() is sufficient.
6601 */
6602 synchronize_rcu();
6603 free_snapshot(tr);
6604 }
6605
6606 if (t->use_max_tr && !tr->allocated_snapshot) {
6607 ret = tracing_alloc_snapshot_instance(tr);
6608 if (ret < 0)
6609 goto out;
6610 }
6611#else
6612 tr->current_trace = &nop_trace;
6613#endif
6614
6615 if (t->init) {
6616 ret = tracer_init(t, tr);
6617 if (ret)
6618 goto out;
6619 }
6620
6621 tr->current_trace = t;
6622 tr->current_trace->enabled++;
6623 trace_branch_enable(tr);
6624 out:
6625 mutex_unlock(lock: &trace_types_lock);
6626
6627 return ret;
6628}
6629
6630static ssize_t
6631tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6632 size_t cnt, loff_t *ppos)
6633{
6634 struct trace_array *tr = filp->private_data;
6635 char buf[MAX_TRACER_SIZE+1];
6636 char *name;
6637 size_t ret;
6638 int err;
6639
6640 ret = cnt;
6641
6642 if (cnt > MAX_TRACER_SIZE)
6643 cnt = MAX_TRACER_SIZE;
6644
6645 if (copy_from_user(to: buf, from: ubuf, n: cnt))
6646 return -EFAULT;
6647
6648 buf[cnt] = 0;
6649
6650 name = strim(buf);
6651
6652 err = tracing_set_tracer(tr, buf: name);
6653 if (err)
6654 return err;
6655
6656 *ppos += ret;
6657
6658 return ret;
6659}
6660
6661static ssize_t
6662tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6663 size_t cnt, loff_t *ppos)
6664{
6665 char buf[64];
6666 int r;
6667
6668 r = snprintf(buf, size: sizeof(buf), fmt: "%ld\n",
6669 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(nsecs: *ptr));
6670 if (r > sizeof(buf))
6671 r = sizeof(buf);
6672 return simple_read_from_buffer(to: ubuf, count: cnt, ppos, from: buf, available: r);
6673}
6674
6675static ssize_t
6676tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6677 size_t cnt, loff_t *ppos)
6678{
6679 unsigned long val;
6680 int ret;
6681
6682 ret = kstrtoul_from_user(s: ubuf, count: cnt, base: 10, res: &val);
6683 if (ret)
6684 return ret;
6685
6686 *ptr = val * 1000;
6687
6688 return cnt;
6689}
6690
6691static ssize_t
6692tracing_thresh_read(struct file *filp, char __user *ubuf,
6693 size_t cnt, loff_t *ppos)
6694{
6695 return tracing_nsecs_read(ptr: &tracing_thresh, ubuf, cnt, ppos);
6696}
6697
6698static ssize_t
6699tracing_thresh_write(struct file *filp, const char __user *ubuf,
6700 size_t cnt, loff_t *ppos)
6701{
6702 struct trace_array *tr = filp->private_data;
6703 int ret;
6704
6705 mutex_lock(&trace_types_lock);
6706 ret = tracing_nsecs_write(ptr: &tracing_thresh, ubuf, cnt, ppos);
6707 if (ret < 0)
6708 goto out;
6709
6710 if (tr->current_trace->update_thresh) {
6711 ret = tr->current_trace->update_thresh(tr);
6712 if (ret < 0)
6713 goto out;
6714 }
6715
6716 ret = cnt;
6717out:
6718 mutex_unlock(lock: &trace_types_lock);
6719
6720 return ret;
6721}
6722
6723#ifdef CONFIG_TRACER_MAX_TRACE
6724
6725static ssize_t
6726tracing_max_lat_read(struct file *filp, char __user *ubuf,
6727 size_t cnt, loff_t *ppos)
6728{
6729 struct trace_array *tr = filp->private_data;
6730
6731 return tracing_nsecs_read(ptr: &tr->max_latency, ubuf, cnt, ppos);
6732}
6733
6734static ssize_t
6735tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6736 size_t cnt, loff_t *ppos)
6737{
6738 struct trace_array *tr = filp->private_data;
6739
6740 return tracing_nsecs_write(ptr: &tr->max_latency, ubuf, cnt, ppos);
6741}
6742
6743#endif
6744
6745static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6746{
6747 if (cpu == RING_BUFFER_ALL_CPUS) {
6748 if (cpumask_empty(srcp: tr->pipe_cpumask)) {
6749 cpumask_setall(dstp: tr->pipe_cpumask);
6750 return 0;
6751 }
6752 } else if (!cpumask_test_cpu(cpu, cpumask: tr->pipe_cpumask)) {
6753 cpumask_set_cpu(cpu, dstp: tr->pipe_cpumask);
6754 return 0;
6755 }
6756 return -EBUSY;
6757}
6758
6759static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6760{
6761 if (cpu == RING_BUFFER_ALL_CPUS) {
6762 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6763 cpumask_clear(dstp: tr->pipe_cpumask);
6764 } else {
6765 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6766 cpumask_clear_cpu(cpu, dstp: tr->pipe_cpumask);
6767 }
6768}
6769
6770static int tracing_open_pipe(struct inode *inode, struct file *filp)
6771{
6772 struct trace_array *tr = inode->i_private;
6773 struct trace_iterator *iter;
6774 int cpu;
6775 int ret;
6776
6777 ret = tracing_check_open_get_tr(tr);
6778 if (ret)
6779 return ret;
6780
6781 mutex_lock(&trace_types_lock);
6782 cpu = tracing_get_cpu(inode);
6783 ret = open_pipe_on_cpu(tr, cpu);
6784 if (ret)
6785 goto fail_pipe_on_cpu;
6786
6787 /* create a buffer to store the information to pass to userspace */
6788 iter = kzalloc(size: sizeof(*iter), GFP_KERNEL);
6789 if (!iter) {
6790 ret = -ENOMEM;
6791 goto fail_alloc_iter;
6792 }
6793
6794 trace_seq_init(s: &iter->seq);
6795 iter->trace = tr->current_trace;
6796
6797 if (!alloc_cpumask_var(mask: &iter->started, GFP_KERNEL)) {
6798 ret = -ENOMEM;
6799 goto fail;
6800 }
6801
6802 /* trace pipe does not show start of buffer */
6803 cpumask_setall(dstp: iter->started);
6804
6805 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6806 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6807
6808 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6809 if (trace_clocks[tr->clock_id].in_ns)
6810 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6811
6812 iter->tr = tr;
6813 iter->array_buffer = &tr->array_buffer;
6814 iter->cpu_file = cpu;
6815 mutex_init(&iter->mutex);
6816 filp->private_data = iter;
6817
6818 if (iter->trace->pipe_open)
6819 iter->trace->pipe_open(iter);
6820
6821 nonseekable_open(inode, filp);
6822
6823 tr->trace_ref++;
6824
6825 mutex_unlock(lock: &trace_types_lock);
6826 return ret;
6827
6828fail:
6829 kfree(objp: iter);
6830fail_alloc_iter:
6831 close_pipe_on_cpu(tr, cpu);
6832fail_pipe_on_cpu:
6833 __trace_array_put(this_tr: tr);
6834 mutex_unlock(lock: &trace_types_lock);
6835 return ret;
6836}
6837
6838static int tracing_release_pipe(struct inode *inode, struct file *file)
6839{
6840 struct trace_iterator *iter = file->private_data;
6841 struct trace_array *tr = inode->i_private;
6842
6843 mutex_lock(&trace_types_lock);
6844
6845 tr->trace_ref--;
6846
6847 if (iter->trace->pipe_close)
6848 iter->trace->pipe_close(iter);
6849 close_pipe_on_cpu(tr, cpu: iter->cpu_file);
6850 mutex_unlock(lock: &trace_types_lock);
6851
6852 free_trace_iter_content(iter);
6853 kfree(objp: iter);
6854
6855 trace_array_put(tr);
6856
6857 return 0;
6858}
6859
6860static __poll_t
6861trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6862{
6863 struct trace_array *tr = iter->tr;
6864
6865 /* Iterators are static, they should be filled or empty */
6866 if (trace_buffer_iter(iter, cpu: iter->cpu_file))
6867 return EPOLLIN | EPOLLRDNORM;
6868
6869 if (tr->trace_flags & TRACE_ITER_BLOCK)
6870 /*
6871 * Always select as readable when in blocking mode
6872 */
6873 return EPOLLIN | EPOLLRDNORM;
6874 else
6875 return ring_buffer_poll_wait(buffer: iter->array_buffer->buffer, cpu: iter->cpu_file,
6876 filp, poll_table, full: iter->tr->buffer_percent);
6877}
6878
6879static __poll_t
6880tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6881{
6882 struct trace_iterator *iter = filp->private_data;
6883
6884 return trace_poll(iter, filp, poll_table);
6885}
6886
6887/* Must be called with iter->mutex held. */
6888static int tracing_wait_pipe(struct file *filp)
6889{
6890 struct trace_iterator *iter = filp->private_data;
6891 int ret;
6892
6893 while (trace_empty(iter)) {
6894
6895 if ((filp->f_flags & O_NONBLOCK)) {
6896 return -EAGAIN;
6897 }
6898
6899 /*
6900 * We block until we read something and tracing is disabled.
6901 * We still block if tracing is disabled, but we have never
6902 * read anything. This allows a user to cat this file, and
6903 * then enable tracing. But after we have read something,
6904 * we give an EOF when tracing is again disabled.
6905 *
6906 * iter->pos will be 0 if we haven't read anything.
6907 */
6908 if (!tracer_tracing_is_on(tr: iter->tr) && iter->pos)
6909 break;
6910
6911 mutex_unlock(lock: &iter->mutex);
6912
6913 ret = wait_on_pipe(iter, full: 0);
6914
6915 mutex_lock(&iter->mutex);
6916
6917 if (ret)
6918 return ret;
6919 }
6920
6921 return 1;
6922}
6923
6924/*
6925 * Consumer reader.
6926 */
6927static ssize_t
6928tracing_read_pipe(struct file *filp, char __user *ubuf,
6929 size_t cnt, loff_t *ppos)
6930{
6931 struct trace_iterator *iter = filp->private_data;
6932 ssize_t sret;
6933
6934 /*
6935 * Avoid more than one consumer on a single file descriptor
6936 * This is just a matter of traces coherency, the ring buffer itself
6937 * is protected.
6938 */
6939 mutex_lock(&iter->mutex);
6940
6941 /* return any leftover data */
6942 sret = trace_seq_to_user(s: &iter->seq, ubuf, cnt);
6943 if (sret != -EBUSY)
6944 goto out;
6945
6946 trace_seq_init(s: &iter->seq);
6947
6948 if (iter->trace->read) {
6949 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6950 if (sret)
6951 goto out;
6952 }
6953
6954waitagain:
6955 sret = tracing_wait_pipe(filp);
6956 if (sret <= 0)
6957 goto out;
6958
6959 /* stop when tracing is finished */
6960 if (trace_empty(iter)) {
6961 sret = 0;
6962 goto out;
6963 }
6964
6965 if (cnt >= PAGE_SIZE)
6966 cnt = PAGE_SIZE - 1;
6967
6968 /* reset all but tr, trace, and overruns */
6969 trace_iterator_reset(iter);
6970 cpumask_clear(dstp: iter->started);
6971 trace_seq_init(s: &iter->seq);
6972
6973 trace_event_read_lock();
6974 trace_access_lock(cpu: iter->cpu_file);
6975 while (trace_find_next_entry_inc(iter) != NULL) {
6976 enum print_line_t ret;
6977 int save_len = iter->seq.seq.len;
6978
6979 ret = print_trace_line(iter);
6980 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6981 /*
6982 * If one print_trace_line() fills entire trace_seq in one shot,
6983 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6984 * In this case, we need to consume it, otherwise, loop will peek
6985 * this event next time, resulting in an infinite loop.
6986 */
6987 if (save_len == 0) {
6988 iter->seq.full = 0;
6989 trace_seq_puts(s: &iter->seq, str: "[LINE TOO BIG]\n");
6990 trace_consume(iter);
6991 break;
6992 }
6993
6994 /* In other cases, don't print partial lines */
6995 iter->seq.seq.len = save_len;
6996 break;
6997 }
6998 if (ret != TRACE_TYPE_NO_CONSUME)
6999 trace_consume(iter);
7000
7001 if (trace_seq_used(s: &iter->seq) >= cnt)
7002 break;
7003
7004 /*
7005 * Setting the full flag means we reached the trace_seq buffer
7006 * size and we should leave by partial output condition above.
7007 * One of the trace_seq_* functions is not used properly.
7008 */
7009 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
7010 iter->ent->type);
7011 }
7012 trace_access_unlock(cpu: iter->cpu_file);
7013 trace_event_read_unlock();
7014
7015 /* Now copy what we have to the user */
7016 sret = trace_seq_to_user(s: &iter->seq, ubuf, cnt);
7017 if (iter->seq.readpos >= trace_seq_used(s: &iter->seq))
7018 trace_seq_init(s: &iter->seq);
7019
7020 /*
7021 * If there was nothing to send to user, in spite of consuming trace
7022 * entries, go back to wait for more entries.
7023 */
7024 if (sret == -EBUSY)
7025 goto waitagain;
7026
7027out:
7028 mutex_unlock(lock: &iter->mutex);
7029
7030 return sret;
7031}
7032
7033static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
7034 unsigned int idx)
7035{
7036 __free_page(spd->pages[idx]);
7037}
7038
7039static size_t
7040tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
7041{
7042 size_t count;
7043 int save_len;
7044 int ret;
7045
7046 /* Seq buffer is page-sized, exactly what we need. */
7047 for (;;) {
7048 save_len = iter->seq.seq.len;
7049 ret = print_trace_line(iter);
7050
7051 if (trace_seq_has_overflowed(s: &iter->seq)) {
7052 iter->seq.seq.len = save_len;
7053 break;
7054 }
7055
7056 /*
7057 * This should not be hit, because it should only
7058 * be set if the iter->seq overflowed. But check it
7059 * anyway to be safe.
7060 */
7061 if (ret == TRACE_TYPE_PARTIAL_LINE) {
7062 iter->seq.seq.len = save_len;
7063 break;
7064 }
7065
7066 count = trace_seq_used(s: &iter->seq) - save_len;
7067 if (rem < count) {
7068 rem = 0;
7069 iter->seq.seq.len = save_len;
7070 break;
7071 }
7072
7073 if (ret != TRACE_TYPE_NO_CONSUME)
7074 trace_consume(iter);
7075 rem -= count;
7076 if (!trace_find_next_entry_inc(iter)) {
7077 rem = 0;
7078 iter->ent = NULL;
7079 break;
7080 }
7081 }
7082
7083 return rem;
7084}
7085
7086static ssize_t tracing_splice_read_pipe(struct file *filp,
7087 loff_t *ppos,
7088 struct pipe_inode_info *pipe,
7089 size_t len,
7090 unsigned int flags)
7091{
7092 struct page *pages_def[PIPE_DEF_BUFFERS];
7093 struct partial_page partial_def[PIPE_DEF_BUFFERS];
7094 struct trace_iterator *iter = filp->private_data;
7095 struct splice_pipe_desc spd = {
7096 .pages = pages_def,
7097 .partial = partial_def,
7098 .nr_pages = 0, /* This gets updated below. */
7099 .nr_pages_max = PIPE_DEF_BUFFERS,
7100 .ops = &default_pipe_buf_ops,
7101 .spd_release = tracing_spd_release_pipe,
7102 };
7103 ssize_t ret;
7104 size_t rem;
7105 unsigned int i;
7106
7107 if (splice_grow_spd(pipe, &spd))
7108 return -ENOMEM;
7109
7110 mutex_lock(&iter->mutex);
7111
7112 if (iter->trace->splice_read) {
7113 ret = iter->trace->splice_read(iter, filp,
7114 ppos, pipe, len, flags);
7115 if (ret)
7116 goto out_err;
7117 }
7118
7119 ret = tracing_wait_pipe(filp);
7120 if (ret <= 0)
7121 goto out_err;
7122
7123 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7124 ret = -EFAULT;
7125 goto out_err;
7126 }
7127
7128 trace_event_read_lock();
7129 trace_access_lock(cpu: iter->cpu_file);
7130
7131 /* Fill as many pages as possible. */
7132 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7133 spd.pages[i] = alloc_page(GFP_KERNEL);
7134 if (!spd.pages[i])
7135 break;
7136
7137 rem = tracing_fill_pipe_page(rem, iter);
7138
7139 /* Copy the data into the page, so we can start over. */
7140 ret = trace_seq_to_buffer(s: &iter->seq,
7141 page_address(spd.pages[i]),
7142 cnt: trace_seq_used(s: &iter->seq));
7143 if (ret < 0) {
7144 __free_page(spd.pages[i]);
7145 break;
7146 }
7147 spd.partial[i].offset = 0;
7148 spd.partial[i].len = trace_seq_used(s: &iter->seq);
7149
7150 trace_seq_init(s: &iter->seq);
7151 }
7152
7153 trace_access_unlock(cpu: iter->cpu_file);
7154 trace_event_read_unlock();
7155 mutex_unlock(lock: &iter->mutex);
7156
7157 spd.nr_pages = i;
7158
7159 if (i)
7160 ret = splice_to_pipe(pipe, &spd);
7161 else
7162 ret = 0;
7163out:
7164 splice_shrink_spd(&spd);
7165 return ret;
7166
7167out_err:
7168 mutex_unlock(lock: &iter->mutex);
7169 goto out;
7170}
7171
7172static ssize_t
7173tracing_entries_read(struct file *filp, char __user *ubuf,
7174 size_t cnt, loff_t *ppos)
7175{
7176 struct inode *inode = file_inode(f: filp);
7177 struct trace_array *tr = inode->i_private;
7178 int cpu = tracing_get_cpu(inode);
7179 char buf[64];
7180 int r = 0;
7181 ssize_t ret;
7182
7183 mutex_lock(&trace_types_lock);
7184
7185 if (cpu == RING_BUFFER_ALL_CPUS) {
7186 int cpu, buf_size_same;
7187 unsigned long size;
7188
7189 size = 0;
7190 buf_size_same = 1;
7191 /* check if all cpu sizes are same */
7192 for_each_tracing_cpu(cpu) {
7193 /* fill in the size from first enabled cpu */
7194 if (size == 0)
7195 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7196 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7197 buf_size_same = 0;
7198 break;
7199 }
7200 }
7201
7202 if (buf_size_same) {
7203 if (!tr->ring_buffer_expanded)
7204 r = sprintf(buf, fmt: "%lu (expanded: %lu)\n",
7205 size >> 10,
7206 trace_buf_size >> 10);
7207 else
7208 r = sprintf(buf, fmt: "%lu\n", size >> 10);
7209 } else
7210 r = sprintf(buf, fmt: "X\n");
7211 } else
7212 r = sprintf(buf, fmt: "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7213
7214 mutex_unlock(lock: &trace_types_lock);
7215
7216 ret = simple_read_from_buffer(to: ubuf, count: cnt, ppos, from: buf, available: r);
7217 return ret;
7218}
7219
7220static ssize_t
7221tracing_entries_write(struct file *filp, const char __user *ubuf,
7222 size_t cnt, loff_t *ppos)
7223{
7224 struct inode *inode = file_inode(f: filp);
7225 struct trace_array *tr = inode->i_private;
7226 unsigned long val;
7227 int ret;
7228
7229 ret = kstrtoul_from_user(s: ubuf, count: cnt, base: 10, res: &val);
7230 if (ret)
7231 return ret;
7232
7233 /* must have at least 1 entry */
7234 if (!val)
7235 return -EINVAL;
7236
7237 /* value is in KB */
7238 val <<= 10;
7239 ret = tracing_resize_ring_buffer(tr, size: val, cpu_id: tracing_get_cpu(inode));
7240 if (ret < 0)
7241 return ret;
7242
7243 *ppos += cnt;
7244
7245 return cnt;
7246}
7247
7248static ssize_t
7249tracing_total_entries_read(struct file *filp, char __user *ubuf,
7250 size_t cnt, loff_t *ppos)
7251{
7252 struct trace_array *tr = filp->private_data;
7253 char buf[64];
7254 int r, cpu;
7255 unsigned long size = 0, expanded_size = 0;
7256
7257 mutex_lock(&trace_types_lock);
7258 for_each_tracing_cpu(cpu) {
7259 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7260 if (!tr->ring_buffer_expanded)
7261 expanded_size += trace_buf_size >> 10;
7262 }
7263 if (tr->ring_buffer_expanded)
7264 r = sprintf(buf, fmt: "%lu\n", size);
7265 else
7266 r = sprintf(buf, fmt: "%lu (expanded: %lu)\n", size, expanded_size);
7267 mutex_unlock(lock: &trace_types_lock);
7268
7269 return simple_read_from_buffer(to: ubuf, count: cnt, ppos, from: buf, available: r);
7270}
7271
7272static ssize_t
7273tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7274 size_t cnt, loff_t *ppos)
7275{
7276 /*
7277 * There is no need to read what the user has written, this function
7278 * is just to make sure that there is no error when "echo" is used
7279 */
7280
7281 *ppos += cnt;
7282
7283 return cnt;
7284}
7285
7286static int
7287tracing_free_buffer_release(struct inode *inode, struct file *filp)
7288{
7289 struct trace_array *tr = inode->i_private;
7290
7291 /* disable tracing ? */
7292 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7293 tracer_tracing_off(tr);
7294 /* resize the ring buffer to 0 */
7295 tracing_resize_ring_buffer(tr, size: 0, RING_BUFFER_ALL_CPUS);
7296
7297 trace_array_put(tr);
7298
7299 return 0;
7300}
7301
7302static ssize_t
7303tracing_mark_write(struct file *filp, const char __user *ubuf,
7304 size_t cnt, loff_t *fpos)
7305{
7306 struct trace_array *tr = filp->private_data;
7307 struct ring_buffer_event *event;
7308 enum event_trigger_type tt = ETT_NONE;
7309 struct trace_buffer *buffer;
7310 struct print_entry *entry;
7311 ssize_t written;
7312 int size;
7313 int len;
7314
7315/* Used in tracing_mark_raw_write() as well */
7316#define FAULTED_STR "<faulted>"
7317#define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7318
7319 if (tracing_disabled)
7320 return -EINVAL;
7321
7322 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7323 return -EINVAL;
7324
7325 if (cnt > TRACE_BUF_SIZE)
7326 cnt = TRACE_BUF_SIZE;
7327
7328 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7329
7330 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
7331
7332 /* If less than "<faulted>", then make sure we can still add that */
7333 if (cnt < FAULTED_SIZE)
7334 size += FAULTED_SIZE - cnt;
7335
7336 buffer = tr->array_buffer.buffer;
7337 event = __trace_buffer_lock_reserve(buffer, type: TRACE_PRINT, len: size,
7338 trace_ctx: tracing_gen_ctx());
7339 if (unlikely(!event))
7340 /* Ring buffer disabled, return as if not open for write */
7341 return -EBADF;
7342
7343 entry = ring_buffer_event_data(event);
7344 entry->ip = _THIS_IP_;
7345
7346 len = __copy_from_user_inatomic(to: &entry->buf, from: ubuf, n: cnt);
7347 if (len) {
7348 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7349 cnt = FAULTED_SIZE;
7350 written = -EFAULT;
7351 } else
7352 written = cnt;
7353
7354 if (tr->trace_marker_file && !list_empty(head: &tr->trace_marker_file->triggers)) {
7355 /* do not add \n before testing triggers, but add \0 */
7356 entry->buf[cnt] = '\0';
7357 tt = event_triggers_call(file: tr->trace_marker_file, buffer, rec: entry, event);
7358 }
7359
7360 if (entry->buf[cnt - 1] != '\n') {
7361 entry->buf[cnt] = '\n';
7362 entry->buf[cnt + 1] = '\0';
7363 } else
7364 entry->buf[cnt] = '\0';
7365
7366 if (static_branch_unlikely(&trace_marker_exports_enabled))
7367 ftrace_exports(event, TRACE_EXPORT_MARKER);
7368 __buffer_unlock_commit(buffer, event);
7369
7370 if (tt)
7371 event_triggers_post_call(file: tr->trace_marker_file, tt);
7372
7373 return written;
7374}
7375
7376/* Limit it for now to 3K (including tag) */
7377#define RAW_DATA_MAX_SIZE (1024*3)
7378
7379static ssize_t
7380tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7381 size_t cnt, loff_t *fpos)
7382{
7383 struct trace_array *tr = filp->private_data;
7384 struct ring_buffer_event *event;
7385 struct trace_buffer *buffer;
7386 struct raw_data_entry *entry;
7387 ssize_t written;
7388 int size;
7389 int len;
7390
7391#define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7392
7393 if (tracing_disabled)
7394 return -EINVAL;
7395
7396 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7397 return -EINVAL;
7398
7399 /* The marker must at least have a tag id */
7400 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7401 return -EINVAL;
7402
7403 if (cnt > TRACE_BUF_SIZE)
7404 cnt = TRACE_BUF_SIZE;
7405
7406 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7407
7408 size = sizeof(*entry) + cnt;
7409 if (cnt < FAULT_SIZE_ID)
7410 size += FAULT_SIZE_ID - cnt;
7411
7412 buffer = tr->array_buffer.buffer;
7413 event = __trace_buffer_lock_reserve(buffer, type: TRACE_RAW_DATA, len: size,
7414 trace_ctx: tracing_gen_ctx());
7415 if (!event)
7416 /* Ring buffer disabled, return as if not open for write */
7417 return -EBADF;
7418
7419 entry = ring_buffer_event_data(event);
7420
7421 len = __copy_from_user_inatomic(to: &entry->id, from: ubuf, n: cnt);
7422 if (len) {
7423 entry->id = -1;
7424 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7425 written = -EFAULT;
7426 } else
7427 written = cnt;
7428
7429 __buffer_unlock_commit(buffer, event);
7430
7431 return written;
7432}
7433
7434static int tracing_clock_show(struct seq_file *m, void *v)
7435{
7436 struct trace_array *tr = m->private;
7437 int i;
7438
7439 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7440 seq_printf(m,
7441 fmt: "%s%s%s%s", i ? " " : "",
7442 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7443 i == tr->clock_id ? "]" : "");
7444 seq_putc(m, c: '\n');
7445
7446 return 0;
7447}
7448
7449int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7450{
7451 int i;
7452
7453 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7454 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7455 break;
7456 }
7457 if (i == ARRAY_SIZE(trace_clocks))
7458 return -EINVAL;
7459
7460 mutex_lock(&trace_types_lock);
7461
7462 tr->clock_id = i;
7463
7464 ring_buffer_set_clock(buffer: tr->array_buffer.buffer, clock: trace_clocks[i].func);
7465
7466 /*
7467 * New clock may not be consistent with the previous clock.
7468 * Reset the buffer so that it doesn't have incomparable timestamps.
7469 */
7470 tracing_reset_online_cpus(buf: &tr->array_buffer);
7471
7472#ifdef CONFIG_TRACER_MAX_TRACE
7473 if (tr->max_buffer.buffer)
7474 ring_buffer_set_clock(buffer: tr->max_buffer.buffer, clock: trace_clocks[i].func);
7475 tracing_reset_online_cpus(buf: &tr->max_buffer);
7476#endif
7477
7478 mutex_unlock(lock: &trace_types_lock);
7479
7480 return 0;
7481}
7482
7483static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7484 size_t cnt, loff_t *fpos)
7485{
7486 struct seq_file *m = filp->private_data;
7487 struct trace_array *tr = m->private;
7488 char buf[64];
7489 const char *clockstr;
7490 int ret;
7491
7492 if (cnt >= sizeof(buf))
7493 return -EINVAL;
7494
7495 if (copy_from_user(to: buf, from: ubuf, n: cnt))
7496 return -EFAULT;
7497
7498 buf[cnt] = 0;
7499
7500 clockstr = strstrip(str: buf);
7501
7502 ret = tracing_set_clock(tr, clockstr);
7503 if (ret)
7504 return ret;
7505
7506 *fpos += cnt;
7507
7508 return cnt;
7509}
7510
7511static int tracing_clock_open(struct inode *inode, struct file *file)
7512{
7513 struct trace_array *tr = inode->i_private;
7514 int ret;
7515
7516 ret = tracing_check_open_get_tr(tr);
7517 if (ret)
7518 return ret;
7519
7520 ret = single_open(file, tracing_clock_show, inode->i_private);
7521 if (ret < 0)
7522 trace_array_put(tr);
7523
7524 return ret;
7525}
7526
7527static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7528{
7529 struct trace_array *tr = m->private;
7530
7531 mutex_lock(&trace_types_lock);
7532
7533 if (ring_buffer_time_stamp_abs(buffer: tr->array_buffer.buffer))
7534 seq_puts(m, s: "delta [absolute]\n");
7535 else
7536 seq_puts(m, s: "[delta] absolute\n");
7537
7538 mutex_unlock(lock: &trace_types_lock);
7539
7540 return 0;
7541}
7542
7543static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7544{
7545 struct trace_array *tr = inode->i_private;
7546 int ret;
7547
7548 ret = tracing_check_open_get_tr(tr);
7549 if (ret)
7550 return ret;
7551
7552 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7553 if (ret < 0)
7554 trace_array_put(tr);
7555
7556 return ret;
7557}
7558
7559u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7560{
7561 if (rbe == this_cpu_read(trace_buffered_event))
7562 return ring_buffer_time_stamp(buffer);
7563
7564 return ring_buffer_event_time_stamp(buffer, event: rbe);
7565}
7566
7567/*
7568 * Set or disable using the per CPU trace_buffer_event when possible.
7569 */
7570int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7571{
7572 int ret = 0;
7573
7574 mutex_lock(&trace_types_lock);
7575
7576 if (set && tr->no_filter_buffering_ref++)
7577 goto out;
7578
7579 if (!set) {
7580 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7581 ret = -EINVAL;
7582 goto out;
7583 }
7584
7585 --tr->no_filter_buffering_ref;
7586 }
7587 out:
7588 mutex_unlock(lock: &trace_types_lock);
7589
7590 return ret;
7591}
7592
7593struct ftrace_buffer_info {
7594 struct trace_iterator iter;
7595 void *spare;
7596 unsigned int spare_cpu;
7597 unsigned int read;
7598};
7599
7600#ifdef CONFIG_TRACER_SNAPSHOT
7601static int tracing_snapshot_open(struct inode *inode, struct file *file)
7602{
7603 struct trace_array *tr = inode->i_private;
7604 struct trace_iterator *iter;
7605 struct seq_file *m;
7606 int ret;
7607
7608 ret = tracing_check_open_get_tr(tr);
7609 if (ret)
7610 return ret;
7611
7612 if (file->f_mode & FMODE_READ) {
7613 iter = __tracing_open(inode, file, snapshot: true);
7614 if (IS_ERR(ptr: iter))
7615 ret = PTR_ERR(ptr: iter);
7616 } else {
7617 /* Writes still need the seq_file to hold the private data */
7618 ret = -ENOMEM;
7619 m = kzalloc(size: sizeof(*m), GFP_KERNEL);
7620 if (!m)
7621 goto out;
7622 iter = kzalloc(size: sizeof(*iter), GFP_KERNEL);
7623 if (!iter) {
7624 kfree(objp: m);
7625 goto out;
7626 }
7627 ret = 0;
7628
7629 iter->tr = tr;
7630 iter->array_buffer = &tr->max_buffer;
7631 iter->cpu_file = tracing_get_cpu(inode);
7632 m->private = iter;
7633 file->private_data = m;
7634 }
7635out:
7636 if (ret < 0)
7637 trace_array_put(tr);
7638
7639 return ret;
7640}
7641
7642static void tracing_swap_cpu_buffer(void *tr)
7643{
7644 update_max_tr_single(tr: (struct trace_array *)tr, current, smp_processor_id());
7645}
7646
7647static ssize_t
7648tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7649 loff_t *ppos)
7650{
7651 struct seq_file *m = filp->private_data;
7652 struct trace_iterator *iter = m->private;
7653 struct trace_array *tr = iter->tr;
7654 unsigned long val;
7655 int ret;
7656
7657 ret = tracing_update_buffers(tr);
7658 if (ret < 0)
7659 return ret;
7660
7661 ret = kstrtoul_from_user(s: ubuf, count: cnt, base: 10, res: &val);
7662 if (ret)
7663 return ret;
7664
7665 mutex_lock(&trace_types_lock);
7666
7667 if (tr->current_trace->use_max_tr) {
7668 ret = -EBUSY;
7669 goto out;
7670 }
7671
7672 local_irq_disable();
7673 arch_spin_lock(&tr->max_lock);
7674 if (tr->cond_snapshot)
7675 ret = -EBUSY;
7676 arch_spin_unlock(&tr->max_lock);
7677 local_irq_enable();
7678 if (ret)
7679 goto out;
7680
7681 switch (val) {
7682 case 0:
7683 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7684 ret = -EINVAL;
7685 break;
7686 }
7687 if (tr->allocated_snapshot)
7688 free_snapshot(tr);
7689 break;
7690 case 1:
7691/* Only allow per-cpu swap if the ring buffer supports it */
7692#ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7693 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7694 ret = -EINVAL;
7695 break;
7696 }
7697#endif
7698 if (tr->allocated_snapshot)
7699 ret = resize_buffer_duplicate_size(trace_buf: &tr->max_buffer,
7700 size_buf: &tr->array_buffer, cpu_id: iter->cpu_file);
7701 else
7702 ret = tracing_alloc_snapshot_instance(tr);
7703 if (ret < 0)
7704 break;
7705 /* Now, we're going to swap */
7706 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7707 local_irq_disable();
7708 update_max_tr(tr, current, smp_processor_id(), NULL);
7709 local_irq_enable();
7710 } else {
7711 smp_call_function_single(cpuid: iter->cpu_file, func: tracing_swap_cpu_buffer,
7712 info: (void *)tr, wait: 1);
7713 }
7714 break;
7715 default:
7716 if (tr->allocated_snapshot) {
7717 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7718 tracing_reset_online_cpus(buf: &tr->max_buffer);
7719 else
7720 tracing_reset_cpu(buf: &tr->max_buffer, cpu: iter->cpu_file);
7721 }
7722 break;
7723 }
7724
7725 if (ret >= 0) {
7726 *ppos += cnt;
7727 ret = cnt;
7728 }
7729out:
7730 mutex_unlock(lock: &trace_types_lock);
7731 return ret;
7732}
7733
7734static int tracing_snapshot_release(struct inode *inode, struct file *file)
7735{
7736 struct seq_file *m = file->private_data;
7737 int ret;
7738
7739 ret = tracing_release(inode, file);
7740
7741 if (file->f_mode & FMODE_READ)
7742 return ret;
7743
7744 /* If write only, the seq_file is just a stub */
7745 if (m)
7746 kfree(objp: m->private);
7747 kfree(objp: m);
7748
7749 return 0;
7750}
7751
7752static int tracing_buffers_open(struct inode *inode, struct file *filp);
7753static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7754 size_t count, loff_t *ppos);
7755static int tracing_buffers_release(struct inode *inode, struct file *file);
7756static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7757 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7758
7759static int snapshot_raw_open(struct inode *inode, struct file *filp)
7760{
7761 struct ftrace_buffer_info *info;
7762 int ret;
7763
7764 /* The following checks for tracefs lockdown */
7765 ret = tracing_buffers_open(inode, filp);
7766 if (ret < 0)
7767 return ret;
7768
7769 info = filp->private_data;
7770
7771 if (info->iter.trace->use_max_tr) {
7772 tracing_buffers_release(inode, file: filp);
7773 return -EBUSY;
7774 }
7775
7776 info->iter.snapshot = true;
7777 info->iter.array_buffer = &info->iter.tr->max_buffer;
7778
7779 return ret;
7780}
7781
7782#endif /* CONFIG_TRACER_SNAPSHOT */
7783
7784
7785static const struct file_operations tracing_thresh_fops = {
7786 .open = tracing_open_generic,
7787 .read = tracing_thresh_read,
7788 .write = tracing_thresh_write,
7789 .llseek = generic_file_llseek,
7790};
7791
7792#ifdef CONFIG_TRACER_MAX_TRACE
7793static const struct file_operations tracing_max_lat_fops = {
7794 .open = tracing_open_generic_tr,
7795 .read = tracing_max_lat_read,
7796 .write = tracing_max_lat_write,
7797 .llseek = generic_file_llseek,
7798 .release = tracing_release_generic_tr,
7799};
7800#endif
7801
7802static const struct file_operations set_tracer_fops = {
7803 .open = tracing_open_generic_tr,
7804 .read = tracing_set_trace_read,
7805 .write = tracing_set_trace_write,
7806 .llseek = generic_file_llseek,
7807 .release = tracing_release_generic_tr,
7808};
7809
7810static const struct file_operations tracing_pipe_fops = {
7811 .open = tracing_open_pipe,
7812 .poll = tracing_poll_pipe,
7813 .read = tracing_read_pipe,
7814 .splice_read = tracing_splice_read_pipe,
7815 .release = tracing_release_pipe,
7816 .llseek = no_llseek,
7817};
7818
7819static const struct file_operations tracing_entries_fops = {
7820 .open = tracing_open_generic_tr,
7821 .read = tracing_entries_read,
7822 .write = tracing_entries_write,
7823 .llseek = generic_file_llseek,
7824 .release = tracing_release_generic_tr,
7825};
7826
7827static const struct file_operations tracing_total_entries_fops = {
7828 .open = tracing_open_generic_tr,
7829 .read = tracing_total_entries_read,
7830 .llseek = generic_file_llseek,
7831 .release = tracing_release_generic_tr,
7832};
7833
7834static const struct file_operations tracing_free_buffer_fops = {
7835 .open = tracing_open_generic_tr,
7836 .write = tracing_free_buffer_write,
7837 .release = tracing_free_buffer_release,
7838};
7839
7840static const struct file_operations tracing_mark_fops = {
7841 .open = tracing_mark_open,
7842 .write = tracing_mark_write,
7843 .release = tracing_release_generic_tr,
7844};
7845
7846static const struct file_operations tracing_mark_raw_fops = {
7847 .open = tracing_mark_open,
7848 .write = tracing_mark_raw_write,
7849 .release = tracing_release_generic_tr,
7850};
7851
7852static const struct file_operations trace_clock_fops = {
7853 .open = tracing_clock_open,
7854 .read = seq_read,
7855 .llseek = seq_lseek,
7856 .release = tracing_single_release_tr,
7857 .write = tracing_clock_write,
7858};
7859
7860static const struct file_operations trace_time_stamp_mode_fops = {
7861 .open = tracing_time_stamp_mode_open,
7862 .read = seq_read,
7863 .llseek = seq_lseek,
7864 .release = tracing_single_release_tr,
7865};
7866
7867#ifdef CONFIG_TRACER_SNAPSHOT
7868static const struct file_operations snapshot_fops = {
7869 .open = tracing_snapshot_open,
7870 .read = seq_read,
7871 .write = tracing_snapshot_write,
7872 .llseek = tracing_lseek,
7873 .release = tracing_snapshot_release,
7874};
7875
7876static const struct file_operations snapshot_raw_fops = {
7877 .open = snapshot_raw_open,
7878 .read = tracing_buffers_read,
7879 .release = tracing_buffers_release,
7880 .splice_read = tracing_buffers_splice_read,
7881 .llseek = no_llseek,
7882};
7883
7884#endif /* CONFIG_TRACER_SNAPSHOT */
7885
7886/*
7887 * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7888 * @filp: The active open file structure
7889 * @ubuf: The userspace provided buffer to read value into
7890 * @cnt: The maximum number of bytes to read
7891 * @ppos: The current "file" position
7892 *
7893 * This function implements the write interface for a struct trace_min_max_param.
7894 * The filp->private_data must point to a trace_min_max_param structure that
7895 * defines where to write the value, the min and the max acceptable values,
7896 * and a lock to protect the write.
7897 */
7898static ssize_t
7899trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7900{
7901 struct trace_min_max_param *param = filp->private_data;
7902 u64 val;
7903 int err;
7904
7905 if (!param)
7906 return -EFAULT;
7907
7908 err = kstrtoull_from_user(s: ubuf, count: cnt, base: 10, res: &val);
7909 if (err)
7910 return err;
7911
7912 if (param->lock)
7913 mutex_lock(param->lock);
7914
7915 if (param->min && val < *param->min)
7916 err = -EINVAL;
7917
7918 if (param->max && val > *param->max)
7919 err = -EINVAL;
7920
7921 if (!err)
7922 *param->val = val;
7923
7924 if (param->lock)
7925 mutex_unlock(lock: param->lock);
7926
7927 if (err)
7928 return err;
7929
7930 return cnt;
7931}
7932
7933/*
7934 * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7935 * @filp: The active open file structure
7936 * @ubuf: The userspace provided buffer to read value into
7937 * @cnt: The maximum number of bytes to read
7938 * @ppos: The current "file" position
7939 *
7940 * This function implements the read interface for a struct trace_min_max_param.
7941 * The filp->private_data must point to a trace_min_max_param struct with valid
7942 * data.
7943 */
7944static ssize_t
7945trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7946{
7947 struct trace_min_max_param *param = filp->private_data;
7948 char buf[U64_STR_SIZE];
7949 int len;
7950 u64 val;
7951
7952 if (!param)
7953 return -EFAULT;
7954
7955 val = *param->val;
7956
7957 if (cnt > sizeof(buf))
7958 cnt = sizeof(buf);
7959
7960 len = snprintf(buf, size: sizeof(buf), fmt: "%llu\n", val);
7961
7962 return simple_read_from_buffer(to: ubuf, count: cnt, ppos, from: buf, available: len);
7963}
7964
7965const struct file_operations trace_min_max_fops = {
7966 .open = tracing_open_generic,
7967 .read = trace_min_max_read,
7968 .write = trace_min_max_write,
7969};
7970
7971#define TRACING_LOG_ERRS_MAX 8
7972#define TRACING_LOG_LOC_MAX 128
7973
7974#define CMD_PREFIX " Command: "
7975
7976struct err_info {
7977 const char **errs; /* ptr to loc-specific array of err strings */
7978 u8 type; /* index into errs -> specific err string */
7979 u16 pos; /* caret position */
7980 u64 ts;
7981};
7982
7983struct tracing_log_err {
7984 struct list_head list;
7985 struct err_info info;
7986 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7987 char *cmd; /* what caused err */
7988};
7989
7990static DEFINE_MUTEX(tracing_err_log_lock);
7991
7992static struct tracing_log_err *alloc_tracing_log_err(int len)
7993{
7994 struct tracing_log_err *err;
7995
7996 err = kzalloc(size: sizeof(*err), GFP_KERNEL);
7997 if (!err)
7998 return ERR_PTR(error: -ENOMEM);
7999
8000 err->cmd = kzalloc(size: len, GFP_KERNEL);
8001 if (!err->cmd) {
8002 kfree(objp: err);
8003 return ERR_PTR(error: -ENOMEM);
8004 }
8005
8006 return err;
8007}
8008
8009static void free_tracing_log_err(struct tracing_log_err *err)
8010{
8011 kfree(objp: err->cmd);
8012 kfree(objp: err);
8013}
8014
8015static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8016 int len)
8017{
8018 struct tracing_log_err *err;
8019 char *cmd;
8020
8021 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8022 err = alloc_tracing_log_err(len);
8023 if (PTR_ERR(ptr: err) != -ENOMEM)
8024 tr->n_err_log_entries++;
8025
8026 return err;
8027 }
8028 cmd = kzalloc(size: len, GFP_KERNEL);
8029 if (!cmd)
8030 return ERR_PTR(error: -ENOMEM);
8031 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8032 kfree(objp: err->cmd);
8033 err->cmd = cmd;
8034 list_del(entry: &err->list);
8035
8036 return err;
8037}
8038
8039/**
8040 * err_pos - find the position of a string within a command for error careting
8041 * @cmd: The tracing command that caused the error
8042 * @str: The string to position the caret at within @cmd
8043 *
8044 * Finds the position of the first occurrence of @str within @cmd. The
8045 * return value can be passed to tracing_log_err() for caret placement
8046 * within @cmd.
8047 *
8048 * Returns the index within @cmd of the first occurrence of @str or 0
8049 * if @str was not found.
8050 */
8051unsigned int err_pos(char *cmd, const char *str)
8052{
8053 char *found;
8054
8055 if (WARN_ON(!strlen(cmd)))
8056 return 0;
8057
8058 found = strstr(cmd, str);
8059 if (found)
8060 return found - cmd;
8061
8062 return 0;
8063}
8064
8065/**
8066 * tracing_log_err - write an error to the tracing error log
8067 * @tr: The associated trace array for the error (NULL for top level array)
8068 * @loc: A string describing where the error occurred
8069 * @cmd: The tracing command that caused the error
8070 * @errs: The array of loc-specific static error strings
8071 * @type: The index into errs[], which produces the specific static err string
8072 * @pos: The position the caret should be placed in the cmd
8073 *
8074 * Writes an error into tracing/error_log of the form:
8075 *
8076 * <loc>: error: <text>
8077 * Command: <cmd>
8078 * ^
8079 *
8080 * tracing/error_log is a small log file containing the last
8081 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
8082 * unless there has been a tracing error, and the error log can be
8083 * cleared and have its memory freed by writing the empty string in
8084 * truncation mode to it i.e. echo > tracing/error_log.
8085 *
8086 * NOTE: the @errs array along with the @type param are used to
8087 * produce a static error string - this string is not copied and saved
8088 * when the error is logged - only a pointer to it is saved. See
8089 * existing callers for examples of how static strings are typically
8090 * defined for use with tracing_log_err().
8091 */
8092void tracing_log_err(struct trace_array *tr,
8093 const char *loc, const char *cmd,
8094 const char **errs, u8 type, u16 pos)
8095{
8096 struct tracing_log_err *err;
8097 int len = 0;
8098
8099 if (!tr)
8100 tr = &global_trace;
8101
8102 len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8103
8104 mutex_lock(&tracing_err_log_lock);
8105 err = get_tracing_log_err(tr, len);
8106 if (PTR_ERR(ptr: err) == -ENOMEM) {
8107 mutex_unlock(lock: &tracing_err_log_lock);
8108 return;
8109 }
8110
8111 snprintf(buf: err->loc, TRACING_LOG_LOC_MAX, fmt: "%s: error: ", loc);
8112 snprintf(buf: err->cmd, size: len, fmt: "\n" CMD_PREFIX "%s\n", cmd);
8113
8114 err->info.errs = errs;
8115 err->info.type = type;
8116 err->info.pos = pos;
8117 err->info.ts = local_clock();
8118
8119 list_add_tail(new: &err->list, head: &tr->err_log);
8120 mutex_unlock(lock: &tracing_err_log_lock);
8121}
8122
8123static void clear_tracing_err_log(struct trace_array *tr)
8124{
8125 struct tracing_log_err *err, *next;
8126
8127 mutex_lock(&tracing_err_log_lock);
8128 list_for_each_entry_safe(err, next, &tr->err_log, list) {
8129 list_del(entry: &err->list);
8130 free_tracing_log_err(err);
8131 }
8132
8133 tr->n_err_log_entries = 0;
8134 mutex_unlock(lock: &tracing_err_log_lock);
8135}
8136
8137static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8138{
8139 struct trace_array *tr = m->private;
8140
8141 mutex_lock(&tracing_err_log_lock);
8142
8143 return seq_list_start(head: &tr->err_log, pos: *pos);
8144}
8145
8146static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8147{
8148 struct trace_array *tr = m->private;
8149
8150 return seq_list_next(v, head: &tr->err_log, ppos: pos);
8151}
8152
8153static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8154{
8155 mutex_unlock(lock: &tracing_err_log_lock);
8156}
8157
8158static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8159{
8160 u16 i;
8161
8162 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8163 seq_putc(m, c: ' ');
8164 for (i = 0; i < pos; i++)
8165 seq_putc(m, c: ' ');
8166 seq_puts(m, s: "^\n");
8167}
8168
8169static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8170{
8171 struct tracing_log_err *err = v;
8172
8173 if (err) {
8174 const char *err_text = err->info.errs[err->info.type];
8175 u64 sec = err->info.ts;
8176 u32 nsec;
8177
8178 nsec = do_div(sec, NSEC_PER_SEC);
8179 seq_printf(m, fmt: "[%5llu.%06u] %s%s", sec, nsec / 1000,
8180 err->loc, err_text);
8181 seq_printf(m, fmt: "%s", err->cmd);
8182 tracing_err_log_show_pos(m, pos: err->info.pos);
8183 }
8184
8185 return 0;
8186}
8187
8188static const struct seq_operations tracing_err_log_seq_ops = {
8189 .start = tracing_err_log_seq_start,
8190 .next = tracing_err_log_seq_next,
8191 .stop = tracing_err_log_seq_stop,
8192 .show = tracing_err_log_seq_show
8193};
8194
8195static int tracing_err_log_open(struct inode *inode, struct file *file)
8196{
8197 struct trace_array *tr = inode->i_private;
8198 int ret = 0;
8199
8200 ret = tracing_check_open_get_tr(tr);
8201 if (ret)
8202 return ret;
8203
8204 /* If this file was opened for write, then erase contents */
8205 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8206 clear_tracing_err_log(tr);
8207
8208 if (file->f_mode & FMODE_READ) {
8209 ret = seq_open(file, &tracing_err_log_seq_ops);
8210 if (!ret) {
8211 struct seq_file *m = file->private_data;
8212 m->private = tr;
8213 } else {
8214 trace_array_put(tr);
8215 }
8216 }
8217 return ret;
8218}
8219
8220static ssize_t tracing_err_log_write(struct file *file,
8221 const char __user *buffer,
8222 size_t count, loff_t *ppos)
8223{
8224 return count;
8225}
8226
8227static int tracing_err_log_release(struct inode *inode, struct file *file)
8228{
8229 struct trace_array *tr = inode->i_private;
8230
8231 trace_array_put(tr);
8232
8233 if (file->f_mode & FMODE_READ)
8234 seq_release(inode, file);
8235
8236 return 0;
8237}
8238
8239static const struct file_operations tracing_err_log_fops = {
8240 .open = tracing_err_log_open,
8241 .write = tracing_err_log_write,
8242 .read = seq_read,
8243 .llseek = tracing_lseek,
8244 .release = tracing_err_log_release,
8245};
8246
8247static int tracing_buffers_open(struct inode *inode, struct file *filp)
8248{
8249 struct trace_array *tr = inode->i_private;
8250 struct ftrace_buffer_info *info;
8251 int ret;
8252
8253 ret = tracing_check_open_get_tr(tr);
8254 if (ret)
8255 return ret;
8256
8257 info = kvzalloc(size: sizeof(*info), GFP_KERNEL);
8258 if (!info) {
8259 trace_array_put(tr);
8260 return -ENOMEM;
8261 }
8262
8263 mutex_lock(&trace_types_lock);
8264
8265 info->iter.tr = tr;
8266 info->iter.cpu_file = tracing_get_cpu(inode);
8267 info->iter.trace = tr->current_trace;
8268 info->iter.array_buffer = &tr->array_buffer;
8269 info->spare = NULL;
8270 /* Force reading ring buffer for first read */
8271 info->read = (unsigned int)-1;
8272
8273 filp->private_data = info;
8274
8275 tr->trace_ref++;
8276
8277 mutex_unlock(lock: &trace_types_lock);
8278
8279 ret = nonseekable_open(inode, filp);
8280 if (ret < 0)
8281 trace_array_put(tr);
8282
8283 return ret;
8284}
8285
8286static __poll_t
8287tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8288{
8289 struct ftrace_buffer_info *info = filp->private_data;
8290 struct trace_iterator *iter = &info->iter;
8291
8292 return trace_poll(iter, filp, poll_table);
8293}
8294
8295static ssize_t
8296tracing_buffers_read(struct file *filp, char __user *ubuf,
8297 size_t count, loff_t *ppos)
8298{
8299 struct ftrace_buffer_info *info = filp->private_data;
8300 struct trace_iterator *iter = &info->iter;
8301 ssize_t ret = 0;
8302 ssize_t size;
8303
8304 if (!count)
8305 return 0;
8306
8307#ifdef CONFIG_TRACER_MAX_TRACE
8308 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8309 return -EBUSY;
8310#endif
8311
8312 if (!info->spare) {
8313 info->spare = ring_buffer_alloc_read_page(buffer: iter->array_buffer->buffer,
8314 cpu: iter->cpu_file);
8315 if (IS_ERR(ptr: info->spare)) {
8316 ret = PTR_ERR(ptr: info->spare);
8317 info->spare = NULL;
8318 } else {
8319 info->spare_cpu = iter->cpu_file;
8320 }
8321 }
8322 if (!info->spare)
8323 return ret;
8324
8325 /* Do we have previous read data to read? */
8326 if (info->read < PAGE_SIZE)
8327 goto read;
8328
8329 again:
8330 trace_access_lock(cpu: iter->cpu_file);
8331 ret = ring_buffer_read_page(buffer: iter->array_buffer->buffer,
8332 data_page: &info->spare,
8333 len: count,
8334 cpu: iter->cpu_file, full: 0);
8335 trace_access_unlock(cpu: iter->cpu_file);
8336
8337 if (ret < 0) {
8338 if (trace_empty(iter)) {
8339 if ((filp->f_flags & O_NONBLOCK))
8340 return -EAGAIN;
8341
8342 ret = wait_on_pipe(iter, full: 0);
8343 if (ret)
8344 return ret;
8345
8346 goto again;
8347 }
8348 return 0;
8349 }
8350
8351 info->read = 0;
8352 read:
8353 size = PAGE_SIZE - info->read;
8354 if (size > count)
8355 size = count;
8356
8357 ret = copy_to_user(to: ubuf, from: info->spare + info->read, n: size);
8358 if (ret == size)
8359 return -EFAULT;
8360
8361 size -= ret;
8362
8363 *ppos += size;
8364 info->read += size;
8365
8366 return size;
8367}
8368
8369static int tracing_buffers_release(struct inode *inode, struct file *file)
8370{
8371 struct ftrace_buffer_info *info = file->private_data;
8372 struct trace_iterator *iter = &info->iter;
8373
8374 mutex_lock(&trace_types_lock);
8375
8376 iter->tr->trace_ref--;
8377
8378 __trace_array_put(this_tr: iter->tr);
8379
8380 iter->wait_index++;
8381 /* Make sure the waiters see the new wait_index */
8382 smp_wmb();
8383
8384 ring_buffer_wake_waiters(buffer: iter->array_buffer->buffer, cpu: iter->cpu_file);
8385
8386 if (info->spare)
8387 ring_buffer_free_read_page(buffer: iter->array_buffer->buffer,
8388 cpu: info->spare_cpu, data: info->spare);
8389 kvfree(addr: info);
8390
8391 mutex_unlock(lock: &trace_types_lock);
8392
8393 return 0;
8394}
8395
8396struct buffer_ref {
8397 struct trace_buffer *buffer;
8398 void *page;
8399 int cpu;
8400 refcount_t refcount;
8401};
8402
8403static void buffer_ref_release(struct buffer_ref *ref)
8404{
8405 if (!refcount_dec_and_test(r: &ref->refcount))
8406 return;
8407 ring_buffer_free_read_page(buffer: ref->buffer, cpu: ref->cpu, data: ref->page);
8408 kfree(objp: ref);
8409}
8410
8411static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8412 struct pipe_buffer *buf)
8413{
8414 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8415
8416 buffer_ref_release(ref);
8417 buf->private = 0;
8418}
8419
8420static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8421 struct pipe_buffer *buf)
8422{
8423 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8424
8425 if (refcount_read(r: &ref->refcount) > INT_MAX/2)
8426 return false;
8427
8428 refcount_inc(r: &ref->refcount);
8429 return true;
8430}
8431
8432/* Pipe buffer operations for a buffer. */
8433static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8434 .release = buffer_pipe_buf_release,
8435 .get = buffer_pipe_buf_get,
8436};
8437
8438/*
8439 * Callback from splice_to_pipe(), if we need to release some pages
8440 * at the end of the spd in case we error'ed out in filling the pipe.
8441 */
8442static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8443{
8444 struct buffer_ref *ref =
8445 (struct buffer_ref *)spd->partial[i].private;
8446
8447 buffer_ref_release(ref);
8448 spd->partial[i].private = 0;
8449}
8450
8451static ssize_t
8452tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8453 struct pipe_inode_info *pipe, size_t len,
8454 unsigned int flags)
8455{
8456 struct ftrace_buffer_info *info = file->private_data;
8457 struct trace_iterator *iter = &info->iter;
8458 struct partial_page partial_def[PIPE_DEF_BUFFERS];
8459 struct page *pages_def[PIPE_DEF_BUFFERS];
8460 struct splice_pipe_desc spd = {
8461 .pages = pages_def,
8462 .partial = partial_def,
8463 .nr_pages_max = PIPE_DEF_BUFFERS,
8464 .ops = &buffer_pipe_buf_ops,
8465 .spd_release = buffer_spd_release,
8466 };
8467 struct buffer_ref *ref;
8468 int entries, i;
8469 ssize_t ret = 0;
8470
8471#ifdef CONFIG_TRACER_MAX_TRACE
8472 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8473 return -EBUSY;
8474#endif
8475
8476 if (*ppos & (PAGE_SIZE - 1))
8477 return -EINVAL;
8478
8479 if (len & (PAGE_SIZE - 1)) {
8480 if (len < PAGE_SIZE)
8481 return -EINVAL;
8482 len &= PAGE_MASK;
8483 }
8484
8485 if (splice_grow_spd(pipe, &spd))
8486 return -ENOMEM;
8487
8488 again:
8489 trace_access_lock(cpu: iter->cpu_file);
8490 entries = ring_buffer_entries_cpu(buffer: iter->array_buffer->buffer, cpu: iter->cpu_file);
8491
8492 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8493 struct page *page;
8494 int r;
8495
8496 ref = kzalloc(size: sizeof(*ref), GFP_KERNEL);
8497 if (!ref) {
8498 ret = -ENOMEM;
8499 break;
8500 }
8501
8502 refcount_set(r: &ref->refcount, n: 1);
8503 ref->buffer = iter->array_buffer->buffer;
8504 ref->page = ring_buffer_alloc_read_page(buffer: ref->buffer, cpu: iter->cpu_file);
8505 if (IS_ERR(ptr: ref->page)) {
8506 ret = PTR_ERR(ptr: ref->page);
8507 ref->page = NULL;
8508 kfree(objp: ref);
8509 break;
8510 }
8511 ref->cpu = iter->cpu_file;
8512
8513 r = ring_buffer_read_page(buffer: ref->buffer, data_page: &ref->page,
8514 len, cpu: iter->cpu_file, full: 1);
8515 if (r < 0) {
8516 ring_buffer_free_read_page(buffer: ref->buffer, cpu: ref->cpu,
8517 data: ref->page);
8518 kfree(objp: ref);
8519 break;
8520 }
8521
8522 page = virt_to_page(ref->page);
8523
8524 spd.pages[i] = page;
8525 spd.partial[i].len = PAGE_SIZE;
8526 spd.partial[i].offset = 0;
8527 spd.partial[i].private = (unsigned long)ref;
8528 spd.nr_pages++;
8529 *ppos += PAGE_SIZE;
8530
8531 entries = ring_buffer_entries_cpu(buffer: iter->array_buffer->buffer, cpu: iter->cpu_file);
8532 }
8533
8534 trace_access_unlock(cpu: iter->cpu_file);
8535 spd.nr_pages = i;
8536
8537 /* did we read anything? */
8538 if (!spd.nr_pages) {
8539 long wait_index;
8540
8541 if (ret)
8542 goto out;
8543
8544 ret = -EAGAIN;
8545 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8546 goto out;
8547
8548 wait_index = READ_ONCE(iter->wait_index);
8549
8550 ret = wait_on_pipe(iter, full: iter->tr->buffer_percent);
8551 if (ret)
8552 goto out;
8553
8554 /* No need to wait after waking up when tracing is off */
8555 if (!tracer_tracing_is_on(tr: iter->tr))
8556 goto out;
8557
8558 /* Make sure we see the new wait_index */
8559 smp_rmb();
8560 if (wait_index != iter->wait_index)
8561 goto out;
8562
8563 goto again;
8564 }
8565
8566 ret = splice_to_pipe(pipe, &spd);
8567out:
8568 splice_shrink_spd(&spd);
8569
8570 return ret;
8571}
8572
8573/* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8574static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8575{
8576 struct ftrace_buffer_info *info = file->private_data;
8577 struct trace_iterator *iter = &info->iter;
8578
8579 if (cmd)
8580 return -ENOIOCTLCMD;
8581
8582 mutex_lock(&trace_types_lock);
8583
8584 iter->wait_index++;
8585 /* Make sure the waiters see the new wait_index */
8586 smp_wmb();
8587
8588 ring_buffer_wake_waiters(buffer: iter->array_buffer->buffer, cpu: iter->cpu_file);
8589
8590 mutex_unlock(lock: &trace_types_lock);
8591 return 0;
8592}
8593
8594static const struct file_operations tracing_buffers_fops = {
8595 .open = tracing_buffers_open,
8596 .read = tracing_buffers_read,
8597 .poll = tracing_buffers_poll,
8598 .release = tracing_buffers_release,
8599 .splice_read = tracing_buffers_splice_read,
8600 .unlocked_ioctl = tracing_buffers_ioctl,
8601 .llseek = no_llseek,
8602};
8603
8604static ssize_t
8605tracing_stats_read(struct file *filp, char __user *ubuf,
8606 size_t count, loff_t *ppos)
8607{
8608 struct inode *inode = file_inode(f: filp);
8609 struct trace_array *tr = inode->i_private;
8610 struct array_buffer *trace_buf = &tr->array_buffer;
8611 int cpu = tracing_get_cpu(inode);
8612 struct trace_seq *s;
8613 unsigned long cnt;
8614 unsigned long long t;
8615 unsigned long usec_rem;
8616
8617 s = kmalloc(size: sizeof(*s), GFP_KERNEL);
8618 if (!s)
8619 return -ENOMEM;
8620
8621 trace_seq_init(s);
8622
8623 cnt = ring_buffer_entries_cpu(buffer: trace_buf->buffer, cpu);
8624 trace_seq_printf(s, fmt: "entries: %ld\n", cnt);
8625
8626 cnt = ring_buffer_overrun_cpu(buffer: trace_buf->buffer, cpu);
8627 trace_seq_printf(s, fmt: "overrun: %ld\n", cnt);
8628
8629 cnt = ring_buffer_commit_overrun_cpu(buffer: trace_buf->buffer, cpu);
8630 trace_seq_printf(s, fmt: "commit overrun: %ld\n", cnt);
8631
8632 cnt = ring_buffer_bytes_cpu(buffer: trace_buf->buffer, cpu);
8633 trace_seq_printf(s, fmt: "bytes: %ld\n", cnt);
8634
8635 if (trace_clocks[tr->clock_id].in_ns) {
8636 /* local or global for trace_clock */
8637 t = ns2usecs(nsec: ring_buffer_oldest_event_ts(buffer: trace_buf->buffer, cpu));
8638 usec_rem = do_div(t, USEC_PER_SEC);
8639 trace_seq_printf(s, fmt: "oldest event ts: %5llu.%06lu\n",
8640 t, usec_rem);
8641
8642 t = ns2usecs(nsec: ring_buffer_time_stamp(buffer: trace_buf->buffer));
8643 usec_rem = do_div(t, USEC_PER_SEC);
8644 trace_seq_printf(s, fmt: "now ts: %5llu.%06lu\n", t, usec_rem);
8645 } else {
8646 /* counter or tsc mode for trace_clock */
8647 trace_seq_printf(s, fmt: "oldest event ts: %llu\n",
8648 ring_buffer_oldest_event_ts(buffer: trace_buf->buffer, cpu));
8649
8650 trace_seq_printf(s, fmt: "now ts: %llu\n",
8651 ring_buffer_time_stamp(buffer: trace_buf->buffer));
8652 }
8653
8654 cnt = ring_buffer_dropped_events_cpu(buffer: trace_buf->buffer, cpu);
8655 trace_seq_printf(s, fmt: "dropped events: %ld\n", cnt);
8656
8657 cnt = ring_buffer_read_events_cpu(buffer: trace_buf->buffer, cpu);
8658 trace_seq_printf(s, fmt: "read events: %ld\n", cnt);
8659
8660 count = simple_read_from_buffer(to: ubuf, count, ppos,
8661 from: s->buffer, available: trace_seq_used(s));
8662
8663 kfree(objp: s);
8664
8665 return count;
8666}
8667
8668static const struct file_operations tracing_stats_fops = {
8669 .open = tracing_open_generic_tr,
8670 .read = tracing_stats_read,
8671 .llseek = generic_file_llseek,
8672 .release = tracing_release_generic_tr,
8673};
8674
8675#ifdef CONFIG_DYNAMIC_FTRACE
8676
8677static ssize_t
8678tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8679 size_t cnt, loff_t *ppos)
8680{
8681 ssize_t ret;
8682 char *buf;
8683 int r;
8684
8685 /* 256 should be plenty to hold the amount needed */
8686 buf = kmalloc(size: 256, GFP_KERNEL);
8687 if (!buf)
8688 return -ENOMEM;
8689
8690 r = scnprintf(buf, size: 256, fmt: "%ld pages:%ld groups: %ld\n",
8691 ftrace_update_tot_cnt,
8692 ftrace_number_of_pages,
8693 ftrace_number_of_groups);
8694
8695 ret = simple_read_from_buffer(to: ubuf, count: cnt, ppos, from: buf, available: r);
8696 kfree(objp: buf);
8697 return ret;
8698}
8699
8700static const struct file_operations tracing_dyn_info_fops = {
8701 .open = tracing_open_generic,
8702 .read = tracing_read_dyn_info,
8703 .llseek = generic_file_llseek,
8704};
8705#endif /* CONFIG_DYNAMIC_FTRACE */
8706
8707#if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8708static void
8709ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8710 struct trace_array *tr, struct ftrace_probe_ops *ops,
8711 void *data)
8712{
8713 tracing_snapshot_instance(tr);
8714}
8715
8716static void
8717ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8718 struct trace_array *tr, struct ftrace_probe_ops *ops,
8719 void *data)
8720{
8721 struct ftrace_func_mapper *mapper = data;
8722 long *count = NULL;
8723
8724 if (mapper)
8725 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8726
8727 if (count) {
8728
8729 if (*count <= 0)
8730 return;
8731
8732 (*count)--;
8733 }
8734
8735 tracing_snapshot_instance(tr);
8736}
8737
8738static int
8739ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8740 struct ftrace_probe_ops *ops, void *data)
8741{
8742 struct ftrace_func_mapper *mapper = data;
8743 long *count = NULL;
8744
8745 seq_printf(m, fmt: "%ps:", (void *)ip);
8746
8747 seq_puts(m, s: "snapshot");
8748
8749 if (mapper)
8750 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8751
8752 if (count)
8753 seq_printf(m, fmt: ":count=%ld\n", *count);
8754 else
8755 seq_puts(m, s: ":unlimited\n");
8756
8757 return 0;
8758}
8759
8760static int
8761ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8762 unsigned long ip, void *init_data, void **data)
8763{
8764 struct ftrace_func_mapper *mapper = *data;
8765
8766 if (!mapper) {
8767 mapper = allocate_ftrace_func_mapper();
8768 if (!mapper)
8769 return -ENOMEM;
8770 *data = mapper;
8771 }
8772
8773 return ftrace_func_mapper_add_ip(mapper, ip, data: init_data);
8774}
8775
8776static void
8777ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8778 unsigned long ip, void *data)
8779{
8780 struct ftrace_func_mapper *mapper = data;
8781
8782 if (!ip) {
8783 if (!mapper)
8784 return;
8785 free_ftrace_func_mapper(mapper, NULL);
8786 return;
8787 }
8788
8789 ftrace_func_mapper_remove_ip(mapper, ip);
8790}
8791
8792static struct ftrace_probe_ops snapshot_probe_ops = {
8793 .func = ftrace_snapshot,
8794 .print = ftrace_snapshot_print,
8795};
8796
8797static struct ftrace_probe_ops snapshot_count_probe_ops = {
8798 .func = ftrace_count_snapshot,
8799 .print = ftrace_snapshot_print,
8800 .init = ftrace_snapshot_init,
8801 .free = ftrace_snapshot_free,
8802};
8803
8804static int
8805ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8806 char *glob, char *cmd, char *param, int enable)
8807{
8808 struct ftrace_probe_ops *ops;
8809 void *count = (void *)-1;
8810 char *number;
8811 int ret;
8812
8813 if (!tr)
8814 return -ENODEV;
8815
8816 /* hash funcs only work with set_ftrace_filter */
8817 if (!enable)
8818 return -EINVAL;
8819
8820 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8821
8822 if (glob[0] == '!')
8823 return unregister_ftrace_function_probe_func(glob: glob+1, tr, ops);
8824
8825 if (!param)
8826 goto out_reg;
8827
8828 number = strsep(&param, ":");
8829
8830 if (!strlen(number))
8831 goto out_reg;
8832
8833 /*
8834 * We use the callback data field (which is a pointer)
8835 * as our counter.
8836 */
8837 ret = kstrtoul(s: number, base: 0, res: (unsigned long *)&count);
8838 if (ret)
8839 return ret;
8840
8841 out_reg:
8842 ret = tracing_alloc_snapshot_instance(tr);
8843 if (ret < 0)
8844 goto out;
8845
8846 ret = register_ftrace_function_probe(glob, tr, ops, data: count);
8847
8848 out:
8849 return ret < 0 ? ret : 0;
8850}
8851
8852static struct ftrace_func_command ftrace_snapshot_cmd = {
8853 .name = "snapshot",
8854 .func = ftrace_trace_snapshot_callback,
8855};
8856
8857static __init int register_snapshot_cmd(void)
8858{
8859 return register_ftrace_command(cmd: &ftrace_snapshot_cmd);
8860}
8861#else
8862static inline __init int register_snapshot_cmd(void) { return 0; }
8863#endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8864
8865static struct dentry *tracing_get_dentry(struct trace_array *tr)
8866{
8867 if (WARN_ON(!tr->dir))
8868 return ERR_PTR(error: -ENODEV);
8869
8870 /* Top directory uses NULL as the parent */
8871 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8872 return NULL;
8873
8874 /* All sub buffers have a descriptor */
8875 return tr->dir;
8876}
8877
8878static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8879{
8880 struct dentry *d_tracer;
8881
8882 if (tr->percpu_dir)
8883 return tr->percpu_dir;
8884
8885 d_tracer = tracing_get_dentry(tr);
8886 if (IS_ERR(ptr: d_tracer))
8887 return NULL;
8888
8889 tr->percpu_dir = tracefs_create_dir(name: "per_cpu", parent: d_tracer);
8890
8891 MEM_FAIL(!tr->percpu_dir,
8892 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8893
8894 return tr->percpu_dir;
8895}
8896
8897static struct dentry *
8898trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8899 void *data, long cpu, const struct file_operations *fops)
8900{
8901 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8902
8903 if (ret) /* See tracing_get_cpu() */
8904 d_inode(dentry: ret)->i_cdev = (void *)(cpu + 1);
8905 return ret;
8906}
8907
8908static void
8909tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8910{
8911 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8912 struct dentry *d_cpu;
8913 char cpu_dir[30]; /* 30 characters should be more than enough */
8914
8915 if (!d_percpu)
8916 return;
8917
8918 snprintf(buf: cpu_dir, size: 30, fmt: "cpu%ld", cpu);
8919 d_cpu = tracefs_create_dir(name: cpu_dir, parent: d_percpu);
8920 if (!d_cpu) {
8921 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8922 return;
8923 }
8924
8925 /* per cpu trace_pipe */
8926 trace_create_cpu_file(name: "trace_pipe", TRACE_MODE_READ, parent: d_cpu,
8927 data: tr, cpu, fops: &tracing_pipe_fops);
8928
8929 /* per cpu trace */
8930 trace_create_cpu_file(name: "trace", TRACE_MODE_WRITE, parent: d_cpu,
8931 data: tr, cpu, fops: &tracing_fops);
8932
8933 trace_create_cpu_file(name: "trace_pipe_raw", TRACE_MODE_READ, parent: d_cpu,
8934 data: tr, cpu, fops: &tracing_buffers_fops);
8935
8936 trace_create_cpu_file(name: "stats", TRACE_MODE_READ, parent: d_cpu,
8937 data: tr, cpu, fops: &tracing_stats_fops);
8938
8939 trace_create_cpu_file(name: "buffer_size_kb", TRACE_MODE_READ, parent: d_cpu,
8940 data: tr, cpu, fops: &tracing_entries_fops);
8941
8942#ifdef CONFIG_TRACER_SNAPSHOT
8943 trace_create_cpu_file(name: "snapshot", TRACE_MODE_WRITE, parent: d_cpu,
8944 data: tr, cpu, fops: &snapshot_fops);
8945
8946 trace_create_cpu_file(name: "snapshot_raw", TRACE_MODE_READ, parent: d_cpu,
8947 data: tr, cpu, fops: &snapshot_raw_fops);
8948#endif
8949}
8950
8951#ifdef CONFIG_FTRACE_SELFTEST
8952/* Let selftest have access to static functions in this file */
8953#include "trace_selftest.c"
8954#endif
8955
8956static ssize_t
8957trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8958 loff_t *ppos)
8959{
8960 struct trace_option_dentry *topt = filp->private_data;
8961 char *buf;
8962
8963 if (topt->flags->val & topt->opt->bit)
8964 buf = "1\n";
8965 else
8966 buf = "0\n";
8967
8968 return simple_read_from_buffer(to: ubuf, count: cnt, ppos, from: buf, available: 2);
8969}
8970
8971static ssize_t
8972trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8973 loff_t *ppos)
8974{
8975 struct trace_option_dentry *topt = filp->private_data;
8976 unsigned long val;
8977 int ret;
8978
8979 ret = kstrtoul_from_user(s: ubuf, count: cnt, base: 10, res: &val);
8980 if (ret)
8981 return ret;
8982
8983 if (val != 0 && val != 1)
8984 return -EINVAL;
8985
8986 if (!!(topt->flags->val & topt->opt->bit) != val) {
8987 mutex_lock(&trace_types_lock);
8988 ret = __set_tracer_option(tr: topt->tr, tracer_flags: topt->flags,
8989 opts: topt->opt, neg: !val);
8990 mutex_unlock(lock: &trace_types_lock);
8991 if (ret)
8992 return ret;
8993 }
8994
8995 *ppos += cnt;
8996
8997 return cnt;
8998}
8999
9000static int tracing_open_options(struct inode *inode, struct file *filp)
9001{
9002 struct trace_option_dentry *topt = inode->i_private;
9003 int ret;
9004
9005 ret = tracing_check_open_get_tr(tr: topt->tr);
9006 if (ret)
9007 return ret;
9008
9009 filp->private_data = inode->i_private;
9010 return 0;
9011}
9012
9013static int tracing_release_options(struct inode *inode, struct file *file)
9014{
9015 struct trace_option_dentry *topt = file->private_data;
9016
9017 trace_array_put(topt->tr);
9018 return 0;
9019}
9020
9021static const struct file_operations trace_options_fops = {
9022 .open = tracing_open_options,
9023 .read = trace_options_read,
9024 .write = trace_options_write,
9025 .llseek = generic_file_llseek,
9026 .release = tracing_release_options,
9027};
9028
9029/*
9030 * In order to pass in both the trace_array descriptor as well as the index
9031 * to the flag that the trace option file represents, the trace_array
9032 * has a character array of trace_flags_index[], which holds the index
9033 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9034 * The address of this character array is passed to the flag option file
9035 * read/write callbacks.
9036 *
9037 * In order to extract both the index and the trace_array descriptor,
9038 * get_tr_index() uses the following algorithm.
9039 *
9040 * idx = *ptr;
9041 *
9042 * As the pointer itself contains the address of the index (remember
9043 * index[1] == 1).
9044 *
9045 * Then to get the trace_array descriptor, by subtracting that index
9046 * from the ptr, we get to the start of the index itself.
9047 *
9048 * ptr - idx == &index[0]
9049 *
9050 * Then a simple container_of() from that pointer gets us to the
9051 * trace_array descriptor.
9052 */
9053static void get_tr_index(void *data, struct trace_array **ptr,
9054 unsigned int *pindex)
9055{
9056 *pindex = *(unsigned char *)data;
9057
9058 *ptr = container_of(data - *pindex, struct trace_array,
9059 trace_flags_index);
9060}
9061
9062static ssize_t
9063trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9064 loff_t *ppos)
9065{
9066 void *tr_index = filp->private_data;
9067 struct trace_array *tr;
9068 unsigned int index;
9069 char *buf;
9070
9071 get_tr_index(data: tr_index, ptr: &tr, pindex: &index);
9072
9073 if (tr->trace_flags & (1 << index))
9074 buf = "1\n";
9075 else
9076 buf = "0\n";
9077
9078 return simple_read_from_buffer(to: ubuf, count: cnt, ppos, from: buf, available: 2);
9079}
9080
9081static ssize_t
9082trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9083 loff_t *ppos)
9084{
9085 void *tr_index = filp->private_data;
9086 struct trace_array *tr;
9087 unsigned int index;
9088 unsigned long val;
9089 int ret;
9090
9091 get_tr_index(data: tr_index, ptr: &tr, pindex: &index);
9092
9093 ret = kstrtoul_from_user(s: ubuf, count: cnt, base: 10, res: &val);
9094 if (ret)
9095 return ret;
9096
9097 if (val != 0 && val != 1)
9098 return -EINVAL;
9099
9100 mutex_lock(&event_mutex);
9101 mutex_lock(&trace_types_lock);
9102 ret = set_tracer_flag(tr, mask: 1 << index, enabled: val);
9103 mutex_unlock(lock: &trace_types_lock);
9104 mutex_unlock(lock: &event_mutex);
9105
9106 if (ret < 0)
9107 return ret;
9108
9109 *ppos += cnt;
9110
9111 return cnt;
9112}
9113
9114static const struct file_operations trace_options_core_fops = {
9115 .open = tracing_open_generic,
9116 .read = trace_options_core_read,
9117 .write = trace_options_core_write,
9118 .llseek = generic_file_llseek,
9119};
9120
9121struct dentry *trace_create_file(const char *name,
9122 umode_t mode,
9123 struct dentry *parent,
9124 void *data,
9125 const struct file_operations *fops)
9126{
9127 struct dentry *ret;
9128
9129 ret = tracefs_create_file(name, mode, parent, data, fops);
9130 if (!ret)
9131 pr_warn("Could not create tracefs '%s' entry\n", name);
9132
9133 return ret;
9134}
9135
9136
9137static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9138{
9139 struct dentry *d_tracer;
9140
9141 if (tr->options)
9142 return tr->options;
9143
9144 d_tracer = tracing_get_dentry(tr);
9145 if (IS_ERR(ptr: d_tracer))
9146 return NULL;
9147
9148 tr->options = tracefs_create_dir(name: "options", parent: d_tracer);
9149 if (!tr->options) {
9150 pr_warn("Could not create tracefs directory 'options'\n");
9151 return NULL;
9152 }
9153
9154 return tr->options;
9155}
9156
9157static void
9158create_trace_option_file(struct trace_array *tr,
9159 struct trace_option_dentry *topt,
9160 struct tracer_flags *flags,
9161 struct tracer_opt *opt)
9162{
9163 struct dentry *t_options;
9164
9165 t_options = trace_options_init_dentry(tr);
9166 if (!t_options)
9167 return;
9168
9169 topt->flags = flags;
9170 topt->opt = opt;
9171 topt->tr = tr;
9172
9173 topt->entry = trace_create_file(name: opt->name, TRACE_MODE_WRITE,
9174 parent: t_options, data: topt, fops: &trace_options_fops);
9175
9176}
9177
9178static void
9179create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9180{
9181 struct trace_option_dentry *topts;
9182 struct trace_options *tr_topts;
9183 struct tracer_flags *flags;
9184 struct tracer_opt *opts;
9185 int cnt;
9186 int i;
9187
9188 if (!tracer)
9189 return;
9190
9191 flags = tracer->flags;
9192
9193 if (!flags || !flags->opts)
9194 return;
9195
9196 /*
9197 * If this is an instance, only create flags for tracers
9198 * the instance may have.
9199 */
9200 if (!trace_ok_for_array(t: tracer, tr))
9201 return;
9202
9203 for (i = 0; i < tr->nr_topts; i++) {
9204 /* Make sure there's no duplicate flags. */
9205 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9206 return;
9207 }
9208
9209 opts = flags->opts;
9210
9211 for (cnt = 0; opts[cnt].name; cnt++)
9212 ;
9213
9214 topts = kcalloc(n: cnt + 1, size: sizeof(*topts), GFP_KERNEL);
9215 if (!topts)
9216 return;
9217
9218 tr_topts = krealloc(objp: tr->topts, new_size: sizeof(*tr->topts) * (tr->nr_topts + 1),
9219 GFP_KERNEL);
9220 if (!tr_topts) {
9221 kfree(objp: topts);
9222 return;
9223 }
9224
9225 tr->topts = tr_topts;
9226 tr->topts[tr->nr_topts].tracer = tracer;
9227 tr->topts[tr->nr_topts].topts = topts;
9228 tr->nr_topts++;
9229
9230 for (cnt = 0; opts[cnt].name; cnt++) {
9231 create_trace_option_file(tr, topt: &topts[cnt], flags,
9232 opt: &opts[cnt]);
9233 MEM_FAIL(topts[cnt].entry == NULL,
9234 "Failed to create trace option: %s",
9235 opts[cnt].name);
9236 }
9237}
9238
9239static struct dentry *
9240create_trace_option_core_file(struct trace_array *tr,
9241 const char *option, long index)
9242{
9243 struct dentry *t_options;
9244
9245 t_options = trace_options_init_dentry(tr);
9246 if (!t_options)
9247 return NULL;
9248
9249 return trace_create_file(name: option, TRACE_MODE_WRITE, parent: t_options,
9250 data: (void *)&tr->trace_flags_index[index],
9251 fops: &trace_options_core_fops);
9252}
9253
9254static void create_trace_options_dir(struct trace_array *tr)
9255{
9256 struct dentry *t_options;
9257 bool top_level = tr == &global_trace;
9258 int i;
9259
9260 t_options = trace_options_init_dentry(tr);
9261 if (!t_options)
9262 return;
9263
9264 for (i = 0; trace_options[i]; i++) {
9265 if (top_level ||
9266 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9267 create_trace_option_core_file(tr, option: trace_options[i], index: i);
9268 }
9269}
9270
9271static ssize_t
9272rb_simple_read(struct file *filp, char __user *ubuf,
9273 size_t cnt, loff_t *ppos)
9274{
9275 struct trace_array *tr = filp->private_data;
9276 char buf[64];
9277 int r;
9278
9279 r = tracer_tracing_is_on(tr);
9280 r = sprintf(buf, fmt: "%d\n", r);
9281
9282 return simple_read_from_buffer(to: ubuf, count: cnt, ppos, from: buf, available: r);
9283}
9284
9285static ssize_t
9286rb_simple_write(struct file *filp, const char __user *ubuf,
9287 size_t cnt, loff_t *ppos)
9288{
9289 struct trace_array *tr = filp->private_data;
9290 struct trace_buffer *buffer = tr->array_buffer.buffer;
9291 unsigned long val;
9292 int ret;
9293
9294 ret = kstrtoul_from_user(s: ubuf, count: cnt, base: 10, res: &val);
9295 if (ret)
9296 return ret;
9297
9298 if (buffer) {
9299 mutex_lock(&trace_types_lock);
9300 if (!!val == tracer_tracing_is_on(tr)) {
9301 val = 0; /* do nothing */
9302 } else if (val) {
9303 tracer_tracing_on(tr);
9304 if (tr->current_trace->start)
9305 tr->current_trace->start(tr);
9306 } else {
9307 tracer_tracing_off(tr);
9308 if (tr->current_trace->stop)
9309 tr->current_trace->stop(tr);
9310 /* Wake up any waiters */
9311 ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9312 }
9313 mutex_unlock(lock: &trace_types_lock);
9314 }
9315
9316 (*ppos)++;
9317
9318 return cnt;
9319}
9320
9321static const struct file_operations rb_simple_fops = {
9322 .open = tracing_open_generic_tr,
9323 .read = rb_simple_read,
9324 .write = rb_simple_write,
9325 .release = tracing_release_generic_tr,
9326 .llseek = default_llseek,
9327};
9328
9329static ssize_t
9330buffer_percent_read(struct file *filp, char __user *ubuf,
9331 size_t cnt, loff_t *ppos)
9332{
9333 struct trace_array *tr = filp->private_data;
9334 char buf[64];
9335 int r;
9336
9337 r = tr->buffer_percent;
9338 r = sprintf(buf, fmt: "%d\n", r);
9339
9340 return simple_read_from_buffer(to: ubuf, count: cnt, ppos, from: buf, available: r);
9341}
9342
9343static ssize_t
9344buffer_percent_write(struct file *filp, const char __user *ubuf,
9345 size_t cnt, loff_t *ppos)
9346{
9347 struct trace_array *tr = filp->private_data;
9348 unsigned long val;
9349 int ret;
9350
9351 ret = kstrtoul_from_user(s: ubuf, count: cnt, base: 10, res: &val);
9352 if (ret)
9353 return ret;
9354
9355 if (val > 100)
9356 return -EINVAL;
9357
9358 tr->buffer_percent = val;
9359
9360 (*ppos)++;
9361
9362 return cnt;
9363}
9364
9365static const struct file_operations buffer_percent_fops = {
9366 .open = tracing_open_generic_tr,
9367 .read = buffer_percent_read,
9368 .write = buffer_percent_write,
9369 .release = tracing_release_generic_tr,
9370 .llseek = default_llseek,
9371};
9372
9373static struct dentry *trace_instance_dir;
9374
9375static void
9376init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9377
9378static int
9379allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9380{
9381 enum ring_buffer_flags rb_flags;
9382
9383 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9384
9385 buf->tr = tr;
9386
9387 buf->buffer = ring_buffer_alloc(size, rb_flags);
9388 if (!buf->buffer)
9389 return -ENOMEM;
9390
9391 buf->data = alloc_percpu(struct trace_array_cpu);
9392 if (!buf->data) {
9393 ring_buffer_free(buffer: buf->buffer);
9394 buf->buffer = NULL;
9395 return -ENOMEM;
9396 }
9397
9398 /* Allocate the first page for all buffers */
9399 set_buffer_entries(buf: &tr->array_buffer,
9400 val: ring_buffer_size(buffer: tr->array_buffer.buffer, cpu: 0));
9401
9402 return 0;
9403}
9404
9405static void free_trace_buffer(struct array_buffer *buf)
9406{
9407 if (buf->buffer) {
9408 ring_buffer_free(buffer: buf->buffer);
9409 buf->buffer = NULL;
9410 free_percpu(pdata: buf->data);
9411 buf->data = NULL;
9412 }
9413}
9414
9415static int allocate_trace_buffers(struct trace_array *tr, int size)
9416{
9417 int ret;
9418
9419 ret = allocate_trace_buffer(tr, buf: &tr->array_buffer, size);
9420 if (ret)
9421 return ret;
9422
9423#ifdef CONFIG_TRACER_MAX_TRACE
9424 ret = allocate_trace_buffer(tr, buf: &tr->max_buffer,
9425 size: allocate_snapshot ? size : 1);
9426 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9427 free_trace_buffer(buf: &tr->array_buffer);
9428 return -ENOMEM;
9429 }
9430 tr->allocated_snapshot = allocate_snapshot;
9431
9432 allocate_snapshot = false;
9433#endif
9434
9435 return 0;
9436}
9437
9438static void free_trace_buffers(struct trace_array *tr)
9439{
9440 if (!tr)
9441 return;
9442
9443 free_trace_buffer(buf: &tr->array_buffer);
9444
9445#ifdef CONFIG_TRACER_MAX_TRACE
9446 free_trace_buffer(buf: &tr->max_buffer);
9447#endif
9448}
9449
9450static void init_trace_flags_index(struct trace_array *tr)
9451{
9452 int i;
9453
9454 /* Used by the trace options files */
9455 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9456 tr->trace_flags_index[i] = i;
9457}
9458
9459static void __update_tracer_options(struct trace_array *tr)
9460{
9461 struct tracer *t;
9462
9463 for (t = trace_types; t; t = t->next)
9464 add_tracer_options(tr, t);
9465}
9466
9467static void update_tracer_options(struct trace_array *tr)
9468{
9469 mutex_lock(&trace_types_lock);
9470 tracer_options_updated = true;
9471 __update_tracer_options(tr);
9472 mutex_unlock(lock: &trace_types_lock);
9473}
9474
9475/* Must have trace_types_lock held */
9476struct trace_array *trace_array_find(const char *instance)
9477{
9478 struct trace_array *tr, *found = NULL;
9479
9480 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9481 if (tr->name && strcmp(tr->name, instance) == 0) {
9482 found = tr;
9483 break;
9484 }
9485 }
9486
9487 return found;
9488}
9489
9490struct trace_array *trace_array_find_get(const char *instance)
9491{
9492 struct trace_array *tr;
9493
9494 mutex_lock(&trace_types_lock);
9495 tr = trace_array_find(instance);
9496 if (tr)
9497 tr->ref++;
9498 mutex_unlock(lock: &trace_types_lock);
9499
9500 return tr;
9501}
9502
9503static int trace_array_create_dir(struct trace_array *tr)
9504{
9505 int ret;
9506
9507 tr->dir = tracefs_create_dir(name: tr->name, parent: trace_instance_dir);
9508 if (!tr->dir)
9509 return -EINVAL;
9510
9511 ret = event_trace_add_tracer(parent: tr->dir, tr);
9512 if (ret) {
9513 tracefs_remove(dentry: tr->dir);
9514 return ret;
9515 }
9516
9517 init_tracer_tracefs(tr, d_tracer: tr->dir);
9518 __update_tracer_options(tr);
9519
9520 return ret;
9521}
9522
9523static struct trace_array *trace_array_create(const char *name)
9524{
9525 struct trace_array *tr;
9526 int ret;
9527
9528 ret = -ENOMEM;
9529 tr = kzalloc(size: sizeof(*tr), GFP_KERNEL);
9530 if (!tr)
9531 return ERR_PTR(error: ret);
9532
9533 tr->name = kstrdup(s: name, GFP_KERNEL);
9534 if (!tr->name)
9535 goto out_free_tr;
9536
9537 if (!alloc_cpumask_var(mask: &tr->tracing_cpumask, GFP_KERNEL))
9538 goto out_free_tr;
9539
9540 if (!zalloc_cpumask_var(mask: &tr->pipe_cpumask, GFP_KERNEL))
9541 goto out_free_tr;
9542
9543 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9544
9545 cpumask_copy(dstp: tr->tracing_cpumask, cpu_all_mask);
9546
9547 raw_spin_lock_init(&tr->start_lock);
9548
9549 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9550
9551 tr->current_trace = &nop_trace;
9552
9553 INIT_LIST_HEAD(list: &tr->systems);
9554 INIT_LIST_HEAD(list: &tr->events);
9555 INIT_LIST_HEAD(list: &tr->hist_vars);
9556 INIT_LIST_HEAD(list: &tr->err_log);
9557
9558 if (allocate_trace_buffers(tr, size: trace_buf_size) < 0)
9559 goto out_free_tr;
9560
9561 /* The ring buffer is defaultly expanded */
9562 trace_set_ring_buffer_expanded(tr);
9563
9564 if (ftrace_allocate_ftrace_ops(tr) < 0)
9565 goto out_free_tr;
9566
9567 ftrace_init_trace_array(tr);
9568
9569 init_trace_flags_index(tr);
9570
9571 if (trace_instance_dir) {
9572 ret = trace_array_create_dir(tr);
9573 if (ret)
9574 goto out_free_tr;
9575 } else
9576 __trace_early_add_events(tr);
9577
9578 list_add(new: &tr->list, head: &ftrace_trace_arrays);
9579
9580 tr->ref++;
9581
9582 return tr;
9583
9584 out_free_tr:
9585 ftrace_free_ftrace_ops(tr);
9586 free_trace_buffers(tr);
9587 free_cpumask_var(mask: tr->pipe_cpumask);
9588 free_cpumask_var(mask: tr->tracing_cpumask);
9589 kfree(objp: tr->name);
9590 kfree(objp: tr);
9591
9592 return ERR_PTR(error: ret);
9593}
9594
9595static int instance_mkdir(const char *name)
9596{
9597 struct trace_array *tr;
9598 int ret;
9599
9600 mutex_lock(&event_mutex);
9601 mutex_lock(&trace_types_lock);
9602
9603 ret = -EEXIST;
9604 if (trace_array_find(instance: name))
9605 goto out_unlock;
9606
9607 tr = trace_array_create(name);
9608
9609 ret = PTR_ERR_OR_ZERO(ptr: tr);
9610
9611out_unlock:
9612 mutex_unlock(lock: &trace_types_lock);
9613 mutex_unlock(lock: &event_mutex);
9614 return ret;
9615}
9616
9617/**
9618 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9619 * @name: The name of the trace array to be looked up/created.
9620 *
9621 * Returns pointer to trace array with given name.
9622 * NULL, if it cannot be created.
9623 *
9624 * NOTE: This function increments the reference counter associated with the
9625 * trace array returned. This makes sure it cannot be freed while in use.
9626 * Use trace_array_put() once the trace array is no longer needed.
9627 * If the trace_array is to be freed, trace_array_destroy() needs to
9628 * be called after the trace_array_put(), or simply let user space delete
9629 * it from the tracefs instances directory. But until the
9630 * trace_array_put() is called, user space can not delete it.
9631 *
9632 */
9633struct trace_array *trace_array_get_by_name(const char *name)
9634{
9635 struct trace_array *tr;
9636
9637 mutex_lock(&event_mutex);
9638 mutex_lock(&trace_types_lock);
9639
9640 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9641 if (tr->name && strcmp(tr->name, name) == 0)
9642 goto out_unlock;
9643 }
9644
9645 tr = trace_array_create(name);
9646
9647 if (IS_ERR(ptr: tr))
9648 tr = NULL;
9649out_unlock:
9650 if (tr)
9651 tr->ref++;
9652
9653 mutex_unlock(lock: &trace_types_lock);
9654 mutex_unlock(lock: &event_mutex);
9655 return tr;
9656}
9657EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9658
9659static int __remove_instance(struct trace_array *tr)
9660{
9661 int i;
9662
9663 /* Reference counter for a newly created trace array = 1. */
9664 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9665 return -EBUSY;
9666
9667 list_del(entry: &tr->list);
9668
9669 /* Disable all the flags that were enabled coming in */
9670 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9671 if ((1 << i) & ZEROED_TRACE_FLAGS)
9672 set_tracer_flag(tr, mask: 1 << i, enabled: 0);
9673 }
9674
9675 tracing_set_nop(tr);
9676 clear_ftrace_function_probes(tr);
9677 event_trace_del_tracer(tr);
9678 ftrace_clear_pids(tr);
9679 ftrace_destroy_function_files(tr);
9680 tracefs_remove(dentry: tr->dir);
9681 free_percpu(pdata: tr->last_func_repeats);
9682 free_trace_buffers(tr);
9683 clear_tracing_err_log(tr);
9684
9685 for (i = 0; i < tr->nr_topts; i++) {
9686 kfree(objp: tr->topts[i].topts);
9687 }
9688 kfree(objp: tr->topts);
9689
9690 free_cpumask_var(mask: tr->pipe_cpumask);
9691 free_cpumask_var(mask: tr->tracing_cpumask);
9692 kfree(objp: tr->name);
9693 kfree(objp: tr);
9694
9695 return 0;
9696}
9697
9698int trace_array_destroy(struct trace_array *this_tr)
9699{
9700 struct trace_array *tr;
9701 int ret;
9702
9703 if (!this_tr)
9704 return -EINVAL;
9705
9706 mutex_lock(&event_mutex);
9707 mutex_lock(&trace_types_lock);
9708
9709 ret = -ENODEV;
9710
9711 /* Making sure trace array exists before destroying it. */
9712 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9713 if (tr == this_tr) {
9714 ret = __remove_instance(tr);
9715 break;
9716 }
9717 }
9718
9719 mutex_unlock(lock: &trace_types_lock);
9720 mutex_unlock(lock: &event_mutex);
9721
9722 return ret;
9723}
9724EXPORT_SYMBOL_GPL(trace_array_destroy);
9725
9726static int instance_rmdir(const char *name)
9727{
9728 struct trace_array *tr;
9729 int ret;
9730
9731 mutex_lock(&event_mutex);
9732 mutex_lock(&trace_types_lock);
9733
9734 ret = -ENODEV;
9735 tr = trace_array_find(instance: name);
9736 if (tr)
9737 ret = __remove_instance(tr);
9738
9739 mutex_unlock(lock: &trace_types_lock);
9740 mutex_unlock(lock: &event_mutex);
9741
9742 return ret;
9743}
9744
9745static __init void create_trace_instances(struct dentry *d_tracer)
9746{
9747 struct trace_array *tr;
9748
9749 trace_instance_dir = tracefs_create_instance_dir(name: "instances", parent: d_tracer,
9750 mkdir: instance_mkdir,
9751 rmdir: instance_rmdir);
9752 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9753 return;
9754
9755 mutex_lock(&event_mutex);
9756 mutex_lock(&trace_types_lock);
9757
9758 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9759 if (!tr->name)
9760 continue;
9761 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9762 "Failed to create instance directory\n"))
9763 break;
9764 }
9765
9766 mutex_unlock(lock: &trace_types_lock);
9767 mutex_unlock(lock: &event_mutex);
9768}
9769
9770static void
9771init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9772{
9773 int cpu;
9774
9775 trace_create_file(name: "available_tracers", TRACE_MODE_READ, parent: d_tracer,
9776 data: tr, fops: &show_traces_fops);
9777
9778 trace_create_file(name: "current_tracer", TRACE_MODE_WRITE, parent: d_tracer,
9779 data: tr, fops: &set_tracer_fops);
9780
9781 trace_create_file(name: "tracing_cpumask", TRACE_MODE_WRITE, parent: d_tracer,
9782 data: tr, fops: &tracing_cpumask_fops);
9783
9784 trace_create_file(name: "trace_options", TRACE_MODE_WRITE, parent: d_tracer,
9785 data: tr, fops: &tracing_iter_fops);
9786
9787 trace_create_file(name: "trace", TRACE_MODE_WRITE, parent: d_tracer,
9788 data: tr, fops: &tracing_fops);
9789
9790 trace_create_file(name: "trace_pipe", TRACE_MODE_READ, parent: d_tracer,
9791 data: tr, fops: &tracing_pipe_fops);
9792
9793 trace_create_file(name: "buffer_size_kb", TRACE_MODE_WRITE, parent: d_tracer,
9794 data: tr, fops: &tracing_entries_fops);
9795
9796 trace_create_file(name: "buffer_total_size_kb", TRACE_MODE_READ, parent: d_tracer,
9797 data: tr, fops: &tracing_total_entries_fops);
9798
9799 trace_create_file(name: "free_buffer", mode: 0200, parent: d_tracer,
9800 data: tr, fops: &tracing_free_buffer_fops);
9801
9802 trace_create_file(name: "trace_marker", mode: 0220, parent: d_tracer,
9803 data: tr, fops: &tracing_mark_fops);
9804
9805 tr->trace_marker_file = __find_event_file(tr, system: "ftrace", event: "print");
9806
9807 trace_create_file(name: "trace_marker_raw", mode: 0220, parent: d_tracer,
9808 data: tr, fops: &tracing_mark_raw_fops);
9809
9810 trace_create_file(name: "trace_clock", TRACE_MODE_WRITE, parent: d_tracer, data: tr,
9811 fops: &trace_clock_fops);
9812
9813 trace_create_file(name: "tracing_on", TRACE_MODE_WRITE, parent: d_tracer,
9814 data: tr, fops: &rb_simple_fops);
9815
9816 trace_create_file(name: "timestamp_mode", TRACE_MODE_READ, parent: d_tracer, data: tr,
9817 fops: &trace_time_stamp_mode_fops);
9818
9819 tr->buffer_percent = 50;
9820
9821 trace_create_file(name: "buffer_percent", TRACE_MODE_WRITE, parent: d_tracer,
9822 data: tr, fops: &buffer_percent_fops);
9823
9824 create_trace_options_dir(tr);
9825
9826#ifdef CONFIG_TRACER_MAX_TRACE
9827 trace_create_maxlat_file(tr, d_tracer);
9828#endif
9829
9830 if (ftrace_create_function_files(tr, parent: d_tracer))
9831 MEM_FAIL(1, "Could not allocate function filter files");
9832
9833#ifdef CONFIG_TRACER_SNAPSHOT
9834 trace_create_file(name: "snapshot", TRACE_MODE_WRITE, parent: d_tracer,
9835 data: tr, fops: &snapshot_fops);
9836#endif
9837
9838 trace_create_file(name: "error_log", TRACE_MODE_WRITE, parent: d_tracer,
9839 data: tr, fops: &tracing_err_log_fops);
9840
9841 for_each_tracing_cpu(cpu)
9842 tracing_init_tracefs_percpu(tr, cpu);
9843
9844 ftrace_init_tracefs(tr, d_tracer);
9845}
9846
9847static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9848{
9849 struct vfsmount *mnt;
9850 struct file_system_type *type;
9851
9852 /*
9853 * To maintain backward compatibility for tools that mount
9854 * debugfs to get to the tracing facility, tracefs is automatically
9855 * mounted to the debugfs/tracing directory.
9856 */
9857 type = get_fs_type(name: "tracefs");
9858 if (!type)
9859 return NULL;
9860 mnt = vfs_submount(mountpoint: mntpt, type, name: "tracefs", NULL);
9861 put_filesystem(fs: type);
9862 if (IS_ERR(ptr: mnt))
9863 return NULL;
9864 mntget(mnt);
9865
9866 return mnt;
9867}
9868
9869/**
9870 * tracing_init_dentry - initialize top level trace array
9871 *
9872 * This is called when creating files or directories in the tracing
9873 * directory. It is called via fs_initcall() by any of the boot up code
9874 * and expects to return the dentry of the top level tracing directory.
9875 */
9876int tracing_init_dentry(void)
9877{
9878 struct trace_array *tr = &global_trace;
9879
9880 if (security_locked_down(what: LOCKDOWN_TRACEFS)) {
9881 pr_warn("Tracing disabled due to lockdown\n");
9882 return -EPERM;
9883 }
9884
9885 /* The top level trace array uses NULL as parent */
9886 if (tr->dir)
9887 return 0;
9888
9889 if (WARN_ON(!tracefs_initialized()))
9890 return -ENODEV;
9891
9892 /*
9893 * As there may still be users that expect the tracing
9894 * files to exist in debugfs/tracing, we must automount
9895 * the tracefs file system there, so older tools still
9896 * work with the newer kernel.
9897 */
9898 tr->dir = debugfs_create_automount(name: "tracing", NULL,
9899 f: trace_automount, NULL);
9900
9901 return 0;
9902}
9903
9904extern struct trace_eval_map *__start_ftrace_eval_maps[];
9905extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9906
9907static struct workqueue_struct *eval_map_wq __initdata;
9908static struct work_struct eval_map_work __initdata;
9909static struct work_struct tracerfs_init_work __initdata;
9910
9911static void __init eval_map_work_func(struct work_struct *work)
9912{
9913 int len;
9914
9915 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9916 trace_insert_eval_map(NULL, start: __start_ftrace_eval_maps, len);
9917}
9918
9919static int __init trace_eval_init(void)
9920{
9921 INIT_WORK(&eval_map_work, eval_map_work_func);
9922
9923 eval_map_wq = alloc_workqueue(fmt: "eval_map_wq", flags: WQ_UNBOUND, max_active: 0);
9924 if (!eval_map_wq) {
9925 pr_err("Unable to allocate eval_map_wq\n");
9926 /* Do work here */
9927 eval_map_work_func(work: &eval_map_work);
9928 return -ENOMEM;
9929 }
9930
9931 queue_work(wq: eval_map_wq, work: &eval_map_work);
9932 return 0;
9933}
9934
9935subsys_initcall(trace_eval_init);
9936
9937static int __init trace_eval_sync(void)
9938{
9939 /* Make sure the eval map updates are finished */
9940 if (eval_map_wq)
9941 destroy_workqueue(wq: eval_map_wq);
9942 return 0;
9943}
9944
9945late_initcall_sync(trace_eval_sync);
9946
9947
9948#ifdef CONFIG_MODULES
9949static void trace_module_add_evals(struct module *mod)
9950{
9951 if (!mod->num_trace_evals)
9952 return;
9953
9954 /*
9955 * Modules with bad taint do not have events created, do
9956 * not bother with enums either.
9957 */
9958 if (trace_module_has_bad_taint(mod))
9959 return;
9960
9961 trace_insert_eval_map(mod, start: mod->trace_evals, len: mod->num_trace_evals);
9962}
9963
9964#ifdef CONFIG_TRACE_EVAL_MAP_FILE
9965static void trace_module_remove_evals(struct module *mod)
9966{
9967 union trace_eval_map_item *map;
9968 union trace_eval_map_item **last = &trace_eval_maps;
9969
9970 if (!mod->num_trace_evals)
9971 return;
9972
9973 mutex_lock(&trace_eval_mutex);
9974
9975 map = trace_eval_maps;
9976
9977 while (map) {
9978 if (map->head.mod == mod)
9979 break;
9980 map = trace_eval_jmp_to_tail(ptr: map);
9981 last = &map->tail.next;
9982 map = map->tail.next;
9983 }
9984 if (!map)
9985 goto out;
9986
9987 *last = trace_eval_jmp_to_tail(ptr: map)->tail.next;
9988 kfree(objp: map);
9989 out:
9990 mutex_unlock(lock: &trace_eval_mutex);
9991}
9992#else
9993static inline void trace_module_remove_evals(struct module *mod) { }
9994#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9995
9996static int trace_module_notify(struct notifier_block *self,
9997 unsigned long val, void *data)
9998{
9999 struct module *mod = data;
10000
10001 switch (val) {
10002 case MODULE_STATE_COMING:
10003 trace_module_add_evals(mod);
10004 break;
10005 case MODULE_STATE_GOING:
10006 trace_module_remove_evals(mod);
10007 break;
10008 }
10009
10010 return NOTIFY_OK;
10011}
10012
10013static struct notifier_block trace_module_nb = {
10014 .notifier_call = trace_module_notify,
10015 .priority = 0,
10016};
10017#endif /* CONFIG_MODULES */
10018
10019static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10020{
10021
10022 event_trace_init();
10023
10024 init_tracer_tracefs(tr: &global_trace, NULL);
10025 ftrace_init_tracefs_toplevel(tr: &global_trace, NULL);
10026
10027 trace_create_file(name: "tracing_thresh", TRACE_MODE_WRITE, NULL,
10028 data: &global_trace, fops: &tracing_thresh_fops);
10029
10030 trace_create_file(name: "README", TRACE_MODE_READ, NULL,
10031 NULL, fops: &tracing_readme_fops);
10032
10033 trace_create_file(name: "saved_cmdlines", TRACE_MODE_READ, NULL,
10034 NULL, fops: &tracing_saved_cmdlines_fops);
10035
10036 trace_create_file(name: "saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10037 NULL, fops: &tracing_saved_cmdlines_size_fops);
10038
10039 trace_create_file(name: "saved_tgids", TRACE_MODE_READ, NULL,
10040 NULL, fops: &tracing_saved_tgids_fops);
10041
10042 trace_create_eval_file(NULL);
10043
10044#ifdef CONFIG_MODULES
10045 register_module_notifier(nb: &trace_module_nb);
10046#endif
10047
10048#ifdef CONFIG_DYNAMIC_FTRACE
10049 trace_create_file(name: "dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10050 NULL, fops: &tracing_dyn_info_fops);
10051#endif
10052
10053 create_trace_instances(NULL);
10054
10055 update_tracer_options(tr: &global_trace);
10056}
10057
10058static __init int tracer_init_tracefs(void)
10059{
10060 int ret;
10061
10062 trace_access_lock_init();
10063
10064 ret = tracing_init_dentry();
10065 if (ret)
10066 return 0;
10067
10068 if (eval_map_wq) {
10069 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10070 queue_work(wq: eval_map_wq, work: &tracerfs_init_work);
10071 } else {
10072 tracer_init_tracefs_work_func(NULL);
10073 }
10074
10075 rv_init_interface();
10076
10077 return 0;
10078}
10079
10080fs_initcall(tracer_init_tracefs);
10081
10082static int trace_die_panic_handler(struct notifier_block *self,
10083 unsigned long ev, void *unused);
10084
10085static struct notifier_block trace_panic_notifier = {
10086 .notifier_call = trace_die_panic_handler,
10087 .priority = INT_MAX - 1,
10088};
10089
10090static struct notifier_block trace_die_notifier = {
10091 .notifier_call = trace_die_panic_handler,
10092 .priority = INT_MAX - 1,
10093};
10094
10095/*
10096 * The idea is to execute the following die/panic callback early, in order
10097 * to avoid showing irrelevant information in the trace (like other panic
10098 * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10099 * warnings get disabled (to prevent potential log flooding).
10100 */
10101static int trace_die_panic_handler(struct notifier_block *self,
10102 unsigned long ev, void *unused)
10103{
10104 if (!ftrace_dump_on_oops)
10105 return NOTIFY_DONE;
10106
10107 /* The die notifier requires DIE_OOPS to trigger */
10108 if (self == &trace_die_notifier && ev != DIE_OOPS)
10109 return NOTIFY_DONE;
10110
10111 ftrace_dump(oops_dump_mode: ftrace_dump_on_oops);
10112
10113 return NOTIFY_DONE;
10114}
10115
10116/*
10117 * printk is set to max of 1024, we really don't need it that big.
10118 * Nothing should be printing 1000 characters anyway.
10119 */
10120#define TRACE_MAX_PRINT 1000
10121
10122/*
10123 * Define here KERN_TRACE so that we have one place to modify
10124 * it if we decide to change what log level the ftrace dump
10125 * should be at.
10126 */
10127#define KERN_TRACE KERN_EMERG
10128
10129void
10130trace_printk_seq(struct trace_seq *s)
10131{
10132 /* Probably should print a warning here. */
10133 if (s->seq.len >= TRACE_MAX_PRINT)
10134 s->seq.len = TRACE_MAX_PRINT;
10135
10136 /*
10137 * More paranoid code. Although the buffer size is set to
10138 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10139 * an extra layer of protection.
10140 */
10141 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10142 s->seq.len = s->seq.size - 1;
10143
10144 /* should be zero ended, but we are paranoid. */
10145 s->buffer[s->seq.len] = 0;
10146
10147 printk(KERN_TRACE "%s", s->buffer);
10148
10149 trace_seq_init(s);
10150}
10151
10152void trace_init_global_iter(struct trace_iterator *iter)
10153{
10154 iter->tr = &global_trace;
10155 iter->trace = iter->tr->current_trace;
10156 iter->cpu_file = RING_BUFFER_ALL_CPUS;
10157 iter->array_buffer = &global_trace.array_buffer;
10158
10159 if (iter->trace && iter->trace->open)
10160 iter->trace->open(iter);
10161
10162 /* Annotate start of buffers if we had overruns */
10163 if (ring_buffer_overruns(buffer: iter->array_buffer->buffer))
10164 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10165
10166 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10167 if (trace_clocks[iter->tr->clock_id].in_ns)
10168 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10169
10170 /* Can not use kmalloc for iter.temp and iter.fmt */
10171 iter->temp = static_temp_buf;
10172 iter->temp_size = STATIC_TEMP_BUF_SIZE;
10173 iter->fmt = static_fmt_buf;
10174 iter->fmt_size = STATIC_FMT_BUF_SIZE;
10175}
10176
10177void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10178{
10179 /* use static because iter can be a bit big for the stack */
10180 static struct trace_iterator iter;
10181 static atomic_t dump_running;
10182 struct trace_array *tr = &global_trace;
10183 unsigned int old_userobj;
10184 unsigned long flags;
10185 int cnt = 0, cpu;
10186
10187 /* Only allow one dump user at a time. */
10188 if (atomic_inc_return(v: &dump_running) != 1) {
10189 atomic_dec(v: &dump_running);
10190 return;
10191 }
10192
10193 /*
10194 * Always turn off tracing when we dump.
10195 * We don't need to show trace output of what happens
10196 * between multiple crashes.
10197 *
10198 * If the user does a sysrq-z, then they can re-enable
10199 * tracing with echo 1 > tracing_on.
10200 */
10201 tracing_off();
10202
10203 local_irq_save(flags);
10204
10205 /* Simulate the iterator */
10206 trace_init_global_iter(iter: &iter);
10207
10208 for_each_tracing_cpu(cpu) {
10209 atomic_inc(v: &per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10210 }
10211
10212 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10213
10214 /* don't look at user memory in panic mode */
10215 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10216
10217 switch (oops_dump_mode) {
10218 case DUMP_ALL:
10219 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10220 break;
10221 case DUMP_ORIG:
10222 iter.cpu_file = raw_smp_processor_id();
10223 break;
10224 case DUMP_NONE:
10225 goto out_enable;
10226 default:
10227 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10228 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10229 }
10230
10231 printk(KERN_TRACE "Dumping ftrace buffer:\n");
10232
10233 /* Did function tracer already get disabled? */
10234 if (ftrace_is_dead()) {
10235 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10236 printk("# MAY BE MISSING FUNCTION EVENTS\n");
10237 }
10238
10239 /*
10240 * We need to stop all tracing on all CPUS to read
10241 * the next buffer. This is a bit expensive, but is
10242 * not done often. We fill all what we can read,
10243 * and then release the locks again.
10244 */
10245
10246 while (!trace_empty(iter: &iter)) {
10247
10248 if (!cnt)
10249 printk(KERN_TRACE "---------------------------------\n");
10250
10251 cnt++;
10252
10253 trace_iterator_reset(iter: &iter);
10254 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10255
10256 if (trace_find_next_entry_inc(iter: &iter) != NULL) {
10257 int ret;
10258
10259 ret = print_trace_line(iter: &iter);
10260 if (ret != TRACE_TYPE_NO_CONSUME)
10261 trace_consume(iter: &iter);
10262 }
10263 touch_nmi_watchdog();
10264
10265 trace_printk_seq(s: &iter.seq);
10266 }
10267
10268 if (!cnt)
10269 printk(KERN_TRACE " (ftrace buffer empty)\n");
10270 else
10271 printk(KERN_TRACE "---------------------------------\n");
10272
10273 out_enable:
10274 tr->trace_flags |= old_userobj;
10275
10276 for_each_tracing_cpu(cpu) {
10277 atomic_dec(v: &per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10278 }
10279 atomic_dec(v: &dump_running);
10280 local_irq_restore(flags);
10281}
10282EXPORT_SYMBOL_GPL(ftrace_dump);
10283
10284#define WRITE_BUFSIZE 4096
10285
10286ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10287 size_t count, loff_t *ppos,
10288 int (*createfn)(const char *))
10289{
10290 char *kbuf, *buf, *tmp;
10291 int ret = 0;
10292 size_t done = 0;
10293 size_t size;
10294
10295 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10296 if (!kbuf)
10297 return -ENOMEM;
10298
10299 while (done < count) {
10300 size = count - done;
10301
10302 if (size >= WRITE_BUFSIZE)
10303 size = WRITE_BUFSIZE - 1;
10304
10305 if (copy_from_user(to: kbuf, from: buffer + done, n: size)) {
10306 ret = -EFAULT;
10307 goto out;
10308 }
10309 kbuf[size] = '\0';
10310 buf = kbuf;
10311 do {
10312 tmp = strchr(buf, '\n');
10313 if (tmp) {
10314 *tmp = '\0';
10315 size = tmp - buf + 1;
10316 } else {
10317 size = strlen(buf);
10318 if (done + size < count) {
10319 if (buf != kbuf)
10320 break;
10321 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10322 pr_warn("Line length is too long: Should be less than %d\n",
10323 WRITE_BUFSIZE - 2);
10324 ret = -EINVAL;
10325 goto out;
10326 }
10327 }
10328 done += size;
10329
10330 /* Remove comments */
10331 tmp = strchr(buf, '#');
10332
10333 if (tmp)
10334 *tmp = '\0';
10335
10336 ret = createfn(buf);
10337 if (ret)
10338 goto out;
10339 buf += size;
10340
10341 } while (done < count);
10342 }
10343 ret = done;
10344
10345out:
10346 kfree(objp: kbuf);
10347
10348 return ret;
10349}
10350
10351#ifdef CONFIG_TRACER_MAX_TRACE
10352__init static bool tr_needs_alloc_snapshot(const char *name)
10353{
10354 char *test;
10355 int len = strlen(name);
10356 bool ret;
10357
10358 if (!boot_snapshot_index)
10359 return false;
10360
10361 if (strncmp(name, boot_snapshot_info, len) == 0 &&
10362 boot_snapshot_info[len] == '\t')
10363 return true;
10364
10365 test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10366 if (!test)
10367 return false;
10368
10369 sprintf(buf: test, fmt: "\t%s\t", name);
10370 ret = strstr(boot_snapshot_info, test) == NULL;
10371 kfree(objp: test);
10372 return ret;
10373}
10374
10375__init static void do_allocate_snapshot(const char *name)
10376{
10377 if (!tr_needs_alloc_snapshot(name))
10378 return;
10379
10380 /*
10381 * When allocate_snapshot is set, the next call to
10382 * allocate_trace_buffers() (called by trace_array_get_by_name())
10383 * will allocate the snapshot buffer. That will alse clear
10384 * this flag.
10385 */
10386 allocate_snapshot = true;
10387}
10388#else
10389static inline void do_allocate_snapshot(const char *name) { }
10390#endif
10391
10392__init static void enable_instances(void)
10393{
10394 struct trace_array *tr;
10395 char *curr_str;
10396 char *str;
10397 char *tok;
10398
10399 /* A tab is always appended */
10400 boot_instance_info[boot_instance_index - 1] = '\0';
10401 str = boot_instance_info;
10402
10403 while ((curr_str = strsep(&str, "\t"))) {
10404
10405 tok = strsep(&curr_str, ",");
10406
10407 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10408 do_allocate_snapshot(name: tok);
10409
10410 tr = trace_array_get_by_name(tok);
10411 if (!tr) {
10412 pr_warn("Failed to create instance buffer %s\n", curr_str);
10413 continue;
10414 }
10415 /* Allow user space to delete it */
10416 trace_array_put(tr);
10417
10418 while ((tok = strsep(&curr_str, ","))) {
10419 early_enable_events(tr, buf: tok, disable_first: true);
10420 }
10421 }
10422}
10423
10424__init static int tracer_alloc_buffers(void)
10425{
10426 int ring_buf_size;
10427 int ret = -ENOMEM;
10428
10429
10430 if (security_locked_down(what: LOCKDOWN_TRACEFS)) {
10431 pr_warn("Tracing disabled due to lockdown\n");
10432 return -EPERM;
10433 }
10434
10435 /*
10436 * Make sure we don't accidentally add more trace options
10437 * than we have bits for.
10438 */
10439 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10440
10441 if (!alloc_cpumask_var(mask: &tracing_buffer_mask, GFP_KERNEL))
10442 goto out;
10443
10444 if (!alloc_cpumask_var(mask: &global_trace.tracing_cpumask, GFP_KERNEL))
10445 goto out_free_buffer_mask;
10446
10447 /* Only allocate trace_printk buffers if a trace_printk exists */
10448 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10449 /* Must be called before global_trace.buffer is allocated */
10450 trace_printk_init_buffers();
10451
10452 /* To save memory, keep the ring buffer size to its minimum */
10453 if (global_trace.ring_buffer_expanded)
10454 ring_buf_size = trace_buf_size;
10455 else
10456 ring_buf_size = 1;
10457
10458 cpumask_copy(dstp: tracing_buffer_mask, cpu_possible_mask);
10459 cpumask_copy(dstp: global_trace.tracing_cpumask, cpu_all_mask);
10460
10461 raw_spin_lock_init(&global_trace.start_lock);
10462
10463 /*
10464 * The prepare callbacks allocates some memory for the ring buffer. We
10465 * don't free the buffer if the CPU goes down. If we were to free
10466 * the buffer, then the user would lose any trace that was in the
10467 * buffer. The memory will be removed once the "instance" is removed.
10468 */
10469 ret = cpuhp_setup_state_multi(state: CPUHP_TRACE_RB_PREPARE,
10470 name: "trace/RB:prepare", startup: trace_rb_cpu_prepare,
10471 NULL);
10472 if (ret < 0)
10473 goto out_free_cpumask;
10474 /* Used for event triggers */
10475 ret = -ENOMEM;
10476 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10477 if (!temp_buffer)
10478 goto out_rm_hp_state;
10479
10480 if (trace_create_savedcmd() < 0)
10481 goto out_free_temp_buffer;
10482
10483 if (!zalloc_cpumask_var(mask: &global_trace.pipe_cpumask, GFP_KERNEL))
10484 goto out_free_savedcmd;
10485
10486 /* TODO: make the number of buffers hot pluggable with CPUS */
10487 if (allocate_trace_buffers(tr: &global_trace, size: ring_buf_size) < 0) {
10488 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10489 goto out_free_pipe_cpumask;
10490 }
10491 if (global_trace.buffer_disabled)
10492 tracing_off();
10493
10494 if (trace_boot_clock) {
10495 ret = tracing_set_clock(tr: &global_trace, clockstr: trace_boot_clock);
10496 if (ret < 0)
10497 pr_warn("Trace clock %s not defined, going back to default\n",
10498 trace_boot_clock);
10499 }
10500
10501 /*
10502 * register_tracer() might reference current_trace, so it
10503 * needs to be set before we register anything. This is
10504 * just a bootstrap of current_trace anyway.
10505 */
10506 global_trace.current_trace = &nop_trace;
10507
10508 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10509
10510 ftrace_init_global_array_ops(tr: &global_trace);
10511
10512 init_trace_flags_index(tr: &global_trace);
10513
10514 register_tracer(type: &nop_trace);
10515
10516 /* Function tracing may start here (via kernel command line) */
10517 init_function_trace();
10518
10519 /* All seems OK, enable tracing */
10520 tracing_disabled = 0;
10521
10522 atomic_notifier_chain_register(nh: &panic_notifier_list,
10523 nb: &trace_panic_notifier);
10524
10525 register_die_notifier(nb: &trace_die_notifier);
10526
10527 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10528
10529 INIT_LIST_HEAD(list: &global_trace.systems);
10530 INIT_LIST_HEAD(list: &global_trace.events);
10531 INIT_LIST_HEAD(list: &global_trace.hist_vars);
10532 INIT_LIST_HEAD(list: &global_trace.err_log);
10533 list_add(new: &global_trace.list, head: &ftrace_trace_arrays);
10534
10535 apply_trace_boot_options();
10536
10537 register_snapshot_cmd();
10538
10539 test_can_verify();
10540
10541 return 0;
10542
10543out_free_pipe_cpumask:
10544 free_cpumask_var(mask: global_trace.pipe_cpumask);
10545out_free_savedcmd:
10546 free_saved_cmdlines_buffer(s: savedcmd);
10547out_free_temp_buffer:
10548 ring_buffer_free(buffer: temp_buffer);
10549out_rm_hp_state:
10550 cpuhp_remove_multi_state(state: CPUHP_TRACE_RB_PREPARE);
10551out_free_cpumask:
10552 free_cpumask_var(mask: global_trace.tracing_cpumask);
10553out_free_buffer_mask:
10554 free_cpumask_var(mask: tracing_buffer_mask);
10555out:
10556 return ret;
10557}
10558
10559void __init ftrace_boot_snapshot(void)
10560{
10561#ifdef CONFIG_TRACER_MAX_TRACE
10562 struct trace_array *tr;
10563
10564 if (!snapshot_at_boot)
10565 return;
10566
10567 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10568 if (!tr->allocated_snapshot)
10569 continue;
10570
10571 tracing_snapshot_instance(tr);
10572 trace_array_puts(tr, "** Boot snapshot taken **\n");
10573 }
10574#endif
10575}
10576
10577void __init early_trace_init(void)
10578{
10579 if (tracepoint_printk) {
10580 tracepoint_print_iter =
10581 kzalloc(size: sizeof(*tracepoint_print_iter), GFP_KERNEL);
10582 if (MEM_FAIL(!tracepoint_print_iter,
10583 "Failed to allocate trace iterator\n"))
10584 tracepoint_printk = 0;
10585 else
10586 static_key_enable(key: &tracepoint_printk_key.key);
10587 }
10588 tracer_alloc_buffers();
10589
10590 init_events();
10591}
10592
10593void __init trace_init(void)
10594{
10595 trace_event_init();
10596
10597 if (boot_instance_index)
10598 enable_instances();
10599}
10600
10601__init static void clear_boot_tracer(void)
10602{
10603 /*
10604 * The default tracer at boot buffer is an init section.
10605 * This function is called in lateinit. If we did not
10606 * find the boot tracer, then clear it out, to prevent
10607 * later registration from accessing the buffer that is
10608 * about to be freed.
10609 */
10610 if (!default_bootup_tracer)
10611 return;
10612
10613 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10614 default_bootup_tracer);
10615 default_bootup_tracer = NULL;
10616}
10617
10618#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10619__init static void tracing_set_default_clock(void)
10620{
10621 /* sched_clock_stable() is determined in late_initcall */
10622 if (!trace_boot_clock && !sched_clock_stable()) {
10623 if (security_locked_down(what: LOCKDOWN_TRACEFS)) {
10624 pr_warn("Can not set tracing clock due to lockdown\n");
10625 return;
10626 }
10627
10628 printk(KERN_WARNING
10629 "Unstable clock detected, switching default tracing clock to \"global\"\n"
10630 "If you want to keep using the local clock, then add:\n"
10631 " \"trace_clock=local\"\n"
10632 "on the kernel command line\n");
10633 tracing_set_clock(tr: &global_trace, clockstr: "global");
10634 }
10635}
10636#else
10637static inline void tracing_set_default_clock(void) { }
10638#endif
10639
10640__init static int late_trace_init(void)
10641{
10642 if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10643 static_key_disable(key: &tracepoint_printk_key.key);
10644 tracepoint_printk = 0;
10645 }
10646
10647 tracing_set_default_clock();
10648 clear_boot_tracer();
10649 return 0;
10650}
10651
10652late_initcall_sync(late_trace_init);
10653

source code of linux/kernel/trace/trace.c