1// SPDX-License-Identifier: GPL-2.0
2/*
3 * ring buffer based function tracer
4 *
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 *
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 *
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
14 */
15#include <linux/ring_buffer.h>
16#include <generated/utsrelease.h>
17#include <linux/stacktrace.h>
18#include <linux/writeback.h>
19#include <linux/kallsyms.h>
20#include <linux/seq_file.h>
21#include <linux/notifier.h>
22#include <linux/irqflags.h>
23#include <linux/debugfs.h>
24#include <linux/tracefs.h>
25#include <linux/pagemap.h>
26#include <linux/hardirq.h>
27#include <linux/linkage.h>
28#include <linux/uaccess.h>
29#include <linux/vmalloc.h>
30#include <linux/ftrace.h>
31#include <linux/module.h>
32#include <linux/percpu.h>
33#include <linux/splice.h>
34#include <linux/kdebug.h>
35#include <linux/string.h>
36#include <linux/mount.h>
37#include <linux/rwsem.h>
38#include <linux/slab.h>
39#include <linux/ctype.h>
40#include <linux/init.h>
41#include <linux/poll.h>
42#include <linux/nmi.h>
43#include <linux/fs.h>
44#include <linux/trace.h>
45#include <linux/sched/clock.h>
46#include <linux/sched/rt.h>
47
48#include "trace.h"
49#include "trace_output.h"
50
51/*
52 * On boot up, the ring buffer is set to the minimum size, so that
53 * we do not waste memory on systems that are not using tracing.
54 */
55bool ring_buffer_expanded;
56
57/*
58 * We need to change this state when a selftest is running.
59 * A selftest will lurk into the ring-buffer to count the
60 * entries inserted during the selftest although some concurrent
61 * insertions into the ring-buffer such as trace_printk could occurred
62 * at the same time, giving false positive or negative results.
63 */
64static bool __read_mostly tracing_selftest_running;
65
66/*
67 * If a tracer is running, we do not want to run SELFTEST.
68 */
69bool __read_mostly tracing_selftest_disabled;
70
71/* Pipe tracepoints to printk */
72struct trace_iterator *tracepoint_print_iter;
73int tracepoint_printk;
74static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
75
76/* For tracers that don't implement custom flags */
77static struct tracer_opt dummy_tracer_opt[] = {
78 { }
79};
80
81static int
82dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
83{
84 return 0;
85}
86
87/*
88 * To prevent the comm cache from being overwritten when no
89 * tracing is active, only save the comm when a trace event
90 * occurred.
91 */
92static DEFINE_PER_CPU(bool, trace_taskinfo_save);
93
94/*
95 * Kill all tracing for good (never come back).
96 * It is initialized to 1 but will turn to zero if the initialization
97 * of the tracer is successful. But that is the only place that sets
98 * this back to zero.
99 */
100static int tracing_disabled = 1;
101
102cpumask_var_t __read_mostly tracing_buffer_mask;
103
104/*
105 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
106 *
107 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
108 * is set, then ftrace_dump is called. This will output the contents
109 * of the ftrace buffers to the console. This is very useful for
110 * capturing traces that lead to crashes and outputing it to a
111 * serial console.
112 *
113 * It is default off, but you can enable it with either specifying
114 * "ftrace_dump_on_oops" in the kernel command line, or setting
115 * /proc/sys/kernel/ftrace_dump_on_oops
116 * Set 1 if you want to dump buffers of all CPUs
117 * Set 2 if you want to dump the buffer of the CPU that triggered oops
118 */
119
120enum ftrace_dump_mode ftrace_dump_on_oops;
121
122/* When set, tracing will stop when a WARN*() is hit */
123int __disable_trace_on_warning;
124
125#ifdef CONFIG_TRACE_EVAL_MAP_FILE
126/* Map of enums to their values, for "eval_map" file */
127struct trace_eval_map_head {
128 struct module *mod;
129 unsigned long length;
130};
131
132union trace_eval_map_item;
133
134struct trace_eval_map_tail {
135 /*
136 * "end" is first and points to NULL as it must be different
137 * than "mod" or "eval_string"
138 */
139 union trace_eval_map_item *next;
140 const char *end; /* points to NULL */
141};
142
143static DEFINE_MUTEX(trace_eval_mutex);
144
145/*
146 * The trace_eval_maps are saved in an array with two extra elements,
147 * one at the beginning, and one at the end. The beginning item contains
148 * the count of the saved maps (head.length), and the module they
149 * belong to if not built in (head.mod). The ending item contains a
150 * pointer to the next array of saved eval_map items.
151 */
152union trace_eval_map_item {
153 struct trace_eval_map map;
154 struct trace_eval_map_head head;
155 struct trace_eval_map_tail tail;
156};
157
158static union trace_eval_map_item *trace_eval_maps;
159#endif /* CONFIG_TRACE_EVAL_MAP_FILE */
160
161static int tracing_set_tracer(struct trace_array *tr, const char *buf);
162
163#define MAX_TRACER_SIZE 100
164static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
165static char *default_bootup_tracer;
166
167static bool allocate_snapshot;
168
169static int __init set_cmdline_ftrace(char *str)
170{
171 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
172 default_bootup_tracer = bootup_tracer_buf;
173 /* We are using ftrace early, expand it */
174 ring_buffer_expanded = true;
175 return 1;
176}
177__setup("ftrace=", set_cmdline_ftrace);
178
179static int __init set_ftrace_dump_on_oops(char *str)
180{
181 if (*str++ != '=' || !*str) {
182 ftrace_dump_on_oops = DUMP_ALL;
183 return 1;
184 }
185
186 if (!strcmp("orig_cpu", str)) {
187 ftrace_dump_on_oops = DUMP_ORIG;
188 return 1;
189 }
190
191 return 0;
192}
193__setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
194
195static int __init stop_trace_on_warning(char *str)
196{
197 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
198 __disable_trace_on_warning = 1;
199 return 1;
200}
201__setup("traceoff_on_warning", stop_trace_on_warning);
202
203static int __init boot_alloc_snapshot(char *str)
204{
205 allocate_snapshot = true;
206 /* We also need the main ring buffer expanded */
207 ring_buffer_expanded = true;
208 return 1;
209}
210__setup("alloc_snapshot", boot_alloc_snapshot);
211
212
213static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
214
215static int __init set_trace_boot_options(char *str)
216{
217 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218 return 0;
219}
220__setup("trace_options=", set_trace_boot_options);
221
222static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
223static char *trace_boot_clock __initdata;
224
225static int __init set_trace_boot_clock(char *str)
226{
227 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
228 trace_boot_clock = trace_boot_clock_buf;
229 return 0;
230}
231__setup("trace_clock=", set_trace_boot_clock);
232
233static int __init set_tracepoint_printk(char *str)
234{
235 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
236 tracepoint_printk = 1;
237 return 1;
238}
239__setup("tp_printk", set_tracepoint_printk);
240
241unsigned long long ns2usecs(u64 nsec)
242{
243 nsec += 500;
244 do_div(nsec, 1000);
245 return nsec;
246}
247
248/* trace_flags holds trace_options default values */
249#define TRACE_DEFAULT_FLAGS \
250 (FUNCTION_DEFAULT_FLAGS | \
251 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
252 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
253 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
254 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
255
256/* trace_options that are only supported by global_trace */
257#define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
258 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
259
260/* trace_flags that are default zero for instances */
261#define ZEROED_TRACE_FLAGS \
262 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
263
264/*
265 * The global_trace is the descriptor that holds the top-level tracing
266 * buffers for the live tracing.
267 */
268static struct trace_array global_trace = {
269 .trace_flags = TRACE_DEFAULT_FLAGS,
270};
271
272LIST_HEAD(ftrace_trace_arrays);
273
274int trace_array_get(struct trace_array *this_tr)
275{
276 struct trace_array *tr;
277 int ret = -ENODEV;
278
279 mutex_lock(&trace_types_lock);
280 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
281 if (tr == this_tr) {
282 tr->ref++;
283 ret = 0;
284 break;
285 }
286 }
287 mutex_unlock(&trace_types_lock);
288
289 return ret;
290}
291
292static void __trace_array_put(struct trace_array *this_tr)
293{
294 WARN_ON(!this_tr->ref);
295 this_tr->ref--;
296}
297
298void trace_array_put(struct trace_array *this_tr)
299{
300 mutex_lock(&trace_types_lock);
301 __trace_array_put(this_tr);
302 mutex_unlock(&trace_types_lock);
303}
304
305int call_filter_check_discard(struct trace_event_call *call, void *rec,
306 struct ring_buffer *buffer,
307 struct ring_buffer_event *event)
308{
309 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
310 !filter_match_preds(call->filter, rec)) {
311 __trace_event_discard_commit(buffer, event);
312 return 1;
313 }
314
315 return 0;
316}
317
318void trace_free_pid_list(struct trace_pid_list *pid_list)
319{
320 vfree(pid_list->pids);
321 kfree(pid_list);
322}
323
324/**
325 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
326 * @filtered_pids: The list of pids to check
327 * @search_pid: The PID to find in @filtered_pids
328 *
329 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
330 */
331bool
332trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
333{
334 /*
335 * If pid_max changed after filtered_pids was created, we
336 * by default ignore all pids greater than the previous pid_max.
337 */
338 if (search_pid >= filtered_pids->pid_max)
339 return false;
340
341 return test_bit(search_pid, filtered_pids->pids);
342}
343
344/**
345 * trace_ignore_this_task - should a task be ignored for tracing
346 * @filtered_pids: The list of pids to check
347 * @task: The task that should be ignored if not filtered
348 *
349 * Checks if @task should be traced or not from @filtered_pids.
350 * Returns true if @task should *NOT* be traced.
351 * Returns false if @task should be traced.
352 */
353bool
354trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
355{
356 /*
357 * Return false, because if filtered_pids does not exist,
358 * all pids are good to trace.
359 */
360 if (!filtered_pids)
361 return false;
362
363 return !trace_find_filtered_pid(filtered_pids, task->pid);
364}
365
366/**
367 * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
368 * @pid_list: The list to modify
369 * @self: The current task for fork or NULL for exit
370 * @task: The task to add or remove
371 *
372 * If adding a task, if @self is defined, the task is only added if @self
373 * is also included in @pid_list. This happens on fork and tasks should
374 * only be added when the parent is listed. If @self is NULL, then the
375 * @task pid will be removed from the list, which would happen on exit
376 * of a task.
377 */
378void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
379 struct task_struct *self,
380 struct task_struct *task)
381{
382 if (!pid_list)
383 return;
384
385 /* For forks, we only add if the forking task is listed */
386 if (self) {
387 if (!trace_find_filtered_pid(pid_list, self->pid))
388 return;
389 }
390
391 /* Sorry, but we don't support pid_max changing after setting */
392 if (task->pid >= pid_list->pid_max)
393 return;
394
395 /* "self" is set for forks, and NULL for exits */
396 if (self)
397 set_bit(task->pid, pid_list->pids);
398 else
399 clear_bit(task->pid, pid_list->pids);
400}
401
402/**
403 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
404 * @pid_list: The pid list to show
405 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
406 * @pos: The position of the file
407 *
408 * This is used by the seq_file "next" operation to iterate the pids
409 * listed in a trace_pid_list structure.
410 *
411 * Returns the pid+1 as we want to display pid of zero, but NULL would
412 * stop the iteration.
413 */
414void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
415{
416 unsigned long pid = (unsigned long)v;
417
418 (*pos)++;
419
420 /* pid already is +1 of the actual prevous bit */
421 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
422
423 /* Return pid + 1 to allow zero to be represented */
424 if (pid < pid_list->pid_max)
425 return (void *)(pid + 1);
426
427 return NULL;
428}
429
430/**
431 * trace_pid_start - Used for seq_file to start reading pid lists
432 * @pid_list: The pid list to show
433 * @pos: The position of the file
434 *
435 * This is used by seq_file "start" operation to start the iteration
436 * of listing pids.
437 *
438 * Returns the pid+1 as we want to display pid of zero, but NULL would
439 * stop the iteration.
440 */
441void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
442{
443 unsigned long pid;
444 loff_t l = 0;
445
446 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
447 if (pid >= pid_list->pid_max)
448 return NULL;
449
450 /* Return pid + 1 so that zero can be the exit value */
451 for (pid++; pid && l < *pos;
452 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
453 ;
454 return (void *)pid;
455}
456
457/**
458 * trace_pid_show - show the current pid in seq_file processing
459 * @m: The seq_file structure to write into
460 * @v: A void pointer of the pid (+1) value to display
461 *
462 * Can be directly used by seq_file operations to display the current
463 * pid value.
464 */
465int trace_pid_show(struct seq_file *m, void *v)
466{
467 unsigned long pid = (unsigned long)v - 1;
468
469 seq_printf(m, "%lu\n", pid);
470 return 0;
471}
472
473/* 128 should be much more than enough */
474#define PID_BUF_SIZE 127
475
476int trace_pid_write(struct trace_pid_list *filtered_pids,
477 struct trace_pid_list **new_pid_list,
478 const char __user *ubuf, size_t cnt)
479{
480 struct trace_pid_list *pid_list;
481 struct trace_parser parser;
482 unsigned long val;
483 int nr_pids = 0;
484 ssize_t read = 0;
485 ssize_t ret = 0;
486 loff_t pos;
487 pid_t pid;
488
489 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
490 return -ENOMEM;
491
492 /*
493 * Always recreate a new array. The write is an all or nothing
494 * operation. Always create a new array when adding new pids by
495 * the user. If the operation fails, then the current list is
496 * not modified.
497 */
498 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
499 if (!pid_list)
500 return -ENOMEM;
501
502 pid_list->pid_max = READ_ONCE(pid_max);
503
504 /* Only truncating will shrink pid_max */
505 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
506 pid_list->pid_max = filtered_pids->pid_max;
507
508 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
509 if (!pid_list->pids) {
510 kfree(pid_list);
511 return -ENOMEM;
512 }
513
514 if (filtered_pids) {
515 /* copy the current bits to the new max */
516 for_each_set_bit(pid, filtered_pids->pids,
517 filtered_pids->pid_max) {
518 set_bit(pid, pid_list->pids);
519 nr_pids++;
520 }
521 }
522
523 while (cnt > 0) {
524
525 pos = 0;
526
527 ret = trace_get_user(&parser, ubuf, cnt, &pos);
528 if (ret < 0 || !trace_parser_loaded(&parser))
529 break;
530
531 read += ret;
532 ubuf += ret;
533 cnt -= ret;
534
535 ret = -EINVAL;
536 if (kstrtoul(parser.buffer, 0, &val))
537 break;
538 if (val >= pid_list->pid_max)
539 break;
540
541 pid = (pid_t)val;
542
543 set_bit(pid, pid_list->pids);
544 nr_pids++;
545
546 trace_parser_clear(&parser);
547 ret = 0;
548 }
549 trace_parser_put(&parser);
550
551 if (ret < 0) {
552 trace_free_pid_list(pid_list);
553 return ret;
554 }
555
556 if (!nr_pids) {
557 /* Cleared the list of pids */
558 trace_free_pid_list(pid_list);
559 read = ret;
560 pid_list = NULL;
561 }
562
563 *new_pid_list = pid_list;
564
565 return read;
566}
567
568static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569{
570 u64 ts;
571
572 /* Early boot up does not have a buffer yet */
573 if (!buf->buffer)
574 return trace_clock_local();
575
576 ts = ring_buffer_time_stamp(buf->buffer, cpu);
577 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578
579 return ts;
580}
581
582u64 ftrace_now(int cpu)
583{
584 return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585}
586
587/**
588 * tracing_is_enabled - Show if global_trace has been disabled
589 *
590 * Shows if the global trace has been enabled or not. It uses the
591 * mirror flag "buffer_disabled" to be used in fast paths such as for
592 * the irqsoff tracer. But it may be inaccurate due to races. If you
593 * need to know the accurate state, use tracing_is_on() which is a little
594 * slower, but accurate.
595 */
596int tracing_is_enabled(void)
597{
598 /*
599 * For quick access (irqsoff uses this in fast path), just
600 * return the mirror variable of the state of the ring buffer.
601 * It's a little racy, but we don't really care.
602 */
603 smp_rmb();
604 return !global_trace.buffer_disabled;
605}
606
607/*
608 * trace_buf_size is the size in bytes that is allocated
609 * for a buffer. Note, the number of bytes is always rounded
610 * to page size.
611 *
612 * This number is purposely set to a low number of 16384.
613 * If the dump on oops happens, it will be much appreciated
614 * to not have to wait for all that output. Anyway this can be
615 * boot time and run time configurable.
616 */
617#define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
618
619static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620
621/* trace_types holds a link list of available tracers. */
622static struct tracer *trace_types __read_mostly;
623
624/*
625 * trace_types_lock is used to protect the trace_types list.
626 */
627DEFINE_MUTEX(trace_types_lock);
628
629/*
630 * serialize the access of the ring buffer
631 *
632 * ring buffer serializes readers, but it is low level protection.
633 * The validity of the events (which returns by ring_buffer_peek() ..etc)
634 * are not protected by ring buffer.
635 *
636 * The content of events may become garbage if we allow other process consumes
637 * these events concurrently:
638 * A) the page of the consumed events may become a normal page
639 * (not reader page) in ring buffer, and this page will be rewrited
640 * by events producer.
641 * B) The page of the consumed events may become a page for splice_read,
642 * and this page will be returned to system.
643 *
644 * These primitives allow multi process access to different cpu ring buffer
645 * concurrently.
646 *
647 * These primitives don't distinguish read-only and read-consume access.
648 * Multi read-only access are also serialized.
649 */
650
651#ifdef CONFIG_SMP
652static DECLARE_RWSEM(all_cpu_access_lock);
653static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654
655static inline void trace_access_lock(int cpu)
656{
657 if (cpu == RING_BUFFER_ALL_CPUS) {
658 /* gain it for accessing the whole ring buffer. */
659 down_write(&all_cpu_access_lock);
660 } else {
661 /* gain it for accessing a cpu ring buffer. */
662
663 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664 down_read(&all_cpu_access_lock);
665
666 /* Secondly block other access to this @cpu ring buffer. */
667 mutex_lock(&per_cpu(cpu_access_lock, cpu));
668 }
669}
670
671static inline void trace_access_unlock(int cpu)
672{
673 if (cpu == RING_BUFFER_ALL_CPUS) {
674 up_write(&all_cpu_access_lock);
675 } else {
676 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677 up_read(&all_cpu_access_lock);
678 }
679}
680
681static inline void trace_access_lock_init(void)
682{
683 int cpu;
684
685 for_each_possible_cpu(cpu)
686 mutex_init(&per_cpu(cpu_access_lock, cpu));
687}
688
689#else
690
691static DEFINE_MUTEX(access_lock);
692
693static inline void trace_access_lock(int cpu)
694{
695 (void)cpu;
696 mutex_lock(&access_lock);
697}
698
699static inline void trace_access_unlock(int cpu)
700{
701 (void)cpu;
702 mutex_unlock(&access_lock);
703}
704
705static inline void trace_access_lock_init(void)
706{
707}
708
709#endif
710
711#ifdef CONFIG_STACKTRACE
712static void __ftrace_trace_stack(struct ring_buffer *buffer,
713 unsigned long flags,
714 int skip, int pc, struct pt_regs *regs);
715static inline void ftrace_trace_stack(struct trace_array *tr,
716 struct ring_buffer *buffer,
717 unsigned long flags,
718 int skip, int pc, struct pt_regs *regs);
719
720#else
721static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722 unsigned long flags,
723 int skip, int pc, struct pt_regs *regs)
724{
725}
726static inline void ftrace_trace_stack(struct trace_array *tr,
727 struct ring_buffer *buffer,
728 unsigned long flags,
729 int skip, int pc, struct pt_regs *regs)
730{
731}
732
733#endif
734
735static __always_inline void
736trace_event_setup(struct ring_buffer_event *event,
737 int type, unsigned long flags, int pc)
738{
739 struct trace_entry *ent = ring_buffer_event_data(event);
740
741 tracing_generic_entry_update(ent, flags, pc);
742 ent->type = type;
743}
744
745static __always_inline struct ring_buffer_event *
746__trace_buffer_lock_reserve(struct ring_buffer *buffer,
747 int type,
748 unsigned long len,
749 unsigned long flags, int pc)
750{
751 struct ring_buffer_event *event;
752
753 event = ring_buffer_lock_reserve(buffer, len);
754 if (event != NULL)
755 trace_event_setup(event, type, flags, pc);
756
757 return event;
758}
759
760void tracer_tracing_on(struct trace_array *tr)
761{
762 if (tr->trace_buffer.buffer)
763 ring_buffer_record_on(tr->trace_buffer.buffer);
764 /*
765 * This flag is looked at when buffers haven't been allocated
766 * yet, or by some tracers (like irqsoff), that just want to
767 * know if the ring buffer has been disabled, but it can handle
768 * races of where it gets disabled but we still do a record.
769 * As the check is in the fast path of the tracers, it is more
770 * important to be fast than accurate.
771 */
772 tr->buffer_disabled = 0;
773 /* Make the flag seen by readers */
774 smp_wmb();
775}
776
777/**
778 * tracing_on - enable tracing buffers
779 *
780 * This function enables tracing buffers that may have been
781 * disabled with tracing_off.
782 */
783void tracing_on(void)
784{
785 tracer_tracing_on(&global_trace);
786}
787EXPORT_SYMBOL_GPL(tracing_on);
788
789
790static __always_inline void
791__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792{
793 __this_cpu_write(trace_taskinfo_save, true);
794
795 /* If this is the temp buffer, we need to commit fully */
796 if (this_cpu_read(trace_buffered_event) == event) {
797 /* Length is in event->array[0] */
798 ring_buffer_write(buffer, event->array[0], &event->array[1]);
799 /* Release the temp buffer */
800 this_cpu_dec(trace_buffered_event_cnt);
801 } else
802 ring_buffer_unlock_commit(buffer, event);
803}
804
805/**
806 * __trace_puts - write a constant string into the trace buffer.
807 * @ip: The address of the caller
808 * @str: The constant string to write
809 * @size: The size of the string.
810 */
811int __trace_puts(unsigned long ip, const char *str, int size)
812{
813 struct ring_buffer_event *event;
814 struct ring_buffer *buffer;
815 struct print_entry *entry;
816 unsigned long irq_flags;
817 int alloc;
818 int pc;
819
820 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821 return 0;
822
823 pc = preempt_count();
824
825 if (unlikely(tracing_selftest_running || tracing_disabled))
826 return 0;
827
828 alloc = sizeof(*entry) + size + 2; /* possible \n added */
829
830 local_save_flags(irq_flags);
831 buffer = global_trace.trace_buffer.buffer;
832 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
833 irq_flags, pc);
834 if (!event)
835 return 0;
836
837 entry = ring_buffer_event_data(event);
838 entry->ip = ip;
839
840 memcpy(&entry->buf, str, size);
841
842 /* Add a newline if necessary */
843 if (entry->buf[size - 1] != '\n') {
844 entry->buf[size] = '\n';
845 entry->buf[size + 1] = '\0';
846 } else
847 entry->buf[size] = '\0';
848
849 __buffer_unlock_commit(buffer, event);
850 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851
852 return size;
853}
854EXPORT_SYMBOL_GPL(__trace_puts);
855
856/**
857 * __trace_bputs - write the pointer to a constant string into trace buffer
858 * @ip: The address of the caller
859 * @str: The constant string to write to the buffer to
860 */
861int __trace_bputs(unsigned long ip, const char *str)
862{
863 struct ring_buffer_event *event;
864 struct ring_buffer *buffer;
865 struct bputs_entry *entry;
866 unsigned long irq_flags;
867 int size = sizeof(struct bputs_entry);
868 int pc;
869
870 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871 return 0;
872
873 pc = preempt_count();
874
875 if (unlikely(tracing_selftest_running || tracing_disabled))
876 return 0;
877
878 local_save_flags(irq_flags);
879 buffer = global_trace.trace_buffer.buffer;
880 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881 irq_flags, pc);
882 if (!event)
883 return 0;
884
885 entry = ring_buffer_event_data(event);
886 entry->ip = ip;
887 entry->str = str;
888
889 __buffer_unlock_commit(buffer, event);
890 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891
892 return 1;
893}
894EXPORT_SYMBOL_GPL(__trace_bputs);
895
896#ifdef CONFIG_TRACER_SNAPSHOT
897void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
898{
899 struct tracer *tracer = tr->current_trace;
900 unsigned long flags;
901
902 if (in_nmi()) {
903 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904 internal_trace_puts("*** snapshot is being ignored ***\n");
905 return;
906 }
907
908 if (!tr->allocated_snapshot) {
909 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910 internal_trace_puts("*** stopping trace here! ***\n");
911 tracing_off();
912 return;
913 }
914
915 /* Note, snapshot can not be used when the tracer uses it */
916 if (tracer->use_max_tr) {
917 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919 return;
920 }
921
922 local_irq_save(flags);
923 update_max_tr(tr, current, smp_processor_id(), cond_data);
924 local_irq_restore(flags);
925}
926
927void tracing_snapshot_instance(struct trace_array *tr)
928{
929 tracing_snapshot_instance_cond(tr, NULL);
930}
931
932/**
933 * tracing_snapshot - take a snapshot of the current buffer.
934 *
935 * This causes a swap between the snapshot buffer and the current live
936 * tracing buffer. You can use this to take snapshots of the live
937 * trace when some condition is triggered, but continue to trace.
938 *
939 * Note, make sure to allocate the snapshot with either
940 * a tracing_snapshot_alloc(), or by doing it manually
941 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
942 *
943 * If the snapshot buffer is not allocated, it will stop tracing.
944 * Basically making a permanent snapshot.
945 */
946void tracing_snapshot(void)
947{
948 struct trace_array *tr = &global_trace;
949
950 tracing_snapshot_instance(tr);
951}
952EXPORT_SYMBOL_GPL(tracing_snapshot);
953
954/**
955 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
956 * @tr: The tracing instance to snapshot
957 * @cond_data: The data to be tested conditionally, and possibly saved
958 *
959 * This is the same as tracing_snapshot() except that the snapshot is
960 * conditional - the snapshot will only happen if the
961 * cond_snapshot.update() implementation receiving the cond_data
962 * returns true, which means that the trace array's cond_snapshot
963 * update() operation used the cond_data to determine whether the
964 * snapshot should be taken, and if it was, presumably saved it along
965 * with the snapshot.
966 */
967void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
968{
969 tracing_snapshot_instance_cond(tr, cond_data);
970}
971EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
972
973/**
974 * tracing_snapshot_cond_data - get the user data associated with a snapshot
975 * @tr: The tracing instance
976 *
977 * When the user enables a conditional snapshot using
978 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
979 * with the snapshot. This accessor is used to retrieve it.
980 *
981 * Should not be called from cond_snapshot.update(), since it takes
982 * the tr->max_lock lock, which the code calling
983 * cond_snapshot.update() has already done.
984 *
985 * Returns the cond_data associated with the trace array's snapshot.
986 */
987void *tracing_cond_snapshot_data(struct trace_array *tr)
988{
989 void *cond_data = NULL;
990
991 arch_spin_lock(&tr->max_lock);
992
993 if (tr->cond_snapshot)
994 cond_data = tr->cond_snapshot->cond_data;
995
996 arch_spin_unlock(&tr->max_lock);
997
998 return cond_data;
999}
1000EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1001
1002static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1003 struct trace_buffer *size_buf, int cpu_id);
1004static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1005
1006int tracing_alloc_snapshot_instance(struct trace_array *tr)
1007{
1008 int ret;
1009
1010 if (!tr->allocated_snapshot) {
1011
1012 /* allocate spare buffer */
1013 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1014 &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1015 if (ret < 0)
1016 return ret;
1017
1018 tr->allocated_snapshot = true;
1019 }
1020
1021 return 0;
1022}
1023
1024static void free_snapshot(struct trace_array *tr)
1025{
1026 /*
1027 * We don't free the ring buffer. instead, resize it because
1028 * The max_tr ring buffer has some state (e.g. ring->clock) and
1029 * we want preserve it.
1030 */
1031 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1032 set_buffer_entries(&tr->max_buffer, 1);
1033 tracing_reset_online_cpus(&tr->max_buffer);
1034 tr->allocated_snapshot = false;
1035}
1036
1037/**
1038 * tracing_alloc_snapshot - allocate snapshot buffer.
1039 *
1040 * This only allocates the snapshot buffer if it isn't already
1041 * allocated - it doesn't also take a snapshot.
1042 *
1043 * This is meant to be used in cases where the snapshot buffer needs
1044 * to be set up for events that can't sleep but need to be able to
1045 * trigger a snapshot.
1046 */
1047int tracing_alloc_snapshot(void)
1048{
1049 struct trace_array *tr = &global_trace;
1050 int ret;
1051
1052 ret = tracing_alloc_snapshot_instance(tr);
1053 WARN_ON(ret < 0);
1054
1055 return ret;
1056}
1057EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1058
1059/**
1060 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1061 *
1062 * This is similar to tracing_snapshot(), but it will allocate the
1063 * snapshot buffer if it isn't already allocated. Use this only
1064 * where it is safe to sleep, as the allocation may sleep.
1065 *
1066 * This causes a swap between the snapshot buffer and the current live
1067 * tracing buffer. You can use this to take snapshots of the live
1068 * trace when some condition is triggered, but continue to trace.
1069 */
1070void tracing_snapshot_alloc(void)
1071{
1072 int ret;
1073
1074 ret = tracing_alloc_snapshot();
1075 if (ret < 0)
1076 return;
1077
1078 tracing_snapshot();
1079}
1080EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1081
1082/**
1083 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1084 * @tr: The tracing instance
1085 * @cond_data: User data to associate with the snapshot
1086 * @update: Implementation of the cond_snapshot update function
1087 *
1088 * Check whether the conditional snapshot for the given instance has
1089 * already been enabled, or if the current tracer is already using a
1090 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1091 * save the cond_data and update function inside.
1092 *
1093 * Returns 0 if successful, error otherwise.
1094 */
1095int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1096 cond_update_fn_t update)
1097{
1098 struct cond_snapshot *cond_snapshot;
1099 int ret = 0;
1100
1101 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1102 if (!cond_snapshot)
1103 return -ENOMEM;
1104
1105 cond_snapshot->cond_data = cond_data;
1106 cond_snapshot->update = update;
1107
1108 mutex_lock(&trace_types_lock);
1109
1110 ret = tracing_alloc_snapshot_instance(tr);
1111 if (ret)
1112 goto fail_unlock;
1113
1114 if (tr->current_trace->use_max_tr) {
1115 ret = -EBUSY;
1116 goto fail_unlock;
1117 }
1118
1119 /*
1120 * The cond_snapshot can only change to NULL without the
1121 * trace_types_lock. We don't care if we race with it going
1122 * to NULL, but we want to make sure that it's not set to
1123 * something other than NULL when we get here, which we can
1124 * do safely with only holding the trace_types_lock and not
1125 * having to take the max_lock.
1126 */
1127 if (tr->cond_snapshot) {
1128 ret = -EBUSY;
1129 goto fail_unlock;
1130 }
1131
1132 arch_spin_lock(&tr->max_lock);
1133 tr->cond_snapshot = cond_snapshot;
1134 arch_spin_unlock(&tr->max_lock);
1135
1136 mutex_unlock(&trace_types_lock);
1137
1138 return ret;
1139
1140 fail_unlock:
1141 mutex_unlock(&trace_types_lock);
1142 kfree(cond_snapshot);
1143 return ret;
1144}
1145EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1146
1147/**
1148 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1149 * @tr: The tracing instance
1150 *
1151 * Check whether the conditional snapshot for the given instance is
1152 * enabled; if so, free the cond_snapshot associated with it,
1153 * otherwise return -EINVAL.
1154 *
1155 * Returns 0 if successful, error otherwise.
1156 */
1157int tracing_snapshot_cond_disable(struct trace_array *tr)
1158{
1159 int ret = 0;
1160
1161 arch_spin_lock(&tr->max_lock);
1162
1163 if (!tr->cond_snapshot)
1164 ret = -EINVAL;
1165 else {
1166 kfree(tr->cond_snapshot);
1167 tr->cond_snapshot = NULL;
1168 }
1169
1170 arch_spin_unlock(&tr->max_lock);
1171
1172 return ret;
1173}
1174EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1175#else
1176void tracing_snapshot(void)
1177{
1178 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1179}
1180EXPORT_SYMBOL_GPL(tracing_snapshot);
1181void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1182{
1183 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1184}
1185EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1186int tracing_alloc_snapshot(void)
1187{
1188 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1189 return -ENODEV;
1190}
1191EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1192void tracing_snapshot_alloc(void)
1193{
1194 /* Give warning */
1195 tracing_snapshot();
1196}
1197EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1198void *tracing_cond_snapshot_data(struct trace_array *tr)
1199{
1200 return NULL;
1201}
1202EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1203int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1204{
1205 return -ENODEV;
1206}
1207EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1208int tracing_snapshot_cond_disable(struct trace_array *tr)
1209{
1210 return false;
1211}
1212EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1213#endif /* CONFIG_TRACER_SNAPSHOT */
1214
1215void tracer_tracing_off(struct trace_array *tr)
1216{
1217 if (tr->trace_buffer.buffer)
1218 ring_buffer_record_off(tr->trace_buffer.buffer);
1219 /*
1220 * This flag is looked at when buffers haven't been allocated
1221 * yet, or by some tracers (like irqsoff), that just want to
1222 * know if the ring buffer has been disabled, but it can handle
1223 * races of where it gets disabled but we still do a record.
1224 * As the check is in the fast path of the tracers, it is more
1225 * important to be fast than accurate.
1226 */
1227 tr->buffer_disabled = 1;
1228 /* Make the flag seen by readers */
1229 smp_wmb();
1230}
1231
1232/**
1233 * tracing_off - turn off tracing buffers
1234 *
1235 * This function stops the tracing buffers from recording data.
1236 * It does not disable any overhead the tracers themselves may
1237 * be causing. This function simply causes all recording to
1238 * the ring buffers to fail.
1239 */
1240void tracing_off(void)
1241{
1242 tracer_tracing_off(&global_trace);
1243}
1244EXPORT_SYMBOL_GPL(tracing_off);
1245
1246void disable_trace_on_warning(void)
1247{
1248 if (__disable_trace_on_warning)
1249 tracing_off();
1250}
1251
1252/**
1253 * tracer_tracing_is_on - show real state of ring buffer enabled
1254 * @tr : the trace array to know if ring buffer is enabled
1255 *
1256 * Shows real state of the ring buffer if it is enabled or not.
1257 */
1258bool tracer_tracing_is_on(struct trace_array *tr)
1259{
1260 if (tr->trace_buffer.buffer)
1261 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1262 return !tr->buffer_disabled;
1263}
1264
1265/**
1266 * tracing_is_on - show state of ring buffers enabled
1267 */
1268int tracing_is_on(void)
1269{
1270 return tracer_tracing_is_on(&global_trace);
1271}
1272EXPORT_SYMBOL_GPL(tracing_is_on);
1273
1274static int __init set_buf_size(char *str)
1275{
1276 unsigned long buf_size;
1277
1278 if (!str)
1279 return 0;
1280 buf_size = memparse(str, &str);
1281 /* nr_entries can not be zero */
1282 if (buf_size == 0)
1283 return 0;
1284 trace_buf_size = buf_size;
1285 return 1;
1286}
1287__setup("trace_buf_size=", set_buf_size);
1288
1289static int __init set_tracing_thresh(char *str)
1290{
1291 unsigned long threshold;
1292 int ret;
1293
1294 if (!str)
1295 return 0;
1296 ret = kstrtoul(str, 0, &threshold);
1297 if (ret < 0)
1298 return 0;
1299 tracing_thresh = threshold * 1000;
1300 return 1;
1301}
1302__setup("tracing_thresh=", set_tracing_thresh);
1303
1304unsigned long nsecs_to_usecs(unsigned long nsecs)
1305{
1306 return nsecs / 1000;
1307}
1308
1309/*
1310 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1311 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1312 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1313 * of strings in the order that the evals (enum) were defined.
1314 */
1315#undef C
1316#define C(a, b) b
1317
1318/* These must match the bit postions in trace_iterator_flags */
1319static const char *trace_options[] = {
1320 TRACE_FLAGS
1321 NULL
1322};
1323
1324static struct {
1325 u64 (*func)(void);
1326 const char *name;
1327 int in_ns; /* is this clock in nanoseconds? */
1328} trace_clocks[] = {
1329 { trace_clock_local, "local", 1 },
1330 { trace_clock_global, "global", 1 },
1331 { trace_clock_counter, "counter", 0 },
1332 { trace_clock_jiffies, "uptime", 0 },
1333 { trace_clock, "perf", 1 },
1334 { ktime_get_mono_fast_ns, "mono", 1 },
1335 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1336 { ktime_get_boot_fast_ns, "boot", 1 },
1337 ARCH_TRACE_CLOCKS
1338};
1339
1340bool trace_clock_in_ns(struct trace_array *tr)
1341{
1342 if (trace_clocks[tr->clock_id].in_ns)
1343 return true;
1344
1345 return false;
1346}
1347
1348/*
1349 * trace_parser_get_init - gets the buffer for trace parser
1350 */
1351int trace_parser_get_init(struct trace_parser *parser, int size)
1352{
1353 memset(parser, 0, sizeof(*parser));
1354
1355 parser->buffer = kmalloc(size, GFP_KERNEL);
1356 if (!parser->buffer)
1357 return 1;
1358
1359 parser->size = size;
1360 return 0;
1361}
1362
1363/*
1364 * trace_parser_put - frees the buffer for trace parser
1365 */
1366void trace_parser_put(struct trace_parser *parser)
1367{
1368 kfree(parser->buffer);
1369 parser->buffer = NULL;
1370}
1371
1372/*
1373 * trace_get_user - reads the user input string separated by space
1374 * (matched by isspace(ch))
1375 *
1376 * For each string found the 'struct trace_parser' is updated,
1377 * and the function returns.
1378 *
1379 * Returns number of bytes read.
1380 *
1381 * See kernel/trace/trace.h for 'struct trace_parser' details.
1382 */
1383int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1384 size_t cnt, loff_t *ppos)
1385{
1386 char ch;
1387 size_t read = 0;
1388 ssize_t ret;
1389
1390 if (!*ppos)
1391 trace_parser_clear(parser);
1392
1393 ret = get_user(ch, ubuf++);
1394 if (ret)
1395 goto out;
1396
1397 read++;
1398 cnt--;
1399
1400 /*
1401 * The parser is not finished with the last write,
1402 * continue reading the user input without skipping spaces.
1403 */
1404 if (!parser->cont) {
1405 /* skip white space */
1406 while (cnt && isspace(ch)) {
1407 ret = get_user(ch, ubuf++);
1408 if (ret)
1409 goto out;
1410 read++;
1411 cnt--;
1412 }
1413
1414 parser->idx = 0;
1415
1416 /* only spaces were written */
1417 if (isspace(ch) || !ch) {
1418 *ppos += read;
1419 ret = read;
1420 goto out;
1421 }
1422 }
1423
1424 /* read the non-space input */
1425 while (cnt && !isspace(ch) && ch) {
1426 if (parser->idx < parser->size - 1)
1427 parser->buffer[parser->idx++] = ch;
1428 else {
1429 ret = -EINVAL;
1430 goto out;
1431 }
1432 ret = get_user(ch, ubuf++);
1433 if (ret)
1434 goto out;
1435 read++;
1436 cnt--;
1437 }
1438
1439 /* We either got finished input or we have to wait for another call. */
1440 if (isspace(ch) || !ch) {
1441 parser->buffer[parser->idx] = 0;
1442 parser->cont = false;
1443 } else if (parser->idx < parser->size - 1) {
1444 parser->cont = true;
1445 parser->buffer[parser->idx++] = ch;
1446 /* Make sure the parsed string always terminates with '\0'. */
1447 parser->buffer[parser->idx] = 0;
1448 } else {
1449 ret = -EINVAL;
1450 goto out;
1451 }
1452
1453 *ppos += read;
1454 ret = read;
1455
1456out:
1457 return ret;
1458}
1459
1460/* TODO add a seq_buf_to_buffer() */
1461static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1462{
1463 int len;
1464
1465 if (trace_seq_used(s) <= s->seq.readpos)
1466 return -EBUSY;
1467
1468 len = trace_seq_used(s) - s->seq.readpos;
1469 if (cnt > len)
1470 cnt = len;
1471 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1472
1473 s->seq.readpos += cnt;
1474 return cnt;
1475}
1476
1477unsigned long __read_mostly tracing_thresh;
1478
1479#ifdef CONFIG_TRACER_MAX_TRACE
1480/*
1481 * Copy the new maximum trace into the separate maximum-trace
1482 * structure. (this way the maximum trace is permanently saved,
1483 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1484 */
1485static void
1486__update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1487{
1488 struct trace_buffer *trace_buf = &tr->trace_buffer;
1489 struct trace_buffer *max_buf = &tr->max_buffer;
1490 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1491 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1492
1493 max_buf->cpu = cpu;
1494 max_buf->time_start = data->preempt_timestamp;
1495
1496 max_data->saved_latency = tr->max_latency;
1497 max_data->critical_start = data->critical_start;
1498 max_data->critical_end = data->critical_end;
1499
1500 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1501 max_data->pid = tsk->pid;
1502 /*
1503 * If tsk == current, then use current_uid(), as that does not use
1504 * RCU. The irq tracer can be called out of RCU scope.
1505 */
1506 if (tsk == current)
1507 max_data->uid = current_uid();
1508 else
1509 max_data->uid = task_uid(tsk);
1510
1511 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1512 max_data->policy = tsk->policy;
1513 max_data->rt_priority = tsk->rt_priority;
1514
1515 /* record this tasks comm */
1516 tracing_record_cmdline(tsk);
1517}
1518
1519/**
1520 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1521 * @tr: tracer
1522 * @tsk: the task with the latency
1523 * @cpu: The cpu that initiated the trace.
1524 * @cond_data: User data associated with a conditional snapshot
1525 *
1526 * Flip the buffers between the @tr and the max_tr and record information
1527 * about which task was the cause of this latency.
1528 */
1529void
1530update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1531 void *cond_data)
1532{
1533 if (tr->stop_count)
1534 return;
1535
1536 WARN_ON_ONCE(!irqs_disabled());
1537
1538 if (!tr->allocated_snapshot) {
1539 /* Only the nop tracer should hit this when disabling */
1540 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1541 return;
1542 }
1543
1544 arch_spin_lock(&tr->max_lock);
1545
1546 /* Inherit the recordable setting from trace_buffer */
1547 if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1548 ring_buffer_record_on(tr->max_buffer.buffer);
1549 else
1550 ring_buffer_record_off(tr->max_buffer.buffer);
1551
1552#ifdef CONFIG_TRACER_SNAPSHOT
1553 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1554 goto out_unlock;
1555#endif
1556 swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1557
1558 __update_max_tr(tr, tsk, cpu);
1559
1560 out_unlock:
1561 arch_spin_unlock(&tr->max_lock);
1562}
1563
1564/**
1565 * update_max_tr_single - only copy one trace over, and reset the rest
1566 * @tr - tracer
1567 * @tsk - task with the latency
1568 * @cpu - the cpu of the buffer to copy.
1569 *
1570 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1571 */
1572void
1573update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1574{
1575 int ret;
1576
1577 if (tr->stop_count)
1578 return;
1579
1580 WARN_ON_ONCE(!irqs_disabled());
1581 if (!tr->allocated_snapshot) {
1582 /* Only the nop tracer should hit this when disabling */
1583 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1584 return;
1585 }
1586
1587 arch_spin_lock(&tr->max_lock);
1588
1589 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1590
1591 if (ret == -EBUSY) {
1592 /*
1593 * We failed to swap the buffer due to a commit taking
1594 * place on this CPU. We fail to record, but we reset
1595 * the max trace buffer (no one writes directly to it)
1596 * and flag that it failed.
1597 */
1598 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1599 "Failed to swap buffers due to commit in progress\n");
1600 }
1601
1602 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1603
1604 __update_max_tr(tr, tsk, cpu);
1605 arch_spin_unlock(&tr->max_lock);
1606}
1607#endif /* CONFIG_TRACER_MAX_TRACE */
1608
1609static int wait_on_pipe(struct trace_iterator *iter, int full)
1610{
1611 /* Iterators are static, they should be filled or empty */
1612 if (trace_buffer_iter(iter, iter->cpu_file))
1613 return 0;
1614
1615 return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1616 full);
1617}
1618
1619#ifdef CONFIG_FTRACE_STARTUP_TEST
1620static bool selftests_can_run;
1621
1622struct trace_selftests {
1623 struct list_head list;
1624 struct tracer *type;
1625};
1626
1627static LIST_HEAD(postponed_selftests);
1628
1629static int save_selftest(struct tracer *type)
1630{
1631 struct trace_selftests *selftest;
1632
1633 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1634 if (!selftest)
1635 return -ENOMEM;
1636
1637 selftest->type = type;
1638 list_add(&selftest->list, &postponed_selftests);
1639 return 0;
1640}
1641
1642static int run_tracer_selftest(struct tracer *type)
1643{
1644 struct trace_array *tr = &global_trace;
1645 struct tracer *saved_tracer = tr->current_trace;
1646 int ret;
1647
1648 if (!type->selftest || tracing_selftest_disabled)
1649 return 0;
1650
1651 /*
1652 * If a tracer registers early in boot up (before scheduling is
1653 * initialized and such), then do not run its selftests yet.
1654 * Instead, run it a little later in the boot process.
1655 */
1656 if (!selftests_can_run)
1657 return save_selftest(type);
1658
1659 /*
1660 * Run a selftest on this tracer.
1661 * Here we reset the trace buffer, and set the current
1662 * tracer to be this tracer. The tracer can then run some
1663 * internal tracing to verify that everything is in order.
1664 * If we fail, we do not register this tracer.
1665 */
1666 tracing_reset_online_cpus(&tr->trace_buffer);
1667
1668 tr->current_trace = type;
1669
1670#ifdef CONFIG_TRACER_MAX_TRACE
1671 if (type->use_max_tr) {
1672 /* If we expanded the buffers, make sure the max is expanded too */
1673 if (ring_buffer_expanded)
1674 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1675 RING_BUFFER_ALL_CPUS);
1676 tr->allocated_snapshot = true;
1677 }
1678#endif
1679
1680 /* the test is responsible for initializing and enabling */
1681 pr_info("Testing tracer %s: ", type->name);
1682 ret = type->selftest(type, tr);
1683 /* the test is responsible for resetting too */
1684 tr->current_trace = saved_tracer;
1685 if (ret) {
1686 printk(KERN_CONT "FAILED!\n");
1687 /* Add the warning after printing 'FAILED' */
1688 WARN_ON(1);
1689 return -1;
1690 }
1691 /* Only reset on passing, to avoid touching corrupted buffers */
1692 tracing_reset_online_cpus(&tr->trace_buffer);
1693
1694#ifdef CONFIG_TRACER_MAX_TRACE
1695 if (type->use_max_tr) {
1696 tr->allocated_snapshot = false;
1697
1698 /* Shrink the max buffer again */
1699 if (ring_buffer_expanded)
1700 ring_buffer_resize(tr->max_buffer.buffer, 1,
1701 RING_BUFFER_ALL_CPUS);
1702 }
1703#endif
1704
1705 printk(KERN_CONT "PASSED\n");
1706 return 0;
1707}
1708
1709static __init int init_trace_selftests(void)
1710{
1711 struct trace_selftests *p, *n;
1712 struct tracer *t, **last;
1713 int ret;
1714
1715 selftests_can_run = true;
1716
1717 mutex_lock(&trace_types_lock);
1718
1719 if (list_empty(&postponed_selftests))
1720 goto out;
1721
1722 pr_info("Running postponed tracer tests:\n");
1723
1724 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1725 ret = run_tracer_selftest(p->type);
1726 /* If the test fails, then warn and remove from available_tracers */
1727 if (ret < 0) {
1728 WARN(1, "tracer: %s failed selftest, disabling\n",
1729 p->type->name);
1730 last = &trace_types;
1731 for (t = trace_types; t; t = t->next) {
1732 if (t == p->type) {
1733 *last = t->next;
1734 break;
1735 }
1736 last = &t->next;
1737 }
1738 }
1739 list_del(&p->list);
1740 kfree(p);
1741 }
1742
1743 out:
1744 mutex_unlock(&trace_types_lock);
1745
1746 return 0;
1747}
1748core_initcall(init_trace_selftests);
1749#else
1750static inline int run_tracer_selftest(struct tracer *type)
1751{
1752 return 0;
1753}
1754#endif /* CONFIG_FTRACE_STARTUP_TEST */
1755
1756static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1757
1758static void __init apply_trace_boot_options(void);
1759
1760/**
1761 * register_tracer - register a tracer with the ftrace system.
1762 * @type - the plugin for the tracer
1763 *
1764 * Register a new plugin tracer.
1765 */
1766int __init register_tracer(struct tracer *type)
1767{
1768 struct tracer *t;
1769 int ret = 0;
1770
1771 if (!type->name) {
1772 pr_info("Tracer must have a name\n");
1773 return -1;
1774 }
1775
1776 if (strlen(type->name) >= MAX_TRACER_SIZE) {
1777 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1778 return -1;
1779 }
1780
1781 mutex_lock(&trace_types_lock);
1782
1783 tracing_selftest_running = true;
1784
1785 for (t = trace_types; t; t = t->next) {
1786 if (strcmp(type->name, t->name) == 0) {
1787 /* already found */
1788 pr_info("Tracer %s already registered\n",
1789 type->name);
1790 ret = -1;
1791 goto out;
1792 }
1793 }
1794
1795 if (!type->set_flag)
1796 type->set_flag = &dummy_set_flag;
1797 if (!type->flags) {
1798 /*allocate a dummy tracer_flags*/
1799 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1800 if (!type->flags) {
1801 ret = -ENOMEM;
1802 goto out;
1803 }
1804 type->flags->val = 0;
1805 type->flags->opts = dummy_tracer_opt;
1806 } else
1807 if (!type->flags->opts)
1808 type->flags->opts = dummy_tracer_opt;
1809
1810 /* store the tracer for __set_tracer_option */
1811 type->flags->trace = type;
1812
1813 ret = run_tracer_selftest(type);
1814 if (ret < 0)
1815 goto out;
1816
1817 type->next = trace_types;
1818 trace_types = type;
1819 add_tracer_options(&global_trace, type);
1820
1821 out:
1822 tracing_selftest_running = false;
1823 mutex_unlock(&trace_types_lock);
1824
1825 if (ret || !default_bootup_tracer)
1826 goto out_unlock;
1827
1828 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1829 goto out_unlock;
1830
1831 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1832 /* Do we want this tracer to start on bootup? */
1833 tracing_set_tracer(&global_trace, type->name);
1834 default_bootup_tracer = NULL;
1835
1836 apply_trace_boot_options();
1837
1838 /* disable other selftests, since this will break it. */
1839 tracing_selftest_disabled = true;
1840#ifdef CONFIG_FTRACE_STARTUP_TEST
1841 printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1842 type->name);
1843#endif
1844
1845 out_unlock:
1846 return ret;
1847}
1848
1849void tracing_reset(struct trace_buffer *buf, int cpu)
1850{
1851 struct ring_buffer *buffer = buf->buffer;
1852
1853 if (!buffer)
1854 return;
1855
1856 ring_buffer_record_disable(buffer);
1857
1858 /* Make sure all commits have finished */
1859 synchronize_rcu();
1860 ring_buffer_reset_cpu(buffer, cpu);
1861
1862 ring_buffer_record_enable(buffer);
1863}
1864
1865void tracing_reset_online_cpus(struct trace_buffer *buf)
1866{
1867 struct ring_buffer *buffer = buf->buffer;
1868 int cpu;
1869
1870 if (!buffer)
1871 return;
1872
1873 ring_buffer_record_disable(buffer);
1874
1875 /* Make sure all commits have finished */
1876 synchronize_rcu();
1877
1878 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1879
1880 for_each_online_cpu(cpu)
1881 ring_buffer_reset_cpu(buffer, cpu);
1882
1883 ring_buffer_record_enable(buffer);
1884}
1885
1886/* Must have trace_types_lock held */
1887void tracing_reset_all_online_cpus(void)
1888{
1889 struct trace_array *tr;
1890
1891 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1892 if (!tr->clear_trace)
1893 continue;
1894 tr->clear_trace = false;
1895 tracing_reset_online_cpus(&tr->trace_buffer);
1896#ifdef CONFIG_TRACER_MAX_TRACE
1897 tracing_reset_online_cpus(&tr->max_buffer);
1898#endif
1899 }
1900}
1901
1902static int *tgid_map;
1903
1904#define SAVED_CMDLINES_DEFAULT 128
1905#define NO_CMDLINE_MAP UINT_MAX
1906static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1907struct saved_cmdlines_buffer {
1908 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1909 unsigned *map_cmdline_to_pid;
1910 unsigned cmdline_num;
1911 int cmdline_idx;
1912 char *saved_cmdlines;
1913};
1914static struct saved_cmdlines_buffer *savedcmd;
1915
1916/* temporary disable recording */
1917static atomic_t trace_record_taskinfo_disabled __read_mostly;
1918
1919static inline char *get_saved_cmdlines(int idx)
1920{
1921 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1922}
1923
1924static inline void set_cmdline(int idx, const char *cmdline)
1925{
1926 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1927}
1928
1929static int allocate_cmdlines_buffer(unsigned int val,
1930 struct saved_cmdlines_buffer *s)
1931{
1932 s->map_cmdline_to_pid = kmalloc_array(val,
1933 sizeof(*s->map_cmdline_to_pid),
1934 GFP_KERNEL);
1935 if (!s->map_cmdline_to_pid)
1936 return -ENOMEM;
1937
1938 s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1939 if (!s->saved_cmdlines) {
1940 kfree(s->map_cmdline_to_pid);
1941 return -ENOMEM;
1942 }
1943
1944 s->cmdline_idx = 0;
1945 s->cmdline_num = val;
1946 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1947 sizeof(s->map_pid_to_cmdline));
1948 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1949 val * sizeof(*s->map_cmdline_to_pid));
1950
1951 return 0;
1952}
1953
1954static int trace_create_savedcmd(void)
1955{
1956 int ret;
1957
1958 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1959 if (!savedcmd)
1960 return -ENOMEM;
1961
1962 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1963 if (ret < 0) {
1964 kfree(savedcmd);
1965 savedcmd = NULL;
1966 return -ENOMEM;
1967 }
1968
1969 return 0;
1970}
1971
1972int is_tracing_stopped(void)
1973{
1974 return global_trace.stop_count;
1975}
1976
1977/**
1978 * tracing_start - quick start of the tracer
1979 *
1980 * If tracing is enabled but was stopped by tracing_stop,
1981 * this will start the tracer back up.
1982 */
1983void tracing_start(void)
1984{
1985 struct ring_buffer *buffer;
1986 unsigned long flags;
1987
1988 if (tracing_disabled)
1989 return;
1990
1991 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1992 if (--global_trace.stop_count) {
1993 if (global_trace.stop_count < 0) {
1994 /* Someone screwed up their debugging */
1995 WARN_ON_ONCE(1);
1996 global_trace.stop_count = 0;
1997 }
1998 goto out;
1999 }
2000
2001 /* Prevent the buffers from switching */
2002 arch_spin_lock(&global_trace.max_lock);
2003
2004 buffer = global_trace.trace_buffer.buffer;
2005 if (buffer)
2006 ring_buffer_record_enable(buffer);
2007
2008#ifdef CONFIG_TRACER_MAX_TRACE
2009 buffer = global_trace.max_buffer.buffer;
2010 if (buffer)
2011 ring_buffer_record_enable(buffer);
2012#endif
2013
2014 arch_spin_unlock(&global_trace.max_lock);
2015
2016 out:
2017 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2018}
2019
2020static void tracing_start_tr(struct trace_array *tr)
2021{
2022 struct ring_buffer *buffer;
2023 unsigned long flags;
2024
2025 if (tracing_disabled)
2026 return;
2027
2028 /* If global, we need to also start the max tracer */
2029 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2030 return tracing_start();
2031
2032 raw_spin_lock_irqsave(&tr->start_lock, flags);
2033
2034 if (--tr->stop_count) {
2035 if (tr->stop_count < 0) {
2036 /* Someone screwed up their debugging */
2037 WARN_ON_ONCE(1);
2038 tr->stop_count = 0;
2039 }
2040 goto out;
2041 }
2042
2043 buffer = tr->trace_buffer.buffer;
2044 if (buffer)
2045 ring_buffer_record_enable(buffer);
2046
2047 out:
2048 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2049}
2050
2051/**
2052 * tracing_stop - quick stop of the tracer
2053 *
2054 * Light weight way to stop tracing. Use in conjunction with
2055 * tracing_start.
2056 */
2057void tracing_stop(void)
2058{
2059 struct ring_buffer *buffer;
2060 unsigned long flags;
2061
2062 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2063 if (global_trace.stop_count++)
2064 goto out;
2065
2066 /* Prevent the buffers from switching */
2067 arch_spin_lock(&global_trace.max_lock);
2068
2069 buffer = global_trace.trace_buffer.buffer;
2070 if (buffer)
2071 ring_buffer_record_disable(buffer);
2072
2073#ifdef CONFIG_TRACER_MAX_TRACE
2074 buffer = global_trace.max_buffer.buffer;
2075 if (buffer)
2076 ring_buffer_record_disable(buffer);
2077#endif
2078
2079 arch_spin_unlock(&global_trace.max_lock);
2080
2081 out:
2082 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2083}
2084
2085static void tracing_stop_tr(struct trace_array *tr)
2086{
2087 struct ring_buffer *buffer;
2088 unsigned long flags;
2089
2090 /* If global, we need to also stop the max tracer */
2091 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2092 return tracing_stop();
2093
2094 raw_spin_lock_irqsave(&tr->start_lock, flags);
2095 if (tr->stop_count++)
2096 goto out;
2097
2098 buffer = tr->trace_buffer.buffer;
2099 if (buffer)
2100 ring_buffer_record_disable(buffer);
2101
2102 out:
2103 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2104}
2105
2106static int trace_save_cmdline(struct task_struct *tsk)
2107{
2108 unsigned pid, idx;
2109
2110 /* treat recording of idle task as a success */
2111 if (!tsk->pid)
2112 return 1;
2113
2114 if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2115 return 0;
2116
2117 /*
2118 * It's not the end of the world if we don't get
2119 * the lock, but we also don't want to spin
2120 * nor do we want to disable interrupts,
2121 * so if we miss here, then better luck next time.
2122 */
2123 if (!arch_spin_trylock(&trace_cmdline_lock))
2124 return 0;
2125
2126 idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2127 if (idx == NO_CMDLINE_MAP) {
2128 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2129
2130 /*
2131 * Check whether the cmdline buffer at idx has a pid
2132 * mapped. We are going to overwrite that entry so we
2133 * need to clear the map_pid_to_cmdline. Otherwise we
2134 * would read the new comm for the old pid.
2135 */
2136 pid = savedcmd->map_cmdline_to_pid[idx];
2137 if (pid != NO_CMDLINE_MAP)
2138 savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2139
2140 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2141 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2142
2143 savedcmd->cmdline_idx = idx;
2144 }
2145
2146 set_cmdline(idx, tsk->comm);
2147
2148 arch_spin_unlock(&trace_cmdline_lock);
2149
2150 return 1;
2151}
2152
2153static void __trace_find_cmdline(int pid, char comm[])
2154{
2155 unsigned map;
2156
2157 if (!pid) {
2158 strcpy(comm, "<idle>");
2159 return;
2160 }
2161
2162 if (WARN_ON_ONCE(pid < 0)) {
2163 strcpy(comm, "<XXX>");
2164 return;
2165 }
2166
2167 if (pid > PID_MAX_DEFAULT) {
2168 strcpy(comm, "<...>");
2169 return;
2170 }
2171
2172 map = savedcmd->map_pid_to_cmdline[pid];
2173 if (map != NO_CMDLINE_MAP)
2174 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2175 else
2176 strcpy(comm, "<...>");
2177}
2178
2179void trace_find_cmdline(int pid, char comm[])
2180{
2181 preempt_disable();
2182 arch_spin_lock(&trace_cmdline_lock);
2183
2184 __trace_find_cmdline(pid, comm);
2185
2186 arch_spin_unlock(&trace_cmdline_lock);
2187 preempt_enable();
2188}
2189
2190int trace_find_tgid(int pid)
2191{
2192 if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2193 return 0;
2194
2195 return tgid_map[pid];
2196}
2197
2198static int trace_save_tgid(struct task_struct *tsk)
2199{
2200 /* treat recording of idle task as a success */
2201 if (!tsk->pid)
2202 return 1;
2203
2204 if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2205 return 0;
2206
2207 tgid_map[tsk->pid] = tsk->tgid;
2208 return 1;
2209}
2210
2211static bool tracing_record_taskinfo_skip(int flags)
2212{
2213 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2214 return true;
2215 if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2216 return true;
2217 if (!__this_cpu_read(trace_taskinfo_save))
2218 return true;
2219 return false;
2220}
2221
2222/**
2223 * tracing_record_taskinfo - record the task info of a task
2224 *
2225 * @task - task to record
2226 * @flags - TRACE_RECORD_CMDLINE for recording comm
2227 * - TRACE_RECORD_TGID for recording tgid
2228 */
2229void tracing_record_taskinfo(struct task_struct *task, int flags)
2230{
2231 bool done;
2232
2233 if (tracing_record_taskinfo_skip(flags))
2234 return;
2235
2236 /*
2237 * Record as much task information as possible. If some fail, continue
2238 * to try to record the others.
2239 */
2240 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2241 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2242
2243 /* If recording any information failed, retry again soon. */
2244 if (!done)
2245 return;
2246
2247 __this_cpu_write(trace_taskinfo_save, false);
2248}
2249
2250/**
2251 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2252 *
2253 * @prev - previous task during sched_switch
2254 * @next - next task during sched_switch
2255 * @flags - TRACE_RECORD_CMDLINE for recording comm
2256 * TRACE_RECORD_TGID for recording tgid
2257 */
2258void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2259 struct task_struct *next, int flags)
2260{
2261 bool done;
2262
2263 if (tracing_record_taskinfo_skip(flags))
2264 return;
2265
2266 /*
2267 * Record as much task information as possible. If some fail, continue
2268 * to try to record the others.
2269 */
2270 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2271 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2272 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2273 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2274
2275 /* If recording any information failed, retry again soon. */
2276 if (!done)
2277 return;
2278
2279 __this_cpu_write(trace_taskinfo_save, false);
2280}
2281
2282/* Helpers to record a specific task information */
2283void tracing_record_cmdline(struct task_struct *task)
2284{
2285 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2286}
2287
2288void tracing_record_tgid(struct task_struct *task)
2289{
2290 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2291}
2292
2293/*
2294 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2295 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2296 * simplifies those functions and keeps them in sync.
2297 */
2298enum print_line_t trace_handle_return(struct trace_seq *s)
2299{
2300 return trace_seq_has_overflowed(s) ?
2301 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2302}
2303EXPORT_SYMBOL_GPL(trace_handle_return);
2304
2305void
2306tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2307 int pc)
2308{
2309 struct task_struct *tsk = current;
2310
2311 entry->preempt_count = pc & 0xff;
2312 entry->pid = (tsk) ? tsk->pid : 0;
2313 entry->flags =
2314#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2315 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2316#else
2317 TRACE_FLAG_IRQS_NOSUPPORT |
2318#endif
2319 ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
2320 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2321 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2322 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2323 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2324}
2325EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2326
2327struct ring_buffer_event *
2328trace_buffer_lock_reserve(struct ring_buffer *buffer,
2329 int type,
2330 unsigned long len,
2331 unsigned long flags, int pc)
2332{
2333 return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2334}
2335
2336DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2337DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2338static int trace_buffered_event_ref;
2339
2340/**
2341 * trace_buffered_event_enable - enable buffering events
2342 *
2343 * When events are being filtered, it is quicker to use a temporary
2344 * buffer to write the event data into if there's a likely chance
2345 * that it will not be committed. The discard of the ring buffer
2346 * is not as fast as committing, and is much slower than copying
2347 * a commit.
2348 *
2349 * When an event is to be filtered, allocate per cpu buffers to
2350 * write the event data into, and if the event is filtered and discarded
2351 * it is simply dropped, otherwise, the entire data is to be committed
2352 * in one shot.
2353 */
2354void trace_buffered_event_enable(void)
2355{
2356 struct ring_buffer_event *event;
2357 struct page *page;
2358 int cpu;
2359
2360 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2361
2362 if (trace_buffered_event_ref++)
2363 return;
2364
2365 for_each_tracing_cpu(cpu) {
2366 page = alloc_pages_node(cpu_to_node(cpu),
2367 GFP_KERNEL | __GFP_NORETRY, 0);
2368 if (!page)
2369 goto failed;
2370
2371 event = page_address(page);
2372 memset(event, 0, sizeof(*event));
2373
2374 per_cpu(trace_buffered_event, cpu) = event;
2375
2376 preempt_disable();
2377 if (cpu == smp_processor_id() &&
2378 this_cpu_read(trace_buffered_event) !=
2379 per_cpu(trace_buffered_event, cpu))
2380 WARN_ON_ONCE(1);
2381 preempt_enable();
2382 }
2383
2384 return;
2385 failed:
2386 trace_buffered_event_disable();
2387}
2388
2389static void enable_trace_buffered_event(void *data)
2390{
2391 /* Probably not needed, but do it anyway */
2392 smp_rmb();
2393 this_cpu_dec(trace_buffered_event_cnt);
2394}
2395
2396static void disable_trace_buffered_event(void *data)
2397{
2398 this_cpu_inc(trace_buffered_event_cnt);
2399}
2400
2401/**
2402 * trace_buffered_event_disable - disable buffering events
2403 *
2404 * When a filter is removed, it is faster to not use the buffered
2405 * events, and to commit directly into the ring buffer. Free up
2406 * the temp buffers when there are no more users. This requires
2407 * special synchronization with current events.
2408 */
2409void trace_buffered_event_disable(void)
2410{
2411 int cpu;
2412
2413 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2414
2415 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2416 return;
2417
2418 if (--trace_buffered_event_ref)
2419 return;
2420
2421 preempt_disable();
2422 /* For each CPU, set the buffer as used. */
2423 smp_call_function_many(tracing_buffer_mask,
2424 disable_trace_buffered_event, NULL, 1);
2425 preempt_enable();
2426
2427 /* Wait for all current users to finish */
2428 synchronize_rcu();
2429
2430 for_each_tracing_cpu(cpu) {
2431 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2432 per_cpu(trace_buffered_event, cpu) = NULL;
2433 }
2434 /*
2435 * Make sure trace_buffered_event is NULL before clearing
2436 * trace_buffered_event_cnt.
2437 */
2438 smp_wmb();
2439
2440 preempt_disable();
2441 /* Do the work on each cpu */
2442 smp_call_function_many(tracing_buffer_mask,
2443 enable_trace_buffered_event, NULL, 1);
2444 preempt_enable();
2445}
2446
2447static struct ring_buffer *temp_buffer;
2448
2449struct ring_buffer_event *
2450trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2451 struct trace_event_file *trace_file,
2452 int type, unsigned long len,
2453 unsigned long flags, int pc)
2454{
2455 struct ring_buffer_event *entry;
2456 int val;
2457
2458 *current_rb = trace_file->tr->trace_buffer.buffer;
2459
2460 if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2461 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2462 (entry = this_cpu_read(trace_buffered_event))) {
2463 /* Try to use the per cpu buffer first */
2464 val = this_cpu_inc_return(trace_buffered_event_cnt);
2465 if (val == 1) {
2466 trace_event_setup(entry, type, flags, pc);
2467 entry->array[0] = len;
2468 return entry;
2469 }
2470 this_cpu_dec(trace_buffered_event_cnt);
2471 }
2472
2473 entry = __trace_buffer_lock_reserve(*current_rb,
2474 type, len, flags, pc);
2475 /*
2476 * If tracing is off, but we have triggers enabled
2477 * we still need to look at the event data. Use the temp_buffer
2478 * to store the trace event for the tigger to use. It's recusive
2479 * safe and will not be recorded anywhere.
2480 */
2481 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2482 *current_rb = temp_buffer;
2483 entry = __trace_buffer_lock_reserve(*current_rb,
2484 type, len, flags, pc);
2485 }
2486 return entry;
2487}
2488EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2489
2490static DEFINE_SPINLOCK(tracepoint_iter_lock);
2491static DEFINE_MUTEX(tracepoint_printk_mutex);
2492
2493static void output_printk(struct trace_event_buffer *fbuffer)
2494{
2495 struct trace_event_call *event_call;
2496 struct trace_event *event;
2497 unsigned long flags;
2498 struct trace_iterator *iter = tracepoint_print_iter;
2499
2500 /* We should never get here if iter is NULL */
2501 if (WARN_ON_ONCE(!iter))
2502 return;
2503
2504 event_call = fbuffer->trace_file->event_call;
2505 if (!event_call || !event_call->event.funcs ||
2506 !event_call->event.funcs->trace)
2507 return;
2508
2509 event = &fbuffer->trace_file->event_call->event;
2510
2511 spin_lock_irqsave(&tracepoint_iter_lock, flags);
2512 trace_seq_init(&iter->seq);
2513 iter->ent = fbuffer->entry;
2514 event_call->event.funcs->trace(iter, 0, event);
2515 trace_seq_putc(&iter->seq, 0);
2516 printk("%s", iter->seq.buffer);
2517
2518 spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2519}
2520
2521int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2522 void __user *buffer, size_t *lenp,
2523 loff_t *ppos)
2524{
2525 int save_tracepoint_printk;
2526 int ret;
2527
2528 mutex_lock(&tracepoint_printk_mutex);
2529 save_tracepoint_printk = tracepoint_printk;
2530
2531 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2532
2533 /*
2534 * This will force exiting early, as tracepoint_printk
2535 * is always zero when tracepoint_printk_iter is not allocated
2536 */
2537 if (!tracepoint_print_iter)
2538 tracepoint_printk = 0;
2539
2540 if (save_tracepoint_printk == tracepoint_printk)
2541 goto out;
2542
2543 if (tracepoint_printk)
2544 static_key_enable(&tracepoint_printk_key.key);
2545 else
2546 static_key_disable(&tracepoint_printk_key.key);
2547
2548 out:
2549 mutex_unlock(&tracepoint_printk_mutex);
2550
2551 return ret;
2552}
2553
2554void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2555{
2556 if (static_key_false(&tracepoint_printk_key.key))
2557 output_printk(fbuffer);
2558
2559 event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2560 fbuffer->event, fbuffer->entry,
2561 fbuffer->flags, fbuffer->pc);
2562}
2563EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2564
2565/*
2566 * Skip 3:
2567 *
2568 * trace_buffer_unlock_commit_regs()
2569 * trace_event_buffer_commit()
2570 * trace_event_raw_event_xxx()
2571 */
2572# define STACK_SKIP 3
2573
2574void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2575 struct ring_buffer *buffer,
2576 struct ring_buffer_event *event,
2577 unsigned long flags, int pc,
2578 struct pt_regs *regs)
2579{
2580 __buffer_unlock_commit(buffer, event);
2581
2582 /*
2583 * If regs is not set, then skip the necessary functions.
2584 * Note, we can still get here via blktrace, wakeup tracer
2585 * and mmiotrace, but that's ok if they lose a function or
2586 * two. They are not that meaningful.
2587 */
2588 ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2589 ftrace_trace_userstack(buffer, flags, pc);
2590}
2591
2592/*
2593 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2594 */
2595void
2596trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2597 struct ring_buffer_event *event)
2598{
2599 __buffer_unlock_commit(buffer, event);
2600}
2601
2602static void
2603trace_process_export(struct trace_export *export,
2604 struct ring_buffer_event *event)
2605{
2606 struct trace_entry *entry;
2607 unsigned int size = 0;
2608
2609 entry = ring_buffer_event_data(event);
2610 size = ring_buffer_event_length(event);
2611 export->write(export, entry, size);
2612}
2613
2614static DEFINE_MUTEX(ftrace_export_lock);
2615
2616static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2617
2618static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2619
2620static inline void ftrace_exports_enable(void)
2621{
2622 static_branch_enable(&ftrace_exports_enabled);
2623}
2624
2625static inline void ftrace_exports_disable(void)
2626{
2627 static_branch_disable(&ftrace_exports_enabled);
2628}
2629
2630static void ftrace_exports(struct ring_buffer_event *event)
2631{
2632 struct trace_export *export;
2633
2634 preempt_disable_notrace();
2635
2636 export = rcu_dereference_raw_notrace(ftrace_exports_list);
2637 while (export) {
2638 trace_process_export(export, event);
2639 export = rcu_dereference_raw_notrace(export->next);
2640 }
2641
2642 preempt_enable_notrace();
2643}
2644
2645static inline void
2646add_trace_export(struct trace_export **list, struct trace_export *export)
2647{
2648 rcu_assign_pointer(export->next, *list);
2649 /*
2650 * We are entering export into the list but another
2651 * CPU might be walking that list. We need to make sure
2652 * the export->next pointer is valid before another CPU sees
2653 * the export pointer included into the list.
2654 */
2655 rcu_assign_pointer(*list, export);
2656}
2657
2658static inline int
2659rm_trace_export(struct trace_export **list, struct trace_export *export)
2660{
2661 struct trace_export **p;
2662
2663 for (p = list; *p != NULL; p = &(*p)->next)
2664 if (*p == export)
2665 break;
2666
2667 if (*p != export)
2668 return -1;
2669
2670 rcu_assign_pointer(*p, (*p)->next);
2671
2672 return 0;
2673}
2674
2675static inline void
2676add_ftrace_export(struct trace_export **list, struct trace_export *export)
2677{
2678 if (*list == NULL)
2679 ftrace_exports_enable();
2680
2681 add_trace_export(list, export);
2682}
2683
2684static inline int
2685rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2686{
2687 int ret;
2688
2689 ret = rm_trace_export(list, export);
2690 if (*list == NULL)
2691 ftrace_exports_disable();
2692
2693 return ret;
2694}
2695
2696int register_ftrace_export(struct trace_export *export)
2697{
2698 if (WARN_ON_ONCE(!export->write))
2699 return -1;
2700
2701 mutex_lock(&ftrace_export_lock);
2702
2703 add_ftrace_export(&ftrace_exports_list, export);
2704
2705 mutex_unlock(&ftrace_export_lock);
2706
2707 return 0;
2708}
2709EXPORT_SYMBOL_GPL(register_ftrace_export);
2710
2711int unregister_ftrace_export(struct trace_export *export)
2712{
2713 int ret;
2714
2715 mutex_lock(&ftrace_export_lock);
2716
2717 ret = rm_ftrace_export(&ftrace_exports_list, export);
2718
2719 mutex_unlock(&ftrace_export_lock);
2720
2721 return ret;
2722}
2723EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2724
2725void
2726trace_function(struct trace_array *tr,
2727 unsigned long ip, unsigned long parent_ip, unsigned long flags,
2728 int pc)
2729{
2730 struct trace_event_call *call = &event_function;
2731 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2732 struct ring_buffer_event *event;
2733 struct ftrace_entry *entry;
2734
2735 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2736 flags, pc);
2737 if (!event)
2738 return;
2739 entry = ring_buffer_event_data(event);
2740 entry->ip = ip;
2741 entry->parent_ip = parent_ip;
2742
2743 if (!call_filter_check_discard(call, entry, buffer, event)) {
2744 if (static_branch_unlikely(&ftrace_exports_enabled))
2745 ftrace_exports(event);
2746 __buffer_unlock_commit(buffer, event);
2747 }
2748}
2749
2750#ifdef CONFIG_STACKTRACE
2751
2752#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2753struct ftrace_stack {
2754 unsigned long calls[FTRACE_STACK_MAX_ENTRIES];
2755};
2756
2757static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2758static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2759
2760static void __ftrace_trace_stack(struct ring_buffer *buffer,
2761 unsigned long flags,
2762 int skip, int pc, struct pt_regs *regs)
2763{
2764 struct trace_event_call *call = &event_kernel_stack;
2765 struct ring_buffer_event *event;
2766 struct stack_entry *entry;
2767 struct stack_trace trace;
2768 int use_stack;
2769 int size = FTRACE_STACK_ENTRIES;
2770
2771 trace.nr_entries = 0;
2772 trace.skip = skip;
2773
2774 /*
2775 * Add one, for this function and the call to save_stack_trace()
2776 * If regs is set, then these functions will not be in the way.
2777 */
2778#ifndef CONFIG_UNWINDER_ORC
2779 if (!regs)
2780 trace.skip++;
2781#endif
2782
2783 /*
2784 * Since events can happen in NMIs there's no safe way to
2785 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2786 * or NMI comes in, it will just have to use the default
2787 * FTRACE_STACK_SIZE.
2788 */
2789 preempt_disable_notrace();
2790
2791 use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2792 /*
2793 * We don't need any atomic variables, just a barrier.
2794 * If an interrupt comes in, we don't care, because it would
2795 * have exited and put the counter back to what we want.
2796 * We just need a barrier to keep gcc from moving things
2797 * around.
2798 */
2799 barrier();
2800 if (use_stack == 1) {
2801 trace.entries = this_cpu_ptr(ftrace_stack.calls);
2802 trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
2803
2804 if (regs)
2805 save_stack_trace_regs(regs, &trace);
2806 else
2807 save_stack_trace(&trace);
2808
2809 if (trace.nr_entries > size)
2810 size = trace.nr_entries;
2811 } else
2812 /* From now on, use_stack is a boolean */
2813 use_stack = 0;
2814
2815 size *= sizeof(unsigned long);
2816
2817 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2818 sizeof(*entry) + size, flags, pc);
2819 if (!event)
2820 goto out;
2821 entry = ring_buffer_event_data(event);
2822
2823 memset(&entry->caller, 0, size);
2824
2825 if (use_stack)
2826 memcpy(&entry->caller, trace.entries,
2827 trace.nr_entries * sizeof(unsigned long));
2828 else {
2829 trace.max_entries = FTRACE_STACK_ENTRIES;
2830 trace.entries = entry->caller;
2831 if (regs)
2832 save_stack_trace_regs(regs, &trace);
2833 else
2834 save_stack_trace(&trace);
2835 }
2836
2837 entry->size = trace.nr_entries;
2838
2839 if (!call_filter_check_discard(call, entry, buffer, event))
2840 __buffer_unlock_commit(buffer, event);
2841
2842 out:
2843 /* Again, don't let gcc optimize things here */
2844 barrier();
2845 __this_cpu_dec(ftrace_stack_reserve);
2846 preempt_enable_notrace();
2847
2848}
2849
2850static inline void ftrace_trace_stack(struct trace_array *tr,
2851 struct ring_buffer *buffer,
2852 unsigned long flags,
2853 int skip, int pc, struct pt_regs *regs)
2854{
2855 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2856 return;
2857
2858 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2859}
2860
2861void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2862 int pc)
2863{
2864 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2865
2866 if (rcu_is_watching()) {
2867 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2868 return;
2869 }
2870
2871 /*
2872 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2873 * but if the above rcu_is_watching() failed, then the NMI
2874 * triggered someplace critical, and rcu_irq_enter() should
2875 * not be called from NMI.
2876 */
2877 if (unlikely(in_nmi()))
2878 return;
2879
2880 rcu_irq_enter_irqson();
2881 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2882 rcu_irq_exit_irqson();
2883}
2884
2885/**
2886 * trace_dump_stack - record a stack back trace in the trace buffer
2887 * @skip: Number of functions to skip (helper handlers)
2888 */
2889void trace_dump_stack(int skip)
2890{
2891 unsigned long flags;
2892
2893 if (tracing_disabled || tracing_selftest_running)
2894 return;
2895
2896 local_save_flags(flags);
2897
2898#ifndef CONFIG_UNWINDER_ORC
2899 /* Skip 1 to skip this function. */
2900 skip++;
2901#endif
2902 __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2903 flags, skip, preempt_count(), NULL);
2904}
2905EXPORT_SYMBOL_GPL(trace_dump_stack);
2906
2907static DEFINE_PER_CPU(int, user_stack_count);
2908
2909void
2910ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2911{
2912 struct trace_event_call *call = &event_user_stack;
2913 struct ring_buffer_event *event;
2914 struct userstack_entry *entry;
2915 struct stack_trace trace;
2916
2917 if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2918 return;
2919
2920 /*
2921 * NMIs can not handle page faults, even with fix ups.
2922 * The save user stack can (and often does) fault.
2923 */
2924 if (unlikely(in_nmi()))
2925 return;
2926
2927 /*
2928 * prevent recursion, since the user stack tracing may
2929 * trigger other kernel events.
2930 */
2931 preempt_disable();
2932 if (__this_cpu_read(user_stack_count))
2933 goto out;
2934
2935 __this_cpu_inc(user_stack_count);
2936
2937 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2938 sizeof(*entry), flags, pc);
2939 if (!event)
2940 goto out_drop_count;
2941 entry = ring_buffer_event_data(event);
2942
2943 entry->tgid = current->tgid;
2944 memset(&entry->caller, 0, sizeof(entry->caller));
2945
2946 trace.nr_entries = 0;
2947 trace.max_entries = FTRACE_STACK_ENTRIES;
2948 trace.skip = 0;
2949 trace.entries = entry->caller;
2950
2951 save_stack_trace_user(&trace);
2952 if (!call_filter_check_discard(call, entry, buffer, event))
2953 __buffer_unlock_commit(buffer, event);
2954
2955 out_drop_count:
2956 __this_cpu_dec(user_stack_count);
2957 out:
2958 preempt_enable();
2959}
2960
2961#ifdef UNUSED
2962static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2963{
2964 ftrace_trace_userstack(tr, flags, preempt_count());
2965}
2966#endif /* UNUSED */
2967
2968#endif /* CONFIG_STACKTRACE */
2969
2970/* created for use with alloc_percpu */
2971struct trace_buffer_struct {
2972 int nesting;
2973 char buffer[4][TRACE_BUF_SIZE];
2974};
2975
2976static struct trace_buffer_struct *trace_percpu_buffer;
2977
2978/*
2979 * Thise allows for lockless recording. If we're nested too deeply, then
2980 * this returns NULL.
2981 */
2982static char *get_trace_buf(void)
2983{
2984 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2985
2986 if (!buffer || buffer->nesting >= 4)
2987 return NULL;
2988
2989 buffer->nesting++;
2990
2991 /* Interrupts must see nesting incremented before we use the buffer */
2992 barrier();
2993 return &buffer->buffer[buffer->nesting][0];
2994}
2995
2996static void put_trace_buf(void)
2997{
2998 /* Don't let the decrement of nesting leak before this */
2999 barrier();
3000 this_cpu_dec(trace_percpu_buffer->nesting);
3001}
3002
3003static int alloc_percpu_trace_buffer(void)
3004{
3005 struct trace_buffer_struct *buffers;
3006
3007 buffers = alloc_percpu(struct trace_buffer_struct);
3008 if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3009 return -ENOMEM;
3010
3011 trace_percpu_buffer = buffers;
3012 return 0;
3013}
3014
3015static int buffers_allocated;
3016
3017void trace_printk_init_buffers(void)
3018{
3019 if (buffers_allocated)
3020 return;
3021
3022 if (alloc_percpu_trace_buffer())
3023 return;
3024
3025 /* trace_printk() is for debug use only. Don't use it in production. */
3026
3027 pr_warn("\n");
3028 pr_warn("**********************************************************\n");
3029 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3030 pr_warn("** **\n");
3031 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3032 pr_warn("** **\n");
3033 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3034 pr_warn("** unsafe for production use. **\n");
3035 pr_warn("** **\n");
3036 pr_warn("** If you see this message and you are not debugging **\n");
3037 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3038 pr_warn("** **\n");
3039 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3040 pr_warn("**********************************************************\n");
3041
3042 /* Expand the buffers to set size */
3043 tracing_update_buffers();
3044
3045 buffers_allocated = 1;
3046
3047 /*
3048 * trace_printk_init_buffers() can be called by modules.
3049 * If that happens, then we need to start cmdline recording
3050 * directly here. If the global_trace.buffer is already
3051 * allocated here, then this was called by module code.
3052 */
3053 if (global_trace.trace_buffer.buffer)
3054 tracing_start_cmdline_record();
3055}
3056
3057void trace_printk_start_comm(void)
3058{
3059 /* Start tracing comms if trace printk is set */
3060 if (!buffers_allocated)
3061 return;
3062 tracing_start_cmdline_record();
3063}
3064
3065static void trace_printk_start_stop_comm(int enabled)
3066{
3067 if (!buffers_allocated)
3068 return;
3069
3070 if (enabled)
3071 tracing_start_cmdline_record();
3072 else
3073 tracing_stop_cmdline_record();
3074}
3075
3076/**
3077 * trace_vbprintk - write binary msg to tracing buffer
3078 *
3079 */
3080int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3081{
3082 struct trace_event_call *call = &event_bprint;
3083 struct ring_buffer_event *event;
3084 struct ring_buffer *buffer;
3085 struct trace_array *tr = &global_trace;
3086 struct bprint_entry *entry;
3087 unsigned long flags;
3088 char *tbuffer;
3089 int len = 0, size, pc;
3090
3091 if (unlikely(tracing_selftest_running || tracing_disabled))
3092 return 0;
3093
3094 /* Don't pollute graph traces with trace_vprintk internals */
3095 pause_graph_tracing();
3096
3097 pc = preempt_count();
3098 preempt_disable_notrace();
3099
3100 tbuffer = get_trace_buf();
3101 if (!tbuffer) {
3102 len = 0;
3103 goto out_nobuffer;
3104 }
3105
3106 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3107
3108 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3109 goto out;
3110
3111 local_save_flags(flags);
3112 size = sizeof(*entry) + sizeof(u32) * len;
3113 buffer = tr->trace_buffer.buffer;
3114 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3115 flags, pc);
3116 if (!event)
3117 goto out;
3118 entry = ring_buffer_event_data(event);
3119 entry->ip = ip;
3120 entry->fmt = fmt;
3121
3122 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3123 if (!call_filter_check_discard(call, entry, buffer, event)) {
3124 __buffer_unlock_commit(buffer, event);
3125 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3126 }
3127
3128out:
3129 put_trace_buf();
3130
3131out_nobuffer:
3132 preempt_enable_notrace();
3133 unpause_graph_tracing();
3134
3135 return len;
3136}
3137EXPORT_SYMBOL_GPL(trace_vbprintk);
3138
3139__printf(3, 0)
3140static int
3141__trace_array_vprintk(struct ring_buffer *buffer,
3142 unsigned long ip, const char *fmt, va_list args)
3143{
3144 struct trace_event_call *call = &event_print;
3145 struct ring_buffer_event *event;
3146 int len = 0, size, pc;
3147 struct print_entry *entry;
3148 unsigned long flags;
3149 char *tbuffer;
3150
3151 if (tracing_disabled || tracing_selftest_running)
3152 return 0;
3153
3154 /* Don't pollute graph traces with trace_vprintk internals */
3155 pause_graph_tracing();
3156
3157 pc = preempt_count();
3158 preempt_disable_notrace();
3159
3160
3161 tbuffer = get_trace_buf();
3162 if (!tbuffer) {
3163 len = 0;
3164 goto out_nobuffer;
3165 }
3166
3167 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3168
3169 local_save_flags(flags);
3170 size = sizeof(*entry) + len + 1;
3171 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3172 flags, pc);
3173 if (!event)
3174 goto out;
3175 entry = ring_buffer_event_data(event);
3176 entry->ip = ip;
3177
3178 memcpy(&entry->buf, tbuffer, len + 1);
3179 if (!call_filter_check_discard(call, entry, buffer, event)) {
3180 __buffer_unlock_commit(buffer, event);
3181 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3182 }
3183
3184out:
3185 put_trace_buf();
3186
3187out_nobuffer:
3188 preempt_enable_notrace();
3189 unpause_graph_tracing();
3190
3191 return len;
3192}
3193
3194__printf(3, 0)
3195int trace_array_vprintk(struct trace_array *tr,
3196 unsigned long ip, const char *fmt, va_list args)
3197{
3198 return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3199}
3200
3201__printf(3, 0)
3202int trace_array_printk(struct trace_array *tr,
3203 unsigned long ip, const char *fmt, ...)
3204{
3205 int ret;
3206 va_list ap;
3207
3208 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3209 return 0;
3210
3211 va_start(ap, fmt);
3212 ret = trace_array_vprintk(tr, ip, fmt, ap);
3213 va_end(ap);
3214 return ret;
3215}
3216
3217__printf(3, 4)
3218int trace_array_printk_buf(struct ring_buffer *buffer,
3219 unsigned long ip, const char *fmt, ...)
3220{
3221 int ret;
3222 va_list ap;
3223
3224 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3225 return 0;
3226
3227 va_start(ap, fmt);
3228 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3229 va_end(ap);
3230 return ret;
3231}
3232
3233__printf(2, 0)
3234int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3235{
3236 return trace_array_vprintk(&global_trace, ip, fmt, args);
3237}
3238EXPORT_SYMBOL_GPL(trace_vprintk);
3239
3240static void trace_iterator_increment(struct trace_iterator *iter)
3241{
3242 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3243
3244 iter->idx++;
3245 if (buf_iter)
3246 ring_buffer_read(buf_iter, NULL);
3247}
3248
3249static struct trace_entry *
3250peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3251 unsigned long *lost_events)
3252{
3253 struct ring_buffer_event *event;
3254 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3255
3256 if (buf_iter)
3257 event = ring_buffer_iter_peek(buf_iter, ts);
3258 else
3259 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3260 lost_events);
3261
3262 if (event) {
3263 iter->ent_size = ring_buffer_event_length(event);
3264 return ring_buffer_event_data(event);
3265 }
3266 iter->ent_size = 0;
3267 return NULL;
3268}
3269
3270static struct trace_entry *
3271__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3272 unsigned long *missing_events, u64 *ent_ts)
3273{
3274 struct ring_buffer *buffer = iter->trace_buffer->buffer;
3275 struct trace_entry *ent, *next = NULL;
3276 unsigned long lost_events = 0, next_lost = 0;
3277 int cpu_file = iter->cpu_file;
3278 u64 next_ts = 0, ts;
3279 int next_cpu = -1;
3280 int next_size = 0;
3281 int cpu;
3282
3283 /*
3284 * If we are in a per_cpu trace file, don't bother by iterating over
3285 * all cpu and peek directly.
3286 */
3287 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3288 if (ring_buffer_empty_cpu(buffer, cpu_file))
3289 return NULL;
3290 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3291 if (ent_cpu)
3292 *ent_cpu = cpu_file;
3293
3294 return ent;
3295 }
3296
3297 for_each_tracing_cpu(cpu) {
3298
3299 if (ring_buffer_empty_cpu(buffer, cpu))
3300 continue;
3301
3302 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3303
3304 /*
3305 * Pick the entry with the smallest timestamp:
3306 */
3307 if (ent && (!next || ts < next_ts)) {
3308 next = ent;
3309 next_cpu = cpu;
3310 next_ts = ts;
3311 next_lost = lost_events;
3312 next_size = iter->ent_size;
3313 }
3314 }
3315
3316 iter->ent_size = next_size;
3317
3318 if (ent_cpu)
3319 *ent_cpu = next_cpu;
3320
3321 if (ent_ts)
3322 *ent_ts = next_ts;
3323
3324 if (missing_events)
3325 *missing_events = next_lost;
3326
3327 return next;
3328}
3329
3330/* Find the next real entry, without updating the iterator itself */
3331struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3332 int *ent_cpu, u64 *ent_ts)
3333{
3334 return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3335}
3336
3337/* Find the next real entry, and increment the iterator to the next entry */
3338void *trace_find_next_entry_inc(struct trace_iterator *iter)
3339{
3340 iter->ent = __find_next_entry(iter, &iter->cpu,
3341 &iter->lost_events, &iter->ts);
3342
3343 if (iter->ent)
3344 trace_iterator_increment(iter);
3345
3346 return iter->ent ? iter : NULL;
3347}
3348
3349static void trace_consume(struct trace_iterator *iter)
3350{
3351 ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3352 &iter->lost_events);
3353}
3354
3355static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3356{
3357 struct trace_iterator *iter = m->private;
3358 int i = (int)*pos;
3359 void *ent;
3360
3361 WARN_ON_ONCE(iter->leftover);
3362
3363 (*pos)++;
3364
3365 /* can't go backwards */
3366 if (iter->idx > i)
3367 return NULL;
3368
3369 if (iter->idx < 0)
3370 ent = trace_find_next_entry_inc(iter);
3371 else
3372 ent = iter;
3373
3374 while (ent && iter->idx < i)
3375 ent = trace_find_next_entry_inc(iter);
3376
3377 iter->pos = *pos;
3378
3379 return ent;
3380}
3381
3382void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3383{
3384 struct ring_buffer_event *event;
3385 struct ring_buffer_iter *buf_iter;
3386 unsigned long entries = 0;
3387 u64 ts;
3388
3389 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3390
3391 buf_iter = trace_buffer_iter(iter, cpu);
3392 if (!buf_iter)
3393 return;
3394
3395 ring_buffer_iter_reset(buf_iter);
3396
3397 /*
3398 * We could have the case with the max latency tracers
3399 * that a reset never took place on a cpu. This is evident
3400 * by the timestamp being before the start of the buffer.
3401 */
3402 while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3403 if (ts >= iter->trace_buffer->time_start)
3404 break;
3405 entries++;
3406 ring_buffer_read(buf_iter, NULL);
3407 }
3408
3409 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3410}
3411
3412/*
3413 * The current tracer is copied to avoid a global locking
3414 * all around.
3415 */
3416static void *s_start(struct seq_file *m, loff_t *pos)
3417{
3418 struct trace_iterator *iter = m->private;
3419 struct trace_array *tr = iter->tr;
3420 int cpu_file = iter->cpu_file;
3421 void *p = NULL;
3422 loff_t l = 0;
3423 int cpu;
3424
3425 /*
3426 * copy the tracer to avoid using a global lock all around.
3427 * iter->trace is a copy of current_trace, the pointer to the
3428 * name may be used instead of a strcmp(), as iter->trace->name
3429 * will point to the same string as current_trace->name.
3430 */
3431 mutex_lock(&trace_types_lock);
3432 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3433 *iter->trace = *tr->current_trace;
3434 mutex_unlock(&trace_types_lock);
3435
3436#ifdef CONFIG_TRACER_MAX_TRACE
3437 if (iter->snapshot && iter->trace->use_max_tr)
3438 return ERR_PTR(-EBUSY);
3439#endif
3440
3441 if (!iter->snapshot)
3442 atomic_inc(&trace_record_taskinfo_disabled);
3443
3444 if (*pos != iter->pos) {
3445 iter->ent = NULL;
3446 iter->cpu = 0;
3447 iter->idx = -1;
3448
3449 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3450 for_each_tracing_cpu(cpu)
3451 tracing_iter_reset(iter, cpu);
3452 } else
3453 tracing_iter_reset(iter, cpu_file);
3454
3455 iter->leftover = 0;
3456 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3457 ;
3458
3459 } else {
3460 /*
3461 * If we overflowed the seq_file before, then we want
3462 * to just reuse the trace_seq buffer again.
3463 */
3464 if (iter->leftover)
3465 p = iter;
3466 else {
3467 l = *pos - 1;
3468 p = s_next(m, p, &l);
3469 }
3470 }
3471
3472 trace_event_read_lock();
3473 trace_access_lock(cpu_file);
3474 return p;
3475}
3476
3477static void s_stop(struct seq_file *m, void *p)
3478{
3479 struct trace_iterator *iter = m->private;
3480
3481#ifdef CONFIG_TRACER_MAX_TRACE
3482 if (iter->snapshot && iter->trace->use_max_tr)
3483 return;
3484#endif
3485
3486 if (!iter->snapshot)
3487 atomic_dec(&trace_record_taskinfo_disabled);
3488
3489 trace_access_unlock(iter->cpu_file);
3490 trace_event_read_unlock();
3491}
3492
3493static void
3494get_total_entries(struct trace_buffer *buf,
3495 unsigned long *total, unsigned long *entries)
3496{
3497 unsigned long count;
3498 int cpu;
3499
3500 *total = 0;
3501 *entries = 0;
3502
3503 for_each_tracing_cpu(cpu) {
3504 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3505 /*
3506 * If this buffer has skipped entries, then we hold all
3507 * entries for the trace and we need to ignore the
3508 * ones before the time stamp.
3509 */
3510 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3511 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3512 /* total is the same as the entries */
3513 *total += count;
3514 } else
3515 *total += count +
3516 ring_buffer_overrun_cpu(buf->buffer, cpu);
3517 *entries += count;
3518 }
3519}
3520
3521static void print_lat_help_header(struct seq_file *m)
3522{
3523 seq_puts(m, "# _------=> CPU# \n"
3524 "# / _-----=> irqs-off \n"
3525 "# | / _----=> need-resched \n"
3526 "# || / _---=> hardirq/softirq \n"
3527 "# ||| / _--=> preempt-depth \n"
3528 "# |||| / delay \n"
3529 "# cmd pid ||||| time | caller \n"
3530 "# \\ / ||||| \\ | / \n");
3531}
3532
3533static void print_event_info(struct