1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * OS Noise Tracer: computes the OS Noise suffered by a running thread. |
4 | * Timerlat Tracer: measures the wakeup latency of a timer triggered IRQ and thread. |
5 | * |
6 | * Based on "hwlat_detector" tracer by: |
7 | * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> |
8 | * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com> |
9 | * With feedback from Clark Williams <williams@redhat.com> |
10 | * |
11 | * And also based on the rtsl tracer presented on: |
12 | * DE OLIVEIRA, Daniel Bristot, et al. Demystifying the real-time linux |
13 | * scheduling latency. In: 32nd Euromicro Conference on Real-Time Systems |
14 | * (ECRTS 2020). Schloss Dagstuhl-Leibniz-Zentrum fur Informatik, 2020. |
15 | * |
16 | * Copyright (C) 2021 Daniel Bristot de Oliveira, Red Hat, Inc. <bristot@redhat.com> |
17 | */ |
18 | |
19 | #include <linux/kthread.h> |
20 | #include <linux/tracefs.h> |
21 | #include <linux/uaccess.h> |
22 | #include <linux/cpumask.h> |
23 | #include <linux/delay.h> |
24 | #include <linux/sched/clock.h> |
25 | #include <uapi/linux/sched/types.h> |
26 | #include <linux/sched.h> |
27 | #include "trace.h" |
28 | |
29 | #ifdef CONFIG_X86_LOCAL_APIC |
30 | #include <asm/trace/irq_vectors.h> |
31 | #undef TRACE_INCLUDE_PATH |
32 | #undef TRACE_INCLUDE_FILE |
33 | #endif /* CONFIG_X86_LOCAL_APIC */ |
34 | |
35 | #include <trace/events/irq.h> |
36 | #include <trace/events/sched.h> |
37 | |
38 | #define CREATE_TRACE_POINTS |
39 | #include <trace/events/osnoise.h> |
40 | |
41 | /* |
42 | * Default values. |
43 | */ |
44 | #define BANNER "osnoise: " |
45 | #define DEFAULT_SAMPLE_PERIOD 1000000 /* 1s */ |
46 | #define DEFAULT_SAMPLE_RUNTIME 1000000 /* 1s */ |
47 | |
48 | #define DEFAULT_TIMERLAT_PERIOD 1000 /* 1ms */ |
49 | #define DEFAULT_TIMERLAT_PRIO 95 /* FIFO 95 */ |
50 | |
51 | /* |
52 | * osnoise/options entries. |
53 | */ |
54 | enum osnoise_options_index { |
55 | OSN_DEFAULTS = 0, |
56 | OSN_WORKLOAD, |
57 | OSN_PANIC_ON_STOP, |
58 | OSN_PREEMPT_DISABLE, |
59 | OSN_IRQ_DISABLE, |
60 | OSN_MAX |
61 | }; |
62 | |
63 | static const char * const osnoise_options_str[OSN_MAX] = { |
64 | "DEFAULTS" , |
65 | "OSNOISE_WORKLOAD" , |
66 | "PANIC_ON_STOP" , |
67 | "OSNOISE_PREEMPT_DISABLE" , |
68 | "OSNOISE_IRQ_DISABLE" }; |
69 | |
70 | #define OSN_DEFAULT_OPTIONS 0x2 |
71 | static unsigned long osnoise_options = OSN_DEFAULT_OPTIONS; |
72 | |
73 | /* |
74 | * trace_array of the enabled osnoise/timerlat instances. |
75 | */ |
76 | struct osnoise_instance { |
77 | struct list_head list; |
78 | struct trace_array *tr; |
79 | }; |
80 | |
81 | static struct list_head osnoise_instances; |
82 | |
83 | static bool osnoise_has_registered_instances(void) |
84 | { |
85 | return !!list_first_or_null_rcu(&osnoise_instances, |
86 | struct osnoise_instance, |
87 | list); |
88 | } |
89 | |
90 | /* |
91 | * osnoise_instance_registered - check if a tr is already registered |
92 | */ |
93 | static int osnoise_instance_registered(struct trace_array *tr) |
94 | { |
95 | struct osnoise_instance *inst; |
96 | int found = 0; |
97 | |
98 | rcu_read_lock(); |
99 | list_for_each_entry_rcu(inst, &osnoise_instances, list) { |
100 | if (inst->tr == tr) |
101 | found = 1; |
102 | } |
103 | rcu_read_unlock(); |
104 | |
105 | return found; |
106 | } |
107 | |
108 | /* |
109 | * osnoise_register_instance - register a new trace instance |
110 | * |
111 | * Register a trace_array *tr in the list of instances running |
112 | * osnoise/timerlat tracers. |
113 | */ |
114 | static int osnoise_register_instance(struct trace_array *tr) |
115 | { |
116 | struct osnoise_instance *inst; |
117 | |
118 | /* |
119 | * register/unregister serialization is provided by trace's |
120 | * trace_types_lock. |
121 | */ |
122 | lockdep_assert_held(&trace_types_lock); |
123 | |
124 | inst = kmalloc(size: sizeof(*inst), GFP_KERNEL); |
125 | if (!inst) |
126 | return -ENOMEM; |
127 | |
128 | INIT_LIST_HEAD_RCU(list: &inst->list); |
129 | inst->tr = tr; |
130 | list_add_tail_rcu(new: &inst->list, head: &osnoise_instances); |
131 | |
132 | return 0; |
133 | } |
134 | |
135 | /* |
136 | * osnoise_unregister_instance - unregister a registered trace instance |
137 | * |
138 | * Remove the trace_array *tr from the list of instances running |
139 | * osnoise/timerlat tracers. |
140 | */ |
141 | static void osnoise_unregister_instance(struct trace_array *tr) |
142 | { |
143 | struct osnoise_instance *inst; |
144 | int found = 0; |
145 | |
146 | /* |
147 | * register/unregister serialization is provided by trace's |
148 | * trace_types_lock. |
149 | */ |
150 | list_for_each_entry_rcu(inst, &osnoise_instances, list, |
151 | lockdep_is_held(&trace_types_lock)) { |
152 | if (inst->tr == tr) { |
153 | list_del_rcu(entry: &inst->list); |
154 | found = 1; |
155 | break; |
156 | } |
157 | } |
158 | |
159 | if (!found) |
160 | return; |
161 | |
162 | kvfree_rcu_mightsleep(inst); |
163 | } |
164 | |
165 | /* |
166 | * NMI runtime info. |
167 | */ |
168 | struct osn_nmi { |
169 | u64 count; |
170 | u64 delta_start; |
171 | }; |
172 | |
173 | /* |
174 | * IRQ runtime info. |
175 | */ |
176 | struct osn_irq { |
177 | u64 count; |
178 | u64 arrival_time; |
179 | u64 delta_start; |
180 | }; |
181 | |
182 | #define IRQ_CONTEXT 0 |
183 | #define THREAD_CONTEXT 1 |
184 | #define THREAD_URET 2 |
185 | /* |
186 | * sofirq runtime info. |
187 | */ |
188 | struct osn_softirq { |
189 | u64 count; |
190 | u64 arrival_time; |
191 | u64 delta_start; |
192 | }; |
193 | |
194 | /* |
195 | * thread runtime info. |
196 | */ |
197 | struct osn_thread { |
198 | u64 count; |
199 | u64 arrival_time; |
200 | u64 delta_start; |
201 | }; |
202 | |
203 | /* |
204 | * Runtime information: this structure saves the runtime information used by |
205 | * one sampling thread. |
206 | */ |
207 | struct osnoise_variables { |
208 | struct task_struct *kthread; |
209 | bool sampling; |
210 | pid_t pid; |
211 | struct osn_nmi nmi; |
212 | struct osn_irq irq; |
213 | struct osn_softirq softirq; |
214 | struct osn_thread thread; |
215 | local_t int_counter; |
216 | }; |
217 | |
218 | /* |
219 | * Per-cpu runtime information. |
220 | */ |
221 | static DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var); |
222 | |
223 | /* |
224 | * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU |
225 | */ |
226 | static inline struct osnoise_variables *this_cpu_osn_var(void) |
227 | { |
228 | return this_cpu_ptr(&per_cpu_osnoise_var); |
229 | } |
230 | |
231 | #ifdef CONFIG_TIMERLAT_TRACER |
232 | /* |
233 | * Runtime information for the timer mode. |
234 | */ |
235 | struct timerlat_variables { |
236 | struct task_struct *kthread; |
237 | struct hrtimer timer; |
238 | u64 rel_period; |
239 | u64 abs_period; |
240 | bool tracing_thread; |
241 | u64 count; |
242 | bool uthread_migrate; |
243 | }; |
244 | |
245 | static DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var); |
246 | |
247 | /* |
248 | * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU |
249 | */ |
250 | static inline struct timerlat_variables *this_cpu_tmr_var(void) |
251 | { |
252 | return this_cpu_ptr(&per_cpu_timerlat_var); |
253 | } |
254 | |
255 | /* |
256 | * tlat_var_reset - Reset the values of the given timerlat_variables |
257 | */ |
258 | static inline void tlat_var_reset(void) |
259 | { |
260 | struct timerlat_variables *tlat_var; |
261 | int cpu; |
262 | /* |
263 | * So far, all the values are initialized as 0, so |
264 | * zeroing the structure is perfect. |
265 | */ |
266 | for_each_cpu(cpu, cpu_online_mask) { |
267 | tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); |
268 | memset(tlat_var, 0, sizeof(*tlat_var)); |
269 | } |
270 | } |
271 | #else /* CONFIG_TIMERLAT_TRACER */ |
272 | #define tlat_var_reset() do {} while (0) |
273 | #endif /* CONFIG_TIMERLAT_TRACER */ |
274 | |
275 | /* |
276 | * osn_var_reset - Reset the values of the given osnoise_variables |
277 | */ |
278 | static inline void osn_var_reset(void) |
279 | { |
280 | struct osnoise_variables *osn_var; |
281 | int cpu; |
282 | |
283 | /* |
284 | * So far, all the values are initialized as 0, so |
285 | * zeroing the structure is perfect. |
286 | */ |
287 | for_each_cpu(cpu, cpu_online_mask) { |
288 | osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); |
289 | memset(osn_var, 0, sizeof(*osn_var)); |
290 | } |
291 | } |
292 | |
293 | /* |
294 | * osn_var_reset_all - Reset the value of all per-cpu osnoise_variables |
295 | */ |
296 | static inline void osn_var_reset_all(void) |
297 | { |
298 | osn_var_reset(); |
299 | tlat_var_reset(); |
300 | } |
301 | |
302 | /* |
303 | * Tells NMIs to call back to the osnoise tracer to record timestamps. |
304 | */ |
305 | bool trace_osnoise_callback_enabled; |
306 | |
307 | /* |
308 | * osnoise sample structure definition. Used to store the statistics of a |
309 | * sample run. |
310 | */ |
311 | struct osnoise_sample { |
312 | u64 runtime; /* runtime */ |
313 | u64 noise; /* noise */ |
314 | u64 max_sample; /* max single noise sample */ |
315 | int hw_count; /* # HW (incl. hypervisor) interference */ |
316 | int nmi_count; /* # NMIs during this sample */ |
317 | int irq_count; /* # IRQs during this sample */ |
318 | int softirq_count; /* # softirqs during this sample */ |
319 | int thread_count; /* # threads during this sample */ |
320 | }; |
321 | |
322 | #ifdef CONFIG_TIMERLAT_TRACER |
323 | /* |
324 | * timerlat sample structure definition. Used to store the statistics of |
325 | * a sample run. |
326 | */ |
327 | struct timerlat_sample { |
328 | u64 timer_latency; /* timer_latency */ |
329 | unsigned int seqnum; /* unique sequence */ |
330 | int context; /* timer context */ |
331 | }; |
332 | #endif |
333 | |
334 | /* |
335 | * Protect the interface. |
336 | */ |
337 | static struct mutex interface_lock; |
338 | |
339 | /* |
340 | * Tracer data. |
341 | */ |
342 | static struct osnoise_data { |
343 | u64 sample_period; /* total sampling period */ |
344 | u64 sample_runtime; /* active sampling portion of period */ |
345 | u64 stop_tracing; /* stop trace in the internal operation (loop/irq) */ |
346 | u64 stop_tracing_total; /* stop trace in the final operation (report/thread) */ |
347 | #ifdef CONFIG_TIMERLAT_TRACER |
348 | u64 timerlat_period; /* timerlat period */ |
349 | u64 print_stack; /* print IRQ stack if total > */ |
350 | int timerlat_tracer; /* timerlat tracer */ |
351 | #endif |
352 | bool tainted; /* infor users and developers about a problem */ |
353 | } osnoise_data = { |
354 | .sample_period = DEFAULT_SAMPLE_PERIOD, |
355 | .sample_runtime = DEFAULT_SAMPLE_RUNTIME, |
356 | .stop_tracing = 0, |
357 | .stop_tracing_total = 0, |
358 | #ifdef CONFIG_TIMERLAT_TRACER |
359 | .print_stack = 0, |
360 | .timerlat_period = DEFAULT_TIMERLAT_PERIOD, |
361 | .timerlat_tracer = 0, |
362 | #endif |
363 | }; |
364 | |
365 | #ifdef CONFIG_TIMERLAT_TRACER |
366 | static inline bool timerlat_enabled(void) |
367 | { |
368 | return osnoise_data.timerlat_tracer; |
369 | } |
370 | |
371 | static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) |
372 | { |
373 | struct timerlat_variables *tlat_var = this_cpu_tmr_var(); |
374 | /* |
375 | * If the timerlat is enabled, but the irq handler did |
376 | * not run yet enabling timerlat_tracer, do not trace. |
377 | */ |
378 | if (!tlat_var->tracing_thread) { |
379 | osn_var->softirq.arrival_time = 0; |
380 | osn_var->softirq.delta_start = 0; |
381 | return 0; |
382 | } |
383 | return 1; |
384 | } |
385 | |
386 | static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) |
387 | { |
388 | struct timerlat_variables *tlat_var = this_cpu_tmr_var(); |
389 | /* |
390 | * If the timerlat is enabled, but the irq handler did |
391 | * not run yet enabling timerlat_tracer, do not trace. |
392 | */ |
393 | if (!tlat_var->tracing_thread) { |
394 | osn_var->thread.delta_start = 0; |
395 | osn_var->thread.arrival_time = 0; |
396 | return 0; |
397 | } |
398 | return 1; |
399 | } |
400 | #else /* CONFIG_TIMERLAT_TRACER */ |
401 | static inline bool timerlat_enabled(void) |
402 | { |
403 | return false; |
404 | } |
405 | |
406 | static inline int timerlat_softirq_exit(struct osnoise_variables *osn_var) |
407 | { |
408 | return 1; |
409 | } |
410 | static inline int timerlat_thread_exit(struct osnoise_variables *osn_var) |
411 | { |
412 | return 1; |
413 | } |
414 | #endif |
415 | |
416 | #ifdef CONFIG_PREEMPT_RT |
417 | /* |
418 | * Print the osnoise header info. |
419 | */ |
420 | static void print_osnoise_headers(struct seq_file *s) |
421 | { |
422 | if (osnoise_data.tainted) |
423 | seq_puts(s, "# osnoise is tainted!\n" ); |
424 | |
425 | seq_puts(s, "# _-------=> irqs-off\n" ); |
426 | seq_puts(s, "# / _------=> need-resched\n" ); |
427 | seq_puts(s, "# | / _-----=> need-resched-lazy\n" ); |
428 | seq_puts(s, "# || / _----=> hardirq/softirq\n" ); |
429 | seq_puts(s, "# ||| / _---=> preempt-depth\n" ); |
430 | seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n" ); |
431 | seq_puts(s, "# ||||| / _-=> migrate-disable\n" ); |
432 | |
433 | seq_puts(s, "# |||||| / " ); |
434 | seq_puts(s, " MAX\n" ); |
435 | |
436 | seq_puts(s, "# ||||| / " ); |
437 | seq_puts(s, " SINGLE Interference counters:\n" ); |
438 | |
439 | seq_puts(s, "# ||||||| RUNTIME " ); |
440 | seq_puts(s, " NOISE %% OF CPU NOISE +-----------------------------+\n" ); |
441 | |
442 | seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP IN US " ); |
443 | seq_puts(s, " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n" ); |
444 | |
445 | seq_puts(s, "# | | | ||||||| | | " ); |
446 | seq_puts(s, " | | | | | | | |\n" ); |
447 | } |
448 | #else /* CONFIG_PREEMPT_RT */ |
449 | static void (struct seq_file *s) |
450 | { |
451 | if (osnoise_data.tainted) |
452 | seq_puts(m: s, s: "# osnoise is tainted!\n" ); |
453 | |
454 | seq_puts(m: s, s: "# _-----=> irqs-off\n" ); |
455 | seq_puts(m: s, s: "# / _----=> need-resched\n" ); |
456 | seq_puts(m: s, s: "# | / _---=> hardirq/softirq\n" ); |
457 | seq_puts(m: s, s: "# || / _--=> preempt-depth\n" ); |
458 | seq_puts(m: s, s: "# ||| / _-=> migrate-disable " ); |
459 | seq_puts(m: s, s: " MAX\n" ); |
460 | seq_puts(m: s, s: "# |||| / delay " ); |
461 | seq_puts(m: s, s: " SINGLE Interference counters:\n" ); |
462 | |
463 | seq_puts(m: s, s: "# ||||| RUNTIME " ); |
464 | seq_puts(m: s, s: " NOISE %% OF CPU NOISE +-----------------------------+\n" ); |
465 | |
466 | seq_puts(m: s, s: "# TASK-PID CPU# ||||| TIMESTAMP IN US " ); |
467 | seq_puts(m: s, s: " IN US AVAILABLE IN US HW NMI IRQ SIRQ THREAD\n" ); |
468 | |
469 | seq_puts(m: s, s: "# | | | ||||| | | " ); |
470 | seq_puts(m: s, s: " | | | | | | | |\n" ); |
471 | } |
472 | #endif /* CONFIG_PREEMPT_RT */ |
473 | |
474 | /* |
475 | * osnoise_taint - report an osnoise error. |
476 | */ |
477 | #define osnoise_taint(msg) ({ \ |
478 | struct osnoise_instance *inst; \ |
479 | struct trace_buffer *buffer; \ |
480 | \ |
481 | rcu_read_lock(); \ |
482 | list_for_each_entry_rcu(inst, &osnoise_instances, list) { \ |
483 | buffer = inst->tr->array_buffer.buffer; \ |
484 | trace_array_printk_buf(buffer, _THIS_IP_, msg); \ |
485 | } \ |
486 | rcu_read_unlock(); \ |
487 | osnoise_data.tainted = true; \ |
488 | }) |
489 | |
490 | /* |
491 | * Record an osnoise_sample into the tracer buffer. |
492 | */ |
493 | static void |
494 | __trace_osnoise_sample(struct osnoise_sample *sample, struct trace_buffer *buffer) |
495 | { |
496 | struct trace_event_call *call = &event_osnoise; |
497 | struct ring_buffer_event *event; |
498 | struct osnoise_entry *entry; |
499 | |
500 | event = trace_buffer_lock_reserve(buffer, type: TRACE_OSNOISE, len: sizeof(*entry), |
501 | trace_ctx: tracing_gen_ctx()); |
502 | if (!event) |
503 | return; |
504 | entry = ring_buffer_event_data(event); |
505 | entry->runtime = sample->runtime; |
506 | entry->noise = sample->noise; |
507 | entry->max_sample = sample->max_sample; |
508 | entry->hw_count = sample->hw_count; |
509 | entry->nmi_count = sample->nmi_count; |
510 | entry->irq_count = sample->irq_count; |
511 | entry->softirq_count = sample->softirq_count; |
512 | entry->thread_count = sample->thread_count; |
513 | |
514 | if (!call_filter_check_discard(call, rec: entry, buffer, event)) |
515 | trace_buffer_unlock_commit_nostack(buffer, event); |
516 | } |
517 | |
518 | /* |
519 | * Record an osnoise_sample on all osnoise instances. |
520 | */ |
521 | static void trace_osnoise_sample(struct osnoise_sample *sample) |
522 | { |
523 | struct osnoise_instance *inst; |
524 | struct trace_buffer *buffer; |
525 | |
526 | rcu_read_lock(); |
527 | list_for_each_entry_rcu(inst, &osnoise_instances, list) { |
528 | buffer = inst->tr->array_buffer.buffer; |
529 | __trace_osnoise_sample(sample, buffer); |
530 | } |
531 | rcu_read_unlock(); |
532 | } |
533 | |
534 | #ifdef CONFIG_TIMERLAT_TRACER |
535 | /* |
536 | * Print the timerlat header info. |
537 | */ |
538 | #ifdef CONFIG_PREEMPT_RT |
539 | static void print_timerlat_headers(struct seq_file *s) |
540 | { |
541 | seq_puts(s, "# _-------=> irqs-off\n" ); |
542 | seq_puts(s, "# / _------=> need-resched\n" ); |
543 | seq_puts(s, "# | / _-----=> need-resched-lazy\n" ); |
544 | seq_puts(s, "# || / _----=> hardirq/softirq\n" ); |
545 | seq_puts(s, "# ||| / _---=> preempt-depth\n" ); |
546 | seq_puts(s, "# |||| / _--=> preempt-lazy-depth\n" ); |
547 | seq_puts(s, "# ||||| / _-=> migrate-disable\n" ); |
548 | seq_puts(s, "# |||||| /\n" ); |
549 | seq_puts(s, "# ||||||| ACTIVATION\n" ); |
550 | seq_puts(s, "# TASK-PID CPU# ||||||| TIMESTAMP ID " ); |
551 | seq_puts(s, " CONTEXT LATENCY\n" ); |
552 | seq_puts(s, "# | | | ||||||| | | " ); |
553 | seq_puts(s, " | |\n" ); |
554 | } |
555 | #else /* CONFIG_PREEMPT_RT */ |
556 | static void (struct seq_file *s) |
557 | { |
558 | seq_puts(m: s, s: "# _-----=> irqs-off\n" ); |
559 | seq_puts(m: s, s: "# / _----=> need-resched\n" ); |
560 | seq_puts(m: s, s: "# | / _---=> hardirq/softirq\n" ); |
561 | seq_puts(m: s, s: "# || / _--=> preempt-depth\n" ); |
562 | seq_puts(m: s, s: "# ||| / _-=> migrate-disable\n" ); |
563 | seq_puts(m: s, s: "# |||| / delay\n" ); |
564 | seq_puts(m: s, s: "# ||||| ACTIVATION\n" ); |
565 | seq_puts(m: s, s: "# TASK-PID CPU# ||||| TIMESTAMP ID " ); |
566 | seq_puts(m: s, s: " CONTEXT LATENCY\n" ); |
567 | seq_puts(m: s, s: "# | | | ||||| | | " ); |
568 | seq_puts(m: s, s: " | |\n" ); |
569 | } |
570 | #endif /* CONFIG_PREEMPT_RT */ |
571 | |
572 | static void |
573 | __trace_timerlat_sample(struct timerlat_sample *sample, struct trace_buffer *buffer) |
574 | { |
575 | struct trace_event_call *call = &event_osnoise; |
576 | struct ring_buffer_event *event; |
577 | struct timerlat_entry *entry; |
578 | |
579 | event = trace_buffer_lock_reserve(buffer, type: TRACE_TIMERLAT, len: sizeof(*entry), |
580 | trace_ctx: tracing_gen_ctx()); |
581 | if (!event) |
582 | return; |
583 | entry = ring_buffer_event_data(event); |
584 | entry->seqnum = sample->seqnum; |
585 | entry->context = sample->context; |
586 | entry->timer_latency = sample->timer_latency; |
587 | |
588 | if (!call_filter_check_discard(call, rec: entry, buffer, event)) |
589 | trace_buffer_unlock_commit_nostack(buffer, event); |
590 | } |
591 | |
592 | /* |
593 | * Record an timerlat_sample into the tracer buffer. |
594 | */ |
595 | static void trace_timerlat_sample(struct timerlat_sample *sample) |
596 | { |
597 | struct osnoise_instance *inst; |
598 | struct trace_buffer *buffer; |
599 | |
600 | rcu_read_lock(); |
601 | list_for_each_entry_rcu(inst, &osnoise_instances, list) { |
602 | buffer = inst->tr->array_buffer.buffer; |
603 | __trace_timerlat_sample(sample, buffer); |
604 | } |
605 | rcu_read_unlock(); |
606 | } |
607 | |
608 | #ifdef CONFIG_STACKTRACE |
609 | |
610 | #define MAX_CALLS 256 |
611 | |
612 | /* |
613 | * Stack trace will take place only at IRQ level, so, no need |
614 | * to control nesting here. |
615 | */ |
616 | struct trace_stack { |
617 | int stack_size; |
618 | int nr_entries; |
619 | unsigned long calls[MAX_CALLS]; |
620 | }; |
621 | |
622 | static DEFINE_PER_CPU(struct trace_stack, trace_stack); |
623 | |
624 | /* |
625 | * timerlat_save_stack - save a stack trace without printing |
626 | * |
627 | * Save the current stack trace without printing. The |
628 | * stack will be printed later, after the end of the measurement. |
629 | */ |
630 | static void timerlat_save_stack(int skip) |
631 | { |
632 | unsigned int size, nr_entries; |
633 | struct trace_stack *fstack; |
634 | |
635 | fstack = this_cpu_ptr(&trace_stack); |
636 | |
637 | size = ARRAY_SIZE(fstack->calls); |
638 | |
639 | nr_entries = stack_trace_save(store: fstack->calls, size, skipnr: skip); |
640 | |
641 | fstack->stack_size = nr_entries * sizeof(unsigned long); |
642 | fstack->nr_entries = nr_entries; |
643 | |
644 | return; |
645 | |
646 | } |
647 | |
648 | static void |
649 | __timerlat_dump_stack(struct trace_buffer *buffer, struct trace_stack *fstack, unsigned int size) |
650 | { |
651 | struct trace_event_call *call = &event_osnoise; |
652 | struct ring_buffer_event *event; |
653 | struct stack_entry *entry; |
654 | |
655 | event = trace_buffer_lock_reserve(buffer, type: TRACE_STACK, len: sizeof(*entry) + size, |
656 | trace_ctx: tracing_gen_ctx()); |
657 | if (!event) |
658 | return; |
659 | |
660 | entry = ring_buffer_event_data(event); |
661 | |
662 | memcpy(&entry->caller, fstack->calls, size); |
663 | entry->size = fstack->nr_entries; |
664 | |
665 | if (!call_filter_check_discard(call, rec: entry, buffer, event)) |
666 | trace_buffer_unlock_commit_nostack(buffer, event); |
667 | } |
668 | |
669 | /* |
670 | * timerlat_dump_stack - dump a stack trace previously saved |
671 | */ |
672 | static void timerlat_dump_stack(u64 latency) |
673 | { |
674 | struct osnoise_instance *inst; |
675 | struct trace_buffer *buffer; |
676 | struct trace_stack *fstack; |
677 | unsigned int size; |
678 | |
679 | /* |
680 | * trace only if latency > print_stack config, if enabled. |
681 | */ |
682 | if (!osnoise_data.print_stack || osnoise_data.print_stack > latency) |
683 | return; |
684 | |
685 | preempt_disable_notrace(); |
686 | fstack = this_cpu_ptr(&trace_stack); |
687 | size = fstack->stack_size; |
688 | |
689 | rcu_read_lock(); |
690 | list_for_each_entry_rcu(inst, &osnoise_instances, list) { |
691 | buffer = inst->tr->array_buffer.buffer; |
692 | __timerlat_dump_stack(buffer, fstack, size); |
693 | |
694 | } |
695 | rcu_read_unlock(); |
696 | preempt_enable_notrace(); |
697 | } |
698 | #else /* CONFIG_STACKTRACE */ |
699 | #define timerlat_dump_stack(u64 latency) do {} while (0) |
700 | #define timerlat_save_stack(a) do {} while (0) |
701 | #endif /* CONFIG_STACKTRACE */ |
702 | #endif /* CONFIG_TIMERLAT_TRACER */ |
703 | |
704 | /* |
705 | * Macros to encapsulate the time capturing infrastructure. |
706 | */ |
707 | #define time_get() trace_clock_local() |
708 | #define time_to_us(x) div_u64(x, 1000) |
709 | #define time_sub(a, b) ((a) - (b)) |
710 | |
711 | /* |
712 | * cond_move_irq_delta_start - Forward the delta_start of a running IRQ |
713 | * |
714 | * If an IRQ is preempted by an NMI, its delta_start is pushed forward |
715 | * to discount the NMI interference. |
716 | * |
717 | * See get_int_safe_duration(). |
718 | */ |
719 | static inline void |
720 | cond_move_irq_delta_start(struct osnoise_variables *osn_var, u64 duration) |
721 | { |
722 | if (osn_var->irq.delta_start) |
723 | osn_var->irq.delta_start += duration; |
724 | } |
725 | |
726 | #ifndef CONFIG_PREEMPT_RT |
727 | /* |
728 | * cond_move_softirq_delta_start - Forward the delta_start of a running softirq. |
729 | * |
730 | * If a softirq is preempted by an IRQ or NMI, its delta_start is pushed |
731 | * forward to discount the interference. |
732 | * |
733 | * See get_int_safe_duration(). |
734 | */ |
735 | static inline void |
736 | cond_move_softirq_delta_start(struct osnoise_variables *osn_var, u64 duration) |
737 | { |
738 | if (osn_var->softirq.delta_start) |
739 | osn_var->softirq.delta_start += duration; |
740 | } |
741 | #else /* CONFIG_PREEMPT_RT */ |
742 | #define cond_move_softirq_delta_start(osn_var, duration) do {} while (0) |
743 | #endif |
744 | |
745 | /* |
746 | * cond_move_thread_delta_start - Forward the delta_start of a running thread |
747 | * |
748 | * If a noisy thread is preempted by an softirq, IRQ or NMI, its delta_start |
749 | * is pushed forward to discount the interference. |
750 | * |
751 | * See get_int_safe_duration(). |
752 | */ |
753 | static inline void |
754 | cond_move_thread_delta_start(struct osnoise_variables *osn_var, u64 duration) |
755 | { |
756 | if (osn_var->thread.delta_start) |
757 | osn_var->thread.delta_start += duration; |
758 | } |
759 | |
760 | /* |
761 | * get_int_safe_duration - Get the duration of a window |
762 | * |
763 | * The irq, softirq and thread varaibles need to have its duration without |
764 | * the interference from higher priority interrupts. Instead of keeping a |
765 | * variable to discount the interrupt interference from these variables, the |
766 | * starting time of these variables are pushed forward with the interrupt's |
767 | * duration. In this way, a single variable is used to: |
768 | * |
769 | * - Know if a given window is being measured. |
770 | * - Account its duration. |
771 | * - Discount the interference. |
772 | * |
773 | * To avoid getting inconsistent values, e.g.,: |
774 | * |
775 | * now = time_get() |
776 | * ---> interrupt! |
777 | * delta_start -= int duration; |
778 | * <--- |
779 | * duration = now - delta_start; |
780 | * |
781 | * result: negative duration if the variable duration before the |
782 | * interrupt was smaller than the interrupt execution. |
783 | * |
784 | * A counter of interrupts is used. If the counter increased, try |
785 | * to capture an interference safe duration. |
786 | */ |
787 | static inline s64 |
788 | get_int_safe_duration(struct osnoise_variables *osn_var, u64 *delta_start) |
789 | { |
790 | u64 int_counter, now; |
791 | s64 duration; |
792 | |
793 | do { |
794 | int_counter = local_read(&osn_var->int_counter); |
795 | /* synchronize with interrupts */ |
796 | barrier(); |
797 | |
798 | now = time_get(); |
799 | duration = (now - *delta_start); |
800 | |
801 | /* synchronize with interrupts */ |
802 | barrier(); |
803 | } while (int_counter != local_read(&osn_var->int_counter)); |
804 | |
805 | /* |
806 | * This is an evidence of race conditions that cause |
807 | * a value to be "discounted" too much. |
808 | */ |
809 | if (duration < 0) |
810 | osnoise_taint("Negative duration!\n" ); |
811 | |
812 | *delta_start = 0; |
813 | |
814 | return duration; |
815 | } |
816 | |
817 | /* |
818 | * |
819 | * set_int_safe_time - Save the current time on *time, aware of interference |
820 | * |
821 | * Get the time, taking into consideration a possible interference from |
822 | * higher priority interrupts. |
823 | * |
824 | * See get_int_safe_duration() for an explanation. |
825 | */ |
826 | static u64 |
827 | set_int_safe_time(struct osnoise_variables *osn_var, u64 *time) |
828 | { |
829 | u64 int_counter; |
830 | |
831 | do { |
832 | int_counter = local_read(&osn_var->int_counter); |
833 | /* synchronize with interrupts */ |
834 | barrier(); |
835 | |
836 | *time = time_get(); |
837 | |
838 | /* synchronize with interrupts */ |
839 | barrier(); |
840 | } while (int_counter != local_read(&osn_var->int_counter)); |
841 | |
842 | return int_counter; |
843 | } |
844 | |
845 | #ifdef CONFIG_TIMERLAT_TRACER |
846 | /* |
847 | * copy_int_safe_time - Copy *src into *desc aware of interference |
848 | */ |
849 | static u64 |
850 | copy_int_safe_time(struct osnoise_variables *osn_var, u64 *dst, u64 *src) |
851 | { |
852 | u64 int_counter; |
853 | |
854 | do { |
855 | int_counter = local_read(&osn_var->int_counter); |
856 | /* synchronize with interrupts */ |
857 | barrier(); |
858 | |
859 | *dst = *src; |
860 | |
861 | /* synchronize with interrupts */ |
862 | barrier(); |
863 | } while (int_counter != local_read(&osn_var->int_counter)); |
864 | |
865 | return int_counter; |
866 | } |
867 | #endif /* CONFIG_TIMERLAT_TRACER */ |
868 | |
869 | /* |
870 | * trace_osnoise_callback - NMI entry/exit callback |
871 | * |
872 | * This function is called at the entry and exit NMI code. The bool enter |
873 | * distinguishes between either case. This function is used to note a NMI |
874 | * occurrence, compute the noise caused by the NMI, and to remove the noise |
875 | * it is potentially causing on other interference variables. |
876 | */ |
877 | void trace_osnoise_callback(bool enter) |
878 | { |
879 | struct osnoise_variables *osn_var = this_cpu_osn_var(); |
880 | u64 duration; |
881 | |
882 | if (!osn_var->sampling) |
883 | return; |
884 | |
885 | /* |
886 | * Currently trace_clock_local() calls sched_clock() and the |
887 | * generic version is not NMI safe. |
888 | */ |
889 | if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) { |
890 | if (enter) { |
891 | osn_var->nmi.delta_start = time_get(); |
892 | local_inc(l: &osn_var->int_counter); |
893 | } else { |
894 | duration = time_get() - osn_var->nmi.delta_start; |
895 | |
896 | trace_nmi_noise(start: osn_var->nmi.delta_start, duration); |
897 | |
898 | cond_move_irq_delta_start(osn_var, duration); |
899 | cond_move_softirq_delta_start(osn_var, duration); |
900 | cond_move_thread_delta_start(osn_var, duration); |
901 | } |
902 | } |
903 | |
904 | if (enter) |
905 | osn_var->nmi.count++; |
906 | } |
907 | |
908 | /* |
909 | * osnoise_trace_irq_entry - Note the starting of an IRQ |
910 | * |
911 | * Save the starting time of an IRQ. As IRQs are non-preemptive to other IRQs, |
912 | * it is safe to use a single variable (ons_var->irq) to save the statistics. |
913 | * The arrival_time is used to report... the arrival time. The delta_start |
914 | * is used to compute the duration at the IRQ exit handler. See |
915 | * cond_move_irq_delta_start(). |
916 | */ |
917 | void osnoise_trace_irq_entry(int id) |
918 | { |
919 | struct osnoise_variables *osn_var = this_cpu_osn_var(); |
920 | |
921 | if (!osn_var->sampling) |
922 | return; |
923 | /* |
924 | * This value will be used in the report, but not to compute |
925 | * the execution time, so it is safe to get it unsafe. |
926 | */ |
927 | osn_var->irq.arrival_time = time_get(); |
928 | set_int_safe_time(osn_var, time: &osn_var->irq.delta_start); |
929 | osn_var->irq.count++; |
930 | |
931 | local_inc(l: &osn_var->int_counter); |
932 | } |
933 | |
934 | /* |
935 | * osnoise_irq_exit - Note the end of an IRQ, sava data and trace |
936 | * |
937 | * Computes the duration of the IRQ noise, and trace it. Also discounts the |
938 | * interference from other sources of noise could be currently being accounted. |
939 | */ |
940 | void osnoise_trace_irq_exit(int id, const char *desc) |
941 | { |
942 | struct osnoise_variables *osn_var = this_cpu_osn_var(); |
943 | s64 duration; |
944 | |
945 | if (!osn_var->sampling) |
946 | return; |
947 | |
948 | duration = get_int_safe_duration(osn_var, delta_start: &osn_var->irq.delta_start); |
949 | trace_irq_noise(vector: id, desc, start: osn_var->irq.arrival_time, duration); |
950 | osn_var->irq.arrival_time = 0; |
951 | cond_move_softirq_delta_start(osn_var, duration); |
952 | cond_move_thread_delta_start(osn_var, duration); |
953 | } |
954 | |
955 | /* |
956 | * trace_irqentry_callback - Callback to the irq:irq_entry traceevent |
957 | * |
958 | * Used to note the starting of an IRQ occurece. |
959 | */ |
960 | static void trace_irqentry_callback(void *data, int irq, |
961 | struct irqaction *action) |
962 | { |
963 | osnoise_trace_irq_entry(id: irq); |
964 | } |
965 | |
966 | /* |
967 | * trace_irqexit_callback - Callback to the irq:irq_exit traceevent |
968 | * |
969 | * Used to note the end of an IRQ occurece. |
970 | */ |
971 | static void trace_irqexit_callback(void *data, int irq, |
972 | struct irqaction *action, int ret) |
973 | { |
974 | osnoise_trace_irq_exit(id: irq, desc: action->name); |
975 | } |
976 | |
977 | /* |
978 | * arch specific register function. |
979 | */ |
980 | int __weak osnoise_arch_register(void) |
981 | { |
982 | return 0; |
983 | } |
984 | |
985 | /* |
986 | * arch specific unregister function. |
987 | */ |
988 | void __weak osnoise_arch_unregister(void) |
989 | { |
990 | return; |
991 | } |
992 | |
993 | /* |
994 | * hook_irq_events - Hook IRQ handling events |
995 | * |
996 | * This function hooks the IRQ related callbacks to the respective trace |
997 | * events. |
998 | */ |
999 | static int hook_irq_events(void) |
1000 | { |
1001 | int ret; |
1002 | |
1003 | ret = register_trace_irq_handler_entry(probe: trace_irqentry_callback, NULL); |
1004 | if (ret) |
1005 | goto out_err; |
1006 | |
1007 | ret = register_trace_irq_handler_exit(probe: trace_irqexit_callback, NULL); |
1008 | if (ret) |
1009 | goto out_unregister_entry; |
1010 | |
1011 | ret = osnoise_arch_register(); |
1012 | if (ret) |
1013 | goto out_irq_exit; |
1014 | |
1015 | return 0; |
1016 | |
1017 | out_irq_exit: |
1018 | unregister_trace_irq_handler_exit(probe: trace_irqexit_callback, NULL); |
1019 | out_unregister_entry: |
1020 | unregister_trace_irq_handler_entry(probe: trace_irqentry_callback, NULL); |
1021 | out_err: |
1022 | return -EINVAL; |
1023 | } |
1024 | |
1025 | /* |
1026 | * unhook_irq_events - Unhook IRQ handling events |
1027 | * |
1028 | * This function unhooks the IRQ related callbacks to the respective trace |
1029 | * events. |
1030 | */ |
1031 | static void unhook_irq_events(void) |
1032 | { |
1033 | osnoise_arch_unregister(); |
1034 | unregister_trace_irq_handler_exit(probe: trace_irqexit_callback, NULL); |
1035 | unregister_trace_irq_handler_entry(probe: trace_irqentry_callback, NULL); |
1036 | } |
1037 | |
1038 | #ifndef CONFIG_PREEMPT_RT |
1039 | /* |
1040 | * trace_softirq_entry_callback - Note the starting of a softirq |
1041 | * |
1042 | * Save the starting time of a softirq. As softirqs are non-preemptive to |
1043 | * other softirqs, it is safe to use a single variable (ons_var->softirq) |
1044 | * to save the statistics. The arrival_time is used to report... the |
1045 | * arrival time. The delta_start is used to compute the duration at the |
1046 | * softirq exit handler. See cond_move_softirq_delta_start(). |
1047 | */ |
1048 | static void trace_softirq_entry_callback(void *data, unsigned int vec_nr) |
1049 | { |
1050 | struct osnoise_variables *osn_var = this_cpu_osn_var(); |
1051 | |
1052 | if (!osn_var->sampling) |
1053 | return; |
1054 | /* |
1055 | * This value will be used in the report, but not to compute |
1056 | * the execution time, so it is safe to get it unsafe. |
1057 | */ |
1058 | osn_var->softirq.arrival_time = time_get(); |
1059 | set_int_safe_time(osn_var, time: &osn_var->softirq.delta_start); |
1060 | osn_var->softirq.count++; |
1061 | |
1062 | local_inc(l: &osn_var->int_counter); |
1063 | } |
1064 | |
1065 | /* |
1066 | * trace_softirq_exit_callback - Note the end of an softirq |
1067 | * |
1068 | * Computes the duration of the softirq noise, and trace it. Also discounts the |
1069 | * interference from other sources of noise could be currently being accounted. |
1070 | */ |
1071 | static void trace_softirq_exit_callback(void *data, unsigned int vec_nr) |
1072 | { |
1073 | struct osnoise_variables *osn_var = this_cpu_osn_var(); |
1074 | s64 duration; |
1075 | |
1076 | if (!osn_var->sampling) |
1077 | return; |
1078 | |
1079 | if (unlikely(timerlat_enabled())) |
1080 | if (!timerlat_softirq_exit(osn_var)) |
1081 | return; |
1082 | |
1083 | duration = get_int_safe_duration(osn_var, delta_start: &osn_var->softirq.delta_start); |
1084 | trace_softirq_noise(vector: vec_nr, start: osn_var->softirq.arrival_time, duration); |
1085 | cond_move_thread_delta_start(osn_var, duration); |
1086 | osn_var->softirq.arrival_time = 0; |
1087 | } |
1088 | |
1089 | /* |
1090 | * hook_softirq_events - Hook softirq handling events |
1091 | * |
1092 | * This function hooks the softirq related callbacks to the respective trace |
1093 | * events. |
1094 | */ |
1095 | static int hook_softirq_events(void) |
1096 | { |
1097 | int ret; |
1098 | |
1099 | ret = register_trace_softirq_entry(probe: trace_softirq_entry_callback, NULL); |
1100 | if (ret) |
1101 | goto out_err; |
1102 | |
1103 | ret = register_trace_softirq_exit(probe: trace_softirq_exit_callback, NULL); |
1104 | if (ret) |
1105 | goto out_unreg_entry; |
1106 | |
1107 | return 0; |
1108 | |
1109 | out_unreg_entry: |
1110 | unregister_trace_softirq_entry(probe: trace_softirq_entry_callback, NULL); |
1111 | out_err: |
1112 | return -EINVAL; |
1113 | } |
1114 | |
1115 | /* |
1116 | * unhook_softirq_events - Unhook softirq handling events |
1117 | * |
1118 | * This function hooks the softirq related callbacks to the respective trace |
1119 | * events. |
1120 | */ |
1121 | static void unhook_softirq_events(void) |
1122 | { |
1123 | unregister_trace_softirq_entry(probe: trace_softirq_entry_callback, NULL); |
1124 | unregister_trace_softirq_exit(probe: trace_softirq_exit_callback, NULL); |
1125 | } |
1126 | #else /* CONFIG_PREEMPT_RT */ |
1127 | /* |
1128 | * softirq are threads on the PREEMPT_RT mode. |
1129 | */ |
1130 | static int hook_softirq_events(void) |
1131 | { |
1132 | return 0; |
1133 | } |
1134 | static void unhook_softirq_events(void) |
1135 | { |
1136 | } |
1137 | #endif |
1138 | |
1139 | /* |
1140 | * thread_entry - Record the starting of a thread noise window |
1141 | * |
1142 | * It saves the context switch time for a noisy thread, and increments |
1143 | * the interference counters. |
1144 | */ |
1145 | static void |
1146 | thread_entry(struct osnoise_variables *osn_var, struct task_struct *t) |
1147 | { |
1148 | if (!osn_var->sampling) |
1149 | return; |
1150 | /* |
1151 | * The arrival time will be used in the report, but not to compute |
1152 | * the execution time, so it is safe to get it unsafe. |
1153 | */ |
1154 | osn_var->thread.arrival_time = time_get(); |
1155 | |
1156 | set_int_safe_time(osn_var, time: &osn_var->thread.delta_start); |
1157 | |
1158 | osn_var->thread.count++; |
1159 | local_inc(l: &osn_var->int_counter); |
1160 | } |
1161 | |
1162 | /* |
1163 | * thread_exit - Report the end of a thread noise window |
1164 | * |
1165 | * It computes the total noise from a thread, tracing if needed. |
1166 | */ |
1167 | static void |
1168 | thread_exit(struct osnoise_variables *osn_var, struct task_struct *t) |
1169 | { |
1170 | s64 duration; |
1171 | |
1172 | if (!osn_var->sampling) |
1173 | return; |
1174 | |
1175 | if (unlikely(timerlat_enabled())) |
1176 | if (!timerlat_thread_exit(osn_var)) |
1177 | return; |
1178 | |
1179 | duration = get_int_safe_duration(osn_var, delta_start: &osn_var->thread.delta_start); |
1180 | |
1181 | trace_thread_noise(t, start: osn_var->thread.arrival_time, duration); |
1182 | |
1183 | osn_var->thread.arrival_time = 0; |
1184 | } |
1185 | |
1186 | #ifdef CONFIG_TIMERLAT_TRACER |
1187 | /* |
1188 | * osnoise_stop_exception - Stop tracing and the tracer. |
1189 | */ |
1190 | static __always_inline void osnoise_stop_exception(char *msg, int cpu) |
1191 | { |
1192 | struct osnoise_instance *inst; |
1193 | struct trace_array *tr; |
1194 | |
1195 | rcu_read_lock(); |
1196 | list_for_each_entry_rcu(inst, &osnoise_instances, list) { |
1197 | tr = inst->tr; |
1198 | trace_array_printk_buf(buffer: tr->array_buffer.buffer, _THIS_IP_, |
1199 | fmt: "stop tracing hit on cpu %d due to exception: %s\n" , |
1200 | smp_processor_id(), |
1201 | msg); |
1202 | |
1203 | if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options)) |
1204 | panic(fmt: "tracer hit on cpu %d due to exception: %s\n" , |
1205 | smp_processor_id(), |
1206 | msg); |
1207 | |
1208 | tracer_tracing_off(tr); |
1209 | } |
1210 | rcu_read_unlock(); |
1211 | } |
1212 | |
1213 | /* |
1214 | * trace_sched_migrate_callback - sched:sched_migrate_task trace event handler |
1215 | * |
1216 | * his function is hooked to the sched:sched_migrate_task trace event, and monitors |
1217 | * timerlat user-space thread migration. |
1218 | */ |
1219 | static void trace_sched_migrate_callback(void *data, struct task_struct *p, int dest_cpu) |
1220 | { |
1221 | struct osnoise_variables *osn_var; |
1222 | long cpu = task_cpu(p); |
1223 | |
1224 | osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); |
1225 | if (osn_var->pid == p->pid && dest_cpu != cpu) { |
1226 | per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1; |
1227 | osnoise_taint("timerlat user-thread migrated\n" ); |
1228 | osnoise_stop_exception(msg: "timerlat user-thread migrated" , cpu); |
1229 | } |
1230 | } |
1231 | |
1232 | static int register_migration_monitor(void) |
1233 | { |
1234 | int ret = 0; |
1235 | |
1236 | /* |
1237 | * Timerlat thread migration check is only required when running timerlat in user-space. |
1238 | * Thus, enable callback only if timerlat is set with no workload. |
1239 | */ |
1240 | if (timerlat_enabled() && !test_bit(OSN_WORKLOAD, &osnoise_options)) |
1241 | ret = register_trace_sched_migrate_task(probe: trace_sched_migrate_callback, NULL); |
1242 | |
1243 | return ret; |
1244 | } |
1245 | |
1246 | static void unregister_migration_monitor(void) |
1247 | { |
1248 | if (timerlat_enabled() && !test_bit(OSN_WORKLOAD, &osnoise_options)) |
1249 | unregister_trace_sched_migrate_task(probe: trace_sched_migrate_callback, NULL); |
1250 | } |
1251 | #else |
1252 | static int register_migration_monitor(void) |
1253 | { |
1254 | return 0; |
1255 | } |
1256 | static void unregister_migration_monitor(void) {} |
1257 | #endif |
1258 | /* |
1259 | * trace_sched_switch - sched:sched_switch trace event handler |
1260 | * |
1261 | * This function is hooked to the sched:sched_switch trace event, and it is |
1262 | * used to record the beginning and to report the end of a thread noise window. |
1263 | */ |
1264 | static void |
1265 | trace_sched_switch_callback(void *data, bool preempt, |
1266 | struct task_struct *p, |
1267 | struct task_struct *n, |
1268 | unsigned int prev_state) |
1269 | { |
1270 | struct osnoise_variables *osn_var = this_cpu_osn_var(); |
1271 | int workload = test_bit(OSN_WORKLOAD, &osnoise_options); |
1272 | |
1273 | if ((p->pid != osn_var->pid) || !workload) |
1274 | thread_exit(osn_var, t: p); |
1275 | |
1276 | if ((n->pid != osn_var->pid) || !workload) |
1277 | thread_entry(osn_var, t: n); |
1278 | } |
1279 | |
1280 | /* |
1281 | * hook_thread_events - Hook the instrumentation for thread noise |
1282 | * |
1283 | * Hook the osnoise tracer callbacks to handle the noise from other |
1284 | * threads on the necessary kernel events. |
1285 | */ |
1286 | static int hook_thread_events(void) |
1287 | { |
1288 | int ret; |
1289 | |
1290 | ret = register_trace_sched_switch(probe: trace_sched_switch_callback, NULL); |
1291 | if (ret) |
1292 | return -EINVAL; |
1293 | |
1294 | ret = register_migration_monitor(); |
1295 | if (ret) |
1296 | goto out_unreg; |
1297 | |
1298 | return 0; |
1299 | |
1300 | out_unreg: |
1301 | unregister_trace_sched_switch(probe: trace_sched_switch_callback, NULL); |
1302 | return -EINVAL; |
1303 | } |
1304 | |
1305 | /* |
1306 | * unhook_thread_events - unhook the instrumentation for thread noise |
1307 | * |
1308 | * Unook the osnoise tracer callbacks to handle the noise from other |
1309 | * threads on the necessary kernel events. |
1310 | */ |
1311 | static void unhook_thread_events(void) |
1312 | { |
1313 | unregister_trace_sched_switch(probe: trace_sched_switch_callback, NULL); |
1314 | unregister_migration_monitor(); |
1315 | } |
1316 | |
1317 | /* |
1318 | * save_osn_sample_stats - Save the osnoise_sample statistics |
1319 | * |
1320 | * Save the osnoise_sample statistics before the sampling phase. These |
1321 | * values will be used later to compute the diff betwneen the statistics |
1322 | * before and after the osnoise sampling. |
1323 | */ |
1324 | static void |
1325 | save_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) |
1326 | { |
1327 | s->nmi_count = osn_var->nmi.count; |
1328 | s->irq_count = osn_var->irq.count; |
1329 | s->softirq_count = osn_var->softirq.count; |
1330 | s->thread_count = osn_var->thread.count; |
1331 | } |
1332 | |
1333 | /* |
1334 | * diff_osn_sample_stats - Compute the osnoise_sample statistics |
1335 | * |
1336 | * After a sample period, compute the difference on the osnoise_sample |
1337 | * statistics. The struct osnoise_sample *s contains the statistics saved via |
1338 | * save_osn_sample_stats() before the osnoise sampling. |
1339 | */ |
1340 | static void |
1341 | diff_osn_sample_stats(struct osnoise_variables *osn_var, struct osnoise_sample *s) |
1342 | { |
1343 | s->nmi_count = osn_var->nmi.count - s->nmi_count; |
1344 | s->irq_count = osn_var->irq.count - s->irq_count; |
1345 | s->softirq_count = osn_var->softirq.count - s->softirq_count; |
1346 | s->thread_count = osn_var->thread.count - s->thread_count; |
1347 | } |
1348 | |
1349 | /* |
1350 | * osnoise_stop_tracing - Stop tracing and the tracer. |
1351 | */ |
1352 | static __always_inline void osnoise_stop_tracing(void) |
1353 | { |
1354 | struct osnoise_instance *inst; |
1355 | struct trace_array *tr; |
1356 | |
1357 | rcu_read_lock(); |
1358 | list_for_each_entry_rcu(inst, &osnoise_instances, list) { |
1359 | tr = inst->tr; |
1360 | trace_array_printk_buf(buffer: tr->array_buffer.buffer, _THIS_IP_, |
1361 | fmt: "stop tracing hit on cpu %d\n" , smp_processor_id()); |
1362 | |
1363 | if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options)) |
1364 | panic(fmt: "tracer hit stop condition on CPU %d\n" , smp_processor_id()); |
1365 | |
1366 | tracer_tracing_off(tr); |
1367 | } |
1368 | rcu_read_unlock(); |
1369 | } |
1370 | |
1371 | /* |
1372 | * osnoise_has_tracing_on - Check if there is at least one instance on |
1373 | */ |
1374 | static __always_inline int osnoise_has_tracing_on(void) |
1375 | { |
1376 | struct osnoise_instance *inst; |
1377 | int trace_is_on = 0; |
1378 | |
1379 | rcu_read_lock(); |
1380 | list_for_each_entry_rcu(inst, &osnoise_instances, list) |
1381 | trace_is_on += tracer_tracing_is_on(tr: inst->tr); |
1382 | rcu_read_unlock(); |
1383 | |
1384 | return trace_is_on; |
1385 | } |
1386 | |
1387 | /* |
1388 | * notify_new_max_latency - Notify a new max latency via fsnotify interface. |
1389 | */ |
1390 | static void notify_new_max_latency(u64 latency) |
1391 | { |
1392 | struct osnoise_instance *inst; |
1393 | struct trace_array *tr; |
1394 | |
1395 | rcu_read_lock(); |
1396 | list_for_each_entry_rcu(inst, &osnoise_instances, list) { |
1397 | tr = inst->tr; |
1398 | if (tracer_tracing_is_on(tr) && tr->max_latency < latency) { |
1399 | tr->max_latency = latency; |
1400 | latency_fsnotify(tr); |
1401 | } |
1402 | } |
1403 | rcu_read_unlock(); |
1404 | } |
1405 | |
1406 | /* |
1407 | * run_osnoise - Sample the time and look for osnoise |
1408 | * |
1409 | * Used to capture the time, looking for potential osnoise latency repeatedly. |
1410 | * Different from hwlat_detector, it is called with preemption and interrupts |
1411 | * enabled. This allows irqs, softirqs and threads to run, interfering on the |
1412 | * osnoise sampling thread, as they would do with a regular thread. |
1413 | */ |
1414 | static int run_osnoise(void) |
1415 | { |
1416 | bool disable_irq = test_bit(OSN_IRQ_DISABLE, &osnoise_options); |
1417 | struct osnoise_variables *osn_var = this_cpu_osn_var(); |
1418 | u64 start, sample, last_sample; |
1419 | u64 last_int_count, int_count; |
1420 | s64 noise = 0, max_noise = 0; |
1421 | s64 total, last_total = 0; |
1422 | struct osnoise_sample s; |
1423 | bool disable_preemption; |
1424 | unsigned int threshold; |
1425 | u64 runtime, stop_in; |
1426 | u64 sum_noise = 0; |
1427 | int hw_count = 0; |
1428 | int ret = -1; |
1429 | |
1430 | /* |
1431 | * Disabling preemption is only required if IRQs are enabled, |
1432 | * and the options is set on. |
1433 | */ |
1434 | disable_preemption = !disable_irq && test_bit(OSN_PREEMPT_DISABLE, &osnoise_options); |
1435 | |
1436 | /* |
1437 | * Considers the current thread as the workload. |
1438 | */ |
1439 | osn_var->pid = current->pid; |
1440 | |
1441 | /* |
1442 | * Save the current stats for the diff |
1443 | */ |
1444 | save_osn_sample_stats(osn_var, s: &s); |
1445 | |
1446 | /* |
1447 | * if threshold is 0, use the default value of 5 us. |
1448 | */ |
1449 | threshold = tracing_thresh ? : 5000; |
1450 | |
1451 | /* |
1452 | * Apply PREEMPT and IRQ disabled options. |
1453 | */ |
1454 | if (disable_irq) |
1455 | local_irq_disable(); |
1456 | |
1457 | if (disable_preemption) |
1458 | preempt_disable(); |
1459 | |
1460 | /* |
1461 | * Make sure NMIs see sampling first |
1462 | */ |
1463 | osn_var->sampling = true; |
1464 | barrier(); |
1465 | |
1466 | /* |
1467 | * Transform the *_us config to nanoseconds to avoid the |
1468 | * division on the main loop. |
1469 | */ |
1470 | runtime = osnoise_data.sample_runtime * NSEC_PER_USEC; |
1471 | stop_in = osnoise_data.stop_tracing * NSEC_PER_USEC; |
1472 | |
1473 | /* |
1474 | * Start timestemp |
1475 | */ |
1476 | start = time_get(); |
1477 | |
1478 | /* |
1479 | * "previous" loop. |
1480 | */ |
1481 | last_int_count = set_int_safe_time(osn_var, time: &last_sample); |
1482 | |
1483 | do { |
1484 | /* |
1485 | * Get sample! |
1486 | */ |
1487 | int_count = set_int_safe_time(osn_var, time: &sample); |
1488 | |
1489 | noise = time_sub(sample, last_sample); |
1490 | |
1491 | /* |
1492 | * This shouldn't happen. |
1493 | */ |
1494 | if (noise < 0) { |
1495 | osnoise_taint("negative noise!" ); |
1496 | goto out; |
1497 | } |
1498 | |
1499 | /* |
1500 | * Sample runtime. |
1501 | */ |
1502 | total = time_sub(sample, start); |
1503 | |
1504 | /* |
1505 | * Check for possible overflows. |
1506 | */ |
1507 | if (total < last_total) { |
1508 | osnoise_taint("total overflow!" ); |
1509 | break; |
1510 | } |
1511 | |
1512 | last_total = total; |
1513 | |
1514 | if (noise >= threshold) { |
1515 | int interference = int_count - last_int_count; |
1516 | |
1517 | if (noise > max_noise) |
1518 | max_noise = noise; |
1519 | |
1520 | if (!interference) |
1521 | hw_count++; |
1522 | |
1523 | sum_noise += noise; |
1524 | |
1525 | trace_sample_threshold(start: last_sample, duration: noise, interference); |
1526 | |
1527 | if (osnoise_data.stop_tracing) |
1528 | if (noise > stop_in) |
1529 | osnoise_stop_tracing(); |
1530 | } |
1531 | |
1532 | /* |
1533 | * In some cases, notably when running on a nohz_full CPU with |
1534 | * a stopped tick PREEMPT_RCU has no way to account for QSs. |
1535 | * This will eventually cause unwarranted noise as PREEMPT_RCU |
1536 | * will force preemption as the means of ending the current |
1537 | * grace period. We avoid this problem by calling |
1538 | * rcu_momentary_dyntick_idle(), which performs a zero duration |
1539 | * EQS allowing PREEMPT_RCU to end the current grace period. |
1540 | * This call shouldn't be wrapped inside an RCU critical |
1541 | * section. |
1542 | * |
1543 | * Note that in non PREEMPT_RCU kernels QSs are handled through |
1544 | * cond_resched() |
1545 | */ |
1546 | if (IS_ENABLED(CONFIG_PREEMPT_RCU)) { |
1547 | if (!disable_irq) |
1548 | local_irq_disable(); |
1549 | |
1550 | rcu_momentary_dyntick_idle(); |
1551 | |
1552 | if (!disable_irq) |
1553 | local_irq_enable(); |
1554 | } |
1555 | |
1556 | /* |
1557 | * For the non-preemptive kernel config: let threads runs, if |
1558 | * they so wish, unless set not do to so. |
1559 | */ |
1560 | if (!disable_irq && !disable_preemption) |
1561 | cond_resched(); |
1562 | |
1563 | last_sample = sample; |
1564 | last_int_count = int_count; |
1565 | |
1566 | } while (total < runtime && !kthread_should_stop()); |
1567 | |
1568 | /* |
1569 | * Finish the above in the view for interrupts. |
1570 | */ |
1571 | barrier(); |
1572 | |
1573 | osn_var->sampling = false; |
1574 | |
1575 | /* |
1576 | * Make sure sampling data is no longer updated. |
1577 | */ |
1578 | barrier(); |
1579 | |
1580 | /* |
1581 | * Return to the preemptive state. |
1582 | */ |
1583 | if (disable_preemption) |
1584 | preempt_enable(); |
1585 | |
1586 | if (disable_irq) |
1587 | local_irq_enable(); |
1588 | |
1589 | /* |
1590 | * Save noise info. |
1591 | */ |
1592 | s.noise = time_to_us(sum_noise); |
1593 | s.runtime = time_to_us(total); |
1594 | s.max_sample = time_to_us(max_noise); |
1595 | s.hw_count = hw_count; |
1596 | |
1597 | /* Save interference stats info */ |
1598 | diff_osn_sample_stats(osn_var, s: &s); |
1599 | |
1600 | trace_osnoise_sample(sample: &s); |
1601 | |
1602 | notify_new_max_latency(latency: max_noise); |
1603 | |
1604 | if (osnoise_data.stop_tracing_total) |
1605 | if (s.noise > osnoise_data.stop_tracing_total) |
1606 | osnoise_stop_tracing(); |
1607 | |
1608 | return 0; |
1609 | out: |
1610 | return ret; |
1611 | } |
1612 | |
1613 | static struct cpumask osnoise_cpumask; |
1614 | static struct cpumask save_cpumask; |
1615 | |
1616 | /* |
1617 | * osnoise_sleep - sleep until the next period |
1618 | */ |
1619 | static void osnoise_sleep(bool skip_period) |
1620 | { |
1621 | u64 interval; |
1622 | ktime_t wake_time; |
1623 | |
1624 | mutex_lock(&interface_lock); |
1625 | if (skip_period) |
1626 | interval = osnoise_data.sample_period; |
1627 | else |
1628 | interval = osnoise_data.sample_period - osnoise_data.sample_runtime; |
1629 | mutex_unlock(lock: &interface_lock); |
1630 | |
1631 | /* |
1632 | * differently from hwlat_detector, the osnoise tracer can run |
1633 | * without a pause because preemption is on. |
1634 | */ |
1635 | if (!interval) { |
1636 | /* Let synchronize_rcu_tasks() make progress */ |
1637 | cond_resched_tasks_rcu_qs(); |
1638 | return; |
1639 | } |
1640 | |
1641 | wake_time = ktime_add_us(kt: ktime_get(), usec: interval); |
1642 | __set_current_state(TASK_INTERRUPTIBLE); |
1643 | |
1644 | while (schedule_hrtimeout(expires: &wake_time, mode: HRTIMER_MODE_ABS)) { |
1645 | if (kthread_should_stop()) |
1646 | break; |
1647 | } |
1648 | } |
1649 | |
1650 | /* |
1651 | * osnoise_migration_pending - checks if the task needs to migrate |
1652 | * |
1653 | * osnoise/timerlat threads are per-cpu. If there is a pending request to |
1654 | * migrate the thread away from the current CPU, something bad has happened. |
1655 | * Play the good citizen and leave. |
1656 | * |
1657 | * Returns 0 if it is safe to continue, 1 otherwise. |
1658 | */ |
1659 | static inline int osnoise_migration_pending(void) |
1660 | { |
1661 | if (!current->migration_pending) |
1662 | return 0; |
1663 | |
1664 | /* |
1665 | * If migration is pending, there is a task waiting for the |
1666 | * tracer to enable migration. The tracer does not allow migration, |
1667 | * thus: taint and leave to unblock the blocked thread. |
1668 | */ |
1669 | osnoise_taint("migration requested to osnoise threads, leaving." ); |
1670 | |
1671 | /* |
1672 | * Unset this thread from the threads managed by the interface. |
1673 | * The tracers are responsible for cleaning their env before |
1674 | * exiting. |
1675 | */ |
1676 | mutex_lock(&interface_lock); |
1677 | this_cpu_osn_var()->kthread = NULL; |
1678 | mutex_unlock(lock: &interface_lock); |
1679 | |
1680 | return 1; |
1681 | } |
1682 | |
1683 | /* |
1684 | * osnoise_main - The osnoise detection kernel thread |
1685 | * |
1686 | * Calls run_osnoise() function to measure the osnoise for the configured runtime, |
1687 | * every period. |
1688 | */ |
1689 | static int osnoise_main(void *data) |
1690 | { |
1691 | unsigned long flags; |
1692 | |
1693 | /* |
1694 | * This thread was created pinned to the CPU using PF_NO_SETAFFINITY. |
1695 | * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread. |
1696 | * |
1697 | * To work around this limitation, disable migration and remove the |
1698 | * flag. |
1699 | */ |
1700 | migrate_disable(); |
1701 | raw_spin_lock_irqsave(¤t->pi_lock, flags); |
1702 | current->flags &= ~(PF_NO_SETAFFINITY); |
1703 | raw_spin_unlock_irqrestore(¤t->pi_lock, flags); |
1704 | |
1705 | while (!kthread_should_stop()) { |
1706 | if (osnoise_migration_pending()) |
1707 | break; |
1708 | |
1709 | /* skip a period if tracing is off on all instances */ |
1710 | if (!osnoise_has_tracing_on()) { |
1711 | osnoise_sleep(skip_period: true); |
1712 | continue; |
1713 | } |
1714 | |
1715 | run_osnoise(); |
1716 | osnoise_sleep(skip_period: false); |
1717 | } |
1718 | |
1719 | migrate_enable(); |
1720 | return 0; |
1721 | } |
1722 | |
1723 | #ifdef CONFIG_TIMERLAT_TRACER |
1724 | /* |
1725 | * timerlat_irq - hrtimer handler for timerlat. |
1726 | */ |
1727 | static enum hrtimer_restart timerlat_irq(struct hrtimer *timer) |
1728 | { |
1729 | struct osnoise_variables *osn_var = this_cpu_osn_var(); |
1730 | struct timerlat_variables *tlat; |
1731 | struct timerlat_sample s; |
1732 | u64 now; |
1733 | u64 diff; |
1734 | |
1735 | /* |
1736 | * I am not sure if the timer was armed for this CPU. So, get |
1737 | * the timerlat struct from the timer itself, not from this |
1738 | * CPU. |
1739 | */ |
1740 | tlat = container_of(timer, struct timerlat_variables, timer); |
1741 | |
1742 | now = ktime_to_ns(kt: hrtimer_cb_get_time(timer: &tlat->timer)); |
1743 | |
1744 | /* |
1745 | * Enable the osnoise: events for thread an softirq. |
1746 | */ |
1747 | tlat->tracing_thread = true; |
1748 | |
1749 | osn_var->thread.arrival_time = time_get(); |
1750 | |
1751 | /* |
1752 | * A hardirq is running: the timer IRQ. It is for sure preempting |
1753 | * a thread, and potentially preempting a softirq. |
1754 | * |
1755 | * At this point, it is not interesting to know the duration of the |
1756 | * preempted thread (and maybe softirq), but how much time they will |
1757 | * delay the beginning of the execution of the timer thread. |
1758 | * |
1759 | * To get the correct (net) delay added by the softirq, its delta_start |
1760 | * is set as the IRQ one. In this way, at the return of the IRQ, the delta |
1761 | * start of the sofitrq will be zeroed, accounting then only the time |
1762 | * after that. |
1763 | * |
1764 | * The thread follows the same principle. However, if a softirq is |
1765 | * running, the thread needs to receive the softirq delta_start. The |
1766 | * reason being is that the softirq will be the last to be unfolded, |
1767 | * resseting the thread delay to zero. |
1768 | * |
1769 | * The PREEMPT_RT is a special case, though. As softirqs run as threads |
1770 | * on RT, moving the thread is enough. |
1771 | */ |
1772 | if (!IS_ENABLED(CONFIG_PREEMPT_RT) && osn_var->softirq.delta_start) { |
1773 | copy_int_safe_time(osn_var, dst: &osn_var->thread.delta_start, |
1774 | src: &osn_var->softirq.delta_start); |
1775 | |
1776 | copy_int_safe_time(osn_var, dst: &osn_var->softirq.delta_start, |
1777 | src: &osn_var->irq.delta_start); |
1778 | } else { |
1779 | copy_int_safe_time(osn_var, dst: &osn_var->thread.delta_start, |
1780 | src: &osn_var->irq.delta_start); |
1781 | } |
1782 | |
1783 | /* |
1784 | * Compute the current time with the expected time. |
1785 | */ |
1786 | diff = now - tlat->abs_period; |
1787 | |
1788 | tlat->count++; |
1789 | s.seqnum = tlat->count; |
1790 | s.timer_latency = diff; |
1791 | s.context = IRQ_CONTEXT; |
1792 | |
1793 | trace_timerlat_sample(sample: &s); |
1794 | |
1795 | if (osnoise_data.stop_tracing) { |
1796 | if (time_to_us(diff) >= osnoise_data.stop_tracing) { |
1797 | |
1798 | /* |
1799 | * At this point, if stop_tracing is set and <= print_stack, |
1800 | * print_stack is set and would be printed in the thread handler. |
1801 | * |
1802 | * Thus, print the stack trace as it is helpful to define the |
1803 | * root cause of an IRQ latency. |
1804 | */ |
1805 | if (osnoise_data.stop_tracing <= osnoise_data.print_stack) { |
1806 | timerlat_save_stack(skip: 0); |
1807 | timerlat_dump_stack(time_to_us(diff)); |
1808 | } |
1809 | |
1810 | osnoise_stop_tracing(); |
1811 | notify_new_max_latency(latency: diff); |
1812 | |
1813 | wake_up_process(tsk: tlat->kthread); |
1814 | |
1815 | return HRTIMER_NORESTART; |
1816 | } |
1817 | } |
1818 | |
1819 | wake_up_process(tsk: tlat->kthread); |
1820 | |
1821 | if (osnoise_data.print_stack) |
1822 | timerlat_save_stack(skip: 0); |
1823 | |
1824 | return HRTIMER_NORESTART; |
1825 | } |
1826 | |
1827 | /* |
1828 | * wait_next_period - Wait for the next period for timerlat |
1829 | */ |
1830 | static int wait_next_period(struct timerlat_variables *tlat) |
1831 | { |
1832 | ktime_t next_abs_period, now; |
1833 | u64 rel_period = osnoise_data.timerlat_period * 1000; |
1834 | |
1835 | now = hrtimer_cb_get_time(timer: &tlat->timer); |
1836 | next_abs_period = ns_to_ktime(ns: tlat->abs_period + rel_period); |
1837 | |
1838 | /* |
1839 | * Save the next abs_period. |
1840 | */ |
1841 | tlat->abs_period = (u64) ktime_to_ns(kt: next_abs_period); |
1842 | |
1843 | /* |
1844 | * If the new abs_period is in the past, skip the activation. |
1845 | */ |
1846 | while (ktime_compare(cmp1: now, cmp2: next_abs_period) > 0) { |
1847 | next_abs_period = ns_to_ktime(ns: tlat->abs_period + rel_period); |
1848 | tlat->abs_period = (u64) ktime_to_ns(kt: next_abs_period); |
1849 | } |
1850 | |
1851 | set_current_state(TASK_INTERRUPTIBLE); |
1852 | |
1853 | hrtimer_start(timer: &tlat->timer, tim: next_abs_period, mode: HRTIMER_MODE_ABS_PINNED_HARD); |
1854 | schedule(); |
1855 | return 1; |
1856 | } |
1857 | |
1858 | /* |
1859 | * timerlat_main- Timerlat main |
1860 | */ |
1861 | static int timerlat_main(void *data) |
1862 | { |
1863 | struct osnoise_variables *osn_var = this_cpu_osn_var(); |
1864 | struct timerlat_variables *tlat = this_cpu_tmr_var(); |
1865 | struct timerlat_sample s; |
1866 | struct sched_param sp; |
1867 | unsigned long flags; |
1868 | u64 now, diff; |
1869 | |
1870 | /* |
1871 | * Make the thread RT, that is how cyclictest is usually used. |
1872 | */ |
1873 | sp.sched_priority = DEFAULT_TIMERLAT_PRIO; |
1874 | sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); |
1875 | |
1876 | /* |
1877 | * This thread was created pinned to the CPU using PF_NO_SETAFFINITY. |
1878 | * The problem is that cgroup does not allow PF_NO_SETAFFINITY thread. |
1879 | * |
1880 | * To work around this limitation, disable migration and remove the |
1881 | * flag. |
1882 | */ |
1883 | migrate_disable(); |
1884 | raw_spin_lock_irqsave(¤t->pi_lock, flags); |
1885 | current->flags &= ~(PF_NO_SETAFFINITY); |
1886 | raw_spin_unlock_irqrestore(¤t->pi_lock, flags); |
1887 | |
1888 | tlat->count = 0; |
1889 | tlat->tracing_thread = false; |
1890 | |
1891 | hrtimer_init(timer: &tlat->timer, CLOCK_MONOTONIC, mode: HRTIMER_MODE_ABS_PINNED_HARD); |
1892 | tlat->timer.function = timerlat_irq; |
1893 | tlat->kthread = current; |
1894 | osn_var->pid = current->pid; |
1895 | /* |
1896 | * Anotate the arrival time. |
1897 | */ |
1898 | tlat->abs_period = hrtimer_cb_get_time(timer: &tlat->timer); |
1899 | |
1900 | wait_next_period(tlat); |
1901 | |
1902 | osn_var->sampling = 1; |
1903 | |
1904 | while (!kthread_should_stop()) { |
1905 | |
1906 | now = ktime_to_ns(kt: hrtimer_cb_get_time(timer: &tlat->timer)); |
1907 | diff = now - tlat->abs_period; |
1908 | |
1909 | s.seqnum = tlat->count; |
1910 | s.timer_latency = diff; |
1911 | s.context = THREAD_CONTEXT; |
1912 | |
1913 | trace_timerlat_sample(sample: &s); |
1914 | |
1915 | notify_new_max_latency(latency: diff); |
1916 | |
1917 | timerlat_dump_stack(time_to_us(diff)); |
1918 | |
1919 | tlat->tracing_thread = false; |
1920 | if (osnoise_data.stop_tracing_total) |
1921 | if (time_to_us(diff) >= osnoise_data.stop_tracing_total) |
1922 | osnoise_stop_tracing(); |
1923 | |
1924 | if (osnoise_migration_pending()) |
1925 | break; |
1926 | |
1927 | wait_next_period(tlat); |
1928 | } |
1929 | |
1930 | hrtimer_cancel(timer: &tlat->timer); |
1931 | migrate_enable(); |
1932 | return 0; |
1933 | } |
1934 | #else /* CONFIG_TIMERLAT_TRACER */ |
1935 | static int timerlat_main(void *data) |
1936 | { |
1937 | return 0; |
1938 | } |
1939 | #endif /* CONFIG_TIMERLAT_TRACER */ |
1940 | |
1941 | /* |
1942 | * stop_kthread - stop a workload thread |
1943 | */ |
1944 | static void stop_kthread(unsigned int cpu) |
1945 | { |
1946 | struct task_struct *kthread; |
1947 | |
1948 | kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread; |
1949 | if (kthread) { |
1950 | if (test_bit(OSN_WORKLOAD, &osnoise_options)) { |
1951 | kthread_stop(k: kthread); |
1952 | } else { |
1953 | /* |
1954 | * This is a user thread waiting on the timerlat_fd. We need |
1955 | * to close all users, and the best way to guarantee this is |
1956 | * by killing the thread. NOTE: this is a purpose specific file. |
1957 | */ |
1958 | kill_pid(pid: kthread->thread_pid, SIGKILL, priv: 1); |
1959 | put_task_struct(t: kthread); |
1960 | } |
1961 | per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; |
1962 | } else { |
1963 | /* if no workload, just return */ |
1964 | if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { |
1965 | /* |
1966 | * This is set in the osnoise tracer case. |
1967 | */ |
1968 | per_cpu(per_cpu_osnoise_var, cpu).sampling = false; |
1969 | barrier(); |
1970 | return; |
1971 | } |
1972 | } |
1973 | } |
1974 | |
1975 | /* |
1976 | * stop_per_cpu_kthread - Stop per-cpu threads |
1977 | * |
1978 | * Stop the osnoise sampling htread. Use this on unload and at system |
1979 | * shutdown. |
1980 | */ |
1981 | static void stop_per_cpu_kthreads(void) |
1982 | { |
1983 | int cpu; |
1984 | |
1985 | cpus_read_lock(); |
1986 | |
1987 | for_each_online_cpu(cpu) |
1988 | stop_kthread(cpu); |
1989 | |
1990 | cpus_read_unlock(); |
1991 | } |
1992 | |
1993 | /* |
1994 | * start_kthread - Start a workload tread |
1995 | */ |
1996 | static int start_kthread(unsigned int cpu) |
1997 | { |
1998 | struct task_struct *kthread; |
1999 | void *main = osnoise_main; |
2000 | char comm[24]; |
2001 | |
2002 | if (timerlat_enabled()) { |
2003 | snprintf(buf: comm, size: 24, fmt: "timerlat/%d" , cpu); |
2004 | main = timerlat_main; |
2005 | } else { |
2006 | /* if no workload, just return */ |
2007 | if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { |
2008 | per_cpu(per_cpu_osnoise_var, cpu).sampling = true; |
2009 | barrier(); |
2010 | return 0; |
2011 | } |
2012 | snprintf(buf: comm, size: 24, fmt: "osnoise/%d" , cpu); |
2013 | } |
2014 | |
2015 | kthread = kthread_run_on_cpu(threadfn: main, NULL, cpu, namefmt: comm); |
2016 | |
2017 | if (IS_ERR(ptr: kthread)) { |
2018 | pr_err(BANNER "could not start sampling thread\n" ); |
2019 | stop_per_cpu_kthreads(); |
2020 | return -ENOMEM; |
2021 | } |
2022 | |
2023 | per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread; |
2024 | |
2025 | return 0; |
2026 | } |
2027 | |
2028 | /* |
2029 | * start_per_cpu_kthread - Kick off per-cpu osnoise sampling kthreads |
2030 | * |
2031 | * This starts the kernel thread that will look for osnoise on many |
2032 | * cpus. |
2033 | */ |
2034 | static int start_per_cpu_kthreads(void) |
2035 | { |
2036 | struct cpumask *current_mask = &save_cpumask; |
2037 | int retval = 0; |
2038 | int cpu; |
2039 | |
2040 | if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { |
2041 | if (timerlat_enabled()) |
2042 | return 0; |
2043 | } |
2044 | |
2045 | cpus_read_lock(); |
2046 | /* |
2047 | * Run only on online CPUs in which osnoise is allowed to run. |
2048 | */ |
2049 | cpumask_and(dstp: current_mask, cpu_online_mask, src2p: &osnoise_cpumask); |
2050 | |
2051 | for_each_possible_cpu(cpu) |
2052 | per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; |
2053 | |
2054 | for_each_cpu(cpu, current_mask) { |
2055 | retval = start_kthread(cpu); |
2056 | if (retval) { |
2057 | cpus_read_unlock(); |
2058 | stop_per_cpu_kthreads(); |
2059 | return retval; |
2060 | } |
2061 | } |
2062 | |
2063 | cpus_read_unlock(); |
2064 | |
2065 | return retval; |
2066 | } |
2067 | |
2068 | #ifdef CONFIG_HOTPLUG_CPU |
2069 | static void osnoise_hotplug_workfn(struct work_struct *dummy) |
2070 | { |
2071 | unsigned int cpu = smp_processor_id(); |
2072 | |
2073 | mutex_lock(&trace_types_lock); |
2074 | |
2075 | if (!osnoise_has_registered_instances()) |
2076 | goto out_unlock_trace; |
2077 | |
2078 | mutex_lock(&interface_lock); |
2079 | cpus_read_lock(); |
2080 | |
2081 | if (!cpumask_test_cpu(cpu, cpumask: &osnoise_cpumask)) |
2082 | goto out_unlock; |
2083 | |
2084 | start_kthread(cpu); |
2085 | |
2086 | out_unlock: |
2087 | cpus_read_unlock(); |
2088 | mutex_unlock(lock: &interface_lock); |
2089 | out_unlock_trace: |
2090 | mutex_unlock(lock: &trace_types_lock); |
2091 | } |
2092 | |
2093 | static DECLARE_WORK(osnoise_hotplug_work, osnoise_hotplug_workfn); |
2094 | |
2095 | /* |
2096 | * osnoise_cpu_init - CPU hotplug online callback function |
2097 | */ |
2098 | static int osnoise_cpu_init(unsigned int cpu) |
2099 | { |
2100 | schedule_work_on(cpu, work: &osnoise_hotplug_work); |
2101 | return 0; |
2102 | } |
2103 | |
2104 | /* |
2105 | * osnoise_cpu_die - CPU hotplug offline callback function |
2106 | */ |
2107 | static int osnoise_cpu_die(unsigned int cpu) |
2108 | { |
2109 | stop_kthread(cpu); |
2110 | return 0; |
2111 | } |
2112 | |
2113 | static void osnoise_init_hotplug_support(void) |
2114 | { |
2115 | int ret; |
2116 | |
2117 | ret = cpuhp_setup_state(state: CPUHP_AP_ONLINE_DYN, name: "trace/osnoise:online" , |
2118 | startup: osnoise_cpu_init, teardown: osnoise_cpu_die); |
2119 | if (ret < 0) |
2120 | pr_warn(BANNER "Error to init cpu hotplug support\n" ); |
2121 | |
2122 | return; |
2123 | } |
2124 | #else /* CONFIG_HOTPLUG_CPU */ |
2125 | static void osnoise_init_hotplug_support(void) |
2126 | { |
2127 | return; |
2128 | } |
2129 | #endif /* CONFIG_HOTPLUG_CPU */ |
2130 | |
2131 | /* |
2132 | * seq file functions for the osnoise/options file. |
2133 | */ |
2134 | static void *s_options_start(struct seq_file *s, loff_t *pos) |
2135 | { |
2136 | int option = *pos; |
2137 | |
2138 | mutex_lock(&interface_lock); |
2139 | |
2140 | if (option >= OSN_MAX) |
2141 | return NULL; |
2142 | |
2143 | return pos; |
2144 | } |
2145 | |
2146 | static void *s_options_next(struct seq_file *s, void *v, loff_t *pos) |
2147 | { |
2148 | int option = ++(*pos); |
2149 | |
2150 | if (option >= OSN_MAX) |
2151 | return NULL; |
2152 | |
2153 | return pos; |
2154 | } |
2155 | |
2156 | static int s_options_show(struct seq_file *s, void *v) |
2157 | { |
2158 | loff_t *pos = v; |
2159 | int option = *pos; |
2160 | |
2161 | if (option == OSN_DEFAULTS) { |
2162 | if (osnoise_options == OSN_DEFAULT_OPTIONS) |
2163 | seq_printf(m: s, fmt: "%s" , osnoise_options_str[option]); |
2164 | else |
2165 | seq_printf(m: s, fmt: "NO_%s" , osnoise_options_str[option]); |
2166 | goto out; |
2167 | } |
2168 | |
2169 | if (test_bit(option, &osnoise_options)) |
2170 | seq_printf(m: s, fmt: "%s" , osnoise_options_str[option]); |
2171 | else |
2172 | seq_printf(m: s, fmt: "NO_%s" , osnoise_options_str[option]); |
2173 | |
2174 | out: |
2175 | if (option != OSN_MAX) |
2176 | seq_puts(m: s, s: " " ); |
2177 | |
2178 | return 0; |
2179 | } |
2180 | |
2181 | static void s_options_stop(struct seq_file *s, void *v) |
2182 | { |
2183 | seq_puts(m: s, s: "\n" ); |
2184 | mutex_unlock(lock: &interface_lock); |
2185 | } |
2186 | |
2187 | static const struct seq_operations osnoise_options_seq_ops = { |
2188 | .start = s_options_start, |
2189 | .next = s_options_next, |
2190 | .show = s_options_show, |
2191 | .stop = s_options_stop |
2192 | }; |
2193 | |
2194 | static int osnoise_options_open(struct inode *inode, struct file *file) |
2195 | { |
2196 | return seq_open(file, &osnoise_options_seq_ops); |
2197 | }; |
2198 | |
2199 | /** |
2200 | * osnoise_options_write - Write function for "options" entry |
2201 | * @filp: The active open file structure |
2202 | * @ubuf: The user buffer that contains the value to write |
2203 | * @cnt: The maximum number of bytes to write to "file" |
2204 | * @ppos: The current position in @file |
2205 | * |
2206 | * Writing the option name sets the option, writing the "NO_" |
2207 | * prefix in front of the option name disables it. |
2208 | * |
2209 | * Writing "DEFAULTS" resets the option values to the default ones. |
2210 | */ |
2211 | static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf, |
2212 | size_t cnt, loff_t *ppos) |
2213 | { |
2214 | int running, option, enable, retval; |
2215 | char buf[256], *option_str; |
2216 | |
2217 | if (cnt >= 256) |
2218 | return -EINVAL; |
2219 | |
2220 | if (copy_from_user(to: buf, from: ubuf, n: cnt)) |
2221 | return -EFAULT; |
2222 | |
2223 | buf[cnt] = 0; |
2224 | |
2225 | if (strncmp(buf, "NO_" , 3)) { |
2226 | option_str = strstrip(str: buf); |
2227 | enable = true; |
2228 | } else { |
2229 | option_str = strstrip(str: &buf[3]); |
2230 | enable = false; |
2231 | } |
2232 | |
2233 | option = match_string(array: osnoise_options_str, n: OSN_MAX, string: option_str); |
2234 | if (option < 0) |
2235 | return -EINVAL; |
2236 | |
2237 | /* |
2238 | * trace_types_lock is taken to avoid concurrency on start/stop. |
2239 | */ |
2240 | mutex_lock(&trace_types_lock); |
2241 | running = osnoise_has_registered_instances(); |
2242 | if (running) |
2243 | stop_per_cpu_kthreads(); |
2244 | |
2245 | mutex_lock(&interface_lock); |
2246 | /* |
2247 | * avoid CPU hotplug operations that might read options. |
2248 | */ |
2249 | cpus_read_lock(); |
2250 | |
2251 | retval = cnt; |
2252 | |
2253 | if (enable) { |
2254 | if (option == OSN_DEFAULTS) |
2255 | osnoise_options = OSN_DEFAULT_OPTIONS; |
2256 | else |
2257 | set_bit(nr: option, addr: &osnoise_options); |
2258 | } else { |
2259 | if (option == OSN_DEFAULTS) |
2260 | retval = -EINVAL; |
2261 | else |
2262 | clear_bit(nr: option, addr: &osnoise_options); |
2263 | } |
2264 | |
2265 | cpus_read_unlock(); |
2266 | mutex_unlock(lock: &interface_lock); |
2267 | |
2268 | if (running) |
2269 | start_per_cpu_kthreads(); |
2270 | mutex_unlock(lock: &trace_types_lock); |
2271 | |
2272 | return retval; |
2273 | } |
2274 | |
2275 | /* |
2276 | * osnoise_cpus_read - Read function for reading the "cpus" file |
2277 | * @filp: The active open file structure |
2278 | * @ubuf: The userspace provided buffer to read value into |
2279 | * @cnt: The maximum number of bytes to read |
2280 | * @ppos: The current "file" position |
2281 | * |
2282 | * Prints the "cpus" output into the user-provided buffer. |
2283 | */ |
2284 | static ssize_t |
2285 | osnoise_cpus_read(struct file *filp, char __user *ubuf, size_t count, |
2286 | loff_t *ppos) |
2287 | { |
2288 | char *mask_str; |
2289 | int len; |
2290 | |
2291 | mutex_lock(&interface_lock); |
2292 | |
2293 | len = snprintf(NULL, size: 0, fmt: "%*pbl\n" , cpumask_pr_args(&osnoise_cpumask)) + 1; |
2294 | mask_str = kmalloc(size: len, GFP_KERNEL); |
2295 | if (!mask_str) { |
2296 | count = -ENOMEM; |
2297 | goto out_unlock; |
2298 | } |
2299 | |
2300 | len = snprintf(buf: mask_str, size: len, fmt: "%*pbl\n" , cpumask_pr_args(&osnoise_cpumask)); |
2301 | if (len >= count) { |
2302 | count = -EINVAL; |
2303 | goto out_free; |
2304 | } |
2305 | |
2306 | count = simple_read_from_buffer(to: ubuf, count, ppos, from: mask_str, available: len); |
2307 | |
2308 | out_free: |
2309 | kfree(objp: mask_str); |
2310 | out_unlock: |
2311 | mutex_unlock(lock: &interface_lock); |
2312 | |
2313 | return count; |
2314 | } |
2315 | |
2316 | /* |
2317 | * osnoise_cpus_write - Write function for "cpus" entry |
2318 | * @filp: The active open file structure |
2319 | * @ubuf: The user buffer that contains the value to write |
2320 | * @cnt: The maximum number of bytes to write to "file" |
2321 | * @ppos: The current position in @file |
2322 | * |
2323 | * This function provides a write implementation for the "cpus" |
2324 | * interface to the osnoise trace. By default, it lists all CPUs, |
2325 | * in this way, allowing osnoise threads to run on any online CPU |
2326 | * of the system. It serves to restrict the execution of osnoise to the |
2327 | * set of CPUs writing via this interface. Why not use "tracing_cpumask"? |
2328 | * Because the user might be interested in tracing what is running on |
2329 | * other CPUs. For instance, one might run osnoise in one HT CPU |
2330 | * while observing what is running on the sibling HT CPU. |
2331 | */ |
2332 | static ssize_t |
2333 | osnoise_cpus_write(struct file *filp, const char __user *ubuf, size_t count, |
2334 | loff_t *ppos) |
2335 | { |
2336 | cpumask_var_t osnoise_cpumask_new; |
2337 | int running, err; |
2338 | char buf[256]; |
2339 | |
2340 | if (count >= 256) |
2341 | return -EINVAL; |
2342 | |
2343 | if (copy_from_user(to: buf, from: ubuf, n: count)) |
2344 | return -EFAULT; |
2345 | |
2346 | if (!zalloc_cpumask_var(mask: &osnoise_cpumask_new, GFP_KERNEL)) |
2347 | return -ENOMEM; |
2348 | |
2349 | err = cpulist_parse(buf, dstp: osnoise_cpumask_new); |
2350 | if (err) |
2351 | goto err_free; |
2352 | |
2353 | /* |
2354 | * trace_types_lock is taken to avoid concurrency on start/stop. |
2355 | */ |
2356 | mutex_lock(&trace_types_lock); |
2357 | running = osnoise_has_registered_instances(); |
2358 | if (running) |
2359 | stop_per_cpu_kthreads(); |
2360 | |
2361 | mutex_lock(&interface_lock); |
2362 | /* |
2363 | * osnoise_cpumask is read by CPU hotplug operations. |
2364 | */ |
2365 | cpus_read_lock(); |
2366 | |
2367 | cpumask_copy(dstp: &osnoise_cpumask, srcp: osnoise_cpumask_new); |
2368 | |
2369 | cpus_read_unlock(); |
2370 | mutex_unlock(lock: &interface_lock); |
2371 | |
2372 | if (running) |
2373 | start_per_cpu_kthreads(); |
2374 | mutex_unlock(lock: &trace_types_lock); |
2375 | |
2376 | free_cpumask_var(mask: osnoise_cpumask_new); |
2377 | return count; |
2378 | |
2379 | err_free: |
2380 | free_cpumask_var(mask: osnoise_cpumask_new); |
2381 | |
2382 | return err; |
2383 | } |
2384 | |
2385 | #ifdef CONFIG_TIMERLAT_TRACER |
2386 | static int timerlat_fd_open(struct inode *inode, struct file *file) |
2387 | { |
2388 | struct osnoise_variables *osn_var; |
2389 | struct timerlat_variables *tlat; |
2390 | long cpu = (long) inode->i_cdev; |
2391 | |
2392 | mutex_lock(&interface_lock); |
2393 | |
2394 | /* |
2395 | * This file is accessible only if timerlat is enabled, and |
2396 | * NO_OSNOISE_WORKLOAD is set. |
2397 | */ |
2398 | if (!timerlat_enabled() || test_bit(OSN_WORKLOAD, &osnoise_options)) { |
2399 | mutex_unlock(lock: &interface_lock); |
2400 | return -EINVAL; |
2401 | } |
2402 | |
2403 | migrate_disable(); |
2404 | |
2405 | osn_var = this_cpu_osn_var(); |
2406 | |
2407 | /* |
2408 | * The osn_var->pid holds the single access to this file. |
2409 | */ |
2410 | if (osn_var->pid) { |
2411 | mutex_unlock(lock: &interface_lock); |
2412 | migrate_enable(); |
2413 | return -EBUSY; |
2414 | } |
2415 | |
2416 | /* |
2417 | * timerlat tracer is a per-cpu tracer. Check if the user-space too |
2418 | * is pinned to a single CPU. The tracer laters monitor if the task |
2419 | * migrates and then disables tracer if it does. However, it is |
2420 | * worth doing this basic acceptance test to avoid obviusly wrong |
2421 | * setup. |
2422 | */ |
2423 | if (current->nr_cpus_allowed > 1 || cpu != smp_processor_id()) { |
2424 | mutex_unlock(lock: &interface_lock); |
2425 | migrate_enable(); |
2426 | return -EPERM; |
2427 | } |
2428 | |
2429 | /* |
2430 | * From now on, it is good to go. |
2431 | */ |
2432 | file->private_data = inode->i_cdev; |
2433 | |
2434 | get_task_struct(current); |
2435 | |
2436 | osn_var->kthread = current; |
2437 | osn_var->pid = current->pid; |
2438 | |
2439 | /* |
2440 | * Setup is done. |
2441 | */ |
2442 | mutex_unlock(lock: &interface_lock); |
2443 | |
2444 | tlat = this_cpu_tmr_var(); |
2445 | tlat->count = 0; |
2446 | |
2447 | hrtimer_init(timer: &tlat->timer, CLOCK_MONOTONIC, mode: HRTIMER_MODE_ABS_PINNED_HARD); |
2448 | tlat->timer.function = timerlat_irq; |
2449 | |
2450 | migrate_enable(); |
2451 | return 0; |
2452 | }; |
2453 | |
2454 | /* |
2455 | * timerlat_fd_read - Read function for "timerlat_fd" file |
2456 | * @file: The active open file structure |
2457 | * @ubuf: The userspace provided buffer to read value into |
2458 | * @cnt: The maximum number of bytes to read |
2459 | * @ppos: The current "file" position |
2460 | * |
2461 | * Prints 1 on timerlat, the number of interferences on osnoise, -1 on error. |
2462 | */ |
2463 | static ssize_t |
2464 | timerlat_fd_read(struct file *file, char __user *ubuf, size_t count, |
2465 | loff_t *ppos) |
2466 | { |
2467 | long cpu = (long) file->private_data; |
2468 | struct osnoise_variables *osn_var; |
2469 | struct timerlat_variables *tlat; |
2470 | struct timerlat_sample s; |
2471 | s64 diff; |
2472 | u64 now; |
2473 | |
2474 | migrate_disable(); |
2475 | |
2476 | tlat = this_cpu_tmr_var(); |
2477 | |
2478 | /* |
2479 | * While in user-space, the thread is migratable. There is nothing |
2480 | * we can do about it. |
2481 | * So, if the thread is running on another CPU, stop the machinery. |
2482 | */ |
2483 | if (cpu == smp_processor_id()) { |
2484 | if (tlat->uthread_migrate) { |
2485 | migrate_enable(); |
2486 | return -EINVAL; |
2487 | } |
2488 | } else { |
2489 | per_cpu_ptr(&per_cpu_timerlat_var, cpu)->uthread_migrate = 1; |
2490 | osnoise_taint("timerlat user thread migrate\n" ); |
2491 | osnoise_stop_tracing(); |
2492 | migrate_enable(); |
2493 | return -EINVAL; |
2494 | } |
2495 | |
2496 | osn_var = this_cpu_osn_var(); |
2497 | |
2498 | /* |
2499 | * The timerlat in user-space runs in a different order: |
2500 | * the read() starts from the execution of the previous occurrence, |
2501 | * sleeping for the next occurrence. |
2502 | * |
2503 | * So, skip if we are entering on read() before the first wakeup |
2504 | * from timerlat IRQ: |
2505 | */ |
2506 | if (likely(osn_var->sampling)) { |
2507 | now = ktime_to_ns(kt: hrtimer_cb_get_time(timer: &tlat->timer)); |
2508 | diff = now - tlat->abs_period; |
2509 | |
2510 | /* |
2511 | * it was not a timer firing, but some other signal? |
2512 | */ |
2513 | if (diff < 0) |
2514 | goto out; |
2515 | |
2516 | s.seqnum = tlat->count; |
2517 | s.timer_latency = diff; |
2518 | s.context = THREAD_URET; |
2519 | |
2520 | trace_timerlat_sample(sample: &s); |
2521 | |
2522 | notify_new_max_latency(latency: diff); |
2523 | |
2524 | tlat->tracing_thread = false; |
2525 | if (osnoise_data.stop_tracing_total) |
2526 | if (time_to_us(diff) >= osnoise_data.stop_tracing_total) |
2527 | osnoise_stop_tracing(); |
2528 | } else { |
2529 | tlat->tracing_thread = false; |
2530 | tlat->kthread = current; |
2531 | |
2532 | /* Annotate now to drift new period */ |
2533 | tlat->abs_period = hrtimer_cb_get_time(timer: &tlat->timer); |
2534 | |
2535 | osn_var->sampling = 1; |
2536 | } |
2537 | |
2538 | /* wait for the next period */ |
2539 | wait_next_period(tlat); |
2540 | |
2541 | /* This is the wakeup from this cycle */ |
2542 | now = ktime_to_ns(kt: hrtimer_cb_get_time(timer: &tlat->timer)); |
2543 | diff = now - tlat->abs_period; |
2544 | |
2545 | /* |
2546 | * it was not a timer firing, but some other signal? |
2547 | */ |
2548 | if (diff < 0) |
2549 | goto out; |
2550 | |
2551 | s.seqnum = tlat->count; |
2552 | s.timer_latency = diff; |
2553 | s.context = THREAD_CONTEXT; |
2554 | |
2555 | trace_timerlat_sample(sample: &s); |
2556 | |
2557 | if (osnoise_data.stop_tracing_total) { |
2558 | if (time_to_us(diff) >= osnoise_data.stop_tracing_total) { |
2559 | timerlat_dump_stack(time_to_us(diff)); |
2560 | notify_new_max_latency(latency: diff); |
2561 | osnoise_stop_tracing(); |
2562 | } |
2563 | } |
2564 | |
2565 | out: |
2566 | migrate_enable(); |
2567 | return 0; |
2568 | } |
2569 | |
2570 | static int timerlat_fd_release(struct inode *inode, struct file *file) |
2571 | { |
2572 | struct osnoise_variables *osn_var; |
2573 | struct timerlat_variables *tlat_var; |
2574 | long cpu = (long) file->private_data; |
2575 | |
2576 | migrate_disable(); |
2577 | mutex_lock(&interface_lock); |
2578 | |
2579 | osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); |
2580 | tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); |
2581 | |
2582 | hrtimer_cancel(timer: &tlat_var->timer); |
2583 | memset(tlat_var, 0, sizeof(*tlat_var)); |
2584 | |
2585 | osn_var->sampling = 0; |
2586 | osn_var->pid = 0; |
2587 | |
2588 | /* |
2589 | * We are leaving, not being stopped... see stop_kthread(); |
2590 | */ |
2591 | if (osn_var->kthread) { |
2592 | put_task_struct(t: osn_var->kthread); |
2593 | osn_var->kthread = NULL; |
2594 | } |
2595 | |
2596 | mutex_unlock(lock: &interface_lock); |
2597 | migrate_enable(); |
2598 | return 0; |
2599 | } |
2600 | #endif |
2601 | |
2602 | /* |
2603 | * osnoise/runtime_us: cannot be greater than the period. |
2604 | */ |
2605 | static struct trace_min_max_param osnoise_runtime = { |
2606 | .lock = &interface_lock, |
2607 | .val = &osnoise_data.sample_runtime, |
2608 | .max = &osnoise_data.sample_period, |
2609 | .min = NULL, |
2610 | }; |
2611 | |
2612 | /* |
2613 | * osnoise/period_us: cannot be smaller than the runtime. |
2614 | */ |
2615 | static struct trace_min_max_param osnoise_period = { |
2616 | .lock = &interface_lock, |
2617 | .val = &osnoise_data.sample_period, |
2618 | .max = NULL, |
2619 | .min = &osnoise_data.sample_runtime, |
2620 | }; |
2621 | |
2622 | /* |
2623 | * osnoise/stop_tracing_us: no limit. |
2624 | */ |
2625 | static struct trace_min_max_param osnoise_stop_tracing_in = { |
2626 | .lock = &interface_lock, |
2627 | .val = &osnoise_data.stop_tracing, |
2628 | .max = NULL, |
2629 | .min = NULL, |
2630 | }; |
2631 | |
2632 | /* |
2633 | * osnoise/stop_tracing_total_us: no limit. |
2634 | */ |
2635 | static struct trace_min_max_param osnoise_stop_tracing_total = { |
2636 | .lock = &interface_lock, |
2637 | .val = &osnoise_data.stop_tracing_total, |
2638 | .max = NULL, |
2639 | .min = NULL, |
2640 | }; |
2641 | |
2642 | #ifdef CONFIG_TIMERLAT_TRACER |
2643 | /* |
2644 | * osnoise/print_stack: print the stacktrace of the IRQ handler if the total |
2645 | * latency is higher than val. |
2646 | */ |
2647 | static struct trace_min_max_param osnoise_print_stack = { |
2648 | .lock = &interface_lock, |
2649 | .val = &osnoise_data.print_stack, |
2650 | .max = NULL, |
2651 | .min = NULL, |
2652 | }; |
2653 | |
2654 | /* |
2655 | * osnoise/timerlat_period: min 100 us, max 1 s |
2656 | */ |
2657 | static u64 timerlat_min_period = 100; |
2658 | static u64 timerlat_max_period = 1000000; |
2659 | static struct trace_min_max_param timerlat_period = { |
2660 | .lock = &interface_lock, |
2661 | .val = &osnoise_data.timerlat_period, |
2662 | .max = &timerlat_max_period, |
2663 | .min = &timerlat_min_period, |
2664 | }; |
2665 | |
2666 | static const struct file_operations timerlat_fd_fops = { |
2667 | .open = timerlat_fd_open, |
2668 | .read = timerlat_fd_read, |
2669 | .release = timerlat_fd_release, |
2670 | .llseek = generic_file_llseek, |
2671 | }; |
2672 | #endif |
2673 | |
2674 | static const struct file_operations cpus_fops = { |
2675 | .open = tracing_open_generic, |
2676 | .read = osnoise_cpus_read, |
2677 | .write = osnoise_cpus_write, |
2678 | .llseek = generic_file_llseek, |
2679 | }; |
2680 | |
2681 | static const struct file_operations osnoise_options_fops = { |
2682 | .open = osnoise_options_open, |
2683 | .read = seq_read, |
2684 | .llseek = seq_lseek, |
2685 | .release = seq_release, |
2686 | .write = osnoise_options_write |
2687 | }; |
2688 | |
2689 | #ifdef CONFIG_TIMERLAT_TRACER |
2690 | #ifdef CONFIG_STACKTRACE |
2691 | static int init_timerlat_stack_tracefs(struct dentry *top_dir) |
2692 | { |
2693 | struct dentry *tmp; |
2694 | |
2695 | tmp = tracefs_create_file(name: "print_stack" , TRACE_MODE_WRITE, parent: top_dir, |
2696 | data: &osnoise_print_stack, fops: &trace_min_max_fops); |
2697 | if (!tmp) |
2698 | return -ENOMEM; |
2699 | |
2700 | return 0; |
2701 | } |
2702 | #else /* CONFIG_STACKTRACE */ |
2703 | static int init_timerlat_stack_tracefs(struct dentry *top_dir) |
2704 | { |
2705 | return 0; |
2706 | } |
2707 | #endif /* CONFIG_STACKTRACE */ |
2708 | |
2709 | static int osnoise_create_cpu_timerlat_fd(struct dentry *top_dir) |
2710 | { |
2711 | struct dentry *timerlat_fd; |
2712 | struct dentry *per_cpu; |
2713 | struct dentry *cpu_dir; |
2714 | char cpu_str[30]; /* see trace.c: tracing_init_tracefs_percpu() */ |
2715 | long cpu; |
2716 | |
2717 | /* |
2718 | * Why not using tracing instance per_cpu/ dir? |
2719 | * |
2720 | * Because osnoise/timerlat have a single workload, having |
2721 | * multiple files like these are wast of memory. |
2722 | */ |
2723 | per_cpu = tracefs_create_dir(name: "per_cpu" , parent: top_dir); |
2724 | if (!per_cpu) |
2725 | return -ENOMEM; |
2726 | |
2727 | for_each_possible_cpu(cpu) { |
2728 | snprintf(buf: cpu_str, size: 30, fmt: "cpu%ld" , cpu); |
2729 | cpu_dir = tracefs_create_dir(name: cpu_str, parent: per_cpu); |
2730 | if (!cpu_dir) |
2731 | goto out_clean; |
2732 | |
2733 | timerlat_fd = trace_create_file(name: "timerlat_fd" , TRACE_MODE_READ, |
2734 | parent: cpu_dir, NULL, fops: &timerlat_fd_fops); |
2735 | if (!timerlat_fd) |
2736 | goto out_clean; |
2737 | |
2738 | /* Record the CPU */ |
2739 | d_inode(dentry: timerlat_fd)->i_cdev = (void *)(cpu); |
2740 | } |
2741 | |
2742 | return 0; |
2743 | |
2744 | out_clean: |
2745 | tracefs_remove(dentry: per_cpu); |
2746 | return -ENOMEM; |
2747 | } |
2748 | |
2749 | /* |
2750 | * init_timerlat_tracefs - A function to initialize the timerlat interface files |
2751 | */ |
2752 | static int init_timerlat_tracefs(struct dentry *top_dir) |
2753 | { |
2754 | struct dentry *tmp; |
2755 | int retval; |
2756 | |
2757 | tmp = tracefs_create_file(name: "timerlat_period_us" , TRACE_MODE_WRITE, parent: top_dir, |
2758 | data: &timerlat_period, fops: &trace_min_max_fops); |
2759 | if (!tmp) |
2760 | return -ENOMEM; |
2761 | |
2762 | retval = osnoise_create_cpu_timerlat_fd(top_dir); |
2763 | if (retval) |
2764 | return retval; |
2765 | |
2766 | return init_timerlat_stack_tracefs(top_dir); |
2767 | } |
2768 | #else /* CONFIG_TIMERLAT_TRACER */ |
2769 | static int init_timerlat_tracefs(struct dentry *top_dir) |
2770 | { |
2771 | return 0; |
2772 | } |
2773 | #endif /* CONFIG_TIMERLAT_TRACER */ |
2774 | |
2775 | /* |
2776 | * init_tracefs - A function to initialize the tracefs interface files |
2777 | * |
2778 | * This function creates entries in tracefs for "osnoise" and "timerlat". |
2779 | * It creates these directories in the tracing directory, and within that |
2780 | * directory the use can change and view the configs. |
2781 | */ |
2782 | static int init_tracefs(void) |
2783 | { |
2784 | struct dentry *top_dir; |
2785 | struct dentry *tmp; |
2786 | int ret; |
2787 | |
2788 | ret = tracing_init_dentry(); |
2789 | if (ret) |
2790 | return -ENOMEM; |
2791 | |
2792 | top_dir = tracefs_create_dir(name: "osnoise" , NULL); |
2793 | if (!top_dir) |
2794 | return 0; |
2795 | |
2796 | tmp = tracefs_create_file(name: "period_us" , TRACE_MODE_WRITE, parent: top_dir, |
2797 | data: &osnoise_period, fops: &trace_min_max_fops); |
2798 | if (!tmp) |
2799 | goto err; |
2800 | |
2801 | tmp = tracefs_create_file(name: "runtime_us" , TRACE_MODE_WRITE, parent: top_dir, |
2802 | data: &osnoise_runtime, fops: &trace_min_max_fops); |
2803 | if (!tmp) |
2804 | goto err; |
2805 | |
2806 | tmp = tracefs_create_file(name: "stop_tracing_us" , TRACE_MODE_WRITE, parent: top_dir, |
2807 | data: &osnoise_stop_tracing_in, fops: &trace_min_max_fops); |
2808 | if (!tmp) |
2809 | goto err; |
2810 | |
2811 | tmp = tracefs_create_file(name: "stop_tracing_total_us" , TRACE_MODE_WRITE, parent: top_dir, |
2812 | data: &osnoise_stop_tracing_total, fops: &trace_min_max_fops); |
2813 | if (!tmp) |
2814 | goto err; |
2815 | |
2816 | tmp = trace_create_file(name: "cpus" , TRACE_MODE_WRITE, parent: top_dir, NULL, fops: &cpus_fops); |
2817 | if (!tmp) |
2818 | goto err; |
2819 | |
2820 | tmp = trace_create_file(name: "options" , TRACE_MODE_WRITE, parent: top_dir, NULL, |
2821 | fops: &osnoise_options_fops); |
2822 | if (!tmp) |
2823 | goto err; |
2824 | |
2825 | ret = init_timerlat_tracefs(top_dir); |
2826 | if (ret) |
2827 | goto err; |
2828 | |
2829 | return 0; |
2830 | |
2831 | err: |
2832 | tracefs_remove(dentry: top_dir); |
2833 | return -ENOMEM; |
2834 | } |
2835 | |
2836 | static int osnoise_hook_events(void) |
2837 | { |
2838 | int retval; |
2839 | |
2840 | /* |
2841 | * Trace is already hooked, we are re-enabling from |
2842 | * a stop_tracing_*. |
2843 | */ |
2844 | if (trace_osnoise_callback_enabled) |
2845 | return 0; |
2846 | |
2847 | retval = hook_irq_events(); |
2848 | if (retval) |
2849 | return -EINVAL; |
2850 | |
2851 | retval = hook_softirq_events(); |
2852 | if (retval) |
2853 | goto out_unhook_irq; |
2854 | |
2855 | retval = hook_thread_events(); |
2856 | /* |
2857 | * All fine! |
2858 | */ |
2859 | if (!retval) |
2860 | return 0; |
2861 | |
2862 | unhook_softirq_events(); |
2863 | out_unhook_irq: |
2864 | unhook_irq_events(); |
2865 | return -EINVAL; |
2866 | } |
2867 | |
2868 | static void osnoise_unhook_events(void) |
2869 | { |
2870 | unhook_thread_events(); |
2871 | unhook_softirq_events(); |
2872 | unhook_irq_events(); |
2873 | } |
2874 | |
2875 | /* |
2876 | * osnoise_workload_start - start the workload and hook to events |
2877 | */ |
2878 | static int osnoise_workload_start(void) |
2879 | { |
2880 | int retval; |
2881 | |
2882 | /* |
2883 | * Instances need to be registered after calling workload |
2884 | * start. Hence, if there is already an instance, the |
2885 | * workload was already registered. Otherwise, this |
2886 | * code is on the way to register the first instance, |
2887 | * and the workload will start. |
2888 | */ |
2889 | if (osnoise_has_registered_instances()) |
2890 | return 0; |
2891 | |
2892 | osn_var_reset_all(); |
2893 | |
2894 | retval = osnoise_hook_events(); |
2895 | if (retval) |
2896 | return retval; |
2897 | |
2898 | /* |
2899 | * Make sure that ftrace_nmi_enter/exit() see reset values |
2900 | * before enabling trace_osnoise_callback_enabled. |
2901 | */ |
2902 | barrier(); |
2903 | trace_osnoise_callback_enabled = true; |
2904 | |
2905 | retval = start_per_cpu_kthreads(); |
2906 | if (retval) { |
2907 | trace_osnoise_callback_enabled = false; |
2908 | /* |
2909 | * Make sure that ftrace_nmi_enter/exit() see |
2910 | * trace_osnoise_callback_enabled as false before continuing. |
2911 | */ |
2912 | barrier(); |
2913 | |
2914 | osnoise_unhook_events(); |
2915 | return retval; |
2916 | } |
2917 | |
2918 | return 0; |
2919 | } |
2920 | |
2921 | /* |
2922 | * osnoise_workload_stop - stop the workload and unhook the events |
2923 | */ |
2924 | static void osnoise_workload_stop(void) |
2925 | { |
2926 | /* |
2927 | * Instances need to be unregistered before calling |
2928 | * stop. Hence, if there is a registered instance, more |
2929 | * than one instance is running, and the workload will not |
2930 | * yet stop. Otherwise, this code is on the way to disable |
2931 | * the last instance, and the workload can stop. |
2932 | */ |
2933 | if (osnoise_has_registered_instances()) |
2934 | return; |
2935 | |
2936 | /* |
2937 | * If callbacks were already disabled in a previous stop |
2938 | * call, there is no need to disable then again. |
2939 | * |
2940 | * For instance, this happens when tracing is stopped via: |
2941 | * echo 0 > tracing_on |
2942 | * echo nop > current_tracer. |
2943 | */ |
2944 | if (!trace_osnoise_callback_enabled) |
2945 | return; |
2946 | |
2947 | trace_osnoise_callback_enabled = false; |
2948 | /* |
2949 | * Make sure that ftrace_nmi_enter/exit() see |
2950 | * trace_osnoise_callback_enabled as false before continuing. |
2951 | */ |
2952 | barrier(); |
2953 | |
2954 | stop_per_cpu_kthreads(); |
2955 | |
2956 | osnoise_unhook_events(); |
2957 | } |
2958 | |
2959 | static void osnoise_tracer_start(struct trace_array *tr) |
2960 | { |
2961 | int retval; |
2962 | |
2963 | /* |
2964 | * If the instance is already registered, there is no need to |
2965 | * register it again. |
2966 | */ |
2967 | if (osnoise_instance_registered(tr)) |
2968 | return; |
2969 | |
2970 | retval = osnoise_workload_start(); |
2971 | if (retval) |
2972 | pr_err(BANNER "Error starting osnoise tracer\n" ); |
2973 | |
2974 | osnoise_register_instance(tr); |
2975 | } |
2976 | |
2977 | static void osnoise_tracer_stop(struct trace_array *tr) |
2978 | { |
2979 | osnoise_unregister_instance(tr); |
2980 | osnoise_workload_stop(); |
2981 | } |
2982 | |
2983 | static int osnoise_tracer_init(struct trace_array *tr) |
2984 | { |
2985 | /* |
2986 | * Only allow osnoise tracer if timerlat tracer is not running |
2987 | * already. |
2988 | */ |
2989 | if (timerlat_enabled()) |
2990 | return -EBUSY; |
2991 | |
2992 | tr->max_latency = 0; |
2993 | |
2994 | osnoise_tracer_start(tr); |
2995 | return 0; |
2996 | } |
2997 | |
2998 | static void osnoise_tracer_reset(struct trace_array *tr) |
2999 | { |
3000 | osnoise_tracer_stop(tr); |
3001 | } |
3002 | |
3003 | static struct tracer osnoise_tracer __read_mostly = { |
3004 | .name = "osnoise" , |
3005 | .init = osnoise_tracer_init, |
3006 | .reset = osnoise_tracer_reset, |
3007 | .start = osnoise_tracer_start, |
3008 | .stop = osnoise_tracer_stop, |
3009 | .print_header = print_osnoise_headers, |
3010 | .allow_instances = true, |
3011 | }; |
3012 | |
3013 | #ifdef CONFIG_TIMERLAT_TRACER |
3014 | static void timerlat_tracer_start(struct trace_array *tr) |
3015 | { |
3016 | int retval; |
3017 | |
3018 | /* |
3019 | * If the instance is already registered, there is no need to |
3020 | * register it again. |
3021 | */ |
3022 | if (osnoise_instance_registered(tr)) |
3023 | return; |
3024 | |
3025 | retval = osnoise_workload_start(); |
3026 | if (retval) |
3027 | pr_err(BANNER "Error starting timerlat tracer\n" ); |
3028 | |
3029 | osnoise_register_instance(tr); |
3030 | |
3031 | return; |
3032 | } |
3033 | |
3034 | static void timerlat_tracer_stop(struct trace_array *tr) |
3035 | { |
3036 | int cpu; |
3037 | |
3038 | osnoise_unregister_instance(tr); |
3039 | |
3040 | /* |
3041 | * Instruct the threads to stop only if this is the last instance. |
3042 | */ |
3043 | if (!osnoise_has_registered_instances()) { |
3044 | for_each_online_cpu(cpu) |
3045 | per_cpu(per_cpu_osnoise_var, cpu).sampling = 0; |
3046 | } |
3047 | |
3048 | osnoise_workload_stop(); |
3049 | } |
3050 | |
3051 | static int timerlat_tracer_init(struct trace_array *tr) |
3052 | { |
3053 | /* |
3054 | * Only allow timerlat tracer if osnoise tracer is not running already. |
3055 | */ |
3056 | if (osnoise_has_registered_instances() && !osnoise_data.timerlat_tracer) |
3057 | return -EBUSY; |
3058 | |
3059 | /* |
3060 | * If this is the first instance, set timerlat_tracer to block |
3061 | * osnoise tracer start. |
3062 | */ |
3063 | if (!osnoise_has_registered_instances()) |
3064 | osnoise_data.timerlat_tracer = 1; |
3065 | |
3066 | tr->max_latency = 0; |
3067 | timerlat_tracer_start(tr); |
3068 | |
3069 | return 0; |
3070 | } |
3071 | |
3072 | static void timerlat_tracer_reset(struct trace_array *tr) |
3073 | { |
3074 | timerlat_tracer_stop(tr); |
3075 | |
3076 | /* |
3077 | * If this is the last instance, reset timerlat_tracer allowing |
3078 | * osnoise to be started. |
3079 | */ |
3080 | if (!osnoise_has_registered_instances()) |
3081 | osnoise_data.timerlat_tracer = 0; |
3082 | } |
3083 | |
3084 | static struct tracer timerlat_tracer __read_mostly = { |
3085 | .name = "timerlat" , |
3086 | .init = timerlat_tracer_init, |
3087 | .reset = timerlat_tracer_reset, |
3088 | .start = timerlat_tracer_start, |
3089 | .stop = timerlat_tracer_stop, |
3090 | .print_header = print_timerlat_headers, |
3091 | .allow_instances = true, |
3092 | }; |
3093 | |
3094 | __init static int init_timerlat_tracer(void) |
3095 | { |
3096 | return register_tracer(type: &timerlat_tracer); |
3097 | } |
3098 | #else /* CONFIG_TIMERLAT_TRACER */ |
3099 | __init static int init_timerlat_tracer(void) |
3100 | { |
3101 | return 0; |
3102 | } |
3103 | #endif /* CONFIG_TIMERLAT_TRACER */ |
3104 | |
3105 | __init static int init_osnoise_tracer(void) |
3106 | { |
3107 | int ret; |
3108 | |
3109 | mutex_init(&interface_lock); |
3110 | |
3111 | cpumask_copy(dstp: &osnoise_cpumask, cpu_all_mask); |
3112 | |
3113 | ret = register_tracer(type: &osnoise_tracer); |
3114 | if (ret) { |
3115 | pr_err(BANNER "Error registering osnoise!\n" ); |
3116 | return ret; |
3117 | } |
3118 | |
3119 | ret = init_timerlat_tracer(); |
3120 | if (ret) { |
3121 | pr_err(BANNER "Error registering timerlat!\n" ); |
3122 | return ret; |
3123 | } |
3124 | |
3125 | osnoise_init_hotplug_support(); |
3126 | |
3127 | INIT_LIST_HEAD_RCU(list: &osnoise_instances); |
3128 | |
3129 | init_tracefs(); |
3130 | |
3131 | return 0; |
3132 | } |
3133 | late_initcall(init_osnoise_tracer); |
3134 | |