1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Benchmark synthesis of perf events such as at the start of a 'perf |
4 | * record'. Synthesis is done on the current process and the 'dummy' event |
5 | * handlers are invoked that support dump_trace but otherwise do nothing. |
6 | * |
7 | * Copyright 2019 Google LLC. |
8 | */ |
9 | #include <stdio.h> |
10 | #include "bench.h" |
11 | #include "../util/debug.h" |
12 | #include "../util/session.h" |
13 | #include "../util/stat.h" |
14 | #include "../util/synthetic-events.h" |
15 | #include "../util/target.h" |
16 | #include "../util/thread_map.h" |
17 | #include "../util/tool.h" |
18 | #include "../util/util.h" |
19 | #include <linux/atomic.h> |
20 | #include <linux/err.h> |
21 | #include <linux/time64.h> |
22 | #include <subcmd/parse-options.h> |
23 | |
24 | static unsigned int min_threads = 1; |
25 | static unsigned int max_threads = UINT_MAX; |
26 | static unsigned int single_iterations = 10000; |
27 | static unsigned int multi_iterations = 10; |
28 | static bool run_st; |
29 | static bool run_mt; |
30 | |
31 | static const struct option options[] = { |
32 | OPT_BOOLEAN('s', "st" , &run_st, "Run single threaded benchmark" ), |
33 | OPT_BOOLEAN('t', "mt" , &run_mt, "Run multi-threaded benchmark" ), |
34 | OPT_UINTEGER('m', "min-threads" , &min_threads, |
35 | "Minimum number of threads in multithreaded bench" ), |
36 | OPT_UINTEGER('M', "max-threads" , &max_threads, |
37 | "Maximum number of threads in multithreaded bench" ), |
38 | OPT_UINTEGER('i', "single-iterations" , &single_iterations, |
39 | "Number of iterations used to compute single-threaded average" ), |
40 | OPT_UINTEGER('I', "multi-iterations" , &multi_iterations, |
41 | "Number of iterations used to compute multi-threaded average" ), |
42 | OPT_END() |
43 | }; |
44 | |
45 | static const char *const bench_usage[] = { |
46 | "perf bench internals synthesize <options>" , |
47 | NULL |
48 | }; |
49 | |
50 | static atomic_t event_count; |
51 | |
52 | static int process_synthesized_event(struct perf_tool *tool __maybe_unused, |
53 | union perf_event *event __maybe_unused, |
54 | struct perf_sample *sample __maybe_unused, |
55 | struct machine *machine __maybe_unused) |
56 | { |
57 | atomic_inc(v: &event_count); |
58 | return 0; |
59 | } |
60 | |
61 | static int do_run_single_threaded(struct perf_session *session, |
62 | struct perf_thread_map *threads, |
63 | struct target *target, bool data_mmap) |
64 | { |
65 | const unsigned int nr_threads_synthesize = 1; |
66 | struct timeval start, end, diff; |
67 | u64 runtime_us; |
68 | unsigned int i; |
69 | double time_average, time_stddev, event_average, event_stddev; |
70 | int err; |
71 | struct stats time_stats, event_stats; |
72 | |
73 | init_stats(stats: &time_stats); |
74 | init_stats(stats: &event_stats); |
75 | |
76 | for (i = 0; i < single_iterations; i++) { |
77 | atomic_set(v: &event_count, i: 0); |
78 | gettimeofday(&start, NULL); |
79 | err = __machine__synthesize_threads(machine: &session->machines.host, |
80 | NULL, |
81 | target, threads, |
82 | process: process_synthesized_event, |
83 | needs_mmap: true, data_mmap, |
84 | nr_threads_synthesize); |
85 | if (err) |
86 | return err; |
87 | |
88 | gettimeofday(&end, NULL); |
89 | timersub(&end, &start, &diff); |
90 | runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; |
91 | update_stats(stats: &time_stats, val: runtime_us); |
92 | update_stats(stats: &event_stats, val: atomic_read(v: &event_count)); |
93 | } |
94 | |
95 | time_average = avg_stats(stats: &time_stats); |
96 | time_stddev = stddev_stats(stats: &time_stats); |
97 | printf(" Average %ssynthesis took: %.3f usec (+- %.3f usec)\n" , |
98 | data_mmap ? "data " : "" , time_average, time_stddev); |
99 | |
100 | event_average = avg_stats(stats: &event_stats); |
101 | event_stddev = stddev_stats(stats: &event_stats); |
102 | printf(" Average num. events: %.3f (+- %.3f)\n" , |
103 | event_average, event_stddev); |
104 | |
105 | printf(" Average time per event %.3f usec\n" , |
106 | time_average / event_average); |
107 | return 0; |
108 | } |
109 | |
110 | static int run_single_threaded(void) |
111 | { |
112 | struct perf_session *session; |
113 | struct target target = { |
114 | .pid = "self" , |
115 | }; |
116 | struct perf_thread_map *threads; |
117 | int err; |
118 | |
119 | perf_set_singlethreaded(); |
120 | session = perf_session__new(NULL, NULL); |
121 | if (IS_ERR(ptr: session)) { |
122 | pr_err("Session creation failed.\n" ); |
123 | return PTR_ERR(ptr: session); |
124 | } |
125 | threads = thread_map__new_by_pid(pid: getpid()); |
126 | if (!threads) { |
127 | pr_err("Thread map creation failed.\n" ); |
128 | err = -ENOMEM; |
129 | goto err_out; |
130 | } |
131 | |
132 | puts( |
133 | "Computing performance of single threaded perf event synthesis by\n" |
134 | "synthesizing events on the perf process itself:" ); |
135 | |
136 | err = do_run_single_threaded(session, threads, target: &target, data_mmap: false); |
137 | if (err) |
138 | goto err_out; |
139 | |
140 | err = do_run_single_threaded(session, threads, target: &target, data_mmap: true); |
141 | |
142 | err_out: |
143 | if (threads) |
144 | perf_thread_map__put(threads); |
145 | |
146 | perf_session__delete(session); |
147 | return err; |
148 | } |
149 | |
150 | static int do_run_multi_threaded(struct target *target, |
151 | unsigned int nr_threads_synthesize) |
152 | { |
153 | struct timeval start, end, diff; |
154 | u64 runtime_us; |
155 | unsigned int i; |
156 | double time_average, time_stddev, event_average, event_stddev; |
157 | int err; |
158 | struct stats time_stats, event_stats; |
159 | struct perf_session *session; |
160 | |
161 | init_stats(stats: &time_stats); |
162 | init_stats(stats: &event_stats); |
163 | for (i = 0; i < multi_iterations; i++) { |
164 | session = perf_session__new(NULL, NULL); |
165 | if (IS_ERR(ptr: session)) |
166 | return PTR_ERR(ptr: session); |
167 | |
168 | atomic_set(v: &event_count, i: 0); |
169 | gettimeofday(&start, NULL); |
170 | err = __machine__synthesize_threads(machine: &session->machines.host, |
171 | NULL, |
172 | target, NULL, |
173 | process: process_synthesized_event, |
174 | needs_mmap: true, data_mmap: false, |
175 | nr_threads_synthesize); |
176 | if (err) { |
177 | perf_session__delete(session); |
178 | return err; |
179 | } |
180 | |
181 | gettimeofday(&end, NULL); |
182 | timersub(&end, &start, &diff); |
183 | runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; |
184 | update_stats(stats: &time_stats, val: runtime_us); |
185 | update_stats(stats: &event_stats, val: atomic_read(v: &event_count)); |
186 | perf_session__delete(session); |
187 | } |
188 | |
189 | time_average = avg_stats(stats: &time_stats); |
190 | time_stddev = stddev_stats(stats: &time_stats); |
191 | printf(" Average synthesis took: %.3f usec (+- %.3f usec)\n" , |
192 | time_average, time_stddev); |
193 | |
194 | event_average = avg_stats(stats: &event_stats); |
195 | event_stddev = stddev_stats(stats: &event_stats); |
196 | printf(" Average num. events: %.3f (+- %.3f)\n" , |
197 | event_average, event_stddev); |
198 | |
199 | printf(" Average time per event %.3f usec\n" , |
200 | time_average / event_average); |
201 | return 0; |
202 | } |
203 | |
204 | static int run_multi_threaded(void) |
205 | { |
206 | struct target target = { |
207 | .cpu_list = "0" |
208 | }; |
209 | unsigned int nr_threads_synthesize; |
210 | int err; |
211 | |
212 | if (max_threads == UINT_MAX) |
213 | max_threads = sysconf(_SC_NPROCESSORS_ONLN); |
214 | |
215 | puts( |
216 | "Computing performance of multi threaded perf event synthesis by\n" |
217 | "synthesizing events on CPU 0:" ); |
218 | |
219 | for (nr_threads_synthesize = min_threads; |
220 | nr_threads_synthesize <= max_threads; |
221 | nr_threads_synthesize++) { |
222 | if (nr_threads_synthesize == 1) |
223 | perf_set_singlethreaded(); |
224 | else |
225 | perf_set_multithreaded(); |
226 | |
227 | printf(" Number of synthesis threads: %u\n" , |
228 | nr_threads_synthesize); |
229 | |
230 | err = do_run_multi_threaded(target: &target, nr_threads_synthesize); |
231 | if (err) |
232 | return err; |
233 | } |
234 | perf_set_singlethreaded(); |
235 | return 0; |
236 | } |
237 | |
238 | int bench_synthesize(int argc, const char **argv) |
239 | { |
240 | int err = 0; |
241 | |
242 | argc = parse_options(argc, argv, options, bench_usage, 0); |
243 | if (argc) { |
244 | usage_with_options(bench_usage, options); |
245 | exit(EXIT_FAILURE); |
246 | } |
247 | |
248 | /* |
249 | * If neither single threaded or multi-threaded are specified, default |
250 | * to running just single threaded. |
251 | */ |
252 | if (!run_st && !run_mt) |
253 | run_st = true; |
254 | |
255 | if (run_st) |
256 | err = run_single_threaded(); |
257 | |
258 | if (!err && run_mt) |
259 | err = run_multi_threaded(); |
260 | |
261 | return err; |
262 | } |
263 | |