1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | #include <subcmd/parse-options.h> |
4 | #include <linux/hw_breakpoint.h> |
5 | #include <linux/perf_event.h> |
6 | #include <linux/time64.h> |
7 | #include <sys/syscall.h> |
8 | #include <sys/ioctl.h> |
9 | #include <sys/time.h> |
10 | #include <pthread.h> |
11 | #include <stddef.h> |
12 | #include <stdlib.h> |
13 | #include <unistd.h> |
14 | #include <stdio.h> |
15 | #include <errno.h> |
16 | #include "bench.h" |
17 | #include "futex.h" |
18 | |
19 | struct { |
20 | unsigned int nbreakpoints; |
21 | unsigned int nparallel; |
22 | unsigned int nthreads; |
23 | } thread_params = { |
24 | .nbreakpoints = 1, |
25 | .nparallel = 1, |
26 | .nthreads = 1, |
27 | }; |
28 | |
29 | static const struct option thread_options[] = { |
30 | OPT_UINTEGER('b', "breakpoints" , &thread_params.nbreakpoints, |
31 | "Specify amount of breakpoints" ), |
32 | OPT_UINTEGER('p', "parallelism" , &thread_params.nparallel, "Specify amount of parallelism" ), |
33 | OPT_UINTEGER('t', "threads" , &thread_params.nthreads, "Specify amount of threads" ), |
34 | OPT_END() |
35 | }; |
36 | |
37 | static const char * const thread_usage[] = { |
38 | "perf bench breakpoint thread <options>" , |
39 | NULL |
40 | }; |
41 | |
42 | struct breakpoint { |
43 | int fd; |
44 | char watched; |
45 | }; |
46 | |
47 | static int breakpoint_setup(void *addr) |
48 | { |
49 | struct perf_event_attr attr = { .size = 0, }; |
50 | int fd; |
51 | |
52 | attr.type = PERF_TYPE_BREAKPOINT; |
53 | attr.size = sizeof(attr); |
54 | attr.inherit = 1; |
55 | attr.exclude_kernel = 1; |
56 | attr.exclude_hv = 1; |
57 | attr.bp_addr = (unsigned long)addr; |
58 | attr.bp_type = HW_BREAKPOINT_RW; |
59 | attr.bp_len = HW_BREAKPOINT_LEN_1; |
60 | fd = syscall(SYS_perf_event_open, &attr, 0, -1, -1, 0); |
61 | |
62 | if (fd < 0) |
63 | fd = -errno; |
64 | |
65 | return fd; |
66 | } |
67 | |
68 | static void *passive_thread(void *arg) |
69 | { |
70 | unsigned int *done = (unsigned int *)arg; |
71 | |
72 | while (!__atomic_load_n(done, __ATOMIC_RELAXED)) |
73 | futex_wait(uaddr: done, val: 0, NULL, opflags: 0); |
74 | return NULL; |
75 | } |
76 | |
77 | static void *active_thread(void *arg) |
78 | { |
79 | unsigned int *done = (unsigned int *)arg; |
80 | |
81 | while (!__atomic_load_n(done, __ATOMIC_RELAXED)); |
82 | return NULL; |
83 | } |
84 | |
85 | static void *breakpoint_thread(void *arg) |
86 | { |
87 | unsigned int i, done; |
88 | int *repeat = (int *)arg; |
89 | pthread_t *threads; |
90 | |
91 | threads = calloc(thread_params.nthreads, sizeof(threads[0])); |
92 | if (!threads) |
93 | exit((perror("calloc" ), EXIT_FAILURE)); |
94 | |
95 | while (__atomic_fetch_sub(repeat, 1, __ATOMIC_RELAXED) > 0) { |
96 | done = 0; |
97 | for (i = 0; i < thread_params.nthreads; i++) { |
98 | if (pthread_create(&threads[i], NULL, passive_thread, &done)) |
99 | exit((perror("pthread_create" ), EXIT_FAILURE)); |
100 | } |
101 | __atomic_store_n(&done, 1, __ATOMIC_RELAXED); |
102 | futex_wake(uaddr: &done, nr_wake: thread_params.nthreads, opflags: 0); |
103 | for (i = 0; i < thread_params.nthreads; i++) |
104 | pthread_join(threads[i], NULL); |
105 | } |
106 | free(threads); |
107 | return NULL; |
108 | } |
109 | |
110 | // The benchmark creates nbreakpoints inheritable breakpoints, |
111 | // then starts nparallel threads which create and join bench_repeat batches of nthreads threads. |
112 | int bench_breakpoint_thread(int argc, const char **argv) |
113 | { |
114 | unsigned int i, result_usec; |
115 | int repeat = bench_repeat; |
116 | struct breakpoint *breakpoints; |
117 | pthread_t *parallel; |
118 | struct timeval start, stop, diff; |
119 | |
120 | if (parse_options(argc, argv, thread_options, thread_usage, 0)) { |
121 | usage_with_options(thread_usage, thread_options); |
122 | exit(EXIT_FAILURE); |
123 | } |
124 | breakpoints = calloc(thread_params.nbreakpoints, sizeof(breakpoints[0])); |
125 | parallel = calloc(thread_params.nparallel, sizeof(parallel[0])); |
126 | if (!breakpoints || !parallel) |
127 | exit((perror("calloc" ), EXIT_FAILURE)); |
128 | |
129 | for (i = 0; i < thread_params.nbreakpoints; i++) { |
130 | breakpoints[i].fd = breakpoint_setup(addr: &breakpoints[i].watched); |
131 | |
132 | if (breakpoints[i].fd < 0) { |
133 | if (breakpoints[i].fd == -ENODEV) { |
134 | printf("Skipping perf bench breakpoint thread: No hardware support\n" ); |
135 | return 0; |
136 | } |
137 | exit((perror("perf_event_open" ), EXIT_FAILURE)); |
138 | } |
139 | } |
140 | gettimeofday(&start, NULL); |
141 | for (i = 0; i < thread_params.nparallel; i++) { |
142 | if (pthread_create(¶llel[i], NULL, breakpoint_thread, &repeat)) |
143 | exit((perror("pthread_create" ), EXIT_FAILURE)); |
144 | } |
145 | for (i = 0; i < thread_params.nparallel; i++) |
146 | pthread_join(parallel[i], NULL); |
147 | gettimeofday(&stop, NULL); |
148 | timersub(&stop, &start, &diff); |
149 | for (i = 0; i < thread_params.nbreakpoints; i++) |
150 | close(breakpoints[i].fd); |
151 | free(parallel); |
152 | free(breakpoints); |
153 | switch (bench_format) { |
154 | case BENCH_FORMAT_DEFAULT: |
155 | printf("# Created/joined %d threads with %d breakpoints and %d parallelism\n" , |
156 | bench_repeat, thread_params.nbreakpoints, thread_params.nparallel); |
157 | printf(" %14s: %lu.%03lu [sec]\n\n" , "Total time" , |
158 | (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC)); |
159 | result_usec = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; |
160 | printf(" %14lf usecs/op\n" , |
161 | (double)result_usec / bench_repeat / thread_params.nthreads); |
162 | printf(" %14lf usecs/op/cpu\n" , |
163 | (double)result_usec / bench_repeat / |
164 | thread_params.nthreads * thread_params.nparallel); |
165 | break; |
166 | case BENCH_FORMAT_SIMPLE: |
167 | printf("%lu.%03lu\n" , (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC)); |
168 | break; |
169 | default: |
170 | fprintf(stderr, "Unknown format: %d\n" , bench_format); |
171 | exit(EXIT_FAILURE); |
172 | } |
173 | return 0; |
174 | } |
175 | |
176 | struct { |
177 | unsigned int npassive; |
178 | unsigned int nactive; |
179 | } enable_params = { |
180 | .nactive = 0, |
181 | .npassive = 0, |
182 | }; |
183 | |
184 | static const struct option enable_options[] = { |
185 | OPT_UINTEGER('p', "passive" , &enable_params.npassive, "Specify amount of passive threads" ), |
186 | OPT_UINTEGER('a', "active" , &enable_params.nactive, "Specify amount of active threads" ), |
187 | OPT_END() |
188 | }; |
189 | |
190 | static const char * const enable_usage[] = { |
191 | "perf bench breakpoint enable <options>" , |
192 | NULL |
193 | }; |
194 | |
195 | // The benchmark creates an inheritable breakpoint, |
196 | // then starts npassive threads that block and nactive threads that actively spin |
197 | // and then disables and enables the breakpoint bench_repeat times. |
198 | int bench_breakpoint_enable(int argc, const char **argv) |
199 | { |
200 | unsigned int i, nthreads, result_usec, done = 0; |
201 | char watched; |
202 | int fd; |
203 | pthread_t *threads; |
204 | struct timeval start, stop, diff; |
205 | |
206 | if (parse_options(argc, argv, enable_options, enable_usage, 0)) { |
207 | usage_with_options(enable_usage, enable_options); |
208 | exit(EXIT_FAILURE); |
209 | } |
210 | fd = breakpoint_setup(addr: &watched); |
211 | |
212 | if (fd < 0) { |
213 | if (fd == -ENODEV) { |
214 | printf("Skipping perf bench breakpoint enable: No hardware support\n" ); |
215 | return 0; |
216 | } |
217 | exit((perror("perf_event_open" ), EXIT_FAILURE)); |
218 | } |
219 | nthreads = enable_params.npassive + enable_params.nactive; |
220 | threads = calloc(nthreads, sizeof(threads[0])); |
221 | if (!threads) |
222 | exit((perror("calloc" ), EXIT_FAILURE)); |
223 | |
224 | for (i = 0; i < nthreads; i++) { |
225 | if (pthread_create(&threads[i], NULL, |
226 | i < enable_params.npassive ? passive_thread : active_thread, &done)) |
227 | exit((perror("pthread_create" ), EXIT_FAILURE)); |
228 | } |
229 | usleep(10000); // let the threads block |
230 | gettimeofday(&start, NULL); |
231 | for (i = 0; i < bench_repeat; i++) { |
232 | if (ioctl(fd, PERF_EVENT_IOC_DISABLE, 0)) |
233 | exit((perror("ioctl(PERF_EVENT_IOC_DISABLE)" ), EXIT_FAILURE)); |
234 | if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0)) |
235 | exit((perror("ioctl(PERF_EVENT_IOC_ENABLE)" ), EXIT_FAILURE)); |
236 | } |
237 | gettimeofday(&stop, NULL); |
238 | timersub(&stop, &start, &diff); |
239 | __atomic_store_n(&done, 1, __ATOMIC_RELAXED); |
240 | futex_wake(uaddr: &done, nr_wake: enable_params.npassive, opflags: 0); |
241 | for (i = 0; i < nthreads; i++) |
242 | pthread_join(threads[i], NULL); |
243 | free(threads); |
244 | close(fd); |
245 | switch (bench_format) { |
246 | case BENCH_FORMAT_DEFAULT: |
247 | printf("# Enabled/disabled breakpoint %d time with %d passive and %d active threads\n" , |
248 | bench_repeat, enable_params.npassive, enable_params.nactive); |
249 | printf(" %14s: %lu.%03lu [sec]\n\n" , "Total time" , |
250 | (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC)); |
251 | result_usec = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; |
252 | printf(" %14lf usecs/op\n" , (double)result_usec / bench_repeat); |
253 | break; |
254 | case BENCH_FORMAT_SIMPLE: |
255 | printf("%lu.%03lu\n" , (long)diff.tv_sec, (long)(diff.tv_usec / USEC_PER_MSEC)); |
256 | break; |
257 | default: |
258 | fprintf(stderr, "Unknown format: %d\n" , bench_format); |
259 | exit(EXIT_FAILURE); |
260 | } |
261 | return 0; |
262 | } |
263 | |