1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (C) 2013 Davidlohr Bueso <davidlohr@hp.com> |
4 | * |
5 | * futex-wake: Block a bunch of threads on a futex and wake'em up, N at a time. |
6 | * |
7 | * This program is particularly useful to measure the latency of nthread wakeups |
8 | * in non-error situations: all waiters are queued and all wake calls wakeup |
9 | * one or more tasks, and thus the waitqueue is never empty. |
10 | */ |
11 | |
12 | /* For the CLR_() macros */ |
13 | #include <string.h> |
14 | #include <pthread.h> |
15 | |
16 | #include <signal.h> |
17 | #include "../util/mutex.h" |
18 | #include "../util/stat.h" |
19 | #include <subcmd/parse-options.h> |
20 | #include <linux/compiler.h> |
21 | #include <linux/kernel.h> |
22 | #include <linux/time64.h> |
23 | #include <errno.h> |
24 | #include <perf/cpumap.h> |
25 | #include "bench.h" |
26 | #include "futex.h" |
27 | |
28 | #include <err.h> |
29 | #include <stdlib.h> |
30 | #include <sys/time.h> |
31 | #include <sys/mman.h> |
32 | |
33 | /* all threads will block on the same futex */ |
34 | static u_int32_t futex1 = 0; |
35 | |
36 | static pthread_t *worker; |
37 | static bool done = false; |
38 | static struct mutex thread_lock; |
39 | static struct cond thread_parent, thread_worker; |
40 | static struct stats waketime_stats, wakeup_stats; |
41 | static unsigned int threads_starting; |
42 | static int futex_flag = 0; |
43 | |
44 | static struct bench_futex_parameters params = { |
45 | /* |
46 | * How many wakeups to do at a time. |
47 | * Default to 1 in order to make the kernel work more. |
48 | */ |
49 | .nwakes = 1, |
50 | }; |
51 | |
52 | static const struct option options[] = { |
53 | OPT_UINTEGER('t', "threads" , ¶ms.nthreads, "Specify amount of threads" ), |
54 | OPT_UINTEGER('w', "nwakes" , ¶ms.nwakes, "Specify amount of threads to wake at once" ), |
55 | OPT_BOOLEAN( 's', "silent" , ¶ms.silent, "Silent mode: do not display data/details" ), |
56 | OPT_BOOLEAN( 'S', "shared" , ¶ms.fshared, "Use shared futexes instead of private ones" ), |
57 | OPT_BOOLEAN( 'm', "mlockall" , ¶ms.mlockall, "Lock all current and future memory" ), |
58 | |
59 | OPT_END() |
60 | }; |
61 | |
62 | static const char * const bench_futex_wake_usage[] = { |
63 | "perf bench futex wake <options>" , |
64 | NULL |
65 | }; |
66 | |
67 | static void *workerfn(void *arg __maybe_unused) |
68 | { |
69 | mutex_lock(mtx: &thread_lock); |
70 | threads_starting--; |
71 | if (!threads_starting) |
72 | cond_signal(cnd: &thread_parent); |
73 | cond_wait(cnd: &thread_worker, mtx: &thread_lock); |
74 | mutex_unlock(mtx: &thread_lock); |
75 | |
76 | while (1) { |
77 | if (futex_wait(uaddr: &futex1, val: 0, NULL, opflags: futex_flag) != EINTR) |
78 | break; |
79 | } |
80 | |
81 | pthread_exit(NULL); |
82 | return NULL; |
83 | } |
84 | |
85 | static void print_summary(void) |
86 | { |
87 | double waketime_avg = avg_stats(stats: &waketime_stats); |
88 | double waketime_stddev = stddev_stats(stats: &waketime_stats); |
89 | unsigned int wakeup_avg = avg_stats(stats: &wakeup_stats); |
90 | |
91 | printf("Wokeup %d of %d threads in %.4f ms (+-%.2f%%)\n" , |
92 | wakeup_avg, |
93 | params.nthreads, |
94 | waketime_avg / USEC_PER_MSEC, |
95 | rel_stddev_stats(stddev: waketime_stddev, avg: waketime_avg)); |
96 | } |
97 | |
98 | static void block_threads(pthread_t *w, struct perf_cpu_map *cpu) |
99 | { |
100 | cpu_set_t *cpuset; |
101 | unsigned int i; |
102 | size_t size; |
103 | int nrcpus = perf_cpu_map__nr(cpu); |
104 | threads_starting = params.nthreads; |
105 | |
106 | cpuset = CPU_ALLOC(nrcpus); |
107 | BUG_ON(!cpuset); |
108 | size = CPU_ALLOC_SIZE(nrcpus); |
109 | |
110 | /* create and block all threads */ |
111 | for (i = 0; i < params.nthreads; i++) { |
112 | pthread_attr_t thread_attr; |
113 | |
114 | pthread_attr_init(&thread_attr); |
115 | CPU_ZERO_S(size, cpuset); |
116 | CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, size, cpuset); |
117 | |
118 | if (pthread_attr_setaffinity_np(&thread_attr, size, cpuset)) { |
119 | CPU_FREE(cpuset); |
120 | err(EXIT_FAILURE, "pthread_attr_setaffinity_np" ); |
121 | } |
122 | |
123 | if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) { |
124 | CPU_FREE(cpuset); |
125 | err(EXIT_FAILURE, "pthread_create" ); |
126 | } |
127 | pthread_attr_destroy(&thread_attr); |
128 | } |
129 | CPU_FREE(cpuset); |
130 | } |
131 | |
132 | static void toggle_done(int sig __maybe_unused, |
133 | siginfo_t *info __maybe_unused, |
134 | void *uc __maybe_unused) |
135 | { |
136 | done = true; |
137 | } |
138 | |
139 | int bench_futex_wake(int argc, const char **argv) |
140 | { |
141 | int ret = 0; |
142 | unsigned int i, j; |
143 | struct sigaction act; |
144 | struct perf_cpu_map *cpu; |
145 | |
146 | argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0); |
147 | if (argc) { |
148 | usage_with_options(bench_futex_wake_usage, options); |
149 | exit(EXIT_FAILURE); |
150 | } |
151 | |
152 | cpu = perf_cpu_map__new_online_cpus(); |
153 | if (!cpu) |
154 | err(EXIT_FAILURE, "calloc" ); |
155 | |
156 | memset(&act, 0, sizeof(act)); |
157 | sigfillset(&act.sa_mask); |
158 | act.sa_sigaction = toggle_done; |
159 | sigaction(SIGINT, &act, NULL); |
160 | |
161 | if (params.mlockall) { |
162 | if (mlockall(MCL_CURRENT | MCL_FUTURE)) |
163 | err(EXIT_FAILURE, "mlockall" ); |
164 | } |
165 | |
166 | if (!params.nthreads) |
167 | params.nthreads = perf_cpu_map__nr(cpu); |
168 | |
169 | worker = calloc(params.nthreads, sizeof(*worker)); |
170 | if (!worker) |
171 | err(EXIT_FAILURE, "calloc" ); |
172 | |
173 | if (!params.fshared) |
174 | futex_flag = FUTEX_PRIVATE_FLAG; |
175 | |
176 | printf("Run summary [PID %d]: blocking on %d threads (at [%s] futex %p), " |
177 | "waking up %d at a time.\n\n" , |
178 | getpid(), params.nthreads, params.fshared ? "shared" :"private" , |
179 | &futex1, params.nwakes); |
180 | |
181 | init_stats(stats: &wakeup_stats); |
182 | init_stats(stats: &waketime_stats); |
183 | mutex_init(mtx: &thread_lock); |
184 | cond_init(cnd: &thread_parent); |
185 | cond_init(cnd: &thread_worker); |
186 | |
187 | for (j = 0; j < bench_repeat && !done; j++) { |
188 | unsigned int nwoken = 0; |
189 | struct timeval start, end, runtime; |
190 | |
191 | /* create, launch & block all threads */ |
192 | block_threads(worker, cpu); |
193 | |
194 | /* make sure all threads are already blocked */ |
195 | mutex_lock(mtx: &thread_lock); |
196 | while (threads_starting) |
197 | cond_wait(cnd: &thread_parent, mtx: &thread_lock); |
198 | cond_broadcast(cnd: &thread_worker); |
199 | mutex_unlock(mtx: &thread_lock); |
200 | |
201 | usleep(100000); |
202 | |
203 | /* Ok, all threads are patiently blocked, start waking folks up */ |
204 | gettimeofday(&start, NULL); |
205 | while (nwoken != params.nthreads) |
206 | nwoken += futex_wake(uaddr: &futex1, |
207 | nr_wake: params.nwakes, opflags: futex_flag); |
208 | gettimeofday(&end, NULL); |
209 | timersub(&end, &start, &runtime); |
210 | |
211 | update_stats(stats: &wakeup_stats, val: nwoken); |
212 | update_stats(stats: &waketime_stats, val: runtime.tv_usec); |
213 | |
214 | if (!params.silent) { |
215 | printf("[Run %d]: Wokeup %d of %d threads in %.4f ms\n" , |
216 | j + 1, nwoken, params.nthreads, |
217 | runtime.tv_usec / (double)USEC_PER_MSEC); |
218 | } |
219 | |
220 | for (i = 0; i < params.nthreads; i++) { |
221 | ret = pthread_join(worker[i], NULL); |
222 | if (ret) |
223 | err(EXIT_FAILURE, "pthread_join" ); |
224 | } |
225 | |
226 | } |
227 | |
228 | /* cleanup & report results */ |
229 | cond_destroy(cnd: &thread_parent); |
230 | cond_destroy(cnd: &thread_worker); |
231 | mutex_destroy(mtx: &thread_lock); |
232 | |
233 | print_summary(); |
234 | |
235 | free(worker); |
236 | perf_cpu_map__put(cpu); |
237 | return ret; |
238 | } |
239 | |