1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (C) 2018 Davidlohr Bueso. |
4 | * |
5 | * Benchmark the various operations allowed for epoll_ctl(2). |
6 | * The idea is to concurrently stress a single epoll instance |
7 | */ |
8 | #ifdef HAVE_EVENTFD_SUPPORT |
9 | /* For the CLR_() macros */ |
10 | #include <string.h> |
11 | #include <pthread.h> |
12 | |
13 | #include <errno.h> |
14 | #include <inttypes.h> |
15 | #include <signal.h> |
16 | #include <stdlib.h> |
17 | #include <unistd.h> |
18 | #include <linux/compiler.h> |
19 | #include <linux/kernel.h> |
20 | #include <sys/time.h> |
21 | #include <sys/resource.h> |
22 | #include <sys/epoll.h> |
23 | #include <sys/eventfd.h> |
24 | #include <perf/cpumap.h> |
25 | |
26 | #include "../util/mutex.h" |
27 | #include "../util/stat.h" |
28 | #include <subcmd/parse-options.h> |
29 | #include "bench.h" |
30 | |
31 | #include <err.h> |
32 | |
33 | #define printinfo(fmt, arg...) \ |
34 | do { if (__verbose) printf(fmt, ## arg); } while (0) |
35 | |
36 | static unsigned int nthreads = 0; |
37 | static unsigned int nsecs = 8; |
38 | static bool done, __verbose, randomize; |
39 | |
40 | /* |
41 | * epoll related shared variables. |
42 | */ |
43 | |
44 | /* Maximum number of nesting allowed inside epoll sets */ |
45 | #define EPOLL_MAXNESTS 4 |
46 | |
47 | enum { |
48 | OP_EPOLL_ADD, |
49 | OP_EPOLL_MOD, |
50 | OP_EPOLL_DEL, |
51 | EPOLL_NR_OPS, |
52 | }; |
53 | |
54 | static int epollfd; |
55 | static int *epollfdp; |
56 | static bool noaffinity; |
57 | static unsigned int nested = 0; |
58 | |
59 | /* amount of fds to monitor, per thread */ |
60 | static unsigned int nfds = 64; |
61 | |
62 | static struct mutex thread_lock; |
63 | static unsigned int threads_starting; |
64 | static struct stats all_stats[EPOLL_NR_OPS]; |
65 | static struct cond thread_parent, thread_worker; |
66 | |
67 | struct worker { |
68 | int tid; |
69 | pthread_t thread; |
70 | unsigned long ops[EPOLL_NR_OPS]; |
71 | int *fdmap; |
72 | }; |
73 | |
74 | static const struct option options[] = { |
75 | OPT_UINTEGER('t', "threads" , &nthreads, "Specify amount of threads" ), |
76 | OPT_UINTEGER('r', "runtime" , &nsecs, "Specify runtime (in seconds)" ), |
77 | OPT_UINTEGER('f', "nfds" , &nfds, "Specify amount of file descriptors to monitor for each thread" ), |
78 | OPT_BOOLEAN( 'n', "noaffinity" , &noaffinity, "Disables CPU affinity" ), |
79 | OPT_UINTEGER( 'N', "nested" , &nested, "Nesting level epoll hierarchy (default is 0, no nesting)" ), |
80 | OPT_BOOLEAN( 'R', "randomize" , &randomize, "Perform random operations on random fds" ), |
81 | OPT_BOOLEAN( 'v', "verbose" , &__verbose, "Verbose mode" ), |
82 | OPT_END() |
83 | }; |
84 | |
85 | static const char * const bench_epoll_ctl_usage[] = { |
86 | "perf bench epoll ctl <options>" , |
87 | NULL |
88 | }; |
89 | |
90 | static void toggle_done(int sig __maybe_unused, |
91 | siginfo_t *info __maybe_unused, |
92 | void *uc __maybe_unused) |
93 | { |
94 | /* inform all threads that we're done for the day */ |
95 | done = true; |
96 | gettimeofday(&bench__end, NULL); |
97 | timersub(&bench__end, &bench__start, &bench__runtime); |
98 | } |
99 | |
100 | static void nest_epollfd(void) |
101 | { |
102 | unsigned int i; |
103 | struct epoll_event ev; |
104 | |
105 | if (nested > EPOLL_MAXNESTS) |
106 | nested = EPOLL_MAXNESTS; |
107 | printinfo("Nesting level(s): %d\n" , nested); |
108 | |
109 | epollfdp = calloc(nested, sizeof(int)); |
110 | if (!epollfdp) |
111 | err(EXIT_FAILURE, "calloc" ); |
112 | |
113 | for (i = 0; i < nested; i++) { |
114 | epollfdp[i] = epoll_create(1); |
115 | if (epollfd < 0) |
116 | err(EXIT_FAILURE, "epoll_create" ); |
117 | } |
118 | |
119 | ev.events = EPOLLHUP; /* anything */ |
120 | ev.data.u64 = i; /* any number */ |
121 | |
122 | for (i = nested - 1; i; i--) { |
123 | if (epoll_ctl(epollfdp[i - 1], EPOLL_CTL_ADD, |
124 | epollfdp[i], &ev) < 0) |
125 | err(EXIT_FAILURE, "epoll_ctl" ); |
126 | } |
127 | |
128 | if (epoll_ctl(epollfd, EPOLL_CTL_ADD, *epollfdp, &ev) < 0) |
129 | err(EXIT_FAILURE, "epoll_ctl" ); |
130 | } |
131 | |
132 | static inline void do_epoll_op(struct worker *w, int op, int fd) |
133 | { |
134 | int error; |
135 | struct epoll_event ev; |
136 | |
137 | ev.events = EPOLLIN; |
138 | ev.data.u64 = fd; |
139 | |
140 | switch (op) { |
141 | case OP_EPOLL_ADD: |
142 | error = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev); |
143 | break; |
144 | case OP_EPOLL_MOD: |
145 | ev.events = EPOLLOUT; |
146 | error = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &ev); |
147 | break; |
148 | case OP_EPOLL_DEL: |
149 | error = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL); |
150 | break; |
151 | default: |
152 | error = 1; |
153 | break; |
154 | } |
155 | |
156 | if (!error) |
157 | w->ops[op]++; |
158 | } |
159 | |
160 | static inline void do_random_epoll_op(struct worker *w) |
161 | { |
162 | unsigned long rnd1 = random(), rnd2 = random(); |
163 | int op, fd; |
164 | |
165 | fd = w->fdmap[rnd1 % nfds]; |
166 | op = rnd2 % EPOLL_NR_OPS; |
167 | |
168 | do_epoll_op(w, op, fd); |
169 | } |
170 | |
171 | static void *workerfn(void *arg) |
172 | { |
173 | unsigned int i; |
174 | struct worker *w = (struct worker *) arg; |
175 | struct timespec ts = { .tv_sec = 0, |
176 | .tv_nsec = 250 }; |
177 | |
178 | mutex_lock(&thread_lock); |
179 | threads_starting--; |
180 | if (!threads_starting) |
181 | cond_signal(&thread_parent); |
182 | cond_wait(&thread_worker, &thread_lock); |
183 | mutex_unlock(&thread_lock); |
184 | |
185 | /* Let 'em loose */ |
186 | do { |
187 | /* random */ |
188 | if (randomize) { |
189 | do_random_epoll_op(w); |
190 | } else { |
191 | for (i = 0; i < nfds; i++) { |
192 | do_epoll_op(w, OP_EPOLL_ADD, w->fdmap[i]); |
193 | do_epoll_op(w, OP_EPOLL_MOD, w->fdmap[i]); |
194 | do_epoll_op(w, OP_EPOLL_DEL, w->fdmap[i]); |
195 | } |
196 | } |
197 | |
198 | nanosleep(&ts, NULL); |
199 | } while (!done); |
200 | |
201 | return NULL; |
202 | } |
203 | |
204 | static void init_fdmaps(struct worker *w, int pct) |
205 | { |
206 | unsigned int i; |
207 | int inc; |
208 | struct epoll_event ev; |
209 | |
210 | if (!pct) |
211 | return; |
212 | |
213 | inc = 100/pct; |
214 | for (i = 0; i < nfds; i+=inc) { |
215 | ev.data.fd = w->fdmap[i]; |
216 | ev.events = EPOLLIN; |
217 | |
218 | if (epoll_ctl(epollfd, EPOLL_CTL_ADD, w->fdmap[i], &ev) < 0) |
219 | err(EXIT_FAILURE, "epoll_ct" ); |
220 | } |
221 | } |
222 | |
223 | static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) |
224 | { |
225 | pthread_attr_t thread_attr, *attrp = NULL; |
226 | cpu_set_t *cpuset; |
227 | unsigned int i, j; |
228 | int ret = 0; |
229 | int nrcpus; |
230 | size_t size; |
231 | |
232 | if (!noaffinity) |
233 | pthread_attr_init(&thread_attr); |
234 | |
235 | nrcpus = perf_cpu_map__nr(cpu); |
236 | cpuset = CPU_ALLOC(nrcpus); |
237 | BUG_ON(!cpuset); |
238 | size = CPU_ALLOC_SIZE(nrcpus); |
239 | |
240 | for (i = 0; i < nthreads; i++) { |
241 | struct worker *w = &worker[i]; |
242 | |
243 | w->tid = i; |
244 | w->fdmap = calloc(nfds, sizeof(int)); |
245 | if (!w->fdmap) |
246 | return 1; |
247 | |
248 | for (j = 0; j < nfds; j++) { |
249 | w->fdmap[j] = eventfd(0, EFD_NONBLOCK); |
250 | if (w->fdmap[j] < 0) |
251 | err(EXIT_FAILURE, "eventfd" ); |
252 | } |
253 | |
254 | /* |
255 | * Lets add 50% of the fdmap to the epoll instance, and |
256 | * do it before any threads are started; otherwise there is |
257 | * an initial bias of the call failing (mod and del ops). |
258 | */ |
259 | if (randomize) |
260 | init_fdmaps(w, 50); |
261 | |
262 | if (!noaffinity) { |
263 | CPU_ZERO_S(size, cpuset); |
264 | CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, |
265 | size, cpuset); |
266 | |
267 | ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset); |
268 | if (ret) { |
269 | CPU_FREE(cpuset); |
270 | err(EXIT_FAILURE, "pthread_attr_setaffinity_np" ); |
271 | } |
272 | |
273 | attrp = &thread_attr; |
274 | } |
275 | |
276 | ret = pthread_create(&w->thread, attrp, workerfn, |
277 | (void *)(struct worker *) w); |
278 | if (ret) { |
279 | CPU_FREE(cpuset); |
280 | err(EXIT_FAILURE, "pthread_create" ); |
281 | } |
282 | } |
283 | |
284 | CPU_FREE(cpuset); |
285 | if (!noaffinity) |
286 | pthread_attr_destroy(&thread_attr); |
287 | |
288 | return ret; |
289 | } |
290 | |
291 | static void print_summary(void) |
292 | { |
293 | int i; |
294 | unsigned long avg[EPOLL_NR_OPS]; |
295 | double stddev[EPOLL_NR_OPS]; |
296 | |
297 | for (i = 0; i < EPOLL_NR_OPS; i++) { |
298 | avg[i] = avg_stats(&all_stats[i]); |
299 | stddev[i] = stddev_stats(&all_stats[i]); |
300 | } |
301 | |
302 | printf("\nAveraged %ld ADD operations (+- %.2f%%)\n" , |
303 | avg[OP_EPOLL_ADD], rel_stddev_stats(stddev[OP_EPOLL_ADD], |
304 | avg[OP_EPOLL_ADD])); |
305 | printf("Averaged %ld MOD operations (+- %.2f%%)\n" , |
306 | avg[OP_EPOLL_MOD], rel_stddev_stats(stddev[OP_EPOLL_MOD], |
307 | avg[OP_EPOLL_MOD])); |
308 | printf("Averaged %ld DEL operations (+- %.2f%%)\n" , |
309 | avg[OP_EPOLL_DEL], rel_stddev_stats(stddev[OP_EPOLL_DEL], |
310 | avg[OP_EPOLL_DEL])); |
311 | } |
312 | |
313 | int bench_epoll_ctl(int argc, const char **argv) |
314 | { |
315 | int j, ret = 0; |
316 | struct sigaction act; |
317 | struct worker *worker = NULL; |
318 | struct perf_cpu_map *cpu; |
319 | struct rlimit rl, prevrl; |
320 | unsigned int i; |
321 | |
322 | argc = parse_options(argc, argv, options, bench_epoll_ctl_usage, 0); |
323 | if (argc) { |
324 | usage_with_options(bench_epoll_ctl_usage, options); |
325 | exit(EXIT_FAILURE); |
326 | } |
327 | |
328 | memset(&act, 0, sizeof(act)); |
329 | sigfillset(&act.sa_mask); |
330 | act.sa_sigaction = toggle_done; |
331 | sigaction(SIGINT, &act, NULL); |
332 | |
333 | cpu = perf_cpu_map__new_online_cpus(); |
334 | if (!cpu) |
335 | goto errmem; |
336 | |
337 | /* a single, main epoll instance */ |
338 | epollfd = epoll_create(1); |
339 | if (epollfd < 0) |
340 | err(EXIT_FAILURE, "epoll_create" ); |
341 | |
342 | /* |
343 | * Deal with nested epolls, if any. |
344 | */ |
345 | if (nested) |
346 | nest_epollfd(); |
347 | |
348 | /* default to the number of CPUs */ |
349 | if (!nthreads) |
350 | nthreads = perf_cpu_map__nr(cpu); |
351 | |
352 | worker = calloc(nthreads, sizeof(*worker)); |
353 | if (!worker) |
354 | goto errmem; |
355 | |
356 | if (getrlimit(RLIMIT_NOFILE, &prevrl)) |
357 | err(EXIT_FAILURE, "getrlimit" ); |
358 | rl.rlim_cur = rl.rlim_max = nfds * nthreads * 2 + 50; |
359 | printinfo("Setting RLIMIT_NOFILE rlimit from %" PRIu64 " to: %" PRIu64 "\n" , |
360 | (uint64_t)prevrl.rlim_max, (uint64_t)rl.rlim_max); |
361 | if (setrlimit(RLIMIT_NOFILE, &rl) < 0) |
362 | err(EXIT_FAILURE, "setrlimit" ); |
363 | |
364 | printf("Run summary [PID %d]: %d threads doing epoll_ctl ops " |
365 | "%d file-descriptors for %d secs.\n\n" , |
366 | getpid(), nthreads, nfds, nsecs); |
367 | |
368 | for (i = 0; i < EPOLL_NR_OPS; i++) |
369 | init_stats(&all_stats[i]); |
370 | |
371 | mutex_init(&thread_lock); |
372 | cond_init(&thread_parent); |
373 | cond_init(&thread_worker); |
374 | |
375 | threads_starting = nthreads; |
376 | |
377 | gettimeofday(&bench__start, NULL); |
378 | |
379 | do_threads(worker, cpu); |
380 | |
381 | mutex_lock(&thread_lock); |
382 | while (threads_starting) |
383 | cond_wait(&thread_parent, &thread_lock); |
384 | cond_broadcast(&thread_worker); |
385 | mutex_unlock(&thread_lock); |
386 | |
387 | sleep(nsecs); |
388 | toggle_done(0, NULL, NULL); |
389 | printinfo("main thread: toggling done\n" ); |
390 | |
391 | for (i = 0; i < nthreads; i++) { |
392 | ret = pthread_join(worker[i].thread, NULL); |
393 | if (ret) |
394 | err(EXIT_FAILURE, "pthread_join" ); |
395 | } |
396 | |
397 | /* cleanup & report results */ |
398 | cond_destroy(&thread_parent); |
399 | cond_destroy(&thread_worker); |
400 | mutex_destroy(&thread_lock); |
401 | |
402 | for (i = 0; i < nthreads; i++) { |
403 | unsigned long t[EPOLL_NR_OPS]; |
404 | |
405 | for (j = 0; j < EPOLL_NR_OPS; j++) { |
406 | t[j] = worker[i].ops[j]; |
407 | update_stats(&all_stats[j], t[j]); |
408 | } |
409 | |
410 | if (nfds == 1) |
411 | printf("[thread %2d] fdmap: %p [ add: %04ld; mod: %04ld; del: %04lds ops ]\n" , |
412 | worker[i].tid, &worker[i].fdmap[0], |
413 | t[OP_EPOLL_ADD], t[OP_EPOLL_MOD], t[OP_EPOLL_DEL]); |
414 | else |
415 | printf("[thread %2d] fdmap: %p ... %p [ add: %04ld ops; mod: %04ld ops; del: %04ld ops ]\n" , |
416 | worker[i].tid, &worker[i].fdmap[0], |
417 | &worker[i].fdmap[nfds-1], |
418 | t[OP_EPOLL_ADD], t[OP_EPOLL_MOD], t[OP_EPOLL_DEL]); |
419 | } |
420 | |
421 | print_summary(); |
422 | |
423 | close(epollfd); |
424 | perf_cpu_map__put(cpu); |
425 | for (i = 0; i < nthreads; i++) |
426 | free(worker[i].fdmap); |
427 | |
428 | free(worker); |
429 | return ret; |
430 | errmem: |
431 | err(EXIT_FAILURE, "calloc" ); |
432 | } |
433 | #endif // HAVE_EVENTFD_SUPPORT |
434 | |