1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * Context switch microbenchmark. |
4 | * |
5 | * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM |
6 | */ |
7 | |
8 | #define _GNU_SOURCE |
9 | #include <errno.h> |
10 | #include <sched.h> |
11 | #include <string.h> |
12 | #include <stdio.h> |
13 | #include <unistd.h> |
14 | #include <stdlib.h> |
15 | #include <getopt.h> |
16 | #include <signal.h> |
17 | #include <assert.h> |
18 | #include <pthread.h> |
19 | #include <limits.h> |
20 | #include <sys/time.h> |
21 | #include <sys/syscall.h> |
22 | #include <sys/sysinfo.h> |
23 | #include <sys/types.h> |
24 | #include <sys/shm.h> |
25 | #include <linux/futex.h> |
26 | #ifdef __powerpc__ |
27 | #include <altivec.h> |
28 | #endif |
29 | #include "utils.h" |
30 | |
31 | static unsigned int timeout = 30; |
32 | |
33 | static int touch_vdso; |
34 | struct timeval tv; |
35 | |
36 | static int touch_fp = 1; |
37 | double fp; |
38 | |
39 | static int touch_vector = 1; |
40 | vector int a, b, c; |
41 | |
42 | #ifdef __powerpc__ |
43 | static int touch_altivec = 1; |
44 | |
45 | /* |
46 | * Note: LTO (Link Time Optimisation) doesn't play well with this function |
47 | * attribute. Be very careful enabling LTO for this test. |
48 | */ |
49 | static void __attribute__((__target__("no-vsx" ))) altivec_touch_fn(void) |
50 | { |
51 | c = a + b; |
52 | } |
53 | #endif |
54 | |
55 | static void touch(void) |
56 | { |
57 | if (touch_vdso) |
58 | gettimeofday(&tv, NULL); |
59 | |
60 | if (touch_fp) |
61 | fp += 0.1; |
62 | |
63 | #ifdef __powerpc__ |
64 | if (touch_altivec) |
65 | altivec_touch_fn(); |
66 | #endif |
67 | |
68 | if (touch_vector) |
69 | c = a + b; |
70 | |
71 | asm volatile("# %0 %1 %2" : : "r" (&tv), "r" (&fp), "r" (&c)); |
72 | } |
73 | |
74 | static void start_thread_on(void *(*fn)(void *), void *arg, unsigned long cpu) |
75 | { |
76 | int rc; |
77 | pthread_t tid; |
78 | cpu_set_t cpuset; |
79 | pthread_attr_t attr; |
80 | |
81 | CPU_ZERO(&cpuset); |
82 | CPU_SET(cpu, &cpuset); |
83 | |
84 | rc = pthread_attr_init(&attr); |
85 | if (rc) { |
86 | errno = rc; |
87 | perror("pthread_attr_init" ); |
88 | exit(1); |
89 | } |
90 | |
91 | rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset); |
92 | if (rc) { |
93 | errno = rc; |
94 | perror("pthread_attr_setaffinity_np" ); |
95 | exit(1); |
96 | } |
97 | |
98 | rc = pthread_create(&tid, &attr, fn, arg); |
99 | if (rc) { |
100 | errno = rc; |
101 | perror("pthread_create" ); |
102 | exit(1); |
103 | } |
104 | } |
105 | |
106 | static void start_process_on(void *(*fn)(void *), void *arg, unsigned long cpu) |
107 | { |
108 | int pid, ncpus; |
109 | cpu_set_t *cpuset; |
110 | size_t size; |
111 | |
112 | pid = fork(); |
113 | if (pid == -1) { |
114 | perror("fork" ); |
115 | exit(1); |
116 | } |
117 | |
118 | if (pid) |
119 | return; |
120 | |
121 | ncpus = get_nprocs(); |
122 | size = CPU_ALLOC_SIZE(ncpus); |
123 | cpuset = CPU_ALLOC(ncpus); |
124 | if (!cpuset) { |
125 | perror("malloc" ); |
126 | exit(1); |
127 | } |
128 | CPU_ZERO_S(size, cpuset); |
129 | CPU_SET_S(cpu, size, cpuset); |
130 | |
131 | if (sched_setaffinity(pid: 0, new_mask: size, cpuset)) { |
132 | perror("sched_setaffinity" ); |
133 | CPU_FREE(cpuset); |
134 | exit(1); |
135 | } |
136 | |
137 | CPU_FREE(cpuset); |
138 | fn(arg); |
139 | |
140 | exit(0); |
141 | } |
142 | |
143 | static unsigned long iterations; |
144 | static unsigned long iterations_prev; |
145 | |
146 | static void sigalrm_handler(int junk) |
147 | { |
148 | unsigned long i = iterations; |
149 | |
150 | printf("%ld\n" , i - iterations_prev); |
151 | iterations_prev = i; |
152 | |
153 | if (--timeout == 0) |
154 | kill(0, SIGUSR1); |
155 | |
156 | alarm(1); |
157 | } |
158 | |
159 | static void sigusr1_handler(int junk) |
160 | { |
161 | exit(0); |
162 | } |
163 | |
164 | struct actions { |
165 | void (*setup)(int, int); |
166 | void *(*thread1)(void *); |
167 | void *(*thread2)(void *); |
168 | }; |
169 | |
170 | #define READ 0 |
171 | #define WRITE 1 |
172 | |
173 | static int pipe_fd1[2]; |
174 | static int pipe_fd2[2]; |
175 | |
176 | static void pipe_setup(int cpu1, int cpu2) |
177 | { |
178 | if (pipe(pipe_fd1) || pipe(pipe_fd2)) |
179 | exit(1); |
180 | } |
181 | |
182 | static void *pipe_thread1(void *arg) |
183 | { |
184 | signal(SIGALRM, sigalrm_handler); |
185 | alarm(1); |
186 | |
187 | while (1) { |
188 | assert(read(pipe_fd1[READ], &c, 1) == 1); |
189 | touch(); |
190 | |
191 | assert(write(pipe_fd2[WRITE], &c, 1) == 1); |
192 | touch(); |
193 | |
194 | iterations += 2; |
195 | } |
196 | |
197 | return NULL; |
198 | } |
199 | |
200 | static void *pipe_thread2(void *arg) |
201 | { |
202 | while (1) { |
203 | assert(write(pipe_fd1[WRITE], &c, 1) == 1); |
204 | touch(); |
205 | |
206 | assert(read(pipe_fd2[READ], &c, 1) == 1); |
207 | touch(); |
208 | } |
209 | |
210 | return NULL; |
211 | } |
212 | |
213 | static struct actions pipe_actions = { |
214 | .setup = pipe_setup, |
215 | .thread1 = pipe_thread1, |
216 | .thread2 = pipe_thread2, |
217 | }; |
218 | |
219 | static void yield_setup(int cpu1, int cpu2) |
220 | { |
221 | if (cpu1 != cpu2) { |
222 | fprintf(stderr, "Both threads must be on the same CPU for yield test\n" ); |
223 | exit(1); |
224 | } |
225 | } |
226 | |
227 | static void *yield_thread1(void *arg) |
228 | { |
229 | signal(SIGALRM, sigalrm_handler); |
230 | alarm(1); |
231 | |
232 | while (1) { |
233 | sched_yield(); |
234 | touch(); |
235 | |
236 | iterations += 2; |
237 | } |
238 | |
239 | return NULL; |
240 | } |
241 | |
242 | static void *yield_thread2(void *arg) |
243 | { |
244 | while (1) { |
245 | sched_yield(); |
246 | touch(); |
247 | } |
248 | |
249 | return NULL; |
250 | } |
251 | |
252 | static struct actions yield_actions = { |
253 | .setup = yield_setup, |
254 | .thread1 = yield_thread1, |
255 | .thread2 = yield_thread2, |
256 | }; |
257 | |
258 | static long sys_futex(void *addr1, int op, int val1, struct timespec *timeout, |
259 | void *addr2, int val3) |
260 | { |
261 | return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3); |
262 | } |
263 | |
264 | static unsigned long cmpxchg(unsigned long *p, unsigned long expected, |
265 | unsigned long desired) |
266 | { |
267 | unsigned long exp = expected; |
268 | |
269 | __atomic_compare_exchange_n(p, &exp, desired, 0, |
270 | __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); |
271 | return exp; |
272 | } |
273 | |
274 | static unsigned long xchg(unsigned long *p, unsigned long val) |
275 | { |
276 | return __atomic_exchange_n(p, val, __ATOMIC_SEQ_CST); |
277 | } |
278 | |
279 | static int processes; |
280 | |
281 | static int mutex_lock(unsigned long *m) |
282 | { |
283 | int c; |
284 | int flags = FUTEX_WAIT; |
285 | if (!processes) |
286 | flags |= FUTEX_PRIVATE_FLAG; |
287 | |
288 | c = cmpxchg(m, 0, 1); |
289 | if (!c) |
290 | return 0; |
291 | |
292 | if (c == 1) |
293 | c = xchg(m, 2); |
294 | |
295 | while (c) { |
296 | sys_futex(addr1: m, op: flags, val1: 2, NULL, NULL, val3: 0); |
297 | c = xchg(m, 2); |
298 | } |
299 | |
300 | return 0; |
301 | } |
302 | |
303 | static int mutex_unlock(unsigned long *m) |
304 | { |
305 | int flags = FUTEX_WAKE; |
306 | if (!processes) |
307 | flags |= FUTEX_PRIVATE_FLAG; |
308 | |
309 | if (*m == 2) |
310 | *m = 0; |
311 | else if (xchg(m, 0) == 1) |
312 | return 0; |
313 | |
314 | sys_futex(addr1: m, op: flags, val1: 1, NULL, NULL, val3: 0); |
315 | |
316 | return 0; |
317 | } |
318 | |
319 | static unsigned long *m1, *m2; |
320 | |
321 | static void futex_setup(int cpu1, int cpu2) |
322 | { |
323 | if (!processes) { |
324 | static unsigned long _m1, _m2; |
325 | m1 = &_m1; |
326 | m2 = &_m2; |
327 | } else { |
328 | int shmid; |
329 | void *shmaddr; |
330 | |
331 | shmid = shmget(IPC_PRIVATE, getpagesize(), SHM_R | SHM_W); |
332 | if (shmid < 0) { |
333 | perror("shmget" ); |
334 | exit(1); |
335 | } |
336 | |
337 | shmaddr = shmat(shmid, NULL, 0); |
338 | if (shmaddr == (char *)-1) { |
339 | perror("shmat" ); |
340 | shmctl(shmid, IPC_RMID, NULL); |
341 | exit(1); |
342 | } |
343 | |
344 | shmctl(shmid, IPC_RMID, NULL); |
345 | |
346 | m1 = shmaddr; |
347 | m2 = shmaddr + sizeof(*m1); |
348 | } |
349 | |
350 | *m1 = 0; |
351 | *m2 = 0; |
352 | |
353 | mutex_lock(m1); |
354 | mutex_lock(m2); |
355 | } |
356 | |
357 | static void *futex_thread1(void *arg) |
358 | { |
359 | signal(SIGALRM, sigalrm_handler); |
360 | alarm(1); |
361 | |
362 | while (1) { |
363 | mutex_lock(m2); |
364 | mutex_unlock(lock: m1); |
365 | |
366 | iterations += 2; |
367 | } |
368 | |
369 | return NULL; |
370 | } |
371 | |
372 | static void *futex_thread2(void *arg) |
373 | { |
374 | while (1) { |
375 | mutex_unlock(lock: m2); |
376 | mutex_lock(m1); |
377 | } |
378 | |
379 | return NULL; |
380 | } |
381 | |
382 | static struct actions futex_actions = { |
383 | .setup = futex_setup, |
384 | .thread1 = futex_thread1, |
385 | .thread2 = futex_thread2, |
386 | }; |
387 | |
388 | static struct option options[] = { |
389 | { "test" , required_argument, 0, 't' }, |
390 | { "process" , no_argument, &processes, 1 }, |
391 | { "timeout" , required_argument, 0, 's' }, |
392 | { "vdso" , no_argument, &touch_vdso, 1 }, |
393 | { "no-fp" , no_argument, &touch_fp, 0 }, |
394 | #ifdef __powerpc__ |
395 | { "no-altivec" , no_argument, &touch_altivec, 0 }, |
396 | #endif |
397 | { "no-vector" , no_argument, &touch_vector, 0 }, |
398 | { 0, }, |
399 | }; |
400 | |
401 | static void usage(void) |
402 | { |
403 | fprintf(stderr, "Usage: context_switch2 <options> CPU1 CPU2\n\n" ); |
404 | fprintf(stderr, "\t\t--test=X\tpipe, futex or yield (default)\n" ); |
405 | fprintf(stderr, "\t\t--process\tUse processes (default threads)\n" ); |
406 | fprintf(stderr, "\t\t--timeout=X\tDuration in seconds to run (default 30)\n" ); |
407 | fprintf(stderr, "\t\t--vdso\t\ttouch VDSO\n" ); |
408 | fprintf(stderr, "\t\t--no-fp\t\tDon't touch FP\n" ); |
409 | #ifdef __powerpc__ |
410 | fprintf(stderr, "\t\t--no-altivec\tDon't touch altivec\n" ); |
411 | #endif |
412 | fprintf(stderr, "\t\t--no-vector\tDon't touch vector\n" ); |
413 | } |
414 | |
415 | int main(int argc, char *argv[]) |
416 | { |
417 | signed char c; |
418 | struct actions *actions = &yield_actions; |
419 | int cpu1; |
420 | int cpu2; |
421 | static void (*start_fn)(void *(*fn)(void *), void *arg, unsigned long cpu); |
422 | |
423 | while (1) { |
424 | int option_index = 0; |
425 | |
426 | c = getopt_long(argc, argv, "" , options, &option_index); |
427 | |
428 | if (c == -1) |
429 | break; |
430 | |
431 | switch (c) { |
432 | case 0: |
433 | if (options[option_index].flag != 0) |
434 | break; |
435 | |
436 | usage(); |
437 | exit(1); |
438 | break; |
439 | |
440 | case 't': |
441 | if (!strcmp(optarg, "pipe" )) { |
442 | actions = &pipe_actions; |
443 | } else if (!strcmp(optarg, "yield" )) { |
444 | actions = &yield_actions; |
445 | } else if (!strcmp(optarg, "futex" )) { |
446 | actions = &futex_actions; |
447 | } else { |
448 | usage(); |
449 | exit(1); |
450 | } |
451 | break; |
452 | |
453 | case 's': |
454 | timeout = atoi(optarg); |
455 | break; |
456 | |
457 | default: |
458 | usage(); |
459 | exit(1); |
460 | } |
461 | } |
462 | |
463 | if (processes) |
464 | start_fn = start_process_on; |
465 | else |
466 | start_fn = start_thread_on; |
467 | |
468 | if (((argc - optind) != 2)) { |
469 | cpu1 = cpu2 = pick_online_cpu(); |
470 | } else { |
471 | cpu1 = atoi(argv[optind++]); |
472 | cpu2 = atoi(argv[optind++]); |
473 | } |
474 | |
475 | printf("Using %s with " , processes ? "processes" : "threads" ); |
476 | |
477 | if (actions == &pipe_actions) |
478 | printf("pipe" ); |
479 | else if (actions == &yield_actions) |
480 | printf("yield" ); |
481 | else |
482 | printf("futex" ); |
483 | |
484 | if (!have_hwcap(PPC_FEATURE_HAS_ALTIVEC)) |
485 | touch_altivec = 0; |
486 | |
487 | if (!have_hwcap(PPC_FEATURE_HAS_VSX)) |
488 | touch_vector = 0; |
489 | |
490 | printf(" on cpus %d/%d touching FP:%s altivec:%s vector:%s vdso:%s\n" , |
491 | cpu1, cpu2, touch_fp ? "yes" : "no" , touch_altivec ? "yes" : "no" , |
492 | touch_vector ? "yes" : "no" , touch_vdso ? "yes" : "no" ); |
493 | |
494 | /* Create a new process group so we can signal everyone for exit */ |
495 | setpgid(getpid(), getpid()); |
496 | |
497 | signal(SIGUSR1, sigusr1_handler); |
498 | |
499 | actions->setup(cpu1, cpu2); |
500 | |
501 | start_fn(actions->thread1, NULL, cpu1); |
502 | start_fn(actions->thread2, NULL, cpu2); |
503 | |
504 | while (1) |
505 | sleep(3600); |
506 | |
507 | return 0; |
508 | } |
509 | |