1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Benchmark find_next_bit and related bit operations. |
4 | * |
5 | * Copyright 2020 Google LLC. |
6 | */ |
7 | #include <stdlib.h> |
8 | #include "bench.h" |
9 | #include "../util/stat.h" |
10 | #include <linux/bitmap.h> |
11 | #include <linux/bitops.h> |
12 | #include <linux/time64.h> |
13 | #include <subcmd/parse-options.h> |
14 | |
15 | static unsigned int outer_iterations = 5; |
16 | static unsigned int inner_iterations = 100000; |
17 | |
18 | static const struct option options[] = { |
19 | OPT_UINTEGER('i', "outer-iterations" , &outer_iterations, |
20 | "Number of outer iterations used" ), |
21 | OPT_UINTEGER('j', "inner-iterations" , &inner_iterations, |
22 | "Number of inner iterations used" ), |
23 | OPT_END() |
24 | }; |
25 | |
26 | static const char *const bench_usage[] = { |
27 | "perf bench mem find_bit <options>" , |
28 | NULL |
29 | }; |
30 | |
31 | static unsigned int accumulator; |
32 | static unsigned int use_of_val; |
33 | |
34 | static noinline void workload(int val) |
35 | { |
36 | use_of_val += val; |
37 | accumulator++; |
38 | } |
39 | |
40 | #if (defined(__i386__) || defined(__x86_64__)) && defined(__GCC_ASM_FLAG_OUTPUTS__) |
41 | static bool asm_test_bit(long nr, const unsigned long *addr) |
42 | { |
43 | bool oldbit; |
44 | |
45 | asm volatile("bt %2,%1" |
46 | : "=@ccc" (oldbit) |
47 | : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory" ); |
48 | |
49 | return oldbit; |
50 | } |
51 | #else |
52 | #define asm_test_bit test_bit |
53 | #endif |
54 | |
55 | static int do_for_each_set_bit(unsigned int num_bits) |
56 | { |
57 | unsigned long *to_test = bitmap_zalloc(num_bits); |
58 | struct timeval start, end, diff; |
59 | u64 runtime_us; |
60 | struct stats fb_time_stats, tb_time_stats; |
61 | double time_average, time_stddev; |
62 | unsigned int bit, i, j; |
63 | unsigned int set_bits, skip; |
64 | |
65 | init_stats(stats: &fb_time_stats); |
66 | init_stats(stats: &tb_time_stats); |
67 | |
68 | for (set_bits = 1; set_bits <= num_bits; set_bits <<= 1) { |
69 | bitmap_zero(dst: to_test, nbits: num_bits); |
70 | skip = num_bits / set_bits; |
71 | for (i = 0; i < num_bits; i += skip) |
72 | __set_bit(i, to_test); |
73 | |
74 | for (i = 0; i < outer_iterations; i++) { |
75 | #ifndef NDEBUG |
76 | unsigned int old = accumulator; |
77 | #endif |
78 | |
79 | gettimeofday(&start, NULL); |
80 | for (j = 0; j < inner_iterations; j++) { |
81 | for_each_set_bit(bit, to_test, num_bits) |
82 | workload(val: bit); |
83 | } |
84 | gettimeofday(&end, NULL); |
85 | assert(old + (inner_iterations * set_bits) == accumulator); |
86 | timersub(&end, &start, &diff); |
87 | runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; |
88 | update_stats(stats: &fb_time_stats, val: runtime_us); |
89 | |
90 | #ifndef NDEBUG |
91 | old = accumulator; |
92 | #endif |
93 | gettimeofday(&start, NULL); |
94 | for (j = 0; j < inner_iterations; j++) { |
95 | for (bit = 0; bit < num_bits; bit++) { |
96 | if (asm_test_bit(nr: bit, addr: to_test)) |
97 | workload(val: bit); |
98 | } |
99 | } |
100 | gettimeofday(&end, NULL); |
101 | assert(old + (inner_iterations * set_bits) == accumulator); |
102 | timersub(&end, &start, &diff); |
103 | runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec; |
104 | update_stats(stats: &tb_time_stats, val: runtime_us); |
105 | } |
106 | |
107 | printf("%d operations %d bits set of %d bits\n" , |
108 | inner_iterations, set_bits, num_bits); |
109 | time_average = avg_stats(stats: &fb_time_stats); |
110 | time_stddev = stddev_stats(stats: &fb_time_stats); |
111 | printf(" Average for_each_set_bit took: %.3f usec (+- %.3f usec)\n" , |
112 | time_average, time_stddev); |
113 | time_average = avg_stats(stats: &tb_time_stats); |
114 | time_stddev = stddev_stats(stats: &tb_time_stats); |
115 | printf(" Average test_bit loop took: %.3f usec (+- %.3f usec)\n" , |
116 | time_average, time_stddev); |
117 | |
118 | if (use_of_val == accumulator) /* Try to avoid compiler tricks. */ |
119 | printf("\n" ); |
120 | } |
121 | bitmap_free(bitmap: to_test); |
122 | return 0; |
123 | } |
124 | |
125 | int bench_mem_find_bit(int argc, const char **argv) |
126 | { |
127 | int err = 0, i; |
128 | |
129 | argc = parse_options(argc, argv, options, bench_usage, 0); |
130 | if (argc) { |
131 | usage_with_options(bench_usage, options); |
132 | exit(EXIT_FAILURE); |
133 | } |
134 | |
135 | for (i = 1; i <= 2048; i <<= 1) |
136 | do_for_each_set_bit(num_bits: i); |
137 | |
138 | return err; |
139 | } |
140 | |