1/*
2 * Strictly speaking, this is not a test. But it can report during test
3 * runs so relative performace can be measured.
4 */
5#define _GNU_SOURCE
6#include <assert.h>
7#include <err.h>
8#include <limits.h>
9#include <sched.h>
10#include <stdbool.h>
11#include <stddef.h>
12#include <stdio.h>
13#include <stdlib.h>
14#include <time.h>
15#include <unistd.h>
16#include <linux/filter.h>
17#include <linux/seccomp.h>
18#include <sys/param.h>
19#include <sys/prctl.h>
20#include <sys/syscall.h>
21#include <sys/types.h>
22
23#include "../kselftest.h"
24
25unsigned long long timing(clockid_t clk_id, unsigned long long samples)
26{
27 struct timespec start, finish;
28 unsigned long long i;
29 pid_t pid, ret;
30
31 pid = getpid();
32 assert(clock_gettime(clk_id, &start) == 0);
33 for (i = 0; i < samples; i++) {
34 ret = syscall(__NR_getpid);
35 assert(pid == ret);
36 }
37 assert(clock_gettime(clk_id, &finish) == 0);
38
39 i = finish.tv_sec - start.tv_sec;
40 i *= 1000000000ULL;
41 i += finish.tv_nsec - start.tv_nsec;
42
43 ksft_print_msg(msg: "%lu.%09lu - %lu.%09lu = %llu (%.1fs)\n",
44 finish.tv_sec, finish.tv_nsec,
45 start.tv_sec, start.tv_nsec,
46 i, (double)i / 1000000000.0);
47
48 return i;
49}
50
51unsigned long long calibrate(void)
52{
53 struct timespec start, finish;
54 unsigned long long i, samples, step = 9973;
55 pid_t pid, ret;
56 int seconds = 15;
57
58 ksft_print_msg(msg: "Calibrating sample size for %d seconds worth of syscalls ...\n", seconds);
59
60 samples = 0;
61 pid = getpid();
62 assert(clock_gettime(CLOCK_MONOTONIC, &start) == 0);
63 do {
64 for (i = 0; i < step; i++) {
65 ret = syscall(__NR_getpid);
66 assert(pid == ret);
67 }
68 assert(clock_gettime(CLOCK_MONOTONIC, &finish) == 0);
69
70 samples += step;
71 i = finish.tv_sec - start.tv_sec;
72 i *= 1000000000ULL;
73 i += finish.tv_nsec - start.tv_nsec;
74 } while (i < 1000000000ULL);
75
76 return samples * seconds;
77}
78
79bool approx(int i_one, int i_two)
80{
81 /*
82 * This continues to be a noisy test. Instead of a 1% comparison
83 * go with 10%.
84 */
85 double one = i_one, one_bump = one * 0.1;
86 double two = i_two, two_bump = two * 0.1;
87
88 one_bump = one + MAX(one_bump, 2.0);
89 two_bump = two + MAX(two_bump, 2.0);
90
91 /* Equal to, or within 1% or 2 digits */
92 if (one == two ||
93 (one > two && one <= two_bump) ||
94 (two > one && two <= one_bump))
95 return true;
96 return false;
97}
98
99bool le(int i_one, int i_two)
100{
101 if (i_one <= i_two)
102 return true;
103 return false;
104}
105
106long compare(const char *name_one, const char *name_eval, const char *name_two,
107 unsigned long long one, bool (*eval)(int, int), unsigned long long two,
108 bool skip)
109{
110 bool good;
111
112 if (skip) {
113 ksft_test_result_skip(msg: "%s %s %s\n", name_one, name_eval,
114 name_two);
115 return 0;
116 }
117
118 ksft_print_msg(msg: "\t%s %s %s (%lld %s %lld): ", name_one, name_eval, name_two,
119 (long long)one, name_eval, (long long)two);
120 if (one > INT_MAX) {
121 ksft_print_msg(msg: "Miscalculation! Measurement went negative: %lld\n", (long long)one);
122 good = false;
123 goto out;
124 }
125 if (two > INT_MAX) {
126 ksft_print_msg(msg: "Miscalculation! Measurement went negative: %lld\n", (long long)two);
127 good = false;
128 goto out;
129 }
130
131 good = eval(one, two);
132 printf("%s\n", good ? "✔️" : "❌");
133
134out:
135 ksft_test_result(good, "%s %s %s\n", name_one, name_eval, name_two);
136
137 return good ? 0 : 1;
138}
139
140/* Pin to a single CPU so the benchmark won't bounce around the system. */
141void affinity(void)
142{
143 long cpu;
144 ulong ncores = sysconf(_SC_NPROCESSORS_CONF);
145 cpu_set_t *setp = CPU_ALLOC(ncores);
146 ulong setsz = CPU_ALLOC_SIZE(ncores);
147
148 /*
149 * Totally unscientific way to avoid CPUs that might be busier:
150 * choose the highest CPU instead of the lowest.
151 */
152 for (cpu = ncores - 1; cpu >= 0; cpu--) {
153 CPU_ZERO_S(setsz, setp);
154 CPU_SET_S(cpu, setsz, setp);
155 if (sched_setaffinity(pid: getpid(), new_mask: setsz, setp) == -1)
156 continue;
157 printf("Pinned to CPU %lu of %lu\n", cpu + 1, ncores);
158 goto out;
159 }
160 fprintf(stderr, "Could not set CPU affinity -- calibration may not work well");
161
162out:
163 CPU_FREE(setp);
164}
165
166int main(int argc, char *argv[])
167{
168 struct sock_filter bitmap_filter[] = {
169 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)),
170 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
171 };
172 struct sock_fprog bitmap_prog = {
173 .len = (unsigned short)ARRAY_SIZE(bitmap_filter),
174 .filter = bitmap_filter,
175 };
176 struct sock_filter filter[] = {
177 BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, args[0])),
178 BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
179 };
180 struct sock_fprog prog = {
181 .len = (unsigned short)ARRAY_SIZE(filter),
182 .filter = filter,
183 };
184
185 long ret, bits;
186 unsigned long long samples, calc;
187 unsigned long long native, filter1, filter2, bitmap1, bitmap2;
188 unsigned long long entry, per_filter1, per_filter2;
189 bool skip = false;
190
191 setbuf(stdout, NULL);
192
193 ksft_print_header();
194 ksft_set_plan(plan: 7);
195
196 ksft_print_msg(msg: "Running on:\n");
197 ksft_print_msg(msg: "");
198 system("uname -a");
199
200 ksft_print_msg(msg: "Current BPF sysctl settings:\n");
201 /* Avoid using "sysctl" which may not be installed. */
202 ksft_print_msg(msg: "");
203 system("grep -H . /proc/sys/net/core/bpf_jit_enable");
204 ksft_print_msg(msg: "");
205 system("grep -H . /proc/sys/net/core/bpf_jit_harden");
206
207 affinity();
208
209 if (argc > 1)
210 samples = strtoull(argv[1], NULL, 0);
211 else
212 samples = calibrate();
213
214 ksft_print_msg(msg: "Benchmarking %llu syscalls...\n", samples);
215
216 /* Native call */
217 native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
218 ksft_print_msg(msg: "getpid native: %llu ns\n", native);
219
220 ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
221 assert(ret == 0);
222
223 /* One filter resulting in a bitmap */
224 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog);
225 assert(ret == 0);
226
227 bitmap1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
228 ksft_print_msg(msg: "getpid RET_ALLOW 1 filter (bitmap): %llu ns\n", bitmap1);
229
230 /* Second filter resulting in a bitmap */
231 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog);
232 assert(ret == 0);
233
234 bitmap2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
235 ksft_print_msg(msg: "getpid RET_ALLOW 2 filters (bitmap): %llu ns\n", bitmap2);
236
237 /* Third filter, can no longer be converted to bitmap */
238 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
239 assert(ret == 0);
240
241 filter1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
242 ksft_print_msg(msg: "getpid RET_ALLOW 3 filters (full): %llu ns\n", filter1);
243
244 /* Fourth filter, can not be converted to bitmap because of filter 3 */
245 ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog);
246 assert(ret == 0);
247
248 filter2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
249 ksft_print_msg(msg: "getpid RET_ALLOW 4 filters (full): %llu ns\n", filter2);
250
251 /* Estimations */
252#define ESTIMATE(fmt, var, what) do { \
253 var = (what); \
254 ksft_print_msg("Estimated " fmt ": %llu ns\n", var); \
255 if (var > INT_MAX) { \
256 skip = true; \
257 ret |= 1; \
258 } \
259 } while (0)
260
261 ESTIMATE("total seccomp overhead for 1 bitmapped filter", calc,
262 bitmap1 - native);
263 ESTIMATE("total seccomp overhead for 2 bitmapped filters", calc,
264 bitmap2 - native);
265 ESTIMATE("total seccomp overhead for 3 full filters", calc,
266 filter1 - native);
267 ESTIMATE("total seccomp overhead for 4 full filters", calc,
268 filter2 - native);
269 ESTIMATE("seccomp entry overhead", entry,
270 bitmap1 - native - (bitmap2 - bitmap1));
271 ESTIMATE("seccomp per-filter overhead (last 2 diff)", per_filter1,
272 filter2 - filter1);
273 ESTIMATE("seccomp per-filter overhead (filters / 4)", per_filter2,
274 (filter2 - native - entry) / 4);
275
276 ksft_print_msg(msg: "Expectations:\n");
277 ret |= compare(name_one: "native", name_eval: "≤", name_two: "1 bitmap", one: native, eval: le, two: bitmap1,
278 skip);
279 bits = compare(name_one: "native", name_eval: "≤", name_two: "1 filter", one: native, eval: le, two: filter1,
280 skip);
281 if (bits)
282 skip = true;
283
284 ret |= compare(name_one: "per-filter (last 2 diff)", name_eval: "≈", name_two: "per-filter (filters / 4)",
285 one: per_filter1, eval: approx, two: per_filter2, skip);
286
287 bits = compare(name_one: "1 bitmapped", name_eval: "≈", name_two: "2 bitmapped",
288 one: bitmap1 - native, eval: approx, two: bitmap2 - native, skip);
289 if (bits) {
290 ksft_print_msg(msg: "Skipping constant action bitmap expectations: they appear unsupported.\n");
291 skip = true;
292 }
293
294 ret |= compare(name_one: "entry", name_eval: "≈", name_two: "1 bitmapped", one: entry, eval: approx,
295 two: bitmap1 - native, skip);
296 ret |= compare(name_one: "entry", name_eval: "≈", name_two: "2 bitmapped", one: entry, eval: approx,
297 two: bitmap2 - native, skip);
298 ret |= compare(name_one: "native + entry + (per filter * 4)", name_eval: "≈", name_two: "4 filters total",
299 one: entry + (per_filter1 * 4) + native, eval: approx, two: filter2,
300 skip);
301
302 if (ret)
303 ksft_print_msg(msg: "Saw unexpected benchmark result. Try running again with more samples?\n");
304
305 ksft_finished();
306}
307

source code of linux/tools/testing/selftests/seccomp/seccomp_benchmark.c