1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (c) 2016 Facebook |
4 | */ |
5 | #define _GNU_SOURCE |
6 | #include <linux/types.h> |
7 | #include <stdio.h> |
8 | #include <unistd.h> |
9 | #include <linux/bpf.h> |
10 | #include <errno.h> |
11 | #include <string.h> |
12 | #include <assert.h> |
13 | #include <sched.h> |
14 | #include <sys/wait.h> |
15 | #include <sys/stat.h> |
16 | #include <fcntl.h> |
17 | #include <stdlib.h> |
18 | #include <time.h> |
19 | |
20 | #include <bpf/bpf.h> |
21 | #include "bpf_util.h" |
22 | |
23 | #define min(a, b) ((a) < (b) ? (a) : (b)) |
24 | #ifndef offsetof |
25 | # define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) |
26 | #endif |
27 | #define container_of(ptr, type, member) ({ \ |
28 | const typeof( ((type *)0)->member ) *__mptr = (ptr); \ |
29 | (type *)( (char *)__mptr - offsetof(type,member) );}) |
30 | |
31 | static int nr_cpus; |
32 | static unsigned long long *dist_keys; |
33 | static unsigned int dist_key_counts; |
34 | |
35 | struct list_head { |
36 | struct list_head *next, *prev; |
37 | }; |
38 | |
39 | static inline void INIT_LIST_HEAD(struct list_head *list) |
40 | { |
41 | list->next = list; |
42 | list->prev = list; |
43 | } |
44 | |
45 | static inline void __list_add(struct list_head *new, |
46 | struct list_head *prev, |
47 | struct list_head *next) |
48 | { |
49 | next->prev = new; |
50 | new->next = next; |
51 | new->prev = prev; |
52 | prev->next = new; |
53 | } |
54 | |
55 | static inline void list_add(struct list_head *new, struct list_head *head) |
56 | { |
57 | __list_add(new, prev: head, next: head->next); |
58 | } |
59 | |
60 | static inline void __list_del(struct list_head *prev, struct list_head *next) |
61 | { |
62 | next->prev = prev; |
63 | prev->next = next; |
64 | } |
65 | |
66 | static inline void __list_del_entry(struct list_head *entry) |
67 | { |
68 | __list_del(prev: entry->prev, next: entry->next); |
69 | } |
70 | |
71 | static inline void list_move(struct list_head *list, struct list_head *head) |
72 | { |
73 | __list_del_entry(entry: list); |
74 | list_add(new: list, head); |
75 | } |
76 | |
77 | #define list_entry(ptr, type, member) \ |
78 | container_of(ptr, type, member) |
79 | |
80 | #define list_last_entry(ptr, type, member) \ |
81 | list_entry((ptr)->prev, type, member) |
82 | |
83 | struct pfect_lru_node { |
84 | struct list_head list; |
85 | unsigned long long key; |
86 | }; |
87 | |
88 | struct pfect_lru { |
89 | struct list_head list; |
90 | struct pfect_lru_node *free_nodes; |
91 | unsigned int cur_size; |
92 | unsigned int lru_size; |
93 | unsigned int nr_unique; |
94 | unsigned int nr_misses; |
95 | unsigned int total; |
96 | int map_fd; |
97 | }; |
98 | |
99 | static void pfect_lru_init(struct pfect_lru *lru, unsigned int lru_size, |
100 | unsigned int nr_possible_elems) |
101 | { |
102 | lru->map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, |
103 | sizeof(unsigned long long), |
104 | sizeof(struct pfect_lru_node *), |
105 | nr_possible_elems, NULL); |
106 | assert(lru->map_fd != -1); |
107 | |
108 | lru->free_nodes = malloc(size: lru_size * sizeof(struct pfect_lru_node)); |
109 | assert(lru->free_nodes); |
110 | |
111 | INIT_LIST_HEAD(list: &lru->list); |
112 | lru->cur_size = 0; |
113 | lru->lru_size = lru_size; |
114 | lru->nr_unique = lru->nr_misses = lru->total = 0; |
115 | } |
116 | |
117 | static void pfect_lru_destroy(struct pfect_lru *lru) |
118 | { |
119 | close(fd: lru->map_fd); |
120 | free(ptr: lru->free_nodes); |
121 | } |
122 | |
123 | static int pfect_lru_lookup_or_insert(struct pfect_lru *lru, |
124 | unsigned long long key) |
125 | { |
126 | struct pfect_lru_node *node = NULL; |
127 | int seen = 0; |
128 | |
129 | lru->total++; |
130 | if (!bpf_map_lookup_elem(lru->map_fd, &key, &node)) { |
131 | if (node) { |
132 | list_move(list: &node->list, head: &lru->list); |
133 | return 1; |
134 | } |
135 | seen = 1; |
136 | } |
137 | |
138 | if (lru->cur_size < lru->lru_size) { |
139 | node = &lru->free_nodes[lru->cur_size++]; |
140 | INIT_LIST_HEAD(list: &node->list); |
141 | } else { |
142 | struct pfect_lru_node *null_node = NULL; |
143 | |
144 | node = list_last_entry(&lru->list, |
145 | struct pfect_lru_node, |
146 | list); |
147 | bpf_map_update_elem(lru->map_fd, &node->key, &null_node, BPF_EXIST); |
148 | } |
149 | |
150 | node->key = key; |
151 | list_move(list: &node->list, head: &lru->list); |
152 | |
153 | lru->nr_misses++; |
154 | if (seen) { |
155 | assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_EXIST)); |
156 | } else { |
157 | lru->nr_unique++; |
158 | assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_NOEXIST)); |
159 | } |
160 | |
161 | return seen; |
162 | } |
163 | |
164 | static unsigned int read_keys(const char *dist_file, |
165 | unsigned long long **keys) |
166 | { |
167 | struct stat fst; |
168 | unsigned long long *retkeys; |
169 | unsigned int counts = 0; |
170 | int dist_fd; |
171 | char *b, *l; |
172 | int i; |
173 | |
174 | dist_fd = open(file: dist_file, oflag: 0); |
175 | assert(dist_fd != -1); |
176 | |
177 | assert(fstat(dist_fd, &fst) == 0); |
178 | b = malloc(size: fst.st_size); |
179 | assert(b); |
180 | |
181 | assert(read(dist_fd, b, fst.st_size) == fst.st_size); |
182 | close(fd: dist_fd); |
183 | for (i = 0; i < fst.st_size; i++) { |
184 | if (b[i] == '\n') |
185 | counts++; |
186 | } |
187 | counts++; /* in case the last line has no \n */ |
188 | |
189 | retkeys = malloc(size: counts * sizeof(unsigned long long)); |
190 | assert(retkeys); |
191 | |
192 | counts = 0; |
193 | for (l = strtok(s: b, delim: "\n" ); l; l = strtok(NULL, delim: "\n" )) |
194 | retkeys[counts++] = strtoull(nptr: l, NULL, base: 10); |
195 | free(ptr: b); |
196 | |
197 | *keys = retkeys; |
198 | |
199 | return counts; |
200 | } |
201 | |
202 | static int create_map(int map_type, int map_flags, unsigned int size) |
203 | { |
204 | LIBBPF_OPTS(bpf_map_create_opts, opts, |
205 | .map_flags = map_flags, |
206 | ); |
207 | int map_fd; |
208 | |
209 | map_fd = bpf_map_create(map_type, NULL, sizeof(unsigned long long), |
210 | sizeof(unsigned long long), size, &opts); |
211 | |
212 | if (map_fd == -1) |
213 | perror(s: "bpf_create_map" ); |
214 | |
215 | return map_fd; |
216 | } |
217 | |
218 | static int sched_next_online(int pid, int next_to_try) |
219 | { |
220 | cpu_set_t cpuset; |
221 | |
222 | if (next_to_try == nr_cpus) |
223 | return -1; |
224 | |
225 | while (next_to_try < nr_cpus) { |
226 | CPU_ZERO(&cpuset); |
227 | CPU_SET(next_to_try++, &cpuset); |
228 | if (!sched_setaffinity(pid: pid, cpusetsize: sizeof(cpuset), cpuset: &cpuset)) |
229 | break; |
230 | } |
231 | |
232 | return next_to_try; |
233 | } |
234 | |
235 | static void run_parallel(unsigned int tasks, void (*fn)(int i, void *data), |
236 | void *data) |
237 | { |
238 | int next_sched_cpu = 0; |
239 | pid_t pid[tasks]; |
240 | int i; |
241 | |
242 | for (i = 0; i < tasks; i++) { |
243 | pid[i] = fork(); |
244 | if (pid[i] == 0) { |
245 | next_sched_cpu = sched_next_online(pid: 0, next_to_try: next_sched_cpu); |
246 | fn(i, data); |
247 | exit(status: 0); |
248 | } else if (pid[i] == -1) { |
249 | printf(format: "couldn't spawn #%d process\n" , i); |
250 | exit(status: 1); |
251 | } |
252 | /* It is mostly redundant and just allow the parent |
253 | * process to update next_shced_cpu for the next child |
254 | * process |
255 | */ |
256 | next_sched_cpu = sched_next_online(pid: pid[i], next_to_try: next_sched_cpu); |
257 | } |
258 | for (i = 0; i < tasks; i++) { |
259 | int status; |
260 | |
261 | assert(waitpid(pid[i], &status, 0) == pid[i]); |
262 | assert(status == 0); |
263 | } |
264 | } |
265 | |
266 | static void do_test_lru_dist(int task, void *data) |
267 | { |
268 | unsigned int nr_misses = 0; |
269 | struct pfect_lru pfect_lru; |
270 | unsigned long long key, value = 1234; |
271 | unsigned int i; |
272 | |
273 | unsigned int lru_map_fd = ((unsigned int *)data)[0]; |
274 | unsigned int lru_size = ((unsigned int *)data)[1]; |
275 | unsigned long long key_offset = task * dist_key_counts; |
276 | |
277 | pfect_lru_init(lru: &pfect_lru, lru_size, nr_possible_elems: dist_key_counts); |
278 | |
279 | for (i = 0; i < dist_key_counts; i++) { |
280 | key = dist_keys[i] + key_offset; |
281 | |
282 | pfect_lru_lookup_or_insert(lru: &pfect_lru, key); |
283 | |
284 | if (!bpf_map_lookup_elem(lru_map_fd, &key, &value)) |
285 | continue; |
286 | |
287 | if (bpf_map_update_elem(lru_map_fd, &key, &value, BPF_NOEXIST)) { |
288 | printf(format: "bpf_map_update_elem(lru_map_fd, %llu): errno:%d\n" , |
289 | key, errno); |
290 | assert(0); |
291 | } |
292 | |
293 | nr_misses++; |
294 | } |
295 | |
296 | printf(format: " task:%d BPF LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n" , |
297 | task, pfect_lru.nr_unique, dist_key_counts, nr_misses, |
298 | dist_key_counts); |
299 | printf(format: " task:%d Perfect LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n" , |
300 | task, pfect_lru.nr_unique, pfect_lru.total, |
301 | pfect_lru.nr_misses, pfect_lru.total); |
302 | |
303 | pfect_lru_destroy(lru: &pfect_lru); |
304 | close(fd: lru_map_fd); |
305 | } |
306 | |
307 | static void test_parallel_lru_dist(int map_type, int map_flags, |
308 | int nr_tasks, unsigned int lru_size) |
309 | { |
310 | int child_data[2]; |
311 | int lru_map_fd; |
312 | |
313 | printf(format: "%s (map_type:%d map_flags:0x%X):\n" , __func__, map_type, |
314 | map_flags); |
315 | |
316 | if (map_flags & BPF_F_NO_COMMON_LRU) |
317 | lru_map_fd = create_map(map_type, map_flags, |
318 | size: nr_cpus * lru_size); |
319 | else |
320 | lru_map_fd = create_map(map_type, map_flags, |
321 | size: nr_tasks * lru_size); |
322 | assert(lru_map_fd != -1); |
323 | |
324 | child_data[0] = lru_map_fd; |
325 | child_data[1] = lru_size; |
326 | |
327 | run_parallel(tasks: nr_tasks, fn: do_test_lru_dist, data: child_data); |
328 | |
329 | close(fd: lru_map_fd); |
330 | } |
331 | |
332 | static void test_lru_loss0(int map_type, int map_flags) |
333 | { |
334 | unsigned long long key, value[nr_cpus]; |
335 | unsigned int old_unused_losses = 0; |
336 | unsigned int new_unused_losses = 0; |
337 | unsigned int used_losses = 0; |
338 | int map_fd; |
339 | |
340 | printf(format: "%s (map_type:%d map_flags:0x%X): " , __func__, map_type, |
341 | map_flags); |
342 | |
343 | assert(sched_next_online(0, 0) != -1); |
344 | |
345 | if (map_flags & BPF_F_NO_COMMON_LRU) |
346 | map_fd = create_map(map_type, map_flags, size: 900 * nr_cpus); |
347 | else |
348 | map_fd = create_map(map_type, map_flags, size: 900); |
349 | |
350 | assert(map_fd != -1); |
351 | |
352 | value[0] = 1234; |
353 | |
354 | for (key = 1; key <= 1000; key++) { |
355 | int start_key, end_key; |
356 | |
357 | assert(bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0); |
358 | |
359 | start_key = 101; |
360 | end_key = min(key, 900); |
361 | |
362 | while (start_key <= end_key) { |
363 | bpf_map_lookup_elem(map_fd, &start_key, value); |
364 | start_key++; |
365 | } |
366 | } |
367 | |
368 | for (key = 1; key <= 1000; key++) { |
369 | if (bpf_map_lookup_elem(map_fd, &key, value)) { |
370 | if (key <= 100) |
371 | old_unused_losses++; |
372 | else if (key <= 900) |
373 | used_losses++; |
374 | else |
375 | new_unused_losses++; |
376 | } |
377 | } |
378 | |
379 | close(fd: map_fd); |
380 | |
381 | printf(format: "older-elem-losses:%d(/100) active-elem-losses:%d(/800) " |
382 | "newer-elem-losses:%d(/100)\n" , |
383 | old_unused_losses, used_losses, new_unused_losses); |
384 | } |
385 | |
386 | static void test_lru_loss1(int map_type, int map_flags) |
387 | { |
388 | unsigned long long key, value[nr_cpus]; |
389 | int map_fd; |
390 | unsigned int nr_losses = 0; |
391 | |
392 | printf(format: "%s (map_type:%d map_flags:0x%X): " , __func__, map_type, |
393 | map_flags); |
394 | |
395 | assert(sched_next_online(0, 0) != -1); |
396 | |
397 | if (map_flags & BPF_F_NO_COMMON_LRU) |
398 | map_fd = create_map(map_type, map_flags, size: 1000 * nr_cpus); |
399 | else |
400 | map_fd = create_map(map_type, map_flags, size: 1000); |
401 | |
402 | assert(map_fd != -1); |
403 | |
404 | value[0] = 1234; |
405 | |
406 | for (key = 1; key <= 1000; key++) |
407 | assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST)); |
408 | |
409 | for (key = 1; key <= 1000; key++) { |
410 | if (bpf_map_lookup_elem(map_fd, &key, value)) |
411 | nr_losses++; |
412 | } |
413 | |
414 | close(fd: map_fd); |
415 | |
416 | printf(format: "nr_losses:%d(/1000)\n" , nr_losses); |
417 | } |
418 | |
419 | static void do_test_parallel_lru_loss(int task, void *data) |
420 | { |
421 | const unsigned int nr_stable_elems = 1000; |
422 | const unsigned int nr_repeats = 100000; |
423 | |
424 | int map_fd = *(int *)data; |
425 | unsigned long long stable_base; |
426 | unsigned long long key, value[nr_cpus]; |
427 | unsigned long long next_ins_key; |
428 | unsigned int nr_losses = 0; |
429 | unsigned int i; |
430 | |
431 | stable_base = task * nr_repeats * 2 + 1; |
432 | next_ins_key = stable_base; |
433 | value[0] = 1234; |
434 | for (i = 0; i < nr_stable_elems; i++) { |
435 | assert(bpf_map_update_elem(map_fd, &next_ins_key, value, |
436 | BPF_NOEXIST) == 0); |
437 | next_ins_key++; |
438 | } |
439 | |
440 | for (i = 0; i < nr_repeats; i++) { |
441 | int rn; |
442 | |
443 | rn = rand(); |
444 | |
445 | if (rn % 10) { |
446 | key = rn % nr_stable_elems + stable_base; |
447 | bpf_map_lookup_elem(map_fd, &key, value); |
448 | } else { |
449 | bpf_map_update_elem(map_fd, &next_ins_key, value, |
450 | BPF_NOEXIST); |
451 | next_ins_key++; |
452 | } |
453 | } |
454 | |
455 | key = stable_base; |
456 | for (i = 0; i < nr_stable_elems; i++) { |
457 | if (bpf_map_lookup_elem(map_fd, &key, value)) |
458 | nr_losses++; |
459 | key++; |
460 | } |
461 | |
462 | printf(format: " task:%d nr_losses:%u\n" , task, nr_losses); |
463 | } |
464 | |
465 | static void test_parallel_lru_loss(int map_type, int map_flags, int nr_tasks) |
466 | { |
467 | int map_fd; |
468 | |
469 | printf(format: "%s (map_type:%d map_flags:0x%X):\n" , __func__, map_type, |
470 | map_flags); |
471 | |
472 | /* Give 20% more than the active working set */ |
473 | if (map_flags & BPF_F_NO_COMMON_LRU) |
474 | map_fd = create_map(map_type, map_flags, |
475 | size: nr_cpus * (1000 + 200)); |
476 | else |
477 | map_fd = create_map(map_type, map_flags, |
478 | size: nr_tasks * (1000 + 200)); |
479 | |
480 | assert(map_fd != -1); |
481 | |
482 | run_parallel(tasks: nr_tasks, fn: do_test_parallel_lru_loss, data: &map_fd); |
483 | |
484 | close(fd: map_fd); |
485 | } |
486 | |
487 | int main(int argc, char **argv) |
488 | { |
489 | int map_flags[] = {0, BPF_F_NO_COMMON_LRU}; |
490 | const char *dist_file; |
491 | int nr_tasks = 1; |
492 | int lru_size; |
493 | int f; |
494 | |
495 | if (argc < 4) { |
496 | printf(format: "Usage: %s <dist-file> <lru-size> <nr-tasks>\n" , |
497 | argv[0]); |
498 | return -1; |
499 | } |
500 | |
501 | dist_file = argv[1]; |
502 | lru_size = atoi(nptr: argv[2]); |
503 | nr_tasks = atoi(nptr: argv[3]); |
504 | |
505 | setbuf(stdout, NULL); |
506 | |
507 | srand(seed: time(NULL)); |
508 | |
509 | nr_cpus = bpf_num_possible_cpus(); |
510 | assert(nr_cpus != -1); |
511 | printf(format: "nr_cpus:%d\n\n" , nr_cpus); |
512 | |
513 | nr_tasks = min(nr_tasks, nr_cpus); |
514 | |
515 | dist_key_counts = read_keys(dist_file, keys: &dist_keys); |
516 | if (!dist_key_counts) { |
517 | printf(format: "%s has no key\n" , dist_file); |
518 | return -1; |
519 | } |
520 | |
521 | for (f = 0; f < ARRAY_SIZE(map_flags); f++) { |
522 | test_lru_loss0(map_type: BPF_MAP_TYPE_LRU_HASH, map_flags: map_flags[f]); |
523 | test_lru_loss1(map_type: BPF_MAP_TYPE_LRU_HASH, map_flags: map_flags[f]); |
524 | test_parallel_lru_loss(map_type: BPF_MAP_TYPE_LRU_HASH, map_flags: map_flags[f], |
525 | nr_tasks); |
526 | test_parallel_lru_dist(map_type: BPF_MAP_TYPE_LRU_HASH, map_flags: map_flags[f], |
527 | nr_tasks, lru_size); |
528 | printf(format: "\n" ); |
529 | } |
530 | |
531 | free(ptr: dist_keys); |
532 | |
533 | return 0; |
534 | } |
535 | |