1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright 2013 Google Inc. |
4 | * Author: Willem de Bruijn (willemb@google.com) |
5 | * |
6 | * A basic test of packet socket fanout behavior. |
7 | * |
8 | * Control: |
9 | * - create fanout fails as expected with illegal flag combinations |
10 | * - join fanout fails as expected with diverging types or flags |
11 | * |
12 | * Datapath: |
13 | * Open a pair of packet sockets and a pair of INET sockets, send a known |
14 | * number of packets across the two INET sockets and count the number of |
15 | * packets enqueued onto the two packet sockets. |
16 | * |
17 | * The test currently runs for |
18 | * - PACKET_FANOUT_HASH |
19 | * - PACKET_FANOUT_HASH with PACKET_FANOUT_FLAG_ROLLOVER |
20 | * - PACKET_FANOUT_LB |
21 | * - PACKET_FANOUT_CPU |
22 | * - PACKET_FANOUT_ROLLOVER |
23 | * - PACKET_FANOUT_CBPF |
24 | * - PACKET_FANOUT_EBPF |
25 | * |
26 | * Todo: |
27 | * - functionality: PACKET_FANOUT_FLAG_DEFRAG |
28 | */ |
29 | |
30 | #define _GNU_SOURCE /* for sched_setaffinity */ |
31 | |
32 | #include <arpa/inet.h> |
33 | #include <errno.h> |
34 | #include <fcntl.h> |
35 | #include <linux/unistd.h> /* for __NR_bpf */ |
36 | #include <linux/filter.h> |
37 | #include <linux/bpf.h> |
38 | #include <linux/if_packet.h> |
39 | #include <net/if.h> |
40 | #include <net/ethernet.h> |
41 | #include <netinet/ip.h> |
42 | #include <netinet/udp.h> |
43 | #include <poll.h> |
44 | #include <sched.h> |
45 | #include <stdint.h> |
46 | #include <stdio.h> |
47 | #include <stdlib.h> |
48 | #include <string.h> |
49 | #include <sys/mman.h> |
50 | #include <sys/socket.h> |
51 | #include <sys/stat.h> |
52 | #include <sys/types.h> |
53 | #include <unistd.h> |
54 | |
55 | #include "psock_lib.h" |
56 | #include "../kselftest.h" |
57 | |
58 | #define RING_NUM_FRAMES 20 |
59 | |
60 | static uint32_t cfg_max_num_members; |
61 | |
62 | /* Open a socket in a given fanout mode. |
63 | * @return -1 if mode is bad, a valid socket otherwise */ |
64 | static int sock_fanout_open(uint16_t typeflags, uint16_t group_id) |
65 | { |
66 | struct sockaddr_ll addr = {0}; |
67 | struct fanout_args args; |
68 | int fd, val, err; |
69 | |
70 | fd = socket(PF_PACKET, SOCK_RAW, 0); |
71 | if (fd < 0) { |
72 | perror("socket packet" ); |
73 | exit(1); |
74 | } |
75 | |
76 | pair_udp_setfilter(fd); |
77 | |
78 | addr.sll_family = AF_PACKET; |
79 | addr.sll_protocol = htons(ETH_P_IP); |
80 | addr.sll_ifindex = if_nametoindex("lo" ); |
81 | if (addr.sll_ifindex == 0) { |
82 | perror("if_nametoindex" ); |
83 | exit(1); |
84 | } |
85 | if (bind(fd, (void *) &addr, sizeof(addr))) { |
86 | perror("bind packet" ); |
87 | exit(1); |
88 | } |
89 | |
90 | if (cfg_max_num_members) { |
91 | args.id = group_id; |
92 | args.type_flags = typeflags; |
93 | args.max_num_members = cfg_max_num_members; |
94 | err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, |
95 | sizeof(args)); |
96 | } else { |
97 | val = (((int) typeflags) << 16) | group_id; |
98 | err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, |
99 | sizeof(val)); |
100 | } |
101 | if (err) { |
102 | if (close(fd)) { |
103 | perror("close packet" ); |
104 | exit(1); |
105 | } |
106 | return -1; |
107 | } |
108 | |
109 | return fd; |
110 | } |
111 | |
112 | static void sock_fanout_set_cbpf(int fd) |
113 | { |
114 | struct sock_filter bpf_filter[] = { |
115 | BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 80), /* ldb [80] */ |
116 | BPF_STMT(BPF_RET | BPF_A, 0), /* ret A */ |
117 | }; |
118 | struct sock_fprog bpf_prog; |
119 | |
120 | bpf_prog.filter = bpf_filter; |
121 | bpf_prog.len = ARRAY_SIZE(bpf_filter); |
122 | |
123 | if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &bpf_prog, |
124 | sizeof(bpf_prog))) { |
125 | perror("fanout data cbpf" ); |
126 | exit(1); |
127 | } |
128 | } |
129 | |
130 | static void sock_fanout_getopts(int fd, uint16_t *typeflags, uint16_t *group_id) |
131 | { |
132 | int sockopt; |
133 | socklen_t sockopt_len = sizeof(sockopt); |
134 | |
135 | if (getsockopt(fd, SOL_PACKET, PACKET_FANOUT, |
136 | &sockopt, &sockopt_len)) { |
137 | perror("failed to getsockopt" ); |
138 | exit(1); |
139 | } |
140 | *typeflags = sockopt >> 16; |
141 | *group_id = sockopt & 0xfffff; |
142 | } |
143 | |
144 | static void sock_fanout_set_ebpf(int fd) |
145 | { |
146 | static char log_buf[65536]; |
147 | |
148 | const int len_off = __builtin_offsetof(struct __sk_buff, len); |
149 | struct bpf_insn prog[] = { |
150 | { BPF_ALU64 | BPF_MOV | BPF_X, 6, 1, 0, 0 }, |
151 | { BPF_LDX | BPF_W | BPF_MEM, 0, 6, len_off, 0 }, |
152 | { BPF_JMP | BPF_JGE | BPF_K, 0, 0, 1, DATA_LEN }, |
153 | { BPF_JMP | BPF_JA | BPF_K, 0, 0, 4, 0 }, |
154 | { BPF_LD | BPF_B | BPF_ABS, 0, 0, 0, 0x50 }, |
155 | { BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 2, DATA_CHAR }, |
156 | { BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1, DATA_CHAR_1 }, |
157 | { BPF_ALU | BPF_MOV | BPF_K, 0, 0, 0, 0 }, |
158 | { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 } |
159 | }; |
160 | union bpf_attr attr; |
161 | int pfd; |
162 | |
163 | memset(&attr, 0, sizeof(attr)); |
164 | attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; |
165 | attr.insns = (unsigned long) prog; |
166 | attr.insn_cnt = ARRAY_SIZE(prog); |
167 | attr.license = (unsigned long) "GPL" ; |
168 | attr.log_buf = (unsigned long) log_buf, |
169 | attr.log_size = sizeof(log_buf), |
170 | attr.log_level = 1, |
171 | |
172 | pfd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); |
173 | if (pfd < 0) { |
174 | perror("bpf" ); |
175 | fprintf(stderr, "bpf verifier:\n%s\n" , log_buf); |
176 | exit(1); |
177 | } |
178 | |
179 | if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) { |
180 | perror("fanout data ebpf" ); |
181 | exit(1); |
182 | } |
183 | |
184 | if (close(pfd)) { |
185 | perror("close ebpf" ); |
186 | exit(1); |
187 | } |
188 | } |
189 | |
190 | static char *sock_fanout_open_ring(int fd) |
191 | { |
192 | struct tpacket_req req = { |
193 | .tp_block_size = getpagesize(), |
194 | .tp_frame_size = getpagesize(), |
195 | .tp_block_nr = RING_NUM_FRAMES, |
196 | .tp_frame_nr = RING_NUM_FRAMES, |
197 | }; |
198 | char *ring; |
199 | int val = TPACKET_V2; |
200 | |
201 | if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, (void *) &val, |
202 | sizeof(val))) { |
203 | perror("packetsock ring setsockopt version" ); |
204 | exit(1); |
205 | } |
206 | if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req, |
207 | sizeof(req))) { |
208 | perror("packetsock ring setsockopt" ); |
209 | exit(1); |
210 | } |
211 | |
212 | ring = mmap(0, req.tp_block_size * req.tp_block_nr, |
213 | PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); |
214 | if (ring == MAP_FAILED) { |
215 | perror("packetsock ring mmap" ); |
216 | exit(1); |
217 | } |
218 | |
219 | return ring; |
220 | } |
221 | |
222 | static int sock_fanout_read_ring(int fd, void *ring) |
223 | { |
224 | struct tpacket2_hdr * = ring; |
225 | int count = 0; |
226 | |
227 | while (count < RING_NUM_FRAMES && header->tp_status & TP_STATUS_USER) { |
228 | count++; |
229 | header = ring + (count * getpagesize()); |
230 | } |
231 | |
232 | return count; |
233 | } |
234 | |
235 | static int sock_fanout_read(int fds[], char *rings[], const int expect[]) |
236 | { |
237 | int ret[2]; |
238 | |
239 | ret[0] = sock_fanout_read_ring(fd: fds[0], ring: rings[0]); |
240 | ret[1] = sock_fanout_read_ring(fd: fds[1], ring: rings[1]); |
241 | |
242 | fprintf(stderr, "info: count=%d,%d, expect=%d,%d\n" , |
243 | ret[0], ret[1], expect[0], expect[1]); |
244 | |
245 | if ((!(ret[0] == expect[0] && ret[1] == expect[1])) && |
246 | (!(ret[0] == expect[1] && ret[1] == expect[0]))) { |
247 | fprintf(stderr, "warning: incorrect queue lengths\n" ); |
248 | return 1; |
249 | } |
250 | |
251 | return 0; |
252 | } |
253 | |
254 | /* Test illegal mode + flag combination */ |
255 | static void test_control_single(void) |
256 | { |
257 | fprintf(stderr, "test: control single socket\n" ); |
258 | |
259 | if (sock_fanout_open(PACKET_FANOUT_ROLLOVER | |
260 | PACKET_FANOUT_FLAG_ROLLOVER, group_id: 0) != -1) { |
261 | fprintf(stderr, "ERROR: opened socket with dual rollover\n" ); |
262 | exit(1); |
263 | } |
264 | } |
265 | |
266 | /* Test illegal group with different modes or flags */ |
267 | static void test_control_group(void) |
268 | { |
269 | int fds[2]; |
270 | |
271 | fprintf(stderr, "test: control multiple sockets\n" ); |
272 | |
273 | fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, group_id: 0); |
274 | if (fds[0] == -1) { |
275 | fprintf(stderr, "ERROR: failed to open HASH socket\n" ); |
276 | exit(1); |
277 | } |
278 | if (sock_fanout_open(PACKET_FANOUT_HASH | |
279 | PACKET_FANOUT_FLAG_DEFRAG, group_id: 0) != -1) { |
280 | fprintf(stderr, "ERROR: joined group with wrong flag defrag\n" ); |
281 | exit(1); |
282 | } |
283 | if (sock_fanout_open(PACKET_FANOUT_HASH | |
284 | PACKET_FANOUT_FLAG_ROLLOVER, group_id: 0) != -1) { |
285 | fprintf(stderr, "ERROR: joined group with wrong flag ro\n" ); |
286 | exit(1); |
287 | } |
288 | if (sock_fanout_open(PACKET_FANOUT_CPU, group_id: 0) != -1) { |
289 | fprintf(stderr, "ERROR: joined group with wrong mode\n" ); |
290 | exit(1); |
291 | } |
292 | fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, group_id: 0); |
293 | if (fds[1] == -1) { |
294 | fprintf(stderr, "ERROR: failed to join group\n" ); |
295 | exit(1); |
296 | } |
297 | if (close(fds[1]) || close(fds[0])) { |
298 | fprintf(stderr, "ERROR: closing sockets\n" ); |
299 | exit(1); |
300 | } |
301 | } |
302 | |
303 | /* Test illegal max_num_members values */ |
304 | static void test_control_group_max_num_members(void) |
305 | { |
306 | int fds[3]; |
307 | |
308 | fprintf(stderr, "test: control multiple sockets, max_num_members\n" ); |
309 | |
310 | /* expected failure on greater than PACKET_FANOUT_MAX */ |
311 | cfg_max_num_members = (1 << 16) + 1; |
312 | if (sock_fanout_open(PACKET_FANOUT_HASH, group_id: 0) != -1) { |
313 | fprintf(stderr, "ERROR: max_num_members > PACKET_FANOUT_MAX\n" ); |
314 | exit(1); |
315 | } |
316 | |
317 | cfg_max_num_members = 256; |
318 | fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, group_id: 0); |
319 | if (fds[0] == -1) { |
320 | fprintf(stderr, "ERROR: failed open\n" ); |
321 | exit(1); |
322 | } |
323 | |
324 | /* expected failure on joining group with different max_num_members */ |
325 | cfg_max_num_members = 257; |
326 | if (sock_fanout_open(PACKET_FANOUT_HASH, group_id: 0) != -1) { |
327 | fprintf(stderr, "ERROR: set different max_num_members\n" ); |
328 | exit(1); |
329 | } |
330 | |
331 | /* success on joining group with same max_num_members */ |
332 | cfg_max_num_members = 256; |
333 | fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, group_id: 0); |
334 | if (fds[1] == -1) { |
335 | fprintf(stderr, "ERROR: failed to join group\n" ); |
336 | exit(1); |
337 | } |
338 | |
339 | /* success on joining group with max_num_members unspecified */ |
340 | cfg_max_num_members = 0; |
341 | fds[2] = sock_fanout_open(PACKET_FANOUT_HASH, group_id: 0); |
342 | if (fds[2] == -1) { |
343 | fprintf(stderr, "ERROR: failed to join group\n" ); |
344 | exit(1); |
345 | } |
346 | |
347 | if (close(fds[2]) || close(fds[1]) || close(fds[0])) { |
348 | fprintf(stderr, "ERROR: closing sockets\n" ); |
349 | exit(1); |
350 | } |
351 | } |
352 | |
353 | /* Test creating a unique fanout group ids */ |
354 | static void test_unique_fanout_group_ids(void) |
355 | { |
356 | int fds[3]; |
357 | uint16_t typeflags, first_group_id, second_group_id; |
358 | |
359 | fprintf(stderr, "test: unique ids\n" ); |
360 | |
361 | fds[0] = sock_fanout_open(PACKET_FANOUT_HASH | |
362 | PACKET_FANOUT_FLAG_UNIQUEID, group_id: 0); |
363 | if (fds[0] == -1) { |
364 | fprintf(stderr, "ERROR: failed to create a unique id group.\n" ); |
365 | exit(1); |
366 | } |
367 | |
368 | sock_fanout_getopts(fd: fds[0], typeflags: &typeflags, group_id: &first_group_id); |
369 | if (typeflags != PACKET_FANOUT_HASH) { |
370 | fprintf(stderr, "ERROR: unexpected typeflags %x\n" , typeflags); |
371 | exit(1); |
372 | } |
373 | |
374 | if (sock_fanout_open(PACKET_FANOUT_CPU, group_id: first_group_id) != -1) { |
375 | fprintf(stderr, "ERROR: joined group with wrong type.\n" ); |
376 | exit(1); |
377 | } |
378 | |
379 | fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, group_id: first_group_id); |
380 | if (fds[1] == -1) { |
381 | fprintf(stderr, |
382 | "ERROR: failed to join previously created group.\n" ); |
383 | exit(1); |
384 | } |
385 | |
386 | fds[2] = sock_fanout_open(PACKET_FANOUT_HASH | |
387 | PACKET_FANOUT_FLAG_UNIQUEID, group_id: 0); |
388 | if (fds[2] == -1) { |
389 | fprintf(stderr, |
390 | "ERROR: failed to create a second unique id group.\n" ); |
391 | exit(1); |
392 | } |
393 | |
394 | sock_fanout_getopts(fd: fds[2], typeflags: &typeflags, group_id: &second_group_id); |
395 | if (sock_fanout_open(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_UNIQUEID, |
396 | group_id: second_group_id) != -1) { |
397 | fprintf(stderr, |
398 | "ERROR: specified a group id when requesting unique id\n" ); |
399 | exit(1); |
400 | } |
401 | |
402 | if (close(fds[0]) || close(fds[1]) || close(fds[2])) { |
403 | fprintf(stderr, "ERROR: closing sockets\n" ); |
404 | exit(1); |
405 | } |
406 | } |
407 | |
408 | static int test_datapath(uint16_t typeflags, int port_off, |
409 | const int expect1[], const int expect2[]) |
410 | { |
411 | const int expect0[] = { 0, 0 }; |
412 | char *rings[2]; |
413 | uint8_t type = typeflags & 0xFF; |
414 | int fds[2], fds_udp[2][2], ret; |
415 | |
416 | fprintf(stderr, "\ntest: datapath 0x%hx ports %hu,%hu\n" , |
417 | typeflags, (uint16_t)PORT_BASE, |
418 | (uint16_t)(PORT_BASE + port_off)); |
419 | |
420 | fds[0] = sock_fanout_open(typeflags, group_id: 0); |
421 | fds[1] = sock_fanout_open(typeflags, group_id: 0); |
422 | if (fds[0] == -1 || fds[1] == -1) { |
423 | fprintf(stderr, "ERROR: failed open\n" ); |
424 | exit(1); |
425 | } |
426 | if (type == PACKET_FANOUT_CBPF) |
427 | sock_fanout_set_cbpf(fd: fds[0]); |
428 | else if (type == PACKET_FANOUT_EBPF) |
429 | sock_fanout_set_ebpf(fd: fds[0]); |
430 | |
431 | rings[0] = sock_fanout_open_ring(fd: fds[0]); |
432 | rings[1] = sock_fanout_open_ring(fd: fds[1]); |
433 | pair_udp_open(fds: fds_udp[0], PORT_BASE); |
434 | pair_udp_open(fds: fds_udp[1], PORT_BASE + port_off); |
435 | sock_fanout_read(fds, rings, expect: expect0); |
436 | |
437 | /* Send data, but not enough to overflow a queue */ |
438 | pair_udp_send(fds: fds_udp[0], num: 15); |
439 | pair_udp_send_char(fds: fds_udp[1], num: 5, DATA_CHAR_1); |
440 | ret = sock_fanout_read(fds, rings, expect: expect1); |
441 | |
442 | /* Send more data, overflow the queue */ |
443 | pair_udp_send_char(fds: fds_udp[0], num: 15, DATA_CHAR_1); |
444 | /* TODO: ensure consistent order between expect1 and expect2 */ |
445 | ret |= sock_fanout_read(fds, rings, expect: expect2); |
446 | |
447 | if (munmap(rings[1], RING_NUM_FRAMES * getpagesize()) || |
448 | munmap(rings[0], RING_NUM_FRAMES * getpagesize())) { |
449 | fprintf(stderr, "close rings\n" ); |
450 | exit(1); |
451 | } |
452 | if (close(fds_udp[1][1]) || close(fds_udp[1][0]) || |
453 | close(fds_udp[0][1]) || close(fds_udp[0][0]) || |
454 | close(fds[1]) || close(fds[0])) { |
455 | fprintf(stderr, "close datapath\n" ); |
456 | exit(1); |
457 | } |
458 | |
459 | return ret; |
460 | } |
461 | |
462 | static int set_cpuaffinity(int cpuid) |
463 | { |
464 | cpu_set_t mask; |
465 | |
466 | CPU_ZERO(&mask); |
467 | CPU_SET(cpuid, &mask); |
468 | if (sched_setaffinity(0, sizeof(mask), &mask)) { |
469 | if (errno != EINVAL) { |
470 | fprintf(stderr, "setaffinity %d\n" , cpuid); |
471 | exit(1); |
472 | } |
473 | return 1; |
474 | } |
475 | |
476 | return 0; |
477 | } |
478 | |
479 | int main(int argc, char **argv) |
480 | { |
481 | const int expect_hash[2][2] = { { 15, 5 }, { 20, 5 } }; |
482 | const int expect_hash_rb[2][2] = { { 15, 5 }, { 20, 15 } }; |
483 | const int expect_lb[2][2] = { { 10, 10 }, { 18, 17 } }; |
484 | const int expect_rb[2][2] = { { 15, 5 }, { 20, 15 } }; |
485 | const int expect_cpu0[2][2] = { { 20, 0 }, { 20, 0 } }; |
486 | const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } }; |
487 | const int expect_bpf[2][2] = { { 15, 5 }, { 15, 20 } }; |
488 | const int expect_uniqueid[2][2] = { { 20, 20}, { 20, 20 } }; |
489 | int port_off = 2, tries = 20, ret; |
490 | |
491 | test_control_single(); |
492 | test_control_group(); |
493 | test_control_group_max_num_members(); |
494 | test_unique_fanout_group_ids(); |
495 | |
496 | /* PACKET_FANOUT_MAX */ |
497 | cfg_max_num_members = 1 << 16; |
498 | /* find a set of ports that do not collide onto the same socket */ |
499 | ret = test_datapath(PACKET_FANOUT_HASH, port_off, |
500 | expect1: expect_hash[0], expect2: expect_hash[1]); |
501 | while (ret) { |
502 | fprintf(stderr, "info: trying alternate ports (%d)\n" , tries); |
503 | ret = test_datapath(PACKET_FANOUT_HASH, port_off: ++port_off, |
504 | expect1: expect_hash[0], expect2: expect_hash[1]); |
505 | if (!--tries) { |
506 | fprintf(stderr, "too many collisions\n" ); |
507 | return 1; |
508 | } |
509 | } |
510 | |
511 | ret |= test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER, |
512 | port_off, expect1: expect_hash_rb[0], expect2: expect_hash_rb[1]); |
513 | ret |= test_datapath(PACKET_FANOUT_LB, |
514 | port_off, expect1: expect_lb[0], expect2: expect_lb[1]); |
515 | ret |= test_datapath(PACKET_FANOUT_ROLLOVER, |
516 | port_off, expect1: expect_rb[0], expect2: expect_rb[1]); |
517 | |
518 | ret |= test_datapath(PACKET_FANOUT_CBPF, |
519 | port_off, expect1: expect_bpf[0], expect2: expect_bpf[1]); |
520 | ret |= test_datapath(PACKET_FANOUT_EBPF, |
521 | port_off, expect1: expect_bpf[0], expect2: expect_bpf[1]); |
522 | |
523 | set_cpuaffinity(0); |
524 | ret |= test_datapath(PACKET_FANOUT_CPU, port_off, |
525 | expect1: expect_cpu0[0], expect2: expect_cpu0[1]); |
526 | if (!set_cpuaffinity(1)) |
527 | /* TODO: test that choice alternates with previous */ |
528 | ret |= test_datapath(PACKET_FANOUT_CPU, port_off, |
529 | expect1: expect_cpu1[0], expect2: expect_cpu1[1]); |
530 | |
531 | ret |= test_datapath(PACKET_FANOUT_FLAG_UNIQUEID, port_off, |
532 | expect1: expect_uniqueid[0], expect2: expect_uniqueid[1]); |
533 | |
534 | if (ret) |
535 | return 1; |
536 | |
537 | printf("OK. All tests passed\n" ); |
538 | return 0; |
539 | } |
540 | |