1 | /* Evaluate MSG_ZEROCOPY |
2 | * |
3 | * Send traffic between two processes over one of the supported |
4 | * protocols and modes: |
5 | * |
6 | * PF_INET/PF_INET6 |
7 | * - SOCK_STREAM |
8 | * - SOCK_DGRAM |
9 | * - SOCK_DGRAM with UDP_CORK |
10 | * - SOCK_RAW |
11 | * - SOCK_RAW with IP_HDRINCL |
12 | * |
13 | * PF_PACKET |
14 | * - SOCK_DGRAM |
15 | * - SOCK_RAW |
16 | * |
17 | * PF_RDS |
18 | * - SOCK_SEQPACKET |
19 | * |
20 | * Start this program on two connected hosts, one in send mode and |
21 | * the other with option '-r' to put it in receiver mode. |
22 | * |
23 | * If zerocopy mode ('-z') is enabled, the sender will verify that |
24 | * the kernel queues completions on the error queue for all zerocopy |
25 | * transfers. |
26 | */ |
27 | |
28 | #define _GNU_SOURCE |
29 | |
30 | #include <arpa/inet.h> |
31 | #include <error.h> |
32 | #include <errno.h> |
33 | #include <limits.h> |
34 | #include <linux/errqueue.h> |
35 | #include <linux/if_packet.h> |
36 | #include <linux/ipv6.h> |
37 | #include <linux/socket.h> |
38 | #include <linux/sockios.h> |
39 | #include <net/ethernet.h> |
40 | #include <net/if.h> |
41 | #include <netinet/ip.h> |
42 | #include <netinet/ip6.h> |
43 | #include <netinet/tcp.h> |
44 | #include <netinet/udp.h> |
45 | #include <poll.h> |
46 | #include <sched.h> |
47 | #include <stdbool.h> |
48 | #include <stdio.h> |
49 | #include <stdint.h> |
50 | #include <stdlib.h> |
51 | #include <string.h> |
52 | #include <sys/ioctl.h> |
53 | #include <sys/socket.h> |
54 | #include <sys/stat.h> |
55 | #include <sys/time.h> |
56 | #include <sys/types.h> |
57 | #include <sys/wait.h> |
58 | #include <unistd.h> |
59 | #include <linux/rds.h> |
60 | |
61 | #ifndef SO_EE_ORIGIN_ZEROCOPY |
62 | #define SO_EE_ORIGIN_ZEROCOPY 5 |
63 | #endif |
64 | |
65 | #ifndef SO_ZEROCOPY |
66 | #define SO_ZEROCOPY 60 |
67 | #endif |
68 | |
69 | #ifndef SO_EE_CODE_ZEROCOPY_COPIED |
70 | #define SO_EE_CODE_ZEROCOPY_COPIED 1 |
71 | #endif |
72 | |
73 | #ifndef MSG_ZEROCOPY |
74 | #define MSG_ZEROCOPY 0x4000000 |
75 | #endif |
76 | |
77 | static int cfg_cork; |
78 | static bool cfg_cork_mixed; |
79 | static int cfg_cpu = -1; /* default: pin to last cpu */ |
80 | static int cfg_family = PF_UNSPEC; |
81 | static int cfg_ifindex = 1; |
82 | static int cfg_payload_len; |
83 | static int cfg_port = 8000; |
84 | static bool cfg_rx; |
85 | static int cfg_runtime_ms = 4200; |
86 | static int cfg_verbose; |
87 | static int cfg_waittime_ms = 500; |
88 | static bool cfg_zerocopy; |
89 | |
90 | static socklen_t cfg_alen; |
91 | static struct sockaddr_storage cfg_dst_addr; |
92 | static struct sockaddr_storage cfg_src_addr; |
93 | |
94 | static char payload[IP_MAXPACKET]; |
95 | static long packets, bytes, completions, expected_completions; |
96 | static int zerocopied = -1; |
97 | static uint32_t next_completion; |
98 | |
99 | static unsigned long gettimeofday_ms(void) |
100 | { |
101 | struct timeval tv; |
102 | |
103 | gettimeofday(&tv, NULL); |
104 | return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); |
105 | } |
106 | |
107 | static uint16_t get_ip_csum(const uint16_t *start, int num_words) |
108 | { |
109 | unsigned long sum = 0; |
110 | int i; |
111 | |
112 | for (i = 0; i < num_words; i++) |
113 | sum += start[i]; |
114 | |
115 | while (sum >> 16) |
116 | sum = (sum & 0xFFFF) + (sum >> 16); |
117 | |
118 | return ~sum; |
119 | } |
120 | |
121 | static int do_setcpu(int cpu) |
122 | { |
123 | cpu_set_t mask; |
124 | |
125 | CPU_ZERO(&mask); |
126 | CPU_SET(cpu, &mask); |
127 | if (sched_setaffinity(0, sizeof(mask), &mask)) |
128 | fprintf(stderr, "cpu: unable to pin, may increase variance.\n" ); |
129 | else if (cfg_verbose) |
130 | fprintf(stderr, "cpu: %u\n" , cpu); |
131 | |
132 | return 0; |
133 | } |
134 | |
135 | static void do_setsockopt(int fd, int level, int optname, int val) |
136 | { |
137 | if (setsockopt(fd, level, optname, &val, sizeof(val))) |
138 | error(1, errno, "setsockopt %d.%d: %d" , level, optname, val); |
139 | } |
140 | |
141 | static int do_poll(int fd, int events) |
142 | { |
143 | struct pollfd pfd; |
144 | int ret; |
145 | |
146 | pfd.events = events; |
147 | pfd.revents = 0; |
148 | pfd.fd = fd; |
149 | |
150 | ret = poll(&pfd, 1, cfg_waittime_ms); |
151 | if (ret == -1) |
152 | error(1, errno, "poll" ); |
153 | |
154 | return ret && (pfd.revents & events); |
155 | } |
156 | |
157 | static int do_accept(int fd) |
158 | { |
159 | int fda = fd; |
160 | |
161 | fd = accept(fda, NULL, NULL); |
162 | if (fd == -1) |
163 | error(1, errno, "accept" ); |
164 | if (close(fda)) |
165 | error(1, errno, "close listen sock" ); |
166 | |
167 | return fd; |
168 | } |
169 | |
170 | static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie) |
171 | { |
172 | struct cmsghdr *cm; |
173 | |
174 | if (!msg->msg_control) |
175 | error(1, errno, "NULL cookie" ); |
176 | cm = (void *)msg->msg_control; |
177 | cm->cmsg_len = CMSG_LEN(sizeof(cookie)); |
178 | cm->cmsg_level = SOL_RDS; |
179 | cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE; |
180 | memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie)); |
181 | } |
182 | |
183 | static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain) |
184 | { |
185 | int ret, len, i, flags; |
186 | static uint32_t cookie; |
187 | char ckbuf[CMSG_SPACE(sizeof(cookie))]; |
188 | |
189 | len = 0; |
190 | for (i = 0; i < msg->msg_iovlen; i++) |
191 | len += msg->msg_iov[i].iov_len; |
192 | |
193 | flags = MSG_DONTWAIT; |
194 | if (do_zerocopy) { |
195 | flags |= MSG_ZEROCOPY; |
196 | if (domain == PF_RDS) { |
197 | memset(&msg->msg_control, 0, sizeof(msg->msg_control)); |
198 | msg->msg_controllen = CMSG_SPACE(sizeof(cookie)); |
199 | msg->msg_control = (struct cmsghdr *)ckbuf; |
200 | add_zcopy_cookie(msg, cookie: ++cookie); |
201 | } |
202 | } |
203 | |
204 | ret = sendmsg(fd, msg, flags); |
205 | if (ret == -1 && errno == EAGAIN) |
206 | return false; |
207 | if (ret == -1) |
208 | error(1, errno, "send" ); |
209 | if (cfg_verbose && ret != len) |
210 | fprintf(stderr, "send: ret=%u != %u\n" , ret, len); |
211 | |
212 | if (len) { |
213 | packets++; |
214 | bytes += ret; |
215 | if (do_zerocopy && ret) |
216 | expected_completions++; |
217 | } |
218 | if (do_zerocopy && domain == PF_RDS) { |
219 | msg->msg_control = NULL; |
220 | msg->msg_controllen = 0; |
221 | } |
222 | |
223 | return true; |
224 | } |
225 | |
226 | static void do_sendmsg_corked(int fd, struct msghdr *msg) |
227 | { |
228 | bool do_zerocopy = cfg_zerocopy; |
229 | int i, payload_len, ; |
230 | |
231 | /* split up the packet. for non-multiple, make first buffer longer */ |
232 | payload_len = cfg_payload_len / cfg_cork; |
233 | extra_len = cfg_payload_len - (cfg_cork * payload_len); |
234 | |
235 | do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, val: 1); |
236 | |
237 | for (i = 0; i < cfg_cork; i++) { |
238 | |
239 | /* in mixed-frags mode, alternate zerocopy and copy frags |
240 | * start with non-zerocopy, to ensure attach later works |
241 | */ |
242 | if (cfg_cork_mixed) |
243 | do_zerocopy = (i & 1); |
244 | |
245 | msg->msg_iov[0].iov_len = payload_len + extra_len; |
246 | extra_len = 0; |
247 | |
248 | do_sendmsg(fd, msg, do_zerocopy, |
249 | domain: (cfg_dst_addr.ss_family == AF_INET ? |
250 | PF_INET : PF_INET6)); |
251 | } |
252 | |
253 | do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, val: 0); |
254 | } |
255 | |
256 | static int setup_iph(struct iphdr *iph, uint16_t payload_len) |
257 | { |
258 | struct sockaddr_in *daddr = (void *) &cfg_dst_addr; |
259 | struct sockaddr_in *saddr = (void *) &cfg_src_addr; |
260 | |
261 | memset(iph, 0, sizeof(*iph)); |
262 | |
263 | iph->version = 4; |
264 | iph->tos = 0; |
265 | iph->ihl = 5; |
266 | iph->ttl = 2; |
267 | iph->saddr = saddr->sin_addr.s_addr; |
268 | iph->daddr = daddr->sin_addr.s_addr; |
269 | iph->protocol = IPPROTO_EGP; |
270 | iph->tot_len = htons(sizeof(*iph) + payload_len); |
271 | iph->check = get_ip_csum(start: (void *) iph, num_words: iph->ihl << 1); |
272 | |
273 | return sizeof(*iph); |
274 | } |
275 | |
276 | static int setup_ip6h(struct ipv6hdr *ip6h, uint16_t payload_len) |
277 | { |
278 | struct sockaddr_in6 *daddr = (void *) &cfg_dst_addr; |
279 | struct sockaddr_in6 *saddr = (void *) &cfg_src_addr; |
280 | |
281 | memset(ip6h, 0, sizeof(*ip6h)); |
282 | |
283 | ip6h->version = 6; |
284 | ip6h->payload_len = htons(payload_len); |
285 | ip6h->nexthdr = IPPROTO_EGP; |
286 | ip6h->hop_limit = 2; |
287 | ip6h->saddr = saddr->sin6_addr; |
288 | ip6h->daddr = daddr->sin6_addr; |
289 | |
290 | return sizeof(*ip6h); |
291 | } |
292 | |
293 | |
294 | static void setup_sockaddr(int domain, const char *str_addr, |
295 | struct sockaddr_storage *sockaddr) |
296 | { |
297 | struct sockaddr_in6 *addr6 = (void *) sockaddr; |
298 | struct sockaddr_in *addr4 = (void *) sockaddr; |
299 | |
300 | switch (domain) { |
301 | case PF_INET: |
302 | memset(addr4, 0, sizeof(*addr4)); |
303 | addr4->sin_family = AF_INET; |
304 | addr4->sin_port = htons(cfg_port); |
305 | if (str_addr && |
306 | inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1) |
307 | error(1, 0, "ipv4 parse error: %s" , str_addr); |
308 | break; |
309 | case PF_INET6: |
310 | memset(addr6, 0, sizeof(*addr6)); |
311 | addr6->sin6_family = AF_INET6; |
312 | addr6->sin6_port = htons(cfg_port); |
313 | if (str_addr && |
314 | inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1) |
315 | error(1, 0, "ipv6 parse error: %s" , str_addr); |
316 | break; |
317 | default: |
318 | error(1, 0, "illegal domain" ); |
319 | } |
320 | } |
321 | |
322 | static int do_setup_tx(int domain, int type, int protocol) |
323 | { |
324 | int fd; |
325 | |
326 | fd = socket(domain, type, protocol); |
327 | if (fd == -1) |
328 | error(1, errno, "socket t" ); |
329 | |
330 | do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, val: 1 << 21); |
331 | if (cfg_zerocopy) |
332 | do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, val: 1); |
333 | |
334 | if (domain != PF_PACKET && domain != PF_RDS) |
335 | if (connect(fd, (void *) &cfg_dst_addr, cfg_alen)) |
336 | error(1, errno, "connect" ); |
337 | |
338 | if (domain == PF_RDS) { |
339 | if (bind(fd, (void *) &cfg_src_addr, cfg_alen)) |
340 | error(1, errno, "bind" ); |
341 | } |
342 | |
343 | return fd; |
344 | } |
345 | |
346 | static uint32_t do_process_zerocopy_cookies(struct rds_zcopy_cookies *ck) |
347 | { |
348 | int i; |
349 | |
350 | if (ck->num > RDS_MAX_ZCOOKIES) |
351 | error(1, 0, "Returned %d cookies, max expected %d\n" , |
352 | ck->num, RDS_MAX_ZCOOKIES); |
353 | for (i = 0; i < ck->num; i++) |
354 | if (cfg_verbose >= 2) |
355 | fprintf(stderr, "%d\n" , ck->cookies[i]); |
356 | return ck->num; |
357 | } |
358 | |
359 | static bool do_recvmsg_completion(int fd) |
360 | { |
361 | char cmsgbuf[CMSG_SPACE(sizeof(struct rds_zcopy_cookies))]; |
362 | struct rds_zcopy_cookies *ck; |
363 | struct cmsghdr *cmsg; |
364 | struct msghdr msg; |
365 | bool ret = false; |
366 | |
367 | memset(&msg, 0, sizeof(msg)); |
368 | msg.msg_control = cmsgbuf; |
369 | msg.msg_controllen = sizeof(cmsgbuf); |
370 | |
371 | if (recvmsg(fd, &msg, MSG_DONTWAIT)) |
372 | return ret; |
373 | |
374 | if (msg.msg_flags & MSG_CTRUNC) |
375 | error(1, errno, "recvmsg notification: truncated" ); |
376 | |
377 | for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { |
378 | if (cmsg->cmsg_level == SOL_RDS && |
379 | cmsg->cmsg_type == RDS_CMSG_ZCOPY_COMPLETION) { |
380 | |
381 | ck = (struct rds_zcopy_cookies *)CMSG_DATA(cmsg); |
382 | completions += do_process_zerocopy_cookies(ck); |
383 | ret = true; |
384 | break; |
385 | } |
386 | error(0, 0, "ignoring cmsg at level %d type %d\n" , |
387 | cmsg->cmsg_level, cmsg->cmsg_type); |
388 | } |
389 | return ret; |
390 | } |
391 | |
392 | static bool do_recv_completion(int fd, int domain) |
393 | { |
394 | struct sock_extended_err *serr; |
395 | struct msghdr msg = {}; |
396 | struct cmsghdr *cm; |
397 | uint32_t hi, lo, range; |
398 | int ret, zerocopy; |
399 | char control[100]; |
400 | |
401 | if (domain == PF_RDS) |
402 | return do_recvmsg_completion(fd); |
403 | |
404 | msg.msg_control = control; |
405 | msg.msg_controllen = sizeof(control); |
406 | |
407 | ret = recvmsg(fd, &msg, MSG_ERRQUEUE); |
408 | if (ret == -1 && errno == EAGAIN) |
409 | return false; |
410 | if (ret == -1) |
411 | error(1, errno, "recvmsg notification" ); |
412 | if (msg.msg_flags & MSG_CTRUNC) |
413 | error(1, errno, "recvmsg notification: truncated" ); |
414 | |
415 | cm = CMSG_FIRSTHDR(&msg); |
416 | if (!cm) |
417 | error(1, 0, "cmsg: no cmsg" ); |
418 | if (!((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) || |
419 | (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR) || |
420 | (cm->cmsg_level == SOL_PACKET && cm->cmsg_type == PACKET_TX_TIMESTAMP))) |
421 | error(1, 0, "serr: wrong type: %d.%d" , |
422 | cm->cmsg_level, cm->cmsg_type); |
423 | |
424 | serr = (void *) CMSG_DATA(cm); |
425 | |
426 | if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) |
427 | error(1, 0, "serr: wrong origin: %u" , serr->ee_origin); |
428 | if (serr->ee_errno != 0) |
429 | error(1, 0, "serr: wrong error code: %u" , serr->ee_errno); |
430 | |
431 | hi = serr->ee_data; |
432 | lo = serr->ee_info; |
433 | range = hi - lo + 1; |
434 | |
435 | /* Detect notification gaps. These should not happen often, if at all. |
436 | * Gaps can occur due to drops, reordering and retransmissions. |
437 | */ |
438 | if (lo != next_completion) |
439 | fprintf(stderr, "gap: %u..%u does not append to %u\n" , |
440 | lo, hi, next_completion); |
441 | next_completion = hi + 1; |
442 | |
443 | zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED); |
444 | if (zerocopied == -1) |
445 | zerocopied = zerocopy; |
446 | else if (zerocopied != zerocopy) { |
447 | fprintf(stderr, "serr: inconsistent\n" ); |
448 | zerocopied = zerocopy; |
449 | } |
450 | |
451 | if (cfg_verbose >= 2) |
452 | fprintf(stderr, "completed: %u (h=%u l=%u)\n" , |
453 | range, hi, lo); |
454 | |
455 | completions += range; |
456 | return true; |
457 | } |
458 | |
459 | /* Read all outstanding messages on the errqueue */ |
460 | static void do_recv_completions(int fd, int domain) |
461 | { |
462 | while (do_recv_completion(fd, domain)) {} |
463 | } |
464 | |
465 | /* Wait for all remaining completions on the errqueue */ |
466 | static void do_recv_remaining_completions(int fd, int domain) |
467 | { |
468 | int64_t tstop = gettimeofday_ms() + cfg_waittime_ms; |
469 | |
470 | while (completions < expected_completions && |
471 | gettimeofday_ms() < tstop) { |
472 | if (do_poll(fd, events: domain == PF_RDS ? POLLIN : POLLERR)) |
473 | do_recv_completions(fd, domain); |
474 | } |
475 | |
476 | if (completions < expected_completions) |
477 | fprintf(stderr, "missing notifications: %lu < %lu\n" , |
478 | completions, expected_completions); |
479 | } |
480 | |
481 | static void do_tx(int domain, int type, int protocol) |
482 | { |
483 | struct iovec iov[3] = { {0} }; |
484 | struct sockaddr_ll laddr; |
485 | struct msghdr msg = {0}; |
486 | struct ethhdr eth; |
487 | union { |
488 | struct ipv6hdr ip6h; |
489 | struct iphdr iph; |
490 | } nh; |
491 | uint64_t tstop; |
492 | int fd; |
493 | |
494 | fd = do_setup_tx(domain, type, protocol); |
495 | |
496 | if (domain == PF_PACKET) { |
497 | uint16_t proto = cfg_family == PF_INET ? ETH_P_IP : ETH_P_IPV6; |
498 | |
499 | /* sock_raw passes ll header as data */ |
500 | if (type == SOCK_RAW) { |
501 | memset(eth.h_dest, 0x06, ETH_ALEN); |
502 | memset(eth.h_source, 0x02, ETH_ALEN); |
503 | eth.h_proto = htons(proto); |
504 | iov[0].iov_base = ð |
505 | iov[0].iov_len = sizeof(eth); |
506 | msg.msg_iovlen++; |
507 | } |
508 | |
509 | /* both sock_raw and sock_dgram expect name */ |
510 | memset(&laddr, 0, sizeof(laddr)); |
511 | laddr.sll_family = AF_PACKET; |
512 | laddr.sll_ifindex = cfg_ifindex; |
513 | laddr.sll_protocol = htons(proto); |
514 | laddr.sll_halen = ETH_ALEN; |
515 | |
516 | memset(laddr.sll_addr, 0x06, ETH_ALEN); |
517 | |
518 | msg.msg_name = &laddr; |
519 | msg.msg_namelen = sizeof(laddr); |
520 | } |
521 | |
522 | /* packet and raw sockets with hdrincl must pass network header */ |
523 | if (domain == PF_PACKET || protocol == IPPROTO_RAW) { |
524 | if (cfg_family == PF_INET) |
525 | iov[1].iov_len = setup_iph(iph: &nh.iph, payload_len: cfg_payload_len); |
526 | else |
527 | iov[1].iov_len = setup_ip6h(ip6h: &nh.ip6h, payload_len: cfg_payload_len); |
528 | |
529 | iov[1].iov_base = (void *) &nh; |
530 | msg.msg_iovlen++; |
531 | } |
532 | |
533 | if (domain == PF_RDS) { |
534 | msg.msg_name = &cfg_dst_addr; |
535 | msg.msg_namelen = (cfg_dst_addr.ss_family == AF_INET ? |
536 | sizeof(struct sockaddr_in) : |
537 | sizeof(struct sockaddr_in6)); |
538 | } |
539 | |
540 | iov[2].iov_base = payload; |
541 | iov[2].iov_len = cfg_payload_len; |
542 | msg.msg_iovlen++; |
543 | msg.msg_iov = &iov[3 - msg.msg_iovlen]; |
544 | |
545 | tstop = gettimeofday_ms() + cfg_runtime_ms; |
546 | do { |
547 | if (cfg_cork) |
548 | do_sendmsg_corked(fd, msg: &msg); |
549 | else |
550 | do_sendmsg(fd, msg: &msg, do_zerocopy: cfg_zerocopy, domain); |
551 | |
552 | while (!do_poll(fd, POLLOUT)) { |
553 | if (cfg_zerocopy) |
554 | do_recv_completions(fd, domain); |
555 | } |
556 | |
557 | } while (gettimeofday_ms() < tstop); |
558 | |
559 | if (cfg_zerocopy) |
560 | do_recv_remaining_completions(fd, domain); |
561 | |
562 | if (close(fd)) |
563 | error(1, errno, "close" ); |
564 | |
565 | fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n" , |
566 | packets, bytes >> 20, completions, |
567 | zerocopied == 1 ? 'y' : 'n'); |
568 | } |
569 | |
570 | static int do_setup_rx(int domain, int type, int protocol) |
571 | { |
572 | int fd; |
573 | |
574 | /* If tx over PF_PACKET, rx over PF_INET(6)/SOCK_RAW, |
575 | * to recv the only copy of the packet, not a clone |
576 | */ |
577 | if (domain == PF_PACKET) |
578 | error(1, 0, "Use PF_INET/SOCK_RAW to read" ); |
579 | |
580 | if (type == SOCK_RAW && protocol == IPPROTO_RAW) |
581 | error(1, 0, "IPPROTO_RAW: not supported on Rx" ); |
582 | |
583 | fd = socket(domain, type, protocol); |
584 | if (fd == -1) |
585 | error(1, errno, "socket r" ); |
586 | |
587 | do_setsockopt(fd, SOL_SOCKET, SO_RCVBUF, val: 1 << 21); |
588 | do_setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, val: 1 << 16); |
589 | do_setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, val: 1); |
590 | |
591 | if (bind(fd, (void *) &cfg_dst_addr, cfg_alen)) |
592 | error(1, errno, "bind" ); |
593 | |
594 | if (type == SOCK_STREAM) { |
595 | if (listen(fd, 1)) |
596 | error(1, errno, "listen" ); |
597 | fd = do_accept(fd); |
598 | } |
599 | |
600 | return fd; |
601 | } |
602 | |
603 | /* Flush all outstanding bytes for the tcp receive queue */ |
604 | static void do_flush_tcp(int fd) |
605 | { |
606 | int ret; |
607 | |
608 | /* MSG_TRUNC flushes up to len bytes */ |
609 | ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT); |
610 | if (ret == -1 && errno == EAGAIN) |
611 | return; |
612 | if (ret == -1) |
613 | error(1, errno, "flush" ); |
614 | if (!ret) |
615 | return; |
616 | |
617 | packets++; |
618 | bytes += ret; |
619 | } |
620 | |
621 | /* Flush all outstanding datagrams. Verify first few bytes of each. */ |
622 | static void do_flush_datagram(int fd, int type) |
623 | { |
624 | int ret, off = 0; |
625 | char buf[64]; |
626 | |
627 | /* MSG_TRUNC will return full datagram length */ |
628 | ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT | MSG_TRUNC); |
629 | if (ret == -1 && errno == EAGAIN) |
630 | return; |
631 | |
632 | /* raw ipv4 return with header, raw ipv6 without */ |
633 | if (cfg_family == PF_INET && type == SOCK_RAW) { |
634 | off += sizeof(struct iphdr); |
635 | ret -= sizeof(struct iphdr); |
636 | } |
637 | |
638 | if (ret == -1) |
639 | error(1, errno, "recv" ); |
640 | if (ret != cfg_payload_len) |
641 | error(1, 0, "recv: ret=%u != %u" , ret, cfg_payload_len); |
642 | if (ret > sizeof(buf) - off) |
643 | ret = sizeof(buf) - off; |
644 | if (memcmp(buf + off, payload, ret)) |
645 | error(1, 0, "recv: data mismatch" ); |
646 | |
647 | packets++; |
648 | bytes += cfg_payload_len; |
649 | } |
650 | |
651 | static void do_rx(int domain, int type, int protocol) |
652 | { |
653 | const int cfg_receiver_wait_ms = 400; |
654 | uint64_t tstop; |
655 | int fd; |
656 | |
657 | fd = do_setup_rx(domain, type, protocol); |
658 | |
659 | tstop = gettimeofday_ms() + cfg_runtime_ms + cfg_receiver_wait_ms; |
660 | do { |
661 | if (type == SOCK_STREAM) |
662 | do_flush_tcp(fd); |
663 | else |
664 | do_flush_datagram(fd, type); |
665 | |
666 | do_poll(fd, POLLIN); |
667 | |
668 | } while (gettimeofday_ms() < tstop); |
669 | |
670 | if (close(fd)) |
671 | error(1, errno, "close" ); |
672 | |
673 | fprintf(stderr, "rx=%lu (%lu MB)\n" , packets, bytes >> 20); |
674 | } |
675 | |
676 | static void do_test(int domain, int type, int protocol) |
677 | { |
678 | int i; |
679 | |
680 | if (cfg_cork && (domain == PF_PACKET || type != SOCK_DGRAM)) |
681 | error(1, 0, "can only cork udp sockets" ); |
682 | |
683 | do_setcpu(cpu: cfg_cpu); |
684 | |
685 | for (i = 0; i < IP_MAXPACKET; i++) |
686 | payload[i] = 'a' + (i % 26); |
687 | |
688 | if (cfg_rx) |
689 | do_rx(domain, type, protocol); |
690 | else |
691 | do_tx(domain, type, protocol); |
692 | } |
693 | |
694 | static void usage(const char *filepath) |
695 | { |
696 | error(1, 0, "Usage: %s [options] <test>" , filepath); |
697 | } |
698 | |
699 | static void parse_opts(int argc, char **argv) |
700 | { |
701 | const int max_payload_len = sizeof(payload) - |
702 | sizeof(struct ipv6hdr) - |
703 | sizeof(struct tcphdr) - |
704 | 40 /* max tcp options */; |
705 | int c; |
706 | char *daddr = NULL, *saddr = NULL; |
707 | char *cfg_test; |
708 | |
709 | cfg_payload_len = max_payload_len; |
710 | |
711 | while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz" )) != -1) { |
712 | switch (c) { |
713 | case '4': |
714 | if (cfg_family != PF_UNSPEC) |
715 | error(1, 0, "Pass one of -4 or -6" ); |
716 | cfg_family = PF_INET; |
717 | cfg_alen = sizeof(struct sockaddr_in); |
718 | break; |
719 | case '6': |
720 | if (cfg_family != PF_UNSPEC) |
721 | error(1, 0, "Pass one of -4 or -6" ); |
722 | cfg_family = PF_INET6; |
723 | cfg_alen = sizeof(struct sockaddr_in6); |
724 | break; |
725 | case 'c': |
726 | cfg_cork = strtol(optarg, NULL, 0); |
727 | break; |
728 | case 'C': |
729 | cfg_cpu = strtol(optarg, NULL, 0); |
730 | break; |
731 | case 'D': |
732 | daddr = optarg; |
733 | break; |
734 | case 'i': |
735 | cfg_ifindex = if_nametoindex(optarg); |
736 | if (cfg_ifindex == 0) |
737 | error(1, errno, "invalid iface: %s" , optarg); |
738 | break; |
739 | case 'm': |
740 | cfg_cork_mixed = true; |
741 | break; |
742 | case 'p': |
743 | cfg_port = strtoul(optarg, NULL, 0); |
744 | break; |
745 | case 'r': |
746 | cfg_rx = true; |
747 | break; |
748 | case 's': |
749 | cfg_payload_len = strtoul(optarg, NULL, 0); |
750 | break; |
751 | case 'S': |
752 | saddr = optarg; |
753 | break; |
754 | case 't': |
755 | cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000; |
756 | break; |
757 | case 'v': |
758 | cfg_verbose++; |
759 | break; |
760 | case 'z': |
761 | cfg_zerocopy = true; |
762 | break; |
763 | } |
764 | } |
765 | |
766 | cfg_test = argv[argc - 1]; |
767 | if (strcmp(cfg_test, "rds" ) == 0) { |
768 | if (!daddr) |
769 | error(1, 0, "-D <server addr> required for PF_RDS\n" ); |
770 | if (!cfg_rx && !saddr) |
771 | error(1, 0, "-S <client addr> required for PF_RDS\n" ); |
772 | } |
773 | setup_sockaddr(domain: cfg_family, str_addr: daddr, sockaddr: &cfg_dst_addr); |
774 | setup_sockaddr(domain: cfg_family, str_addr: saddr, sockaddr: &cfg_src_addr); |
775 | |
776 | if (cfg_payload_len > max_payload_len) |
777 | error(1, 0, "-s: payload exceeds max (%d)" , max_payload_len); |
778 | if (cfg_cork_mixed && (!cfg_zerocopy || !cfg_cork)) |
779 | error(1, 0, "-m: cork_mixed requires corking and zerocopy" ); |
780 | |
781 | if (optind != argc - 1) |
782 | usage(filepath: argv[0]); |
783 | } |
784 | |
785 | int main(int argc, char **argv) |
786 | { |
787 | const char *cfg_test; |
788 | |
789 | parse_opts(argc, argv); |
790 | |
791 | cfg_test = argv[argc - 1]; |
792 | |
793 | if (!strcmp(cfg_test, "packet" )) |
794 | do_test(PF_PACKET, type: SOCK_RAW, protocol: 0); |
795 | else if (!strcmp(cfg_test, "packet_dgram" )) |
796 | do_test(PF_PACKET, type: SOCK_DGRAM, protocol: 0); |
797 | else if (!strcmp(cfg_test, "raw" )) |
798 | do_test(domain: cfg_family, type: SOCK_RAW, IPPROTO_EGP); |
799 | else if (!strcmp(cfg_test, "raw_hdrincl" )) |
800 | do_test(domain: cfg_family, type: SOCK_RAW, IPPROTO_RAW); |
801 | else if (!strcmp(cfg_test, "tcp" )) |
802 | do_test(domain: cfg_family, type: SOCK_STREAM, protocol: 0); |
803 | else if (!strcmp(cfg_test, "udp" )) |
804 | do_test(domain: cfg_family, type: SOCK_DGRAM, protocol: 0); |
805 | else if (!strcmp(cfg_test, "rds" )) |
806 | do_test(PF_RDS, type: SOCK_SEQPACKET, protocol: 0); |
807 | else |
808 | error(1, 0, "unknown cfg_test %s" , cfg_test); |
809 | |
810 | return 0; |
811 | } |
812 | |