1 | /* SPDX-License-Identifier: MIT */ |
2 | /* based on linux-kernel/tools/testing/selftests/net/msg_zerocopy.c */ |
3 | #include <assert.h> |
4 | #include <errno.h> |
5 | #include <error.h> |
6 | #include <fcntl.h> |
7 | #include <limits.h> |
8 | #include <stdbool.h> |
9 | #include <stdint.h> |
10 | #include <stdio.h> |
11 | #include <stdlib.h> |
12 | #include <string.h> |
13 | #include <unistd.h> |
14 | |
15 | #include <arpa/inet.h> |
16 | #include <linux/errqueue.h> |
17 | #include <linux/if_packet.h> |
18 | #include <linux/io_uring.h> |
19 | #include <linux/ipv6.h> |
20 | #include <linux/socket.h> |
21 | #include <linux/sockios.h> |
22 | #include <net/ethernet.h> |
23 | #include <net/if.h> |
24 | #include <netinet/in.h> |
25 | #include <netinet/ip.h> |
26 | #include <netinet/ip6.h> |
27 | #include <netinet/tcp.h> |
28 | #include <netinet/udp.h> |
29 | #include <sys/ioctl.h> |
30 | #include <sys/mman.h> |
31 | #include <sys/resource.h> |
32 | #include <sys/socket.h> |
33 | #include <sys/stat.h> |
34 | #include <sys/time.h> |
35 | #include <sys/types.h> |
36 | #include <sys/un.h> |
37 | #include <sys/wait.h> |
38 | |
39 | #include <io_uring/mini_liburing.h> |
40 | |
41 | #define NOTIF_TAG 0xfffffffULL |
42 | #define NONZC_TAG 0 |
43 | #define ZC_TAG 1 |
44 | |
45 | enum { |
46 | MODE_NONZC = 0, |
47 | MODE_ZC = 1, |
48 | MODE_ZC_FIXED = 2, |
49 | MODE_MIXED = 3, |
50 | }; |
51 | |
52 | static bool cfg_cork = false; |
53 | static int cfg_mode = MODE_ZC_FIXED; |
54 | static int cfg_nr_reqs = 8; |
55 | static int cfg_family = PF_UNSPEC; |
56 | static int cfg_payload_len; |
57 | static int cfg_port = 8000; |
58 | static int cfg_runtime_ms = 4200; |
59 | |
60 | static socklen_t cfg_alen; |
61 | static struct sockaddr_storage cfg_dst_addr; |
62 | |
63 | static char payload[IP_MAXPACKET] __attribute__((aligned(4096))); |
64 | |
65 | static unsigned long gettimeofday_ms(void) |
66 | { |
67 | struct timeval tv; |
68 | |
69 | gettimeofday(&tv, NULL); |
70 | return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); |
71 | } |
72 | |
73 | static void do_setsockopt(int fd, int level, int optname, int val) |
74 | { |
75 | if (setsockopt(fd, level, optname, &val, sizeof(val))) |
76 | error(1, errno, "setsockopt %d.%d: %d" , level, optname, val); |
77 | } |
78 | |
79 | static int do_setup_tx(int domain, int type, int protocol) |
80 | { |
81 | int fd; |
82 | |
83 | fd = socket(domain, type, protocol); |
84 | if (fd == -1) |
85 | error(1, errno, "socket t" ); |
86 | |
87 | do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, val: 1 << 21); |
88 | |
89 | if (connect(fd, (void *) &cfg_dst_addr, cfg_alen)) |
90 | error(1, errno, "connect" ); |
91 | return fd; |
92 | } |
93 | |
94 | static void do_tx(int domain, int type, int protocol) |
95 | { |
96 | struct io_uring_sqe *sqe; |
97 | struct io_uring_cqe *cqe; |
98 | unsigned long packets = 0, bytes = 0; |
99 | struct io_uring ring; |
100 | struct iovec iov; |
101 | uint64_t tstop; |
102 | int i, fd, ret; |
103 | int compl_cqes = 0; |
104 | |
105 | fd = do_setup_tx(domain, type, protocol); |
106 | |
107 | ret = io_uring_queue_init(512, &ring, 0); |
108 | if (ret) |
109 | error(1, ret, "io_uring: queue init" ); |
110 | |
111 | iov.iov_base = payload; |
112 | iov.iov_len = cfg_payload_len; |
113 | |
114 | ret = io_uring_register_buffers(&ring, &iov, 1); |
115 | if (ret) |
116 | error(1, ret, "io_uring: buffer registration" ); |
117 | |
118 | tstop = gettimeofday_ms() + cfg_runtime_ms; |
119 | do { |
120 | if (cfg_cork) |
121 | do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, val: 1); |
122 | |
123 | for (i = 0; i < cfg_nr_reqs; i++) { |
124 | unsigned zc_flags = 0; |
125 | unsigned buf_idx = 0; |
126 | unsigned mode = cfg_mode; |
127 | unsigned msg_flags = MSG_WAITALL; |
128 | |
129 | if (cfg_mode == MODE_MIXED) |
130 | mode = rand() % 3; |
131 | |
132 | sqe = io_uring_get_sqe(&ring); |
133 | |
134 | if (mode == MODE_NONZC) { |
135 | io_uring_prep_send(sqe, fd, payload, |
136 | cfg_payload_len, msg_flags); |
137 | sqe->user_data = NONZC_TAG; |
138 | } else { |
139 | io_uring_prep_sendzc(sqe, fd, payload, |
140 | cfg_payload_len, |
141 | msg_flags, zc_flags); |
142 | if (mode == MODE_ZC_FIXED) { |
143 | sqe->ioprio |= IORING_RECVSEND_FIXED_BUF; |
144 | sqe->buf_index = buf_idx; |
145 | } |
146 | sqe->user_data = ZC_TAG; |
147 | } |
148 | } |
149 | |
150 | ret = io_uring_submit(&ring); |
151 | if (ret != cfg_nr_reqs) |
152 | error(1, ret, "submit" ); |
153 | |
154 | if (cfg_cork) |
155 | do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, val: 0); |
156 | for (i = 0; i < cfg_nr_reqs; i++) { |
157 | ret = io_uring_wait_cqe(&ring, &cqe); |
158 | if (ret) |
159 | error(1, ret, "wait cqe" ); |
160 | |
161 | if (cqe->user_data != NONZC_TAG && |
162 | cqe->user_data != ZC_TAG) |
163 | error(1, -EINVAL, "invalid cqe->user_data" ); |
164 | |
165 | if (cqe->flags & IORING_CQE_F_NOTIF) { |
166 | if (cqe->flags & IORING_CQE_F_MORE) |
167 | error(1, -EINVAL, "invalid notif flags" ); |
168 | if (compl_cqes <= 0) |
169 | error(1, -EINVAL, "notification mismatch" ); |
170 | compl_cqes--; |
171 | i--; |
172 | io_uring_cqe_seen(&ring); |
173 | continue; |
174 | } |
175 | if (cqe->flags & IORING_CQE_F_MORE) { |
176 | if (cqe->user_data != ZC_TAG) |
177 | error(1, cqe->res, "unexpected F_MORE" ); |
178 | compl_cqes++; |
179 | } |
180 | if (cqe->res >= 0) { |
181 | packets++; |
182 | bytes += cqe->res; |
183 | } else if (cqe->res != -EAGAIN) { |
184 | error(1, cqe->res, "send failed" ); |
185 | } |
186 | io_uring_cqe_seen(&ring); |
187 | } |
188 | } while (gettimeofday_ms() < tstop); |
189 | |
190 | while (compl_cqes) { |
191 | ret = io_uring_wait_cqe(&ring, &cqe); |
192 | if (ret) |
193 | error(1, ret, "wait cqe" ); |
194 | if (cqe->flags & IORING_CQE_F_MORE) |
195 | error(1, -EINVAL, "invalid notif flags" ); |
196 | if (!(cqe->flags & IORING_CQE_F_NOTIF)) |
197 | error(1, -EINVAL, "missing notif flag" ); |
198 | |
199 | io_uring_cqe_seen(&ring); |
200 | compl_cqes--; |
201 | } |
202 | |
203 | fprintf(stderr, "tx=%lu (MB=%lu), tx/s=%lu (MB/s=%lu)\n" , |
204 | packets, bytes >> 20, |
205 | packets / (cfg_runtime_ms / 1000), |
206 | (bytes >> 20) / (cfg_runtime_ms / 1000)); |
207 | |
208 | if (close(fd)) |
209 | error(1, errno, "close" ); |
210 | } |
211 | |
212 | static void do_test(int domain, int type, int protocol) |
213 | { |
214 | int i; |
215 | |
216 | for (i = 0; i < IP_MAXPACKET; i++) |
217 | payload[i] = 'a' + (i % 26); |
218 | do_tx(domain, type, protocol); |
219 | } |
220 | |
221 | static void usage(const char *filepath) |
222 | { |
223 | error(1, 0, "Usage: %s (-4|-6) (udp|tcp) -D<dst_ip> [-s<payload size>] " |
224 | "[-t<time s>] [-n<batch>] [-p<port>] [-m<mode>]" , filepath); |
225 | } |
226 | |
227 | static void parse_opts(int argc, char **argv) |
228 | { |
229 | const int max_payload_len = sizeof(payload) - |
230 | sizeof(struct ipv6hdr) - |
231 | sizeof(struct tcphdr) - |
232 | 40 /* max tcp options */; |
233 | struct sockaddr_in6 *addr6 = (void *) &cfg_dst_addr; |
234 | struct sockaddr_in *addr4 = (void *) &cfg_dst_addr; |
235 | char *daddr = NULL; |
236 | int c; |
237 | |
238 | if (argc <= 1) |
239 | usage(filepath: argv[0]); |
240 | cfg_payload_len = max_payload_len; |
241 | |
242 | while ((c = getopt(argc, argv, "46D:p:s:t:n:c:m:" )) != -1) { |
243 | switch (c) { |
244 | case '4': |
245 | if (cfg_family != PF_UNSPEC) |
246 | error(1, 0, "Pass one of -4 or -6" ); |
247 | cfg_family = PF_INET; |
248 | cfg_alen = sizeof(struct sockaddr_in); |
249 | break; |
250 | case '6': |
251 | if (cfg_family != PF_UNSPEC) |
252 | error(1, 0, "Pass one of -4 or -6" ); |
253 | cfg_family = PF_INET6; |
254 | cfg_alen = sizeof(struct sockaddr_in6); |
255 | break; |
256 | case 'D': |
257 | daddr = optarg; |
258 | break; |
259 | case 'p': |
260 | cfg_port = strtoul(optarg, NULL, 0); |
261 | break; |
262 | case 's': |
263 | cfg_payload_len = strtoul(optarg, NULL, 0); |
264 | break; |
265 | case 't': |
266 | cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000; |
267 | break; |
268 | case 'n': |
269 | cfg_nr_reqs = strtoul(optarg, NULL, 0); |
270 | break; |
271 | case 'c': |
272 | cfg_cork = strtol(optarg, NULL, 0); |
273 | break; |
274 | case 'm': |
275 | cfg_mode = strtol(optarg, NULL, 0); |
276 | break; |
277 | } |
278 | } |
279 | |
280 | switch (cfg_family) { |
281 | case PF_INET: |
282 | memset(addr4, 0, sizeof(*addr4)); |
283 | addr4->sin_family = AF_INET; |
284 | addr4->sin_port = htons(cfg_port); |
285 | if (daddr && |
286 | inet_pton(AF_INET, daddr, &(addr4->sin_addr)) != 1) |
287 | error(1, 0, "ipv4 parse error: %s" , daddr); |
288 | break; |
289 | case PF_INET6: |
290 | memset(addr6, 0, sizeof(*addr6)); |
291 | addr6->sin6_family = AF_INET6; |
292 | addr6->sin6_port = htons(cfg_port); |
293 | if (daddr && |
294 | inet_pton(AF_INET6, daddr, &(addr6->sin6_addr)) != 1) |
295 | error(1, 0, "ipv6 parse error: %s" , daddr); |
296 | break; |
297 | default: |
298 | error(1, 0, "illegal domain" ); |
299 | } |
300 | |
301 | if (cfg_payload_len > max_payload_len) |
302 | error(1, 0, "-s: payload exceeds max (%d)" , max_payload_len); |
303 | if (optind != argc - 1) |
304 | usage(filepath: argv[0]); |
305 | } |
306 | |
307 | int main(int argc, char **argv) |
308 | { |
309 | const char *cfg_test = argv[argc - 1]; |
310 | |
311 | parse_opts(argc, argv); |
312 | |
313 | if (!strcmp(cfg_test, "tcp" )) |
314 | do_test(domain: cfg_family, type: SOCK_STREAM, protocol: 0); |
315 | else if (!strcmp(cfg_test, "udp" )) |
316 | do_test(domain: cfg_family, type: SOCK_DGRAM, protocol: 0); |
317 | else |
318 | error(1, 0, "unknown cfg_test %s" , cfg_test); |
319 | return 0; |
320 | } |
321 | |