1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * vsock_perf - benchmark utility for vsock. |
4 | * |
5 | * Copyright (C) 2022 SberDevices. |
6 | * |
7 | * Author: Arseniy Krasnov <AVKrasnov@sberdevices.ru> |
8 | */ |
9 | #include <getopt.h> |
10 | #include <stdio.h> |
11 | #include <stdlib.h> |
12 | #include <stdbool.h> |
13 | #include <string.h> |
14 | #include <errno.h> |
15 | #include <unistd.h> |
16 | #include <time.h> |
17 | #include <stdint.h> |
18 | #include <poll.h> |
19 | #include <sys/socket.h> |
20 | #include <linux/vm_sockets.h> |
21 | #include <sys/mman.h> |
22 | |
23 | #include "msg_zerocopy_common.h" |
24 | |
25 | #define DEFAULT_BUF_SIZE_BYTES (128 * 1024) |
26 | #define DEFAULT_TO_SEND_BYTES (64 * 1024) |
27 | #define DEFAULT_VSOCK_BUF_BYTES (256 * 1024) |
28 | #define DEFAULT_RCVLOWAT_BYTES 1 |
29 | #define DEFAULT_PORT 1234 |
30 | |
31 | #define BYTES_PER_GB (1024 * 1024 * 1024ULL) |
32 | #define NSEC_PER_SEC (1000000000ULL) |
33 | |
34 | static unsigned int port = DEFAULT_PORT; |
35 | static unsigned long buf_size_bytes = DEFAULT_BUF_SIZE_BYTES; |
36 | static unsigned long vsock_buf_bytes = DEFAULT_VSOCK_BUF_BYTES; |
37 | static bool zerocopy; |
38 | |
39 | static void error(const char *s) |
40 | { |
41 | perror(s); |
42 | exit(EXIT_FAILURE); |
43 | } |
44 | |
45 | static time_t current_nsec(void) |
46 | { |
47 | struct timespec ts; |
48 | |
49 | if (clock_gettime(CLOCK_REALTIME, &ts)) |
50 | error(s: "clock_gettime" ); |
51 | |
52 | return (ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec; |
53 | } |
54 | |
55 | /* From lib/cmdline.c. */ |
56 | static unsigned long memparse(const char *ptr) |
57 | { |
58 | char *endptr; |
59 | |
60 | unsigned long long ret = strtoull(ptr, &endptr, 0); |
61 | |
62 | switch (*endptr) { |
63 | case 'E': |
64 | case 'e': |
65 | ret <<= 10; |
66 | case 'P': |
67 | case 'p': |
68 | ret <<= 10; |
69 | case 'T': |
70 | case 't': |
71 | ret <<= 10; |
72 | case 'G': |
73 | case 'g': |
74 | ret <<= 10; |
75 | case 'M': |
76 | case 'm': |
77 | ret <<= 10; |
78 | case 'K': |
79 | case 'k': |
80 | ret <<= 10; |
81 | endptr++; |
82 | default: |
83 | break; |
84 | } |
85 | |
86 | return ret; |
87 | } |
88 | |
89 | static void vsock_increase_buf_size(int fd) |
90 | { |
91 | if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_MAX_SIZE, |
92 | &vsock_buf_bytes, sizeof(vsock_buf_bytes))) |
93 | error(s: "setsockopt(SO_VM_SOCKETS_BUFFER_MAX_SIZE)" ); |
94 | |
95 | if (setsockopt(fd, AF_VSOCK, SO_VM_SOCKETS_BUFFER_SIZE, |
96 | &vsock_buf_bytes, sizeof(vsock_buf_bytes))) |
97 | error(s: "setsockopt(SO_VM_SOCKETS_BUFFER_SIZE)" ); |
98 | } |
99 | |
100 | static int vsock_connect(unsigned int cid, unsigned int port) |
101 | { |
102 | union { |
103 | struct sockaddr sa; |
104 | struct sockaddr_vm svm; |
105 | } addr = { |
106 | .svm = { |
107 | .svm_family = AF_VSOCK, |
108 | .svm_port = port, |
109 | .svm_cid = cid, |
110 | }, |
111 | }; |
112 | int fd; |
113 | |
114 | fd = socket(AF_VSOCK, SOCK_STREAM, 0); |
115 | |
116 | if (fd < 0) { |
117 | perror("socket" ); |
118 | return -1; |
119 | } |
120 | |
121 | if (connect(fd, &addr.sa, sizeof(addr.svm)) < 0) { |
122 | perror("connect" ); |
123 | close(fd); |
124 | return -1; |
125 | } |
126 | |
127 | return fd; |
128 | } |
129 | |
130 | static float get_gbps(unsigned long bits, time_t ns_delta) |
131 | { |
132 | return ((float)bits / 1000000000ULL) / |
133 | ((float)ns_delta / NSEC_PER_SEC); |
134 | } |
135 | |
136 | static void run_receiver(unsigned long rcvlowat_bytes) |
137 | { |
138 | unsigned int read_cnt; |
139 | time_t rx_begin_ns; |
140 | time_t in_read_ns; |
141 | size_t total_recv; |
142 | int client_fd; |
143 | char *data; |
144 | int fd; |
145 | union { |
146 | struct sockaddr sa; |
147 | struct sockaddr_vm svm; |
148 | } addr = { |
149 | .svm = { |
150 | .svm_family = AF_VSOCK, |
151 | .svm_port = port, |
152 | .svm_cid = VMADDR_CID_ANY, |
153 | }, |
154 | }; |
155 | union { |
156 | struct sockaddr sa; |
157 | struct sockaddr_vm svm; |
158 | } clientaddr; |
159 | |
160 | socklen_t clientaddr_len = sizeof(clientaddr.svm); |
161 | |
162 | printf("Run as receiver\n" ); |
163 | printf("Listen port %u\n" , port); |
164 | printf("RX buffer %lu bytes\n" , buf_size_bytes); |
165 | printf("vsock buffer %lu bytes\n" , vsock_buf_bytes); |
166 | printf("SO_RCVLOWAT %lu bytes\n" , rcvlowat_bytes); |
167 | |
168 | fd = socket(AF_VSOCK, SOCK_STREAM, 0); |
169 | |
170 | if (fd < 0) |
171 | error(s: "socket" ); |
172 | |
173 | if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) |
174 | error(s: "bind" ); |
175 | |
176 | if (listen(fd, 1) < 0) |
177 | error(s: "listen" ); |
178 | |
179 | client_fd = accept(fd, &clientaddr.sa, &clientaddr_len); |
180 | |
181 | if (client_fd < 0) |
182 | error(s: "accept" ); |
183 | |
184 | vsock_increase_buf_size(fd: client_fd); |
185 | |
186 | if (setsockopt(client_fd, SOL_SOCKET, SO_RCVLOWAT, |
187 | &rcvlowat_bytes, |
188 | sizeof(rcvlowat_bytes))) |
189 | error(s: "setsockopt(SO_RCVLOWAT)" ); |
190 | |
191 | data = malloc(buf_size_bytes); |
192 | |
193 | if (!data) { |
194 | fprintf(stderr, "'malloc()' failed\n" ); |
195 | exit(EXIT_FAILURE); |
196 | } |
197 | |
198 | read_cnt = 0; |
199 | in_read_ns = 0; |
200 | total_recv = 0; |
201 | rx_begin_ns = current_nsec(); |
202 | |
203 | while (1) { |
204 | struct pollfd fds = { 0 }; |
205 | |
206 | fds.fd = client_fd; |
207 | fds.events = POLLIN | POLLERR | |
208 | POLLHUP | POLLRDHUP; |
209 | |
210 | if (poll(&fds, 1, -1) < 0) |
211 | error(s: "poll" ); |
212 | |
213 | if (fds.revents & POLLERR) { |
214 | fprintf(stderr, "'poll()' error\n" ); |
215 | exit(EXIT_FAILURE); |
216 | } |
217 | |
218 | if (fds.revents & POLLIN) { |
219 | ssize_t bytes_read; |
220 | time_t t; |
221 | |
222 | t = current_nsec(); |
223 | bytes_read = read(fds.fd, data, buf_size_bytes); |
224 | in_read_ns += (current_nsec() - t); |
225 | read_cnt++; |
226 | |
227 | if (!bytes_read) |
228 | break; |
229 | |
230 | if (bytes_read < 0) { |
231 | perror("read" ); |
232 | exit(EXIT_FAILURE); |
233 | } |
234 | |
235 | total_recv += bytes_read; |
236 | } |
237 | |
238 | if (fds.revents & (POLLHUP | POLLRDHUP)) |
239 | break; |
240 | } |
241 | |
242 | printf("total bytes received: %zu\n" , total_recv); |
243 | printf("rx performance: %f Gbits/s\n" , |
244 | get_gbps(total_recv * 8, current_nsec() - rx_begin_ns)); |
245 | printf("total time in 'read()': %f sec\n" , (float)in_read_ns / NSEC_PER_SEC); |
246 | printf("average time in 'read()': %f ns\n" , (float)in_read_ns / read_cnt); |
247 | printf("POLLIN wakeups: %i\n" , read_cnt); |
248 | |
249 | free(data); |
250 | close(client_fd); |
251 | close(fd); |
252 | } |
253 | |
254 | static void run_sender(int peer_cid, unsigned long to_send_bytes) |
255 | { |
256 | time_t tx_begin_ns; |
257 | time_t tx_total_ns; |
258 | size_t total_send; |
259 | time_t time_in_send; |
260 | void *data; |
261 | int fd; |
262 | |
263 | if (zerocopy) |
264 | printf("Run as sender MSG_ZEROCOPY\n" ); |
265 | else |
266 | printf("Run as sender\n" ); |
267 | |
268 | printf("Connect to %i:%u\n" , peer_cid, port); |
269 | printf("Send %lu bytes\n" , to_send_bytes); |
270 | printf("TX buffer %lu bytes\n" , buf_size_bytes); |
271 | |
272 | fd = vsock_connect(cid: peer_cid, port); |
273 | |
274 | if (fd < 0) |
275 | exit(EXIT_FAILURE); |
276 | |
277 | if (zerocopy) { |
278 | enable_so_zerocopy(fd); |
279 | |
280 | data = mmap(NULL, buf_size_bytes, PROT_READ | PROT_WRITE, |
281 | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); |
282 | if (data == MAP_FAILED) { |
283 | perror("mmap" ); |
284 | exit(EXIT_FAILURE); |
285 | } |
286 | } else { |
287 | data = malloc(buf_size_bytes); |
288 | |
289 | if (!data) { |
290 | fprintf(stderr, "'malloc()' failed\n" ); |
291 | exit(EXIT_FAILURE); |
292 | } |
293 | } |
294 | |
295 | memset(data, 0, buf_size_bytes); |
296 | total_send = 0; |
297 | time_in_send = 0; |
298 | tx_begin_ns = current_nsec(); |
299 | |
300 | while (total_send < to_send_bytes) { |
301 | ssize_t sent; |
302 | size_t rest_bytes; |
303 | time_t before; |
304 | |
305 | rest_bytes = to_send_bytes - total_send; |
306 | |
307 | before = current_nsec(); |
308 | sent = send(fd, data, (rest_bytes > buf_size_bytes) ? |
309 | buf_size_bytes : rest_bytes, |
310 | zerocopy ? MSG_ZEROCOPY : 0); |
311 | time_in_send += (current_nsec() - before); |
312 | |
313 | if (sent <= 0) |
314 | error(s: "write" ); |
315 | |
316 | total_send += sent; |
317 | |
318 | if (zerocopy) { |
319 | struct pollfd fds = { 0 }; |
320 | |
321 | fds.fd = fd; |
322 | |
323 | if (poll(&fds, 1, -1) < 0) { |
324 | perror("poll" ); |
325 | exit(EXIT_FAILURE); |
326 | } |
327 | |
328 | if (!(fds.revents & POLLERR)) { |
329 | fprintf(stderr, "POLLERR expected\n" ); |
330 | exit(EXIT_FAILURE); |
331 | } |
332 | |
333 | vsock_recv_completion(fd, NULL); |
334 | } |
335 | } |
336 | |
337 | tx_total_ns = current_nsec() - tx_begin_ns; |
338 | |
339 | printf("total bytes sent: %zu\n" , total_send); |
340 | printf("tx performance: %f Gbits/s\n" , |
341 | get_gbps(total_send * 8, time_in_send)); |
342 | printf("total time in tx loop: %f sec\n" , |
343 | (float)tx_total_ns / NSEC_PER_SEC); |
344 | printf("time in 'send()': %f sec\n" , |
345 | (float)time_in_send / NSEC_PER_SEC); |
346 | |
347 | close(fd); |
348 | |
349 | if (zerocopy) |
350 | munmap(data, buf_size_bytes); |
351 | else |
352 | free(data); |
353 | } |
354 | |
355 | static const char optstring[] = "" ; |
356 | static const struct option longopts[] = { |
357 | { |
358 | .name = "help" , |
359 | .has_arg = no_argument, |
360 | .val = 'H', |
361 | }, |
362 | { |
363 | .name = "sender" , |
364 | .has_arg = required_argument, |
365 | .val = 'S', |
366 | }, |
367 | { |
368 | .name = "port" , |
369 | .has_arg = required_argument, |
370 | .val = 'P', |
371 | }, |
372 | { |
373 | .name = "bytes" , |
374 | .has_arg = required_argument, |
375 | .val = 'M', |
376 | }, |
377 | { |
378 | .name = "buf-size" , |
379 | .has_arg = required_argument, |
380 | .val = 'B', |
381 | }, |
382 | { |
383 | .name = "vsk-size" , |
384 | .has_arg = required_argument, |
385 | .val = 'V', |
386 | }, |
387 | { |
388 | .name = "rcvlowat" , |
389 | .has_arg = required_argument, |
390 | .val = 'R', |
391 | }, |
392 | { |
393 | .name = "zerocopy" , |
394 | .has_arg = no_argument, |
395 | .val = 'Z', |
396 | }, |
397 | {}, |
398 | }; |
399 | |
400 | static void usage(void) |
401 | { |
402 | printf("Usage: ./vsock_perf [--help] [options]\n" |
403 | "\n" |
404 | "This is benchmarking utility, to test vsock performance.\n" |
405 | "It runs in two modes: sender or receiver. In sender mode, it\n" |
406 | "connects to the specified CID and starts data transmission.\n" |
407 | "\n" |
408 | "Options:\n" |
409 | " --help This message\n" |
410 | " --sender <cid> Sender mode (receiver default)\n" |
411 | " <cid> of the receiver to connect to\n" |
412 | " --zerocopy Enable zerocopy (for sender mode only)\n" |
413 | " --port <port> Port (default %d)\n" |
414 | " --bytes <bytes>KMG Bytes to send (default %d)\n" |
415 | " --buf-size <bytes>KMG Data buffer size (default %d). In sender mode\n" |
416 | " it is the buffer size, passed to 'write()'. In\n" |
417 | " receiver mode it is the buffer size passed to 'read()'.\n" |
418 | " --vsk-size <bytes>KMG Socket buffer size (default %d)\n" |
419 | " --rcvlowat <bytes>KMG SO_RCVLOWAT value (default %d)\n" |
420 | "\n" , DEFAULT_PORT, DEFAULT_TO_SEND_BYTES, |
421 | DEFAULT_BUF_SIZE_BYTES, DEFAULT_VSOCK_BUF_BYTES, |
422 | DEFAULT_RCVLOWAT_BYTES); |
423 | exit(EXIT_FAILURE); |
424 | } |
425 | |
426 | static long strtolx(const char *arg) |
427 | { |
428 | long value; |
429 | char *end; |
430 | |
431 | value = strtol(arg, &end, 10); |
432 | |
433 | if (end != arg + strlen(arg)) |
434 | usage(); |
435 | |
436 | return value; |
437 | } |
438 | |
439 | int main(int argc, char **argv) |
440 | { |
441 | unsigned long to_send_bytes = DEFAULT_TO_SEND_BYTES; |
442 | unsigned long rcvlowat_bytes = DEFAULT_RCVLOWAT_BYTES; |
443 | int peer_cid = -1; |
444 | bool sender = false; |
445 | |
446 | while (1) { |
447 | int opt = getopt_long(argc, argv, optstring, longopts, NULL); |
448 | |
449 | if (opt == -1) |
450 | break; |
451 | |
452 | switch (opt) { |
453 | case 'V': /* Peer buffer size. */ |
454 | vsock_buf_bytes = memparse(optarg); |
455 | break; |
456 | case 'R': /* SO_RCVLOWAT value. */ |
457 | rcvlowat_bytes = memparse(optarg); |
458 | break; |
459 | case 'P': /* Port to connect to. */ |
460 | port = strtolx(optarg); |
461 | break; |
462 | case 'M': /* Bytes to send. */ |
463 | to_send_bytes = memparse(optarg); |
464 | break; |
465 | case 'B': /* Size of rx/tx buffer. */ |
466 | buf_size_bytes = memparse(optarg); |
467 | break; |
468 | case 'S': /* Sender mode. CID to connect to. */ |
469 | peer_cid = strtolx(optarg); |
470 | sender = true; |
471 | break; |
472 | case 'H': /* Help. */ |
473 | usage(); |
474 | break; |
475 | case 'Z': /* Zerocopy. */ |
476 | zerocopy = true; |
477 | break; |
478 | default: |
479 | usage(); |
480 | } |
481 | } |
482 | |
483 | if (!sender) |
484 | run_receiver(rcvlowat_bytes); |
485 | else |
486 | run_sender(peer_cid, to_send_bytes); |
487 | |
488 | return 0; |
489 | } |
490 | |