1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #define _GNU_SOURCE |
3 | #include <stdio.h> |
4 | #include <stdlib.h> |
5 | #include <string.h> |
6 | #include <sys/syscall.h> |
7 | #include <time.h> |
8 | #include <signal.h> |
9 | #include <setjmp.h> |
10 | #include <sys/mman.h> |
11 | #include <sys/utsname.h> |
12 | #include <sys/wait.h> |
13 | #include <sys/stat.h> |
14 | #include <fcntl.h> |
15 | #include <inttypes.h> |
16 | #include <sched.h> |
17 | |
18 | #include <sys/uio.h> |
19 | #include <linux/io_uring.h> |
20 | #include "../kselftest.h" |
21 | |
22 | #ifndef __x86_64__ |
23 | # error This test is 64-bit only |
24 | #endif |
25 | |
26 | /* LAM modes, these definitions were copied from kernel code */ |
27 | #define LAM_NONE 0 |
28 | #define LAM_U57_BITS 6 |
29 | |
30 | #define LAM_U57_MASK (0x3fULL << 57) |
31 | /* arch prctl for LAM */ |
32 | #define ARCH_GET_UNTAG_MASK 0x4001 |
33 | #define ARCH_ENABLE_TAGGED_ADDR 0x4002 |
34 | #define ARCH_GET_MAX_TAG_BITS 0x4003 |
35 | #define ARCH_FORCE_TAGGED_SVA 0x4004 |
36 | |
37 | /* Specified test function bits */ |
38 | #define FUNC_MALLOC 0x1 |
39 | #define FUNC_BITS 0x2 |
40 | #define FUNC_MMAP 0x4 |
41 | #define FUNC_SYSCALL 0x8 |
42 | #define FUNC_URING 0x10 |
43 | #define FUNC_INHERITE 0x20 |
44 | #define FUNC_PASID 0x40 |
45 | |
46 | #define TEST_MASK 0x7f |
47 | |
48 | #define LOW_ADDR (0x1UL << 30) |
49 | #define HIGH_ADDR (0x3UL << 48) |
50 | |
51 | #define MALLOC_LEN 32 |
52 | |
53 | #define PAGE_SIZE (4 << 10) |
54 | |
55 | #define STACK_SIZE 65536 |
56 | |
57 | #define barrier() ({ \ |
58 | __asm__ __volatile__("" : : : "memory"); \ |
59 | }) |
60 | |
61 | #define URING_QUEUE_SZ 1 |
62 | #define URING_BLOCK_SZ 2048 |
63 | |
64 | /* Pasid test define */ |
65 | #define LAM_CMD_BIT 0x1 |
66 | #define PAS_CMD_BIT 0x2 |
67 | #define SVA_CMD_BIT 0x4 |
68 | |
69 | #define PAS_CMD(cmd1, cmd2, cmd3) (((cmd3) << 8) | ((cmd2) << 4) | ((cmd1) << 0)) |
70 | |
71 | struct testcases { |
72 | unsigned int later; |
73 | int expected; /* 2: SIGSEGV Error; 1: other errors */ |
74 | unsigned long lam; |
75 | uint64_t addr; |
76 | uint64_t cmd; |
77 | int (*test_func)(struct testcases *test); |
78 | const char *msg; |
79 | }; |
80 | |
81 | /* Used by CQ of uring, source file handler and file's size */ |
82 | struct file_io { |
83 | int file_fd; |
84 | off_t file_sz; |
85 | struct iovec iovecs[]; |
86 | }; |
87 | |
88 | struct io_uring_queue { |
89 | unsigned int *head; |
90 | unsigned int *tail; |
91 | unsigned int *ring_mask; |
92 | unsigned int *ring_entries; |
93 | unsigned int *flags; |
94 | unsigned int *array; |
95 | union { |
96 | struct io_uring_cqe *cqes; |
97 | struct io_uring_sqe *sqes; |
98 | } queue; |
99 | size_t ring_sz; |
100 | }; |
101 | |
102 | struct io_ring { |
103 | int ring_fd; |
104 | struct io_uring_queue sq_ring; |
105 | struct io_uring_queue cq_ring; |
106 | }; |
107 | |
108 | int tests_cnt; |
109 | jmp_buf segv_env; |
110 | |
111 | static void segv_handler(int sig) |
112 | { |
113 | ksft_print_msg(msg: "Get segmentation fault(%d)." , sig); |
114 | |
115 | siglongjmp(segv_env, 1); |
116 | } |
117 | |
118 | static inline int cpu_has_lam(void) |
119 | { |
120 | unsigned int cpuinfo[4]; |
121 | |
122 | __cpuid_count(0x7, 1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]); |
123 | |
124 | return (cpuinfo[0] & (1 << 26)); |
125 | } |
126 | |
127 | /* Check 5-level page table feature in CPUID.(EAX=07H, ECX=00H):ECX.[bit 16] */ |
128 | static inline int cpu_has_la57(void) |
129 | { |
130 | unsigned int cpuinfo[4]; |
131 | |
132 | __cpuid_count(0x7, 0, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]); |
133 | |
134 | return (cpuinfo[2] & (1 << 16)); |
135 | } |
136 | |
137 | /* |
138 | * Set tagged address and read back untag mask. |
139 | * check if the untagged mask is expected. |
140 | * |
141 | * @return: |
142 | * 0: Set LAM mode successfully |
143 | * others: failed to set LAM |
144 | */ |
145 | static int set_lam(unsigned long lam) |
146 | { |
147 | int ret = 0; |
148 | uint64_t ptr = 0; |
149 | |
150 | if (lam != LAM_U57_BITS && lam != LAM_NONE) |
151 | return -1; |
152 | |
153 | /* Skip check return */ |
154 | syscall(SYS_arch_prctl, ARCH_ENABLE_TAGGED_ADDR, lam); |
155 | |
156 | /* Get untagged mask */ |
157 | syscall(SYS_arch_prctl, ARCH_GET_UNTAG_MASK, &ptr); |
158 | |
159 | /* Check mask returned is expected */ |
160 | if (lam == LAM_U57_BITS) |
161 | ret = (ptr != ~(LAM_U57_MASK)); |
162 | else if (lam == LAM_NONE) |
163 | ret = (ptr != -1ULL); |
164 | |
165 | return ret; |
166 | } |
167 | |
168 | static unsigned long get_default_tag_bits(void) |
169 | { |
170 | pid_t pid; |
171 | int lam = LAM_NONE; |
172 | int ret = 0; |
173 | |
174 | pid = fork(); |
175 | if (pid < 0) { |
176 | perror("Fork failed." ); |
177 | } else if (pid == 0) { |
178 | /* Set LAM mode in child process */ |
179 | if (set_lam(LAM_U57_BITS) == 0) |
180 | lam = LAM_U57_BITS; |
181 | else |
182 | lam = LAM_NONE; |
183 | exit(lam); |
184 | } else { |
185 | wait(&ret); |
186 | lam = WEXITSTATUS(ret); |
187 | } |
188 | |
189 | return lam; |
190 | } |
191 | |
192 | /* |
193 | * Set tagged address and read back untag mask. |
194 | * check if the untag mask is expected. |
195 | */ |
196 | static int get_lam(void) |
197 | { |
198 | uint64_t ptr = 0; |
199 | int ret = -1; |
200 | /* Get untagged mask */ |
201 | if (syscall(SYS_arch_prctl, ARCH_GET_UNTAG_MASK, &ptr) == -1) |
202 | return -1; |
203 | |
204 | /* Check mask returned is expected */ |
205 | if (ptr == ~(LAM_U57_MASK)) |
206 | ret = LAM_U57_BITS; |
207 | else if (ptr == -1ULL) |
208 | ret = LAM_NONE; |
209 | |
210 | |
211 | return ret; |
212 | } |
213 | |
214 | /* According to LAM mode, set metadata in high bits */ |
215 | static uint64_t set_metadata(uint64_t src, unsigned long lam) |
216 | { |
217 | uint64_t metadata; |
218 | |
219 | srand(time(NULL)); |
220 | |
221 | switch (lam) { |
222 | case LAM_U57_BITS: /* Set metadata in bits 62:57 */ |
223 | /* Get a random non-zero value as metadata */ |
224 | metadata = (rand() % ((1UL << LAM_U57_BITS) - 1) + 1) << 57; |
225 | metadata |= (src & ~(LAM_U57_MASK)); |
226 | break; |
227 | default: |
228 | metadata = src; |
229 | break; |
230 | } |
231 | |
232 | return metadata; |
233 | } |
234 | |
235 | /* |
236 | * Set metadata in user pointer, compare new pointer with original pointer. |
237 | * both pointers should point to the same address. |
238 | * |
239 | * @return: |
240 | * 0: value on the pointer with metadate and value on original are same |
241 | * 1: not same. |
242 | */ |
243 | static int handle_lam_test(void *src, unsigned int lam) |
244 | { |
245 | char *ptr; |
246 | |
247 | strcpy(p: (char *)src, q: "USER POINTER" ); |
248 | |
249 | ptr = (char *)set_metadata(src: (uint64_t)src, lam); |
250 | if (src == ptr) |
251 | return 0; |
252 | |
253 | /* Copy a string into the pointer with metadata */ |
254 | strcpy(p: (char *)ptr, q: "METADATA POINTER" ); |
255 | |
256 | return (!!strcmp((char *)src, (char *)ptr)); |
257 | } |
258 | |
259 | |
260 | int handle_max_bits(struct testcases *test) |
261 | { |
262 | unsigned long exp_bits = get_default_tag_bits(); |
263 | unsigned long bits = 0; |
264 | |
265 | if (exp_bits != LAM_NONE) |
266 | exp_bits = LAM_U57_BITS; |
267 | |
268 | /* Get LAM max tag bits */ |
269 | if (syscall(SYS_arch_prctl, ARCH_GET_MAX_TAG_BITS, &bits) == -1) |
270 | return 1; |
271 | |
272 | return (exp_bits != bits); |
273 | } |
274 | |
275 | /* |
276 | * Test lam feature through dereference pointer get from malloc. |
277 | * @return 0: Pass test. 1: Get failure during test 2: Get SIGSEGV |
278 | */ |
279 | static int handle_malloc(struct testcases *test) |
280 | { |
281 | char *ptr = NULL; |
282 | int ret = 0; |
283 | |
284 | if (test->later == 0 && test->lam != 0) |
285 | if (set_lam(test->lam) == -1) |
286 | return 1; |
287 | |
288 | ptr = (char *)malloc(MALLOC_LEN); |
289 | if (ptr == NULL) { |
290 | perror("malloc() failure\n" ); |
291 | return 1; |
292 | } |
293 | |
294 | /* Set signal handler */ |
295 | if (sigsetjmp(segv_env, 1) == 0) { |
296 | signal(SIGSEGV, segv_handler); |
297 | ret = handle_lam_test(src: ptr, lam: test->lam); |
298 | } else { |
299 | ret = 2; |
300 | } |
301 | |
302 | if (test->later != 0 && test->lam != 0) |
303 | if (set_lam(test->lam) == -1 && ret == 0) |
304 | ret = 1; |
305 | |
306 | free(ptr); |
307 | |
308 | return ret; |
309 | } |
310 | |
311 | static int handle_mmap(struct testcases *test) |
312 | { |
313 | void *ptr; |
314 | unsigned int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED; |
315 | int ret = 0; |
316 | |
317 | if (test->later == 0 && test->lam != 0) |
318 | if (set_lam(test->lam) != 0) |
319 | return 1; |
320 | |
321 | ptr = mmap((void *)test->addr, PAGE_SIZE, PROT_READ | PROT_WRITE, |
322 | flags, -1, 0); |
323 | if (ptr == MAP_FAILED) { |
324 | if (test->addr == HIGH_ADDR) |
325 | if (!cpu_has_la57()) |
326 | return 3; /* unsupport LA57 */ |
327 | return 1; |
328 | } |
329 | |
330 | if (test->later != 0 && test->lam != 0) |
331 | if (set_lam(test->lam) != 0) |
332 | ret = 1; |
333 | |
334 | if (ret == 0) { |
335 | if (sigsetjmp(segv_env, 1) == 0) { |
336 | signal(SIGSEGV, segv_handler); |
337 | ret = handle_lam_test(src: ptr, lam: test->lam); |
338 | } else { |
339 | ret = 2; |
340 | } |
341 | } |
342 | |
343 | munmap(ptr, PAGE_SIZE); |
344 | return ret; |
345 | } |
346 | |
347 | static int handle_syscall(struct testcases *test) |
348 | { |
349 | struct utsname unme, *pu; |
350 | int ret = 0; |
351 | |
352 | if (test->later == 0 && test->lam != 0) |
353 | if (set_lam(test->lam) != 0) |
354 | return 1; |
355 | |
356 | if (sigsetjmp(segv_env, 1) == 0) { |
357 | signal(SIGSEGV, segv_handler); |
358 | pu = (struct utsname *)set_metadata(src: (uint64_t)&unme, lam: test->lam); |
359 | ret = uname(pu); |
360 | if (ret < 0) |
361 | ret = 1; |
362 | } else { |
363 | ret = 2; |
364 | } |
365 | |
366 | if (test->later != 0 && test->lam != 0) |
367 | if (set_lam(test->lam) != -1 && ret == 0) |
368 | ret = 1; |
369 | |
370 | return ret; |
371 | } |
372 | |
373 | int sys_uring_setup(unsigned int entries, struct io_uring_params *p) |
374 | { |
375 | return (int)syscall(__NR_io_uring_setup, entries, p); |
376 | } |
377 | |
378 | int sys_uring_enter(int fd, unsigned int to, unsigned int min, unsigned int flags) |
379 | { |
380 | return (int)syscall(__NR_io_uring_enter, fd, to, min, flags, NULL, 0); |
381 | } |
382 | |
383 | /* Init submission queue and completion queue */ |
384 | int mmap_io_uring(struct io_uring_params p, struct io_ring *s) |
385 | { |
386 | struct io_uring_queue *sring = &s->sq_ring; |
387 | struct io_uring_queue *cring = &s->cq_ring; |
388 | |
389 | sring->ring_sz = p.sq_off.array + p.sq_entries * sizeof(unsigned int); |
390 | cring->ring_sz = p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe); |
391 | |
392 | if (p.features & IORING_FEAT_SINGLE_MMAP) { |
393 | if (cring->ring_sz > sring->ring_sz) |
394 | sring->ring_sz = cring->ring_sz; |
395 | |
396 | cring->ring_sz = sring->ring_sz; |
397 | } |
398 | |
399 | void *sq_ptr = mmap(0, sring->ring_sz, PROT_READ | PROT_WRITE, |
400 | MAP_SHARED | MAP_POPULATE, s->ring_fd, |
401 | IORING_OFF_SQ_RING); |
402 | |
403 | if (sq_ptr == MAP_FAILED) { |
404 | perror("sub-queue!" ); |
405 | return 1; |
406 | } |
407 | |
408 | void *cq_ptr = sq_ptr; |
409 | |
410 | if (!(p.features & IORING_FEAT_SINGLE_MMAP)) { |
411 | cq_ptr = mmap(0, cring->ring_sz, PROT_READ | PROT_WRITE, |
412 | MAP_SHARED | MAP_POPULATE, s->ring_fd, |
413 | IORING_OFF_CQ_RING); |
414 | if (cq_ptr == MAP_FAILED) { |
415 | perror("cpl-queue!" ); |
416 | munmap(sq_ptr, sring->ring_sz); |
417 | return 1; |
418 | } |
419 | } |
420 | |
421 | sring->head = sq_ptr + p.sq_off.head; |
422 | sring->tail = sq_ptr + p.sq_off.tail; |
423 | sring->ring_mask = sq_ptr + p.sq_off.ring_mask; |
424 | sring->ring_entries = sq_ptr + p.sq_off.ring_entries; |
425 | sring->flags = sq_ptr + p.sq_off.flags; |
426 | sring->array = sq_ptr + p.sq_off.array; |
427 | |
428 | /* Map a queue as mem map */ |
429 | s->sq_ring.queue.sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe), |
430 | PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, |
431 | s->ring_fd, IORING_OFF_SQES); |
432 | if (s->sq_ring.queue.sqes == MAP_FAILED) { |
433 | munmap(sq_ptr, sring->ring_sz); |
434 | if (sq_ptr != cq_ptr) { |
435 | ksft_print_msg(msg: "failed to mmap uring queue!" ); |
436 | munmap(cq_ptr, cring->ring_sz); |
437 | return 1; |
438 | } |
439 | } |
440 | |
441 | cring->head = cq_ptr + p.cq_off.head; |
442 | cring->tail = cq_ptr + p.cq_off.tail; |
443 | cring->ring_mask = cq_ptr + p.cq_off.ring_mask; |
444 | cring->ring_entries = cq_ptr + p.cq_off.ring_entries; |
445 | cring->queue.cqes = cq_ptr + p.cq_off.cqes; |
446 | |
447 | return 0; |
448 | } |
449 | |
450 | /* Init io_uring queues */ |
451 | int setup_io_uring(struct io_ring *s) |
452 | { |
453 | struct io_uring_params para; |
454 | |
455 | memset(¶, 0, sizeof(para)); |
456 | s->ring_fd = sys_uring_setup(URING_QUEUE_SZ, p: ¶); |
457 | if (s->ring_fd < 0) |
458 | return 1; |
459 | |
460 | return mmap_io_uring(p: para, s); |
461 | } |
462 | |
463 | /* |
464 | * Get data from completion queue. the data buffer saved the file data |
465 | * return 0: success; others: error; |
466 | */ |
467 | int handle_uring_cq(struct io_ring *s) |
468 | { |
469 | struct file_io *fi = NULL; |
470 | struct io_uring_queue *cring = &s->cq_ring; |
471 | struct io_uring_cqe *cqe; |
472 | unsigned int head; |
473 | off_t len = 0; |
474 | |
475 | head = *cring->head; |
476 | |
477 | do { |
478 | barrier(); |
479 | if (head == *cring->tail) |
480 | break; |
481 | /* Get the entry */ |
482 | cqe = &cring->queue.cqes[head & *s->cq_ring.ring_mask]; |
483 | fi = (struct file_io *)cqe->user_data; |
484 | if (cqe->res < 0) |
485 | break; |
486 | |
487 | int blocks = (int)(fi->file_sz + URING_BLOCK_SZ - 1) / URING_BLOCK_SZ; |
488 | |
489 | for (int i = 0; i < blocks; i++) |
490 | len += fi->iovecs[i].iov_len; |
491 | |
492 | head++; |
493 | } while (1); |
494 | |
495 | *cring->head = head; |
496 | barrier(); |
497 | |
498 | return (len != fi->file_sz); |
499 | } |
500 | |
501 | /* |
502 | * Submit squeue. specify via IORING_OP_READV. |
503 | * the buffer need to be set metadata according to LAM mode |
504 | */ |
505 | int handle_uring_sq(struct io_ring *ring, struct file_io *fi, unsigned long lam) |
506 | { |
507 | int file_fd = fi->file_fd; |
508 | struct io_uring_queue *sring = &ring->sq_ring; |
509 | unsigned int index = 0, cur_block = 0, tail = 0, next_tail = 0; |
510 | struct io_uring_sqe *sqe; |
511 | |
512 | off_t remain = fi->file_sz; |
513 | int blocks = (int)(remain + URING_BLOCK_SZ - 1) / URING_BLOCK_SZ; |
514 | |
515 | while (remain) { |
516 | off_t bytes = remain; |
517 | void *buf; |
518 | |
519 | if (bytes > URING_BLOCK_SZ) |
520 | bytes = URING_BLOCK_SZ; |
521 | |
522 | fi->iovecs[cur_block].iov_len = bytes; |
523 | |
524 | if (posix_memalign(&buf, URING_BLOCK_SZ, URING_BLOCK_SZ)) |
525 | return 1; |
526 | |
527 | fi->iovecs[cur_block].iov_base = (void *)set_metadata(src: (uint64_t)buf, lam); |
528 | remain -= bytes; |
529 | cur_block++; |
530 | } |
531 | |
532 | next_tail = *sring->tail; |
533 | tail = next_tail; |
534 | next_tail++; |
535 | |
536 | barrier(); |
537 | |
538 | index = tail & *ring->sq_ring.ring_mask; |
539 | |
540 | sqe = &ring->sq_ring.queue.sqes[index]; |
541 | sqe->fd = file_fd; |
542 | sqe->flags = 0; |
543 | sqe->opcode = IORING_OP_READV; |
544 | sqe->addr = (unsigned long)fi->iovecs; |
545 | sqe->len = blocks; |
546 | sqe->off = 0; |
547 | sqe->user_data = (uint64_t)fi; |
548 | |
549 | sring->array[index] = index; |
550 | tail = next_tail; |
551 | |
552 | if (*sring->tail != tail) { |
553 | *sring->tail = tail; |
554 | barrier(); |
555 | } |
556 | |
557 | if (sys_uring_enter(fd: ring->ring_fd, to: 1, min: 1, IORING_ENTER_GETEVENTS) < 0) |
558 | return 1; |
559 | |
560 | return 0; |
561 | } |
562 | |
563 | /* |
564 | * Test LAM in async I/O and io_uring, read current binery through io_uring |
565 | * Set metadata in pointers to iovecs buffer. |
566 | */ |
567 | int do_uring(unsigned long lam) |
568 | { |
569 | struct io_ring *ring; |
570 | struct file_io *fi; |
571 | struct stat st; |
572 | int ret = 1; |
573 | char path[PATH_MAX] = {0}; |
574 | |
575 | /* get current process path */ |
576 | if (readlink("/proc/self/exe" , path, PATH_MAX - 1) <= 0) |
577 | return 1; |
578 | |
579 | int file_fd = open(path, O_RDONLY); |
580 | |
581 | if (file_fd < 0) |
582 | return 1; |
583 | |
584 | if (fstat(file_fd, &st) < 0) |
585 | return 1; |
586 | |
587 | off_t file_sz = st.st_size; |
588 | |
589 | int blocks = (int)(file_sz + URING_BLOCK_SZ - 1) / URING_BLOCK_SZ; |
590 | |
591 | fi = malloc(sizeof(*fi) + sizeof(struct iovec) * blocks); |
592 | if (!fi) |
593 | return 1; |
594 | |
595 | fi->file_sz = file_sz; |
596 | fi->file_fd = file_fd; |
597 | |
598 | ring = malloc(sizeof(*ring)); |
599 | if (!ring) |
600 | return 1; |
601 | |
602 | memset(ring, 0, sizeof(struct io_ring)); |
603 | |
604 | if (setup_io_uring(ring)) |
605 | goto out; |
606 | |
607 | if (handle_uring_sq(ring, fi, lam)) |
608 | goto out; |
609 | |
610 | ret = handle_uring_cq(s: ring); |
611 | |
612 | out: |
613 | free(ring); |
614 | |
615 | for (int i = 0; i < blocks; i++) { |
616 | if (fi->iovecs[i].iov_base) { |
617 | uint64_t addr = ((uint64_t)fi->iovecs[i].iov_base); |
618 | |
619 | switch (lam) { |
620 | case LAM_U57_BITS: /* Clear bits 62:57 */ |
621 | addr = (addr & ~(LAM_U57_MASK)); |
622 | break; |
623 | } |
624 | free((void *)addr); |
625 | fi->iovecs[i].iov_base = NULL; |
626 | } |
627 | } |
628 | |
629 | free(fi); |
630 | |
631 | return ret; |
632 | } |
633 | |
634 | int handle_uring(struct testcases *test) |
635 | { |
636 | int ret = 0; |
637 | |
638 | if (test->later == 0 && test->lam != 0) |
639 | if (set_lam(test->lam) != 0) |
640 | return 1; |
641 | |
642 | if (sigsetjmp(segv_env, 1) == 0) { |
643 | signal(SIGSEGV, segv_handler); |
644 | ret = do_uring(lam: test->lam); |
645 | } else { |
646 | ret = 2; |
647 | } |
648 | |
649 | return ret; |
650 | } |
651 | |
652 | static int fork_test(struct testcases *test) |
653 | { |
654 | int ret, child_ret; |
655 | pid_t pid; |
656 | |
657 | pid = fork(); |
658 | if (pid < 0) { |
659 | perror("Fork failed." ); |
660 | ret = 1; |
661 | } else if (pid == 0) { |
662 | ret = test->test_func(test); |
663 | exit(ret); |
664 | } else { |
665 | wait(&child_ret); |
666 | ret = WEXITSTATUS(child_ret); |
667 | } |
668 | |
669 | return ret; |
670 | } |
671 | |
672 | static int handle_execve(struct testcases *test) |
673 | { |
674 | int ret, child_ret; |
675 | int lam = test->lam; |
676 | pid_t pid; |
677 | |
678 | pid = fork(); |
679 | if (pid < 0) { |
680 | perror("Fork failed." ); |
681 | ret = 1; |
682 | } else if (pid == 0) { |
683 | char path[PATH_MAX] = {0}; |
684 | |
685 | /* Set LAM mode in parent process */ |
686 | if (set_lam(lam) != 0) |
687 | return 1; |
688 | |
689 | /* Get current binary's path and the binary was run by execve */ |
690 | if (readlink("/proc/self/exe" , path, PATH_MAX - 1) <= 0) |
691 | exit(-1); |
692 | |
693 | /* run binary to get LAM mode and return to parent process */ |
694 | if (execlp(path, path, "-t 0x0" , NULL) < 0) { |
695 | perror("error on exec" ); |
696 | exit(-1); |
697 | } |
698 | } else { |
699 | wait(&child_ret); |
700 | ret = WEXITSTATUS(child_ret); |
701 | if (ret != LAM_NONE) |
702 | return 1; |
703 | } |
704 | |
705 | return 0; |
706 | } |
707 | |
708 | static int handle_inheritance(struct testcases *test) |
709 | { |
710 | int ret, child_ret; |
711 | int lam = test->lam; |
712 | pid_t pid; |
713 | |
714 | /* Set LAM mode in parent process */ |
715 | if (set_lam(lam) != 0) |
716 | return 1; |
717 | |
718 | pid = fork(); |
719 | if (pid < 0) { |
720 | perror("Fork failed." ); |
721 | return 1; |
722 | } else if (pid == 0) { |
723 | /* Set LAM mode in parent process */ |
724 | int child_lam = get_lam(); |
725 | |
726 | exit(child_lam); |
727 | } else { |
728 | wait(&child_ret); |
729 | ret = WEXITSTATUS(child_ret); |
730 | |
731 | if (lam != ret) |
732 | return 1; |
733 | } |
734 | |
735 | return 0; |
736 | } |
737 | |
738 | static int thread_fn_get_lam(void *arg) |
739 | { |
740 | return get_lam(); |
741 | } |
742 | |
743 | static int thread_fn_set_lam(void *arg) |
744 | { |
745 | struct testcases *test = arg; |
746 | |
747 | return set_lam(test->lam); |
748 | } |
749 | |
750 | static int handle_thread(struct testcases *test) |
751 | { |
752 | char stack[STACK_SIZE]; |
753 | int ret, child_ret; |
754 | int lam = 0; |
755 | pid_t pid; |
756 | |
757 | /* Set LAM mode in parent process */ |
758 | if (!test->later) { |
759 | lam = test->lam; |
760 | if (set_lam(lam) != 0) |
761 | return 1; |
762 | } |
763 | |
764 | pid = clone(thread_fn_get_lam, stack + STACK_SIZE, |
765 | SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM, NULL); |
766 | if (pid < 0) { |
767 | perror("Clone failed." ); |
768 | return 1; |
769 | } |
770 | |
771 | waitpid(pid, &child_ret, 0); |
772 | ret = WEXITSTATUS(child_ret); |
773 | |
774 | if (lam != ret) |
775 | return 1; |
776 | |
777 | if (test->later) { |
778 | if (set_lam(test->lam) != 0) |
779 | return 1; |
780 | } |
781 | |
782 | return 0; |
783 | } |
784 | |
785 | static int handle_thread_enable(struct testcases *test) |
786 | { |
787 | char stack[STACK_SIZE]; |
788 | int ret, child_ret; |
789 | int lam = test->lam; |
790 | pid_t pid; |
791 | |
792 | pid = clone(thread_fn_set_lam, stack + STACK_SIZE, |
793 | SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM, test); |
794 | if (pid < 0) { |
795 | perror("Clone failed." ); |
796 | return 1; |
797 | } |
798 | |
799 | waitpid(pid, &child_ret, 0); |
800 | ret = WEXITSTATUS(child_ret); |
801 | |
802 | if (lam != ret) |
803 | return 1; |
804 | |
805 | return 0; |
806 | } |
807 | static void run_test(struct testcases *test, int count) |
808 | { |
809 | int i, ret = 0; |
810 | |
811 | for (i = 0; i < count; i++) { |
812 | struct testcases *t = test + i; |
813 | |
814 | /* fork a process to run test case */ |
815 | tests_cnt++; |
816 | ret = fork_test(test: t); |
817 | |
818 | /* return 3 is not support LA57, the case should be skipped */ |
819 | if (ret == 3) { |
820 | ksft_test_result_skip(msg: t->msg); |
821 | continue; |
822 | } |
823 | |
824 | if (ret != 0) |
825 | ret = (t->expected == ret); |
826 | else |
827 | ret = !(t->expected); |
828 | |
829 | ksft_test_result(ret, t->msg); |
830 | } |
831 | } |
832 | |
833 | static struct testcases uring_cases[] = { |
834 | { |
835 | .later = 0, |
836 | .lam = LAM_U57_BITS, |
837 | .test_func = handle_uring, |
838 | .msg = "URING: LAM_U57. Dereferencing pointer with metadata\n" , |
839 | }, |
840 | { |
841 | .later = 1, |
842 | .expected = 1, |
843 | .lam = LAM_U57_BITS, |
844 | .test_func = handle_uring, |
845 | .msg = "URING:[Negative] Disable LAM. Dereferencing pointer with metadata.\n" , |
846 | }, |
847 | }; |
848 | |
849 | static struct testcases malloc_cases[] = { |
850 | { |
851 | .later = 0, |
852 | .lam = LAM_U57_BITS, |
853 | .test_func = handle_malloc, |
854 | .msg = "MALLOC: LAM_U57. Dereferencing pointer with metadata\n" , |
855 | }, |
856 | { |
857 | .later = 1, |
858 | .expected = 2, |
859 | .lam = LAM_U57_BITS, |
860 | .test_func = handle_malloc, |
861 | .msg = "MALLOC:[Negative] Disable LAM. Dereferencing pointer with metadata.\n" , |
862 | }, |
863 | }; |
864 | |
865 | static struct testcases bits_cases[] = { |
866 | { |
867 | .test_func = handle_max_bits, |
868 | .msg = "BITS: Check default tag bits\n" , |
869 | }, |
870 | }; |
871 | |
872 | static struct testcases syscall_cases[] = { |
873 | { |
874 | .later = 0, |
875 | .lam = LAM_U57_BITS, |
876 | .test_func = handle_syscall, |
877 | .msg = "SYSCALL: LAM_U57. syscall with metadata\n" , |
878 | }, |
879 | { |
880 | .later = 1, |
881 | .expected = 1, |
882 | .lam = LAM_U57_BITS, |
883 | .test_func = handle_syscall, |
884 | .msg = "SYSCALL:[Negative] Disable LAM. Dereferencing pointer with metadata.\n" , |
885 | }, |
886 | }; |
887 | |
888 | static struct testcases mmap_cases[] = { |
889 | { |
890 | .later = 1, |
891 | .expected = 0, |
892 | .lam = LAM_U57_BITS, |
893 | .addr = HIGH_ADDR, |
894 | .test_func = handle_mmap, |
895 | .msg = "MMAP: First mmap high address, then set LAM_U57.\n" , |
896 | }, |
897 | { |
898 | .later = 0, |
899 | .expected = 0, |
900 | .lam = LAM_U57_BITS, |
901 | .addr = HIGH_ADDR, |
902 | .test_func = handle_mmap, |
903 | .msg = "MMAP: First LAM_U57, then High address.\n" , |
904 | }, |
905 | { |
906 | .later = 0, |
907 | .expected = 0, |
908 | .lam = LAM_U57_BITS, |
909 | .addr = LOW_ADDR, |
910 | .test_func = handle_mmap, |
911 | .msg = "MMAP: First LAM_U57, then Low address.\n" , |
912 | }, |
913 | }; |
914 | |
915 | static struct testcases inheritance_cases[] = { |
916 | { |
917 | .expected = 0, |
918 | .lam = LAM_U57_BITS, |
919 | .test_func = handle_inheritance, |
920 | .msg = "FORK: LAM_U57, child process should get LAM mode same as parent\n" , |
921 | }, |
922 | { |
923 | .expected = 0, |
924 | .lam = LAM_U57_BITS, |
925 | .test_func = handle_thread, |
926 | .msg = "THREAD: LAM_U57, child thread should get LAM mode same as parent\n" , |
927 | }, |
928 | { |
929 | .expected = 1, |
930 | .lam = LAM_U57_BITS, |
931 | .test_func = handle_thread_enable, |
932 | .msg = "THREAD: [NEGATIVE] Enable LAM in child.\n" , |
933 | }, |
934 | { |
935 | .expected = 1, |
936 | .later = 1, |
937 | .lam = LAM_U57_BITS, |
938 | .test_func = handle_thread, |
939 | .msg = "THREAD: [NEGATIVE] Enable LAM in parent after thread created.\n" , |
940 | }, |
941 | { |
942 | .expected = 0, |
943 | .lam = LAM_U57_BITS, |
944 | .test_func = handle_execve, |
945 | .msg = "EXECVE: LAM_U57, child process should get disabled LAM mode\n" , |
946 | }, |
947 | }; |
948 | |
949 | static void cmd_help(void) |
950 | { |
951 | printf("usage: lam [-h] [-t test list]\n" ); |
952 | printf("\t-t test list: run tests specified in the test list, default:0x%x\n" , TEST_MASK); |
953 | printf("\t\t0x1:malloc; 0x2:max_bits; 0x4:mmap; 0x8:syscall; 0x10:io_uring; 0x20:inherit;\n" ); |
954 | printf("\t-h: help\n" ); |
955 | } |
956 | |
957 | /* Check for file existence */ |
958 | uint8_t file_Exists(const char *fileName) |
959 | { |
960 | struct stat buffer; |
961 | |
962 | uint8_t ret = (stat(fileName, &buffer) == 0); |
963 | |
964 | return ret; |
965 | } |
966 | |
967 | /* Sysfs idxd files */ |
968 | const char *dsa_configs[] = { |
969 | "echo 1 > /sys/bus/dsa/devices/dsa0/wq0.1/group_id" , |
970 | "echo shared > /sys/bus/dsa/devices/dsa0/wq0.1/mode" , |
971 | "echo 10 > /sys/bus/dsa/devices/dsa0/wq0.1/priority" , |
972 | "echo 16 > /sys/bus/dsa/devices/dsa0/wq0.1/size" , |
973 | "echo 15 > /sys/bus/dsa/devices/dsa0/wq0.1/threshold" , |
974 | "echo user > /sys/bus/dsa/devices/dsa0/wq0.1/type" , |
975 | "echo MyApp1 > /sys/bus/dsa/devices/dsa0/wq0.1/name" , |
976 | "echo 1 > /sys/bus/dsa/devices/dsa0/engine0.1/group_id" , |
977 | "echo dsa0 > /sys/bus/dsa/drivers/idxd/bind" , |
978 | /* bind files and devices, generated a device file in /dev */ |
979 | "echo wq0.1 > /sys/bus/dsa/drivers/user/bind" , |
980 | }; |
981 | |
982 | /* DSA device file */ |
983 | const char *dsaDeviceFile = "/dev/dsa/wq0.1" ; |
984 | /* file for io*/ |
985 | const char *dsaPasidEnable = "/sys/bus/dsa/devices/dsa0/pasid_enabled" ; |
986 | |
987 | /* |
988 | * DSA depends on kernel cmdline "intel_iommu=on,sm_on" |
989 | * return pasid_enabled (0: disable 1:enable) |
990 | */ |
991 | int Check_DSA_Kernel_Setting(void) |
992 | { |
993 | char command[256] = "" ; |
994 | char buf[256] = "" ; |
995 | char *ptr; |
996 | int rv = -1; |
997 | |
998 | snprintf(buf: command, size: sizeof(command) - 1, fmt: "cat %s" , dsaPasidEnable); |
999 | |
1000 | FILE *cmd = popen(command, "r" ); |
1001 | |
1002 | if (cmd) { |
1003 | while (fgets(buf, sizeof(buf) - 1, cmd) != NULL); |
1004 | |
1005 | pclose(cmd); |
1006 | rv = strtol(buf, &ptr, 16); |
1007 | } |
1008 | |
1009 | return rv; |
1010 | } |
1011 | |
1012 | /* |
1013 | * Config DSA's sysfs files as shared DSA's WQ. |
1014 | * Generated a device file /dev/dsa/wq0.1 |
1015 | * Return: 0 OK; 1 Failed; 3 Skip(SVA disabled). |
1016 | */ |
1017 | int Dsa_Init_Sysfs(void) |
1018 | { |
1019 | uint len = ARRAY_SIZE(dsa_configs); |
1020 | const char **p = dsa_configs; |
1021 | |
1022 | if (file_Exists(fileName: dsaDeviceFile) == 1) |
1023 | return 0; |
1024 | |
1025 | /* check the idxd driver */ |
1026 | if (file_Exists(fileName: dsaPasidEnable) != 1) { |
1027 | printf("Please make sure idxd driver was loaded\n" ); |
1028 | return 3; |
1029 | } |
1030 | |
1031 | /* Check SVA feature */ |
1032 | if (Check_DSA_Kernel_Setting() != 1) { |
1033 | printf("Please enable SVA.(Add intel_iommu=on,sm_on in kernel cmdline)\n" ); |
1034 | return 3; |
1035 | } |
1036 | |
1037 | /* Check the idxd device file on /dev/dsa/ */ |
1038 | for (int i = 0; i < len; i++) { |
1039 | if (system(p[i])) |
1040 | return 1; |
1041 | } |
1042 | |
1043 | /* After config, /dev/dsa/wq0.1 should be generated */ |
1044 | return (file_Exists(fileName: dsaDeviceFile) != 1); |
1045 | } |
1046 | |
1047 | /* |
1048 | * Open DSA device file, triger API: iommu_sva_alloc_pasid |
1049 | */ |
1050 | void *allocate_dsa_pasid(void) |
1051 | { |
1052 | int fd; |
1053 | void *wq; |
1054 | |
1055 | fd = open(dsaDeviceFile, O_RDWR); |
1056 | if (fd < 0) { |
1057 | perror("open" ); |
1058 | return MAP_FAILED; |
1059 | } |
1060 | |
1061 | wq = mmap(NULL, 0x1000, PROT_WRITE, |
1062 | MAP_SHARED | MAP_POPULATE, fd, 0); |
1063 | if (wq == MAP_FAILED) |
1064 | perror("mmap" ); |
1065 | |
1066 | return wq; |
1067 | } |
1068 | |
1069 | int set_force_svm(void) |
1070 | { |
1071 | int ret = 0; |
1072 | |
1073 | ret = syscall(SYS_arch_prctl, ARCH_FORCE_TAGGED_SVA); |
1074 | |
1075 | return ret; |
1076 | } |
1077 | |
1078 | int handle_pasid(struct testcases *test) |
1079 | { |
1080 | uint tmp = test->cmd; |
1081 | uint runed = 0x0; |
1082 | int ret = 0; |
1083 | void *wq = NULL; |
1084 | |
1085 | ret = Dsa_Init_Sysfs(); |
1086 | if (ret != 0) |
1087 | return ret; |
1088 | |
1089 | for (int i = 0; i < 3; i++) { |
1090 | int err = 0; |
1091 | |
1092 | if (tmp & 0x1) { |
1093 | /* run set lam mode*/ |
1094 | if ((runed & 0x1) == 0) { |
1095 | err = set_lam(LAM_U57_BITS); |
1096 | runed = runed | 0x1; |
1097 | } else |
1098 | err = 1; |
1099 | } else if (tmp & 0x4) { |
1100 | /* run force svm */ |
1101 | if ((runed & 0x4) == 0) { |
1102 | err = set_force_svm(); |
1103 | runed = runed | 0x4; |
1104 | } else |
1105 | err = 1; |
1106 | } else if (tmp & 0x2) { |
1107 | /* run allocate pasid */ |
1108 | if ((runed & 0x2) == 0) { |
1109 | runed = runed | 0x2; |
1110 | wq = allocate_dsa_pasid(); |
1111 | if (wq == MAP_FAILED) |
1112 | err = 1; |
1113 | } else |
1114 | err = 1; |
1115 | } |
1116 | |
1117 | ret = ret + err; |
1118 | if (ret > 0) |
1119 | break; |
1120 | |
1121 | tmp = tmp >> 4; |
1122 | } |
1123 | |
1124 | if (wq != MAP_FAILED && wq != NULL) |
1125 | if (munmap(wq, 0x1000)) |
1126 | printf("munmap failed %d\n" , errno); |
1127 | |
1128 | if (runed != 0x7) |
1129 | ret = 1; |
1130 | |
1131 | return (ret != 0); |
1132 | } |
1133 | |
1134 | /* |
1135 | * Pasid test depends on idxd and SVA, kernel should enable iommu and sm. |
1136 | * command line(intel_iommu=on,sm_on) |
1137 | */ |
1138 | static struct testcases pasid_cases[] = { |
1139 | { |
1140 | .expected = 1, |
1141 | .cmd = PAS_CMD(LAM_CMD_BIT, PAS_CMD_BIT, SVA_CMD_BIT), |
1142 | .test_func = handle_pasid, |
1143 | .msg = "PASID: [Negative] Execute LAM, PASID, SVA in sequence\n" , |
1144 | }, |
1145 | { |
1146 | .expected = 0, |
1147 | .cmd = PAS_CMD(LAM_CMD_BIT, SVA_CMD_BIT, PAS_CMD_BIT), |
1148 | .test_func = handle_pasid, |
1149 | .msg = "PASID: Execute LAM, SVA, PASID in sequence\n" , |
1150 | }, |
1151 | { |
1152 | .expected = 1, |
1153 | .cmd = PAS_CMD(PAS_CMD_BIT, LAM_CMD_BIT, SVA_CMD_BIT), |
1154 | .test_func = handle_pasid, |
1155 | .msg = "PASID: [Negative] Execute PASID, LAM, SVA in sequence\n" , |
1156 | }, |
1157 | { |
1158 | .expected = 0, |
1159 | .cmd = PAS_CMD(PAS_CMD_BIT, SVA_CMD_BIT, LAM_CMD_BIT), |
1160 | .test_func = handle_pasid, |
1161 | .msg = "PASID: Execute PASID, SVA, LAM in sequence\n" , |
1162 | }, |
1163 | { |
1164 | .expected = 0, |
1165 | .cmd = PAS_CMD(SVA_CMD_BIT, LAM_CMD_BIT, PAS_CMD_BIT), |
1166 | .test_func = handle_pasid, |
1167 | .msg = "PASID: Execute SVA, LAM, PASID in sequence\n" , |
1168 | }, |
1169 | { |
1170 | .expected = 0, |
1171 | .cmd = PAS_CMD(SVA_CMD_BIT, PAS_CMD_BIT, LAM_CMD_BIT), |
1172 | .test_func = handle_pasid, |
1173 | .msg = "PASID: Execute SVA, PASID, LAM in sequence\n" , |
1174 | }, |
1175 | }; |
1176 | |
1177 | int main(int argc, char **argv) |
1178 | { |
1179 | int c = 0; |
1180 | unsigned int tests = TEST_MASK; |
1181 | |
1182 | tests_cnt = 0; |
1183 | |
1184 | if (!cpu_has_lam()) { |
1185 | ksft_print_msg(msg: "Unsupported LAM feature!\n" ); |
1186 | return -1; |
1187 | } |
1188 | |
1189 | while ((c = getopt(argc, argv, "ht:" )) != -1) { |
1190 | switch (c) { |
1191 | case 't': |
1192 | tests = strtoul(optarg, NULL, 16); |
1193 | if (tests && !(tests & TEST_MASK)) { |
1194 | ksft_print_msg(msg: "Invalid argument!\n" ); |
1195 | return -1; |
1196 | } |
1197 | break; |
1198 | case 'h': |
1199 | cmd_help(); |
1200 | return 0; |
1201 | default: |
1202 | ksft_print_msg(msg: "Invalid argument\n" ); |
1203 | return -1; |
1204 | } |
1205 | } |
1206 | |
1207 | /* |
1208 | * When tests is 0, it is not a real test case; |
1209 | * the option used by test case(execve) to check the lam mode in |
1210 | * process generated by execve, the process read back lam mode and |
1211 | * check with lam mode in parent process. |
1212 | */ |
1213 | if (!tests) |
1214 | return (get_lam()); |
1215 | |
1216 | /* Run test cases */ |
1217 | if (tests & FUNC_MALLOC) |
1218 | run_test(test: malloc_cases, ARRAY_SIZE(malloc_cases)); |
1219 | |
1220 | if (tests & FUNC_BITS) |
1221 | run_test(test: bits_cases, ARRAY_SIZE(bits_cases)); |
1222 | |
1223 | if (tests & FUNC_MMAP) |
1224 | run_test(test: mmap_cases, ARRAY_SIZE(mmap_cases)); |
1225 | |
1226 | if (tests & FUNC_SYSCALL) |
1227 | run_test(test: syscall_cases, ARRAY_SIZE(syscall_cases)); |
1228 | |
1229 | if (tests & FUNC_URING) |
1230 | run_test(test: uring_cases, ARRAY_SIZE(uring_cases)); |
1231 | |
1232 | if (tests & FUNC_INHERITE) |
1233 | run_test(test: inheritance_cases, ARRAY_SIZE(inheritance_cases)); |
1234 | |
1235 | if (tests & FUNC_PASID) |
1236 | run_test(test: pasid_cases, ARRAY_SIZE(pasid_cases)); |
1237 | |
1238 | ksft_set_plan(plan: tests_cnt); |
1239 | |
1240 | return ksft_exit_pass(); |
1241 | } |
1242 | |