1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright 2020, Gustavo Luiz Duarte, IBM Corp. |
4 | * |
5 | * This test starts a transaction and triggers a signal, forcing a pagefault to |
6 | * happen when the kernel signal handling code touches the user signal stack. |
7 | * |
8 | * In order to avoid pre-faulting the signal stack memory and to force the |
9 | * pagefault to happen precisely in the kernel signal handling code, the |
10 | * pagefault handling is done in userspace using the userfaultfd facility. |
11 | * |
12 | * Further pagefaults are triggered by crafting the signal handler's ucontext |
13 | * to point to additional memory regions managed by the userfaultfd, so using |
14 | * the same mechanism used to avoid pre-faulting the signal stack memory. |
15 | * |
16 | * On failure (bug is present) kernel crashes or never returns control back to |
17 | * userspace. If bug is not present, tests completes almost immediately. |
18 | */ |
19 | |
20 | #include <stdio.h> |
21 | #include <stdlib.h> |
22 | #include <string.h> |
23 | #include <linux/userfaultfd.h> |
24 | #include <poll.h> |
25 | #include <unistd.h> |
26 | #include <sys/ioctl.h> |
27 | #include <sys/syscall.h> |
28 | #include <fcntl.h> |
29 | #include <sys/mman.h> |
30 | #include <pthread.h> |
31 | #include <signal.h> |
32 | #include <errno.h> |
33 | |
34 | #include "tm.h" |
35 | |
36 | |
37 | #define UF_MEM_SIZE 655360 /* 10 x 64k pages */ |
38 | |
39 | /* Memory handled by userfaultfd */ |
40 | static char *uf_mem; |
41 | static size_t uf_mem_offset = 0; |
42 | |
43 | /* |
44 | * Data that will be copied into the faulting pages (instead of zero-filled |
45 | * pages). This is used to make the test more reliable and avoid segfaulting |
46 | * when we return from the signal handler. Since we are making the signal |
47 | * handler's ucontext point to newly allocated memory, when that memory is |
48 | * paged-in it will contain the expected content. |
49 | */ |
50 | static char backing_mem[UF_MEM_SIZE]; |
51 | |
52 | static size_t pagesize; |
53 | |
54 | /* |
55 | * Return a chunk of at least 'size' bytes of memory that will be handled by |
56 | * userfaultfd. If 'backing_data' is not NULL, its content will be save to |
57 | * 'backing_mem' and then copied into the faulting pages when the page fault |
58 | * is handled. |
59 | */ |
60 | void *get_uf_mem(size_t size, void *backing_data) |
61 | { |
62 | void *ret; |
63 | |
64 | if (uf_mem_offset + size > UF_MEM_SIZE) { |
65 | fprintf(stderr, "Requesting more uf_mem than expected!\n" ); |
66 | exit(EXIT_FAILURE); |
67 | } |
68 | |
69 | ret = &uf_mem[uf_mem_offset]; |
70 | |
71 | /* Save the data that will be copied into the faulting page */ |
72 | if (backing_data != NULL) |
73 | memcpy(&backing_mem[uf_mem_offset], backing_data, size); |
74 | |
75 | /* Reserve the requested amount of uf_mem */ |
76 | uf_mem_offset += size; |
77 | /* Keep uf_mem_offset aligned to the page size (round up) */ |
78 | uf_mem_offset = (uf_mem_offset + pagesize - 1) & ~(pagesize - 1); |
79 | |
80 | return ret; |
81 | } |
82 | |
83 | void *fault_handler_thread(void *arg) |
84 | { |
85 | struct uffd_msg msg; /* Data read from userfaultfd */ |
86 | long uffd; /* userfaultfd file descriptor */ |
87 | struct uffdio_copy uffdio_copy; |
88 | struct pollfd pollfd; |
89 | ssize_t nread, offset; |
90 | |
91 | uffd = (long) arg; |
92 | |
93 | for (;;) { |
94 | pollfd.fd = uffd; |
95 | pollfd.events = POLLIN; |
96 | if (poll(&pollfd, 1, -1) == -1) { |
97 | perror("poll() failed" ); |
98 | exit(EXIT_FAILURE); |
99 | } |
100 | |
101 | nread = read(uffd, &msg, sizeof(msg)); |
102 | if (nread == 0) { |
103 | fprintf(stderr, "read(): EOF on userfaultfd\n" ); |
104 | exit(EXIT_FAILURE); |
105 | } |
106 | |
107 | if (nread == -1) { |
108 | perror("read() failed" ); |
109 | exit(EXIT_FAILURE); |
110 | } |
111 | |
112 | /* We expect only one kind of event */ |
113 | if (msg.event != UFFD_EVENT_PAGEFAULT) { |
114 | fprintf(stderr, "Unexpected event on userfaultfd\n" ); |
115 | exit(EXIT_FAILURE); |
116 | } |
117 | |
118 | /* |
119 | * We need to handle page faults in units of pages(!). |
120 | * So, round faulting address down to page boundary. |
121 | */ |
122 | uffdio_copy.dst = msg.arg.pagefault.address & ~(pagesize-1); |
123 | |
124 | offset = (char *) uffdio_copy.dst - uf_mem; |
125 | uffdio_copy.src = (unsigned long) &backing_mem[offset]; |
126 | |
127 | uffdio_copy.len = pagesize; |
128 | uffdio_copy.mode = 0; |
129 | uffdio_copy.copy = 0; |
130 | if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1) { |
131 | perror("ioctl-UFFDIO_COPY failed" ); |
132 | exit(EXIT_FAILURE); |
133 | } |
134 | } |
135 | } |
136 | |
137 | void setup_uf_mem(void) |
138 | { |
139 | long uffd; /* userfaultfd file descriptor */ |
140 | pthread_t thr; |
141 | struct uffdio_api uffdio_api; |
142 | struct uffdio_register uffdio_register; |
143 | int ret; |
144 | |
145 | pagesize = sysconf(_SC_PAGE_SIZE); |
146 | |
147 | /* Create and enable userfaultfd object */ |
148 | uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); |
149 | if (uffd == -1) { |
150 | perror("userfaultfd() failed" ); |
151 | exit(EXIT_FAILURE); |
152 | } |
153 | uffdio_api.api = UFFD_API; |
154 | uffdio_api.features = 0; |
155 | if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) { |
156 | perror("ioctl-UFFDIO_API failed" ); |
157 | exit(EXIT_FAILURE); |
158 | } |
159 | |
160 | /* |
161 | * Create a private anonymous mapping. The memory will be demand-zero |
162 | * paged, that is, not yet allocated. When we actually touch the memory |
163 | * the related page will be allocated via the userfaultfd mechanism. |
164 | */ |
165 | uf_mem = mmap(NULL, UF_MEM_SIZE, PROT_READ | PROT_WRITE, |
166 | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); |
167 | if (uf_mem == MAP_FAILED) { |
168 | perror("mmap() failed" ); |
169 | exit(EXIT_FAILURE); |
170 | } |
171 | |
172 | /* |
173 | * Register the memory range of the mapping we've just mapped to be |
174 | * handled by the userfaultfd object. In 'mode' we request to track |
175 | * missing pages (i.e. pages that have not yet been faulted-in). |
176 | */ |
177 | uffdio_register.range.start = (unsigned long) uf_mem; |
178 | uffdio_register.range.len = UF_MEM_SIZE; |
179 | uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; |
180 | if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) { |
181 | perror("ioctl-UFFDIO_REGISTER" ); |
182 | exit(EXIT_FAILURE); |
183 | } |
184 | |
185 | /* Create a thread that will process the userfaultfd events */ |
186 | ret = pthread_create(&thr, NULL, fault_handler_thread, (void *) uffd); |
187 | if (ret != 0) { |
188 | fprintf(stderr, "pthread_create(): Error. Returned %d\n" , ret); |
189 | exit(EXIT_FAILURE); |
190 | } |
191 | } |
192 | |
193 | /* |
194 | * Assumption: the signal was delivered while userspace was in transactional or |
195 | * suspended state, i.e. uc->uc_link != NULL. |
196 | */ |
197 | void signal_handler(int signo, siginfo_t *si, void *uc) |
198 | { |
199 | ucontext_t *ucp = uc; |
200 | |
201 | /* Skip 'trap' after returning, otherwise we get a SIGTRAP again */ |
202 | ucp->uc_link->uc_mcontext.regs->nip += 4; |
203 | |
204 | ucp->uc_mcontext.v_regs = |
205 | get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_mcontext.v_regs); |
206 | |
207 | ucp->uc_link->uc_mcontext.v_regs = |
208 | get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_link->uc_mcontext.v_regs); |
209 | |
210 | ucp->uc_link = get_uf_mem(sizeof(ucontext_t), ucp->uc_link); |
211 | } |
212 | |
213 | bool have_userfaultfd(void) |
214 | { |
215 | long rc; |
216 | |
217 | errno = 0; |
218 | rc = syscall(__NR_userfaultfd, -1); |
219 | |
220 | return rc == 0 || errno != ENOSYS; |
221 | } |
222 | |
223 | int tm_signal_pagefault(void) |
224 | { |
225 | struct sigaction sa; |
226 | stack_t ss; |
227 | |
228 | SKIP_IF(!have_htm()); |
229 | SKIP_IF(htm_is_synthetic()); |
230 | SKIP_IF(!have_userfaultfd()); |
231 | |
232 | setup_uf_mem(); |
233 | |
234 | /* |
235 | * Set an alternative stack that will generate a page fault when the |
236 | * signal is raised. The page fault will be treated via userfaultfd, |
237 | * i.e. via fault_handler_thread. |
238 | */ |
239 | ss.ss_sp = get_uf_mem(SIGSTKSZ, NULL); |
240 | ss.ss_size = SIGSTKSZ; |
241 | ss.ss_flags = 0; |
242 | if (sigaltstack(&ss, NULL) == -1) { |
243 | perror("sigaltstack() failed" ); |
244 | exit(EXIT_FAILURE); |
245 | } |
246 | |
247 | sa.sa_flags = SA_SIGINFO | SA_ONSTACK; |
248 | sa.sa_sigaction = signal_handler; |
249 | if (sigaction(SIGTRAP, &sa, NULL) == -1) { |
250 | perror("sigaction() failed" ); |
251 | exit(EXIT_FAILURE); |
252 | } |
253 | |
254 | /* Trigger a SIGTRAP in transactional state */ |
255 | asm __volatile__( |
256 | "tbegin.;" |
257 | "beq 1f;" |
258 | "trap;" |
259 | "1: ;" |
260 | : : : "memory" ); |
261 | |
262 | /* Trigger a SIGTRAP in suspended state */ |
263 | asm __volatile__( |
264 | "tbegin.;" |
265 | "beq 1f;" |
266 | "tsuspend.;" |
267 | "trap;" |
268 | "tresume.;" |
269 | "1: ;" |
270 | : : : "memory" ); |
271 | |
272 | return EXIT_SUCCESS; |
273 | } |
274 | |
275 | int main(int argc, char **argv) |
276 | { |
277 | /* |
278 | * Depending on kernel config, the TM Bad Thing might not result in a |
279 | * crash, instead the kernel never returns control back to userspace, so |
280 | * set a tight timeout. If the test passes it completes almost |
281 | * immediately. |
282 | */ |
283 | test_harness_set_timeout(2); |
284 | return test_harness(tm_signal_pagefault, "tm_signal_pagefault" ); |
285 | } |
286 | |