1 | // SPDX-License-Identifier: GPL-2.0+ |
2 | |
3 | #include <asm/unistd.h> |
4 | #include <linux/hw_breakpoint.h> |
5 | #include <linux/ptrace.h> |
6 | #include <memory.h> |
7 | #include <stdlib.h> |
8 | #include <sys/wait.h> |
9 | |
10 | #include "utils.h" |
11 | |
12 | /* |
13 | * Child subroutine that performs a load on the address, then traps |
14 | */ |
15 | void same_watch_addr_child(unsigned long *addr); |
16 | |
17 | /* Address of the ld instruction in same_watch_addr_child() */ |
18 | extern char same_watch_addr_load[]; |
19 | |
20 | /* Address of the end trap instruction in same_watch_addr_child() */ |
21 | extern char same_watch_addr_trap[]; |
22 | |
23 | /* |
24 | * Child subroutine that performs a load on the first address, then a load on |
25 | * the second address (with no instructions separating this from the first |
26 | * load), then traps. |
27 | */ |
28 | void perf_then_ptrace_child(unsigned long *first_addr, unsigned long *second_addr); |
29 | |
30 | /* Address of the first ld instruction in perf_then_ptrace_child() */ |
31 | extern char perf_then_ptrace_load1[]; |
32 | |
33 | /* Address of the second ld instruction in perf_then_ptrace_child() */ |
34 | extern char perf_then_ptrace_load2[]; |
35 | |
36 | /* Address of the end trap instruction in perf_then_ptrace_child() */ |
37 | extern char perf_then_ptrace_trap[]; |
38 | |
39 | static inline long sys_ptrace(long request, pid_t pid, unsigned long addr, unsigned long data) |
40 | { |
41 | return syscall(__NR_ptrace, request, pid, addr, data); |
42 | } |
43 | |
44 | static long ptrace_traceme(void) |
45 | { |
46 | return sys_ptrace(PTRACE_TRACEME, pid: 0, addr: 0, data: 0); |
47 | } |
48 | |
49 | static long ptrace_getregs(pid_t pid, struct pt_regs *result) |
50 | { |
51 | return sys_ptrace(PTRACE_GETREGS, pid, addr: 0, data: (unsigned long)result); |
52 | } |
53 | |
54 | static long ptrace_setregs(pid_t pid, struct pt_regs *result) |
55 | { |
56 | return sys_ptrace(PTRACE_SETREGS, pid, addr: 0, data: (unsigned long)result); |
57 | } |
58 | |
59 | static long ptrace_cont(pid_t pid, long signal) |
60 | { |
61 | return sys_ptrace(PTRACE_CONT, pid, addr: 0, data: signal); |
62 | } |
63 | |
64 | static long ptrace_singlestep(pid_t pid, long signal) |
65 | { |
66 | return sys_ptrace(PTRACE_SINGLESTEP, pid, addr: 0, data: signal); |
67 | } |
68 | |
69 | static long ppc_ptrace_gethwdbginfo(pid_t pid, struct ppc_debug_info *dbginfo) |
70 | { |
71 | return sys_ptrace(request: PPC_PTRACE_GETHWDBGINFO, pid, addr: 0, data: (unsigned long)dbginfo); |
72 | } |
73 | |
74 | static long ppc_ptrace_sethwdbg(pid_t pid, struct ppc_hw_breakpoint *bp_info) |
75 | { |
76 | return sys_ptrace(request: PPC_PTRACE_SETHWDEBUG, pid, addr: 0, data: (unsigned long)bp_info); |
77 | } |
78 | |
79 | static long ppc_ptrace_delhwdbg(pid_t pid, int bp_id) |
80 | { |
81 | return sys_ptrace(request: PPC_PTRACE_DELHWDEBUG, pid, addr: 0L, data: bp_id); |
82 | } |
83 | |
84 | static long ptrace_getreg_pc(pid_t pid, void **pc) |
85 | { |
86 | struct pt_regs regs; |
87 | long err; |
88 | |
89 | err = ptrace_getregs(pid, result: ®s); |
90 | if (err) |
91 | return err; |
92 | |
93 | *pc = (void *)regs.nip; |
94 | |
95 | return 0; |
96 | } |
97 | |
98 | static long ptrace_setreg_pc(pid_t pid, void *pc) |
99 | { |
100 | struct pt_regs regs; |
101 | long err; |
102 | |
103 | err = ptrace_getregs(pid, result: ®s); |
104 | if (err) |
105 | return err; |
106 | |
107 | regs.nip = (unsigned long)pc; |
108 | |
109 | err = ptrace_setregs(pid, result: ®s); |
110 | if (err) |
111 | return err; |
112 | |
113 | return 0; |
114 | } |
115 | |
116 | static int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, |
117 | int group_fd, unsigned long flags) |
118 | { |
119 | return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); |
120 | } |
121 | |
122 | static void perf_user_event_attr_set(struct perf_event_attr *attr, void *addr, u64 len) |
123 | { |
124 | memset(attr, 0, sizeof(struct perf_event_attr)); |
125 | |
126 | attr->type = PERF_TYPE_BREAKPOINT; |
127 | attr->size = sizeof(struct perf_event_attr); |
128 | attr->bp_type = HW_BREAKPOINT_R; |
129 | attr->bp_addr = (u64)addr; |
130 | attr->bp_len = len; |
131 | attr->exclude_kernel = 1; |
132 | attr->exclude_hv = 1; |
133 | } |
134 | |
135 | static int perf_watchpoint_open(pid_t child_pid, void *addr, u64 len) |
136 | { |
137 | struct perf_event_attr attr; |
138 | |
139 | perf_user_event_attr_set(attr: &attr, addr, len); |
140 | return perf_event_open(attr: &attr, pid: child_pid, cpu: -1, group_fd: -1, flags: 0); |
141 | } |
142 | |
143 | static int perf_read_counter(int perf_fd, u64 *count) |
144 | { |
145 | /* |
146 | * A perf counter is retrieved by the read() syscall. It contains |
147 | * the current count as 8 bytes that are interpreted as a u64 |
148 | */ |
149 | ssize_t len = read(perf_fd, count, sizeof(*count)); |
150 | |
151 | if (len != sizeof(*count)) |
152 | return -1; |
153 | |
154 | return 0; |
155 | } |
156 | |
157 | static void ppc_ptrace_init_breakpoint(struct ppc_hw_breakpoint *info, |
158 | int type, void *addr, int len) |
159 | { |
160 | info->version = 1; |
161 | info->trigger_type = type; |
162 | info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE; |
163 | info->addr = (u64)addr; |
164 | info->addr2 = (u64)addr + len; |
165 | info->condition_value = 0; |
166 | if (!len) |
167 | info->addr_mode = PPC_BREAKPOINT_MODE_EXACT; |
168 | else |
169 | info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; |
170 | } |
171 | |
172 | /* |
173 | * Checks if we can place at least 2 watchpoints on the child process |
174 | */ |
175 | static int check_watchpoints(pid_t pid) |
176 | { |
177 | struct ppc_debug_info dbginfo; |
178 | |
179 | FAIL_IF_MSG(ppc_ptrace_gethwdbginfo(pid, dbginfo: &dbginfo), "PPC_PTRACE_GETHWDBGINFO failed" ); |
180 | SKIP_IF_MSG(dbginfo.num_data_bps <= 1, "Not enough data watchpoints (need at least 2)" ); |
181 | |
182 | return 0; |
183 | } |
184 | |
185 | /* |
186 | * Wrapper around a plain fork() call that sets up the child for |
187 | * ptrace-ing. Both the parent and child return from this, though |
188 | * the child is stopped until ptrace_cont(pid) is run by the parent. |
189 | */ |
190 | static int ptrace_fork_child(pid_t *pid) |
191 | { |
192 | int status; |
193 | |
194 | *pid = fork(); |
195 | |
196 | if (*pid < 0) |
197 | FAIL_IF_MSG(1, "Failed to fork child" ); |
198 | |
199 | if (!*pid) { |
200 | FAIL_IF_EXIT_MSG(ptrace_traceme(), "PTRACE_TRACEME failed" ); |
201 | FAIL_IF_EXIT_MSG(raise(SIGSTOP), "Child failed to raise SIGSTOP" ); |
202 | } else { |
203 | /* Synchronise on child SIGSTOP */ |
204 | FAIL_IF_MSG(waitpid(*pid, &status, 0) == -1, "Failed to wait for child" ); |
205 | FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped" ); |
206 | } |
207 | |
208 | return 0; |
209 | } |
210 | |
211 | /* |
212 | * Tests the interaction between ptrace and perf watching the same data. |
213 | * |
214 | * We expect ptrace to take 'priority', as it is has before-execute |
215 | * semantics. |
216 | * |
217 | * The perf counter should not be incremented yet because perf has after-execute |
218 | * semantics. E.g., if ptrace changes the child PC, we don't even execute the |
219 | * instruction at all. |
220 | * |
221 | * When the child is stopped for ptrace, we test both continue and single step. |
222 | * Both should increment the perf counter. We also test changing the PC somewhere |
223 | * different and stepping, which should not increment the perf counter. |
224 | */ |
225 | int same_watch_addr_test(void) |
226 | { |
227 | struct ppc_hw_breakpoint bp_info; /* ptrace breakpoint info */ |
228 | int bp_id; /* Breakpoint handle of ptrace watchpoint */ |
229 | int perf_fd; /* File descriptor of perf performance counter */ |
230 | u64 perf_count; /* Most recently fetched perf performance counter value */ |
231 | pid_t pid; /* PID of child process */ |
232 | void *pc; /* Most recently fetched child PC value */ |
233 | int status; /* Stop status of child after waitpid */ |
234 | unsigned long value; /* Dummy value to be read/written to by child */ |
235 | int err; |
236 | |
237 | err = ptrace_fork_child(pid: &pid); |
238 | if (err) |
239 | return err; |
240 | |
241 | if (!pid) { |
242 | same_watch_addr_child(addr: &value); |
243 | exit(1); |
244 | } |
245 | |
246 | err = check_watchpoints(pid); |
247 | if (err) |
248 | return err; |
249 | |
250 | /* Place a perf watchpoint counter on value */ |
251 | perf_fd = perf_watchpoint_open(child_pid: pid, addr: &value, len: sizeof(value)); |
252 | FAIL_IF_MSG(perf_fd < 0, "Failed to open perf performance counter" ); |
253 | |
254 | /* Place a ptrace watchpoint on value */ |
255 | ppc_ptrace_init_breakpoint(info: &bp_info, type: PPC_BREAKPOINT_TRIGGER_READ, addr: &value, len: sizeof(value)); |
256 | bp_id = ppc_ptrace_sethwdbg(pid, bp_info: &bp_info); |
257 | FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint" ); |
258 | |
259 | /* Let the child run. It should stop on the ptrace watchpoint */ |
260 | FAIL_IF_MSG(ptrace_cont(pid, signal: 0), "Failed to continue child" ); |
261 | |
262 | FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child" ); |
263 | FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped" ); |
264 | FAIL_IF_MSG(ptrace_getreg_pc(pid, pc: &pc), "Failed to get child PC" ); |
265 | FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load instruction" ); |
266 | |
267 | /* |
268 | * We stopped before executing the load, so perf should not have |
269 | * recorded any events yet |
270 | */ |
271 | FAIL_IF_MSG(perf_read_counter(perf_fd, count: &perf_count), "Failed to read perf counter" ); |
272 | FAIL_IF_MSG(perf_count != 0, "perf recorded unexpected event" ); |
273 | |
274 | /* Single stepping over the load should increment the perf counter */ |
275 | FAIL_IF_MSG(ptrace_singlestep(pid, signal: 0), "Failed to single step child" ); |
276 | |
277 | FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child" ); |
278 | FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped" ); |
279 | FAIL_IF_MSG(ptrace_getreg_pc(pid, pc: &pc), "Failed to get child PC" ); |
280 | FAIL_IF_MSG(pc != same_watch_addr_load + 4, "Failed to single step load instruction" ); |
281 | FAIL_IF_MSG(perf_read_counter(perf_fd, count: &perf_count), "Failed to read perf counter" ); |
282 | FAIL_IF_MSG(perf_count != 1, "perf counter did not increment" ); |
283 | |
284 | /* |
285 | * Set up a ptrace watchpoint on the value again and trigger it. |
286 | * The perf counter should not have incremented because we do not |
287 | * execute the load yet. |
288 | */ |
289 | FAIL_IF_MSG(ppc_ptrace_delhwdbg(pid, bp_id), "Failed to remove old ptrace watchpoint" ); |
290 | bp_id = ppc_ptrace_sethwdbg(pid, bp_info: &bp_info); |
291 | FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint" ); |
292 | FAIL_IF_MSG(ptrace_setreg_pc(pid, pc: same_watch_addr_load), "Failed to set child PC" ); |
293 | FAIL_IF_MSG(ptrace_cont(pid, signal: 0), "Failed to continue child" ); |
294 | |
295 | FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child" ); |
296 | FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped" ); |
297 | FAIL_IF_MSG(ptrace_getreg_pc(pid, pc: &pc), "Failed to get child PC" ); |
298 | FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load trap" ); |
299 | FAIL_IF_MSG(perf_read_counter(perf_fd, count: &perf_count), "Failed to read perf counter" ); |
300 | FAIL_IF_MSG(perf_count != 1, "perf counter should not have changed" ); |
301 | |
302 | /* Continuing over the load should increment the perf counter */ |
303 | FAIL_IF_MSG(ptrace_cont(pid, signal: 0), "Failed to continue child" ); |
304 | |
305 | FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child" ); |
306 | FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped" ); |
307 | FAIL_IF_MSG(ptrace_getreg_pc(pid, pc: &pc), "Failed to get child PC" ); |
308 | FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap" ); |
309 | FAIL_IF_MSG(perf_read_counter(perf_fd, count: &perf_count), "Failed to read perf counter" ); |
310 | FAIL_IF_MSG(perf_count != 2, "perf counter did not increment" ); |
311 | |
312 | /* |
313 | * If we set the child PC back to the load instruction, then continue, |
314 | * we should reach the end trap (because ptrace is one-shot) and have |
315 | * another perf event. |
316 | */ |
317 | FAIL_IF_MSG(ptrace_setreg_pc(pid, pc: same_watch_addr_load), "Failed to set child PC" ); |
318 | FAIL_IF_MSG(ptrace_cont(pid, signal: 0), "Failed to continue child" ); |
319 | |
320 | FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child" ); |
321 | FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped" ); |
322 | FAIL_IF_MSG(ptrace_getreg_pc(pid, pc: &pc), "Failed to get child PC" ); |
323 | FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap" ); |
324 | FAIL_IF_MSG(perf_read_counter(perf_fd, count: &perf_count), "Failed to read perf counter" ); |
325 | FAIL_IF_MSG(perf_count != 3, "perf counter did not increment" ); |
326 | |
327 | /* |
328 | * If we set the child PC back to the load instruction, set a ptrace |
329 | * watchpoint on the load, then continue, we should immediately get |
330 | * the ptrace trap without incrementing the perf counter |
331 | */ |
332 | FAIL_IF_MSG(ppc_ptrace_delhwdbg(pid, bp_id), "Failed to remove old ptrace watchpoint" ); |
333 | bp_id = ppc_ptrace_sethwdbg(pid, bp_info: &bp_info); |
334 | FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint" ); |
335 | FAIL_IF_MSG(ptrace_setreg_pc(pid, pc: same_watch_addr_load), "Failed to set child PC" ); |
336 | FAIL_IF_MSG(ptrace_cont(pid, signal: 0), "Failed to continue child" ); |
337 | |
338 | FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child" ); |
339 | FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped" ); |
340 | FAIL_IF_MSG(ptrace_getreg_pc(pid, pc: &pc), "Failed to get child PC" ); |
341 | FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load instruction" ); |
342 | FAIL_IF_MSG(perf_read_counter(perf_fd, count: &perf_count), "Failed to read perf counter" ); |
343 | FAIL_IF_MSG(perf_count != 3, "perf counter should not have changed" ); |
344 | |
345 | /* |
346 | * If we change the PC while stopped on the load instruction, we should |
347 | * not increment the perf counter (because ptrace is before-execute, |
348 | * perf is after-execute). |
349 | */ |
350 | FAIL_IF_MSG(ptrace_setreg_pc(pid, pc: same_watch_addr_load + 4), "Failed to set child PC" ); |
351 | FAIL_IF_MSG(ptrace_cont(pid, signal: 0), "Failed to continue child" ); |
352 | |
353 | FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child" ); |
354 | FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped" ); |
355 | FAIL_IF_MSG(ptrace_getreg_pc(pid, pc: &pc), "Failed to get child PC" ); |
356 | FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap" ); |
357 | FAIL_IF_MSG(perf_read_counter(perf_fd, count: &perf_count), "Failed to read perf counter" ); |
358 | FAIL_IF_MSG(perf_count != 3, "perf counter should not have changed" ); |
359 | |
360 | /* Clean up child */ |
361 | FAIL_IF_MSG(kill(pid, SIGKILL) != 0, "Failed to kill child" ); |
362 | |
363 | return 0; |
364 | } |
365 | |
366 | /* |
367 | * Tests the interaction between ptrace and perf when: |
368 | * 1. perf watches a value |
369 | * 2. ptrace watches a different value |
370 | * 3. The perf value is read, then the ptrace value is read immediately after |
371 | * |
372 | * A breakpoint implementation may accidentally misattribute/skip one of |
373 | * the ptrace or perf handlers, as interrupt based work is done after perf |
374 | * and before ptrace. |
375 | * |
376 | * We expect the perf counter to increment before the ptrace watchpoint |
377 | * triggers. |
378 | */ |
379 | int perf_then_ptrace_test(void) |
380 | { |
381 | struct ppc_hw_breakpoint bp_info; /* ptrace breakpoint info */ |
382 | int bp_id; /* Breakpoint handle of ptrace watchpoint */ |
383 | int perf_fd; /* File descriptor of perf performance counter */ |
384 | u64 perf_count; /* Most recently fetched perf performance counter value */ |
385 | pid_t pid; /* PID of child process */ |
386 | void *pc; /* Most recently fetched child PC value */ |
387 | int status; /* Stop status of child after waitpid */ |
388 | unsigned long perf_value; /* Dummy value to be watched by perf */ |
389 | unsigned long ptrace_value; /* Dummy value to be watched by ptrace */ |
390 | int err; |
391 | |
392 | err = ptrace_fork_child(pid: &pid); |
393 | if (err) |
394 | return err; |
395 | |
396 | /* |
397 | * If we are the child, run a subroutine that reads the perf value, |
398 | * then reads the ptrace value with consecutive load instructions |
399 | */ |
400 | if (!pid) { |
401 | perf_then_ptrace_child(first_addr: &perf_value, second_addr: &ptrace_value); |
402 | exit(0); |
403 | } |
404 | |
405 | err = check_watchpoints(pid); |
406 | if (err) |
407 | return err; |
408 | |
409 | /* Place a perf watchpoint counter */ |
410 | perf_fd = perf_watchpoint_open(child_pid: pid, addr: &perf_value, len: sizeof(perf_value)); |
411 | FAIL_IF_MSG(perf_fd < 0, "Failed to open perf performance counter" ); |
412 | |
413 | /* Place a ptrace watchpoint */ |
414 | ppc_ptrace_init_breakpoint(info: &bp_info, type: PPC_BREAKPOINT_TRIGGER_READ, |
415 | addr: &ptrace_value, len: sizeof(ptrace_value)); |
416 | bp_id = ppc_ptrace_sethwdbg(pid, bp_info: &bp_info); |
417 | FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint" ); |
418 | |
419 | /* Let the child run. It should stop on the ptrace watchpoint */ |
420 | FAIL_IF_MSG(ptrace_cont(pid, signal: 0), "Failed to continue child" ); |
421 | |
422 | FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child" ); |
423 | FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped" ); |
424 | FAIL_IF_MSG(ptrace_getreg_pc(pid, pc: &pc), "Failed to get child PC" ); |
425 | FAIL_IF_MSG(pc != perf_then_ptrace_load2, "Child did not stop on ptrace load" ); |
426 | |
427 | /* perf should have recorded the first load */ |
428 | FAIL_IF_MSG(perf_read_counter(perf_fd, count: &perf_count), "Failed to read perf counter" ); |
429 | FAIL_IF_MSG(perf_count != 1, "perf counter did not increment" ); |
430 | |
431 | /* Clean up child */ |
432 | FAIL_IF_MSG(kill(pid, SIGKILL) != 0, "Failed to kill child" ); |
433 | |
434 | return 0; |
435 | } |
436 | |
437 | int main(int argc, char *argv[]) |
438 | { |
439 | int err = 0; |
440 | |
441 | err |= test_harness(same_watch_addr_test, "same_watch_addr" ); |
442 | err |= test_harness(perf_then_ptrace_test, "perf_then_ptrace" ); |
443 | |
444 | return err; |
445 | } |
446 | |