1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | #include <linux/sched.h> |
3 | #include <linux/sched/task.h> |
4 | #include <linux/sched/task_stack.h> |
5 | #include <linux/interrupt.h> |
6 | #include <asm/sections.h> |
7 | #include <asm/ptrace.h> |
8 | #include <asm/bitops.h> |
9 | #include <asm/stacktrace.h> |
10 | #include <asm/unwind.h> |
11 | |
12 | #define (sizeof(long) * 2) |
13 | |
14 | unsigned long unwind_get_return_address(struct unwind_state *state) |
15 | { |
16 | if (unwind_done(state)) |
17 | return 0; |
18 | |
19 | return __kernel_text_address(addr: state->ip) ? state->ip : 0; |
20 | } |
21 | EXPORT_SYMBOL_GPL(unwind_get_return_address); |
22 | |
23 | unsigned long *unwind_get_return_address_ptr(struct unwind_state *state) |
24 | { |
25 | if (unwind_done(state)) |
26 | return NULL; |
27 | |
28 | return state->regs ? &state->regs->ip : state->bp + 1; |
29 | } |
30 | |
31 | static void unwind_dump(struct unwind_state *state) |
32 | { |
33 | static bool dumped_before = false; |
34 | bool prev_zero, zero = false; |
35 | unsigned long word, *sp; |
36 | struct stack_info stack_info = {0}; |
37 | unsigned long visit_mask = 0; |
38 | |
39 | if (dumped_before) |
40 | return; |
41 | |
42 | dumped_before = true; |
43 | |
44 | printk_deferred("unwind stack type:%d next_sp:%p mask:0x%lx graph_idx:%d\n" , |
45 | state->stack_info.type, state->stack_info.next_sp, |
46 | state->stack_mask, state->graph_idx); |
47 | |
48 | for (sp = PTR_ALIGN(state->orig_sp, sizeof(long)); sp; |
49 | sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) { |
50 | if (get_stack_info(stack: sp, task: state->task, info: &stack_info, visit_mask: &visit_mask)) |
51 | break; |
52 | |
53 | for (; sp < stack_info.end; sp++) { |
54 | |
55 | word = READ_ONCE_NOCHECK(*sp); |
56 | |
57 | prev_zero = zero; |
58 | zero = word == 0; |
59 | |
60 | if (zero) { |
61 | if (!prev_zero) |
62 | printk_deferred("%p: %0*x ...\n" , |
63 | sp, BITS_PER_LONG/4, 0); |
64 | continue; |
65 | } |
66 | |
67 | printk_deferred("%p: %0*lx (%pB)\n" , |
68 | sp, BITS_PER_LONG/4, word, (void *)word); |
69 | } |
70 | } |
71 | } |
72 | |
73 | static bool in_entry_code(unsigned long ip) |
74 | { |
75 | char *addr = (char *)ip; |
76 | |
77 | return addr >= __entry_text_start && addr < __entry_text_end; |
78 | } |
79 | |
80 | static inline unsigned long *last_frame(struct unwind_state *state) |
81 | { |
82 | return (unsigned long *)task_pt_regs(state->task) - 2; |
83 | } |
84 | |
85 | static bool is_last_frame(struct unwind_state *state) |
86 | { |
87 | return state->bp == last_frame(state); |
88 | } |
89 | |
90 | #ifdef CONFIG_X86_32 |
91 | #define GCC_REALIGN_WORDS 3 |
92 | #else |
93 | #define GCC_REALIGN_WORDS 1 |
94 | #endif |
95 | |
96 | static inline unsigned long *last_aligned_frame(struct unwind_state *state) |
97 | { |
98 | return last_frame(state) - GCC_REALIGN_WORDS; |
99 | } |
100 | |
101 | static bool is_last_aligned_frame(struct unwind_state *state) |
102 | { |
103 | unsigned long *last_bp = last_frame(state); |
104 | unsigned long *aligned_bp = last_aligned_frame(state); |
105 | |
106 | /* |
107 | * GCC can occasionally decide to realign the stack pointer and change |
108 | * the offset of the stack frame in the prologue of a function called |
109 | * by head/entry code. Examples: |
110 | * |
111 | * <start_secondary>: |
112 | * push %edi |
113 | * lea 0x8(%esp),%edi |
114 | * and $0xfffffff8,%esp |
115 | * pushl -0x4(%edi) |
116 | * push %ebp |
117 | * mov %esp,%ebp |
118 | * |
119 | * <x86_64_start_kernel>: |
120 | * lea 0x8(%rsp),%r10 |
121 | * and $0xfffffffffffffff0,%rsp |
122 | * pushq -0x8(%r10) |
123 | * push %rbp |
124 | * mov %rsp,%rbp |
125 | * |
126 | * After aligning the stack, it pushes a duplicate copy of the return |
127 | * address before pushing the frame pointer. |
128 | */ |
129 | return (state->bp == aligned_bp && *(aligned_bp + 1) == *(last_bp + 1)); |
130 | } |
131 | |
132 | static bool is_last_ftrace_frame(struct unwind_state *state) |
133 | { |
134 | unsigned long *last_bp = last_frame(state); |
135 | unsigned long *last_ftrace_bp = last_bp - 3; |
136 | |
137 | /* |
138 | * When unwinding from an ftrace handler of a function called by entry |
139 | * code, the stack layout of the last frame is: |
140 | * |
141 | * bp |
142 | * parent ret addr |
143 | * bp |
144 | * function ret addr |
145 | * parent ret addr |
146 | * pt_regs |
147 | * ----------------- |
148 | */ |
149 | return (state->bp == last_ftrace_bp && |
150 | *state->bp == *(state->bp + 2) && |
151 | *(state->bp + 1) == *(state->bp + 4)); |
152 | } |
153 | |
154 | static bool is_last_task_frame(struct unwind_state *state) |
155 | { |
156 | return is_last_frame(state) || is_last_aligned_frame(state) || |
157 | is_last_ftrace_frame(state); |
158 | } |
159 | |
160 | /* |
161 | * This determines if the frame pointer actually contains an encoded pointer to |
162 | * pt_regs on the stack. See ENCODE_FRAME_POINTER. |
163 | */ |
164 | #ifdef CONFIG_X86_64 |
165 | static struct pt_regs *decode_frame_pointer(unsigned long *bp) |
166 | { |
167 | unsigned long regs = (unsigned long)bp; |
168 | |
169 | if (!(regs & 0x1)) |
170 | return NULL; |
171 | |
172 | return (struct pt_regs *)(regs & ~0x1); |
173 | } |
174 | #else |
175 | static struct pt_regs *decode_frame_pointer(unsigned long *bp) |
176 | { |
177 | unsigned long regs = (unsigned long)bp; |
178 | |
179 | if (regs & 0x80000000) |
180 | return NULL; |
181 | |
182 | return (struct pt_regs *)(regs | 0x80000000); |
183 | } |
184 | #endif |
185 | |
186 | /* |
187 | * While walking the stack, KMSAN may stomp on stale locals from other |
188 | * functions that were marked as uninitialized upon function exit, and |
189 | * now hold the call frame information for the current function (e.g. the frame |
190 | * pointer). Because KMSAN does not specifically mark call frames as |
191 | * initialized, false positive reports are possible. To prevent such reports, |
192 | * we mark the functions scanning the stack (here and below) with |
193 | * __no_kmsan_checks. |
194 | */ |
195 | __no_kmsan_checks |
196 | static bool update_stack_state(struct unwind_state *state, |
197 | unsigned long *next_bp) |
198 | { |
199 | struct stack_info *info = &state->stack_info; |
200 | enum stack_type prev_type = info->type; |
201 | struct pt_regs *regs; |
202 | unsigned long *frame, *prev_frame_end, *addr_p, addr; |
203 | size_t len; |
204 | |
205 | if (state->regs) |
206 | prev_frame_end = (void *)state->regs + sizeof(*state->regs); |
207 | else |
208 | prev_frame_end = (void *)state->bp + FRAME_HEADER_SIZE; |
209 | |
210 | /* Is the next frame pointer an encoded pointer to pt_regs? */ |
211 | regs = decode_frame_pointer(bp: next_bp); |
212 | if (regs) { |
213 | frame = (unsigned long *)regs; |
214 | len = sizeof(*regs); |
215 | state->got_irq = true; |
216 | } else { |
217 | frame = next_bp; |
218 | len = FRAME_HEADER_SIZE; |
219 | } |
220 | |
221 | /* |
222 | * If the next bp isn't on the current stack, switch to the next one. |
223 | * |
224 | * We may have to traverse multiple stacks to deal with the possibility |
225 | * that info->next_sp could point to an empty stack and the next bp |
226 | * could be on a subsequent stack. |
227 | */ |
228 | while (!on_stack(info, addr: frame, len)) |
229 | if (get_stack_info(stack: info->next_sp, task: state->task, info, |
230 | visit_mask: &state->stack_mask)) |
231 | return false; |
232 | |
233 | /* Make sure it only unwinds up and doesn't overlap the prev frame: */ |
234 | if (state->orig_sp && state->stack_info.type == prev_type && |
235 | frame < prev_frame_end) |
236 | return false; |
237 | |
238 | /* Move state to the next frame: */ |
239 | if (regs) { |
240 | state->regs = regs; |
241 | state->bp = NULL; |
242 | } else { |
243 | state->bp = next_bp; |
244 | state->regs = NULL; |
245 | } |
246 | |
247 | /* Save the return address: */ |
248 | if (state->regs && user_mode(regs: state->regs)) |
249 | state->ip = 0; |
250 | else { |
251 | addr_p = unwind_get_return_address_ptr(state); |
252 | addr = READ_ONCE_TASK_STACK(state->task, *addr_p); |
253 | state->ip = unwind_recover_ret_addr(state, addr, addr_p); |
254 | } |
255 | |
256 | /* Save the original stack pointer for unwind_dump(): */ |
257 | if (!state->orig_sp) |
258 | state->orig_sp = frame; |
259 | |
260 | return true; |
261 | } |
262 | |
263 | __no_kmsan_checks |
264 | bool unwind_next_frame(struct unwind_state *state) |
265 | { |
266 | struct pt_regs *regs; |
267 | unsigned long *next_bp; |
268 | |
269 | if (unwind_done(state)) |
270 | return false; |
271 | |
272 | /* Have we reached the end? */ |
273 | if (state->regs && user_mode(regs: state->regs)) |
274 | goto the_end; |
275 | |
276 | if (is_last_task_frame(state)) { |
277 | regs = task_pt_regs(state->task); |
278 | |
279 | /* |
280 | * kthreads (other than the boot CPU's idle thread) have some |
281 | * partial regs at the end of their stack which were placed |
282 | * there by copy_thread(). But the regs don't have any |
283 | * useful information, so we can skip them. |
284 | * |
285 | * This user_mode() check is slightly broader than a PF_KTHREAD |
286 | * check because it also catches the awkward situation where a |
287 | * newly forked kthread transitions into a user task by calling |
288 | * kernel_execve(), which eventually clears PF_KTHREAD. |
289 | */ |
290 | if (!user_mode(regs)) |
291 | goto the_end; |
292 | |
293 | /* |
294 | * We're almost at the end, but not quite: there's still the |
295 | * syscall regs frame. Entry code doesn't encode the regs |
296 | * pointer for syscalls, so we have to set it manually. |
297 | */ |
298 | state->regs = regs; |
299 | state->bp = NULL; |
300 | state->ip = 0; |
301 | return true; |
302 | } |
303 | |
304 | /* Get the next frame pointer: */ |
305 | if (state->next_bp) { |
306 | next_bp = state->next_bp; |
307 | state->next_bp = NULL; |
308 | } else if (state->regs) { |
309 | next_bp = (unsigned long *)state->regs->bp; |
310 | } else { |
311 | next_bp = (unsigned long *)READ_ONCE_TASK_STACK(state->task, *state->bp); |
312 | } |
313 | |
314 | /* Move to the next frame if it's safe: */ |
315 | if (!update_stack_state(state, next_bp)) |
316 | goto bad_address; |
317 | |
318 | return true; |
319 | |
320 | bad_address: |
321 | state->error = true; |
322 | |
323 | /* |
324 | * When unwinding a non-current task, the task might actually be |
325 | * running on another CPU, in which case it could be modifying its |
326 | * stack while we're reading it. This is generally not a problem and |
327 | * can be ignored as long as the caller understands that unwinding |
328 | * another task will not always succeed. |
329 | */ |
330 | if (state->task != current) |
331 | goto the_end; |
332 | |
333 | /* |
334 | * Don't warn if the unwinder got lost due to an interrupt in entry |
335 | * code or in the C handler before the first frame pointer got set up: |
336 | */ |
337 | if (state->got_irq && in_entry_code(ip: state->ip)) |
338 | goto the_end; |
339 | if (state->regs && |
340 | state->regs->sp >= (unsigned long)last_aligned_frame(state) && |
341 | state->regs->sp < (unsigned long)task_pt_regs(state->task)) |
342 | goto the_end; |
343 | |
344 | /* |
345 | * There are some known frame pointer issues on 32-bit. Disable |
346 | * unwinder warnings on 32-bit until it gets objtool support. |
347 | */ |
348 | if (IS_ENABLED(CONFIG_X86_32)) |
349 | goto the_end; |
350 | |
351 | if (state->task != current) |
352 | goto the_end; |
353 | |
354 | if (state->regs) { |
355 | printk_deferred_once(KERN_WARNING |
356 | "WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n" , |
357 | state->regs, state->task->comm, |
358 | state->task->pid, next_bp); |
359 | unwind_dump(state); |
360 | } else { |
361 | printk_deferred_once(KERN_WARNING |
362 | "WARNING: kernel stack frame pointer at %p in %s:%d has bad value %p\n" , |
363 | state->bp, state->task->comm, |
364 | state->task->pid, next_bp); |
365 | unwind_dump(state); |
366 | } |
367 | the_end: |
368 | state->stack_info.type = STACK_TYPE_UNKNOWN; |
369 | return false; |
370 | } |
371 | EXPORT_SYMBOL_GPL(unwind_next_frame); |
372 | |
373 | void __unwind_start(struct unwind_state *state, struct task_struct *task, |
374 | struct pt_regs *regs, unsigned long *first_frame) |
375 | { |
376 | unsigned long *bp; |
377 | |
378 | memset(state, 0, sizeof(*state)); |
379 | state->task = task; |
380 | state->got_irq = (regs); |
381 | |
382 | /* Don't even attempt to start from user mode regs: */ |
383 | if (regs && user_mode(regs)) { |
384 | state->stack_info.type = STACK_TYPE_UNKNOWN; |
385 | return; |
386 | } |
387 | |
388 | bp = get_frame_pointer(task, regs); |
389 | |
390 | /* |
391 | * If we crash with IP==0, the last successfully executed instruction |
392 | * was probably an indirect function call with a NULL function pointer. |
393 | * That means that SP points into the middle of an incomplete frame: |
394 | * *SP is a return pointer, and *(SP-sizeof(unsigned long)) is where we |
395 | * would have written a frame pointer if we hadn't crashed. |
396 | * Pretend that the frame is complete and that BP points to it, but save |
397 | * the real BP so that we can use it when looking for the next frame. |
398 | */ |
399 | if (regs && regs->ip == 0 && (unsigned long *)regs->sp >= first_frame) { |
400 | state->next_bp = bp; |
401 | bp = ((unsigned long *)regs->sp) - 1; |
402 | } |
403 | |
404 | /* Initialize stack info and make sure the frame data is accessible: */ |
405 | get_stack_info(stack: bp, task: state->task, info: &state->stack_info, |
406 | visit_mask: &state->stack_mask); |
407 | update_stack_state(state, next_bp: bp); |
408 | |
409 | /* |
410 | * The caller can provide the address of the first frame directly |
411 | * (first_frame) or indirectly (regs->sp) to indicate which stack frame |
412 | * to start unwinding at. Skip ahead until we reach it. |
413 | */ |
414 | while (!unwind_done(state) && |
415 | (!on_stack(info: &state->stack_info, addr: first_frame, len: sizeof(long)) || |
416 | (state->next_bp == NULL && state->bp < first_frame))) |
417 | unwind_next_frame(state); |
418 | } |
419 | EXPORT_SYMBOL_GPL(__unwind_start); |
420 | |