1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * PowerPC version |
4 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) |
5 | * |
6 | * Derived from "arch/i386/mm/fault.c" |
7 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds |
8 | * |
9 | * Modified by Cort Dougan and Paul Mackerras. |
10 | * |
11 | * Modified for PPC64 by Dave Engebretsen (engebret@ibm.com) |
12 | */ |
13 | |
14 | #include <linux/signal.h> |
15 | #include <linux/sched.h> |
16 | #include <linux/sched/task_stack.h> |
17 | #include <linux/kernel.h> |
18 | #include <linux/errno.h> |
19 | #include <linux/string.h> |
20 | #include <linux/types.h> |
21 | #include <linux/pagemap.h> |
22 | #include <linux/ptrace.h> |
23 | #include <linux/mman.h> |
24 | #include <linux/mm.h> |
25 | #include <linux/interrupt.h> |
26 | #include <linux/highmem.h> |
27 | #include <linux/extable.h> |
28 | #include <linux/kprobes.h> |
29 | #include <linux/kdebug.h> |
30 | #include <linux/perf_event.h> |
31 | #include <linux/ratelimit.h> |
32 | #include <linux/context_tracking.h> |
33 | #include <linux/hugetlb.h> |
34 | #include <linux/uaccess.h> |
35 | #include <linux/kfence.h> |
36 | #include <linux/pkeys.h> |
37 | |
38 | #include <asm/firmware.h> |
39 | #include <asm/interrupt.h> |
40 | #include <asm/page.h> |
41 | #include <asm/mmu.h> |
42 | #include <asm/mmu_context.h> |
43 | #include <asm/siginfo.h> |
44 | #include <asm/debug.h> |
45 | #include <asm/kup.h> |
46 | #include <asm/inst.h> |
47 | |
48 | |
49 | /* |
50 | * do_page_fault error handling helpers |
51 | */ |
52 | |
53 | static int |
54 | __bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code) |
55 | { |
56 | /* |
57 | * If we are in kernel mode, bail out with a SEGV, this will |
58 | * be caught by the assembly which will restore the non-volatile |
59 | * registers before calling bad_page_fault() |
60 | */ |
61 | if (!user_mode(regs)) |
62 | return SIGSEGV; |
63 | |
64 | _exception(SIGSEGV, regs, si_code, address); |
65 | |
66 | return 0; |
67 | } |
68 | |
69 | static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long address) |
70 | { |
71 | return __bad_area_nosemaphore(regs, address, SEGV_MAPERR); |
72 | } |
73 | |
74 | static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code) |
75 | { |
76 | struct mm_struct *mm = current->mm; |
77 | |
78 | /* |
79 | * Something tried to access memory that isn't in our memory map.. |
80 | * Fix it, but check if it's kernel or user first.. |
81 | */ |
82 | mmap_read_unlock(mm); |
83 | |
84 | return __bad_area_nosemaphore(regs, address, si_code); |
85 | } |
86 | |
87 | static noinline int bad_access_pkey(struct pt_regs *regs, unsigned long address, |
88 | struct vm_area_struct *vma) |
89 | { |
90 | struct mm_struct *mm = current->mm; |
91 | int pkey; |
92 | |
93 | /* |
94 | * We don't try to fetch the pkey from page table because reading |
95 | * page table without locking doesn't guarantee stable pte value. |
96 | * Hence the pkey value that we return to userspace can be different |
97 | * from the pkey that actually caused access error. |
98 | * |
99 | * It does *not* guarantee that the VMA we find here |
100 | * was the one that we faulted on. |
101 | * |
102 | * 1. T1 : mprotect_key(foo, PAGE_SIZE, pkey=4); |
103 | * 2. T1 : set AMR to deny access to pkey=4, touches, page |
104 | * 3. T1 : faults... |
105 | * 4. T2: mprotect_key(foo, PAGE_SIZE, pkey=5); |
106 | * 5. T1 : enters fault handler, takes mmap_lock, etc... |
107 | * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really |
108 | * faulted on a pte with its pkey=4. |
109 | */ |
110 | pkey = vma_pkey(vma); |
111 | |
112 | mmap_read_unlock(mm); |
113 | |
114 | /* |
115 | * If we are in kernel mode, bail out with a SEGV, this will |
116 | * be caught by the assembly which will restore the non-volatile |
117 | * registers before calling bad_page_fault() |
118 | */ |
119 | if (!user_mode(regs)) |
120 | return SIGSEGV; |
121 | |
122 | _exception_pkey(regs, address, pkey); |
123 | |
124 | return 0; |
125 | } |
126 | |
127 | static noinline int bad_access(struct pt_regs *regs, unsigned long address) |
128 | { |
129 | return __bad_area(regs, address, SEGV_ACCERR); |
130 | } |
131 | |
132 | static int do_sigbus(struct pt_regs *regs, unsigned long address, |
133 | vm_fault_t fault) |
134 | { |
135 | if (!user_mode(regs)) |
136 | return SIGBUS; |
137 | |
138 | current->thread.trap_nr = BUS_ADRERR; |
139 | #ifdef CONFIG_MEMORY_FAILURE |
140 | if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { |
141 | unsigned int lsb = 0; /* shutup gcc */ |
142 | |
143 | pr_err("MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n" , |
144 | current->comm, current->pid, address); |
145 | |
146 | if (fault & VM_FAULT_HWPOISON_LARGE) |
147 | lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); |
148 | if (fault & VM_FAULT_HWPOISON) |
149 | lsb = PAGE_SHIFT; |
150 | |
151 | force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb); |
152 | return 0; |
153 | } |
154 | |
155 | #endif |
156 | force_sig_fault(SIGBUS, BUS_ADRERR, addr: (void __user *)address); |
157 | return 0; |
158 | } |
159 | |
160 | static int mm_fault_error(struct pt_regs *regs, unsigned long addr, |
161 | vm_fault_t fault) |
162 | { |
163 | /* |
164 | * Kernel page fault interrupted by SIGKILL. We have no reason to |
165 | * continue processing. |
166 | */ |
167 | if (fatal_signal_pending(current) && !user_mode(regs)) |
168 | return SIGKILL; |
169 | |
170 | /* Out of memory */ |
171 | if (fault & VM_FAULT_OOM) { |
172 | /* |
173 | * We ran out of memory, or some other thing happened to us that |
174 | * made us unable to handle the page fault gracefully. |
175 | */ |
176 | if (!user_mode(regs)) |
177 | return SIGSEGV; |
178 | pagefault_out_of_memory(); |
179 | } else { |
180 | if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| |
181 | VM_FAULT_HWPOISON_LARGE)) |
182 | return do_sigbus(regs, address: addr, fault); |
183 | else if (fault & VM_FAULT_SIGSEGV) |
184 | return bad_area_nosemaphore(regs, address: addr); |
185 | else |
186 | BUG(); |
187 | } |
188 | return 0; |
189 | } |
190 | |
191 | /* Is this a bad kernel fault ? */ |
192 | static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code, |
193 | unsigned long address, bool is_write) |
194 | { |
195 | int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE; |
196 | |
197 | if (is_exec) { |
198 | pr_crit_ratelimited("kernel tried to execute %s page (%lx) - exploit attempt? (uid: %d)\n" , |
199 | address >= TASK_SIZE ? "exec-protected" : "user" , |
200 | address, |
201 | from_kuid(&init_user_ns, current_uid())); |
202 | |
203 | // Kernel exec fault is always bad |
204 | return true; |
205 | } |
206 | |
207 | // Kernel fault on kernel address is bad |
208 | if (address >= TASK_SIZE) |
209 | return true; |
210 | |
211 | // Read/write fault blocked by KUAP is bad, it can never succeed. |
212 | if (bad_kuap_fault(regs, address, is_write)) { |
213 | pr_crit_ratelimited("Kernel attempted to %s user page (%lx) - exploit attempt? (uid: %d)\n" , |
214 | is_write ? "write" : "read" , address, |
215 | from_kuid(&init_user_ns, current_uid())); |
216 | |
217 | // Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad |
218 | if (!search_exception_tables(add: regs->nip)) |
219 | return true; |
220 | |
221 | // Read/write fault in a valid region (the exception table search passed |
222 | // above), but blocked by KUAP is bad, it can never succeed. |
223 | return WARN(true, "Bug: %s fault blocked by KUAP!" , is_write ? "Write" : "Read" ); |
224 | } |
225 | |
226 | // What's left? Kernel fault on user and allowed by KUAP in the faulting context. |
227 | return false; |
228 | } |
229 | |
230 | static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey, |
231 | struct vm_area_struct *vma) |
232 | { |
233 | /* |
234 | * Make sure to check the VMA so that we do not perform |
235 | * faults just to hit a pkey fault as soon as we fill in a |
236 | * page. Only called for current mm, hence foreign == 0 |
237 | */ |
238 | if (!arch_vma_access_permitted(vma, write: is_write, execute: is_exec, foreign: 0)) |
239 | return true; |
240 | |
241 | return false; |
242 | } |
243 | |
244 | static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma) |
245 | { |
246 | /* |
247 | * Allow execution from readable areas if the MMU does not |
248 | * provide separate controls over reading and executing. |
249 | * |
250 | * Note: That code used to not be enabled for 4xx/BookE. |
251 | * It is now as I/D cache coherency for these is done at |
252 | * set_pte_at() time and I see no reason why the test |
253 | * below wouldn't be valid on those processors. This -may- |
254 | * break programs compiled with a really old ABI though. |
255 | */ |
256 | if (is_exec) { |
257 | return !(vma->vm_flags & VM_EXEC) && |
258 | (cpu_has_feature(CPU_FTR_NOEXECUTE) || |
259 | !(vma->vm_flags & (VM_READ | VM_WRITE))); |
260 | } |
261 | |
262 | if (is_write) { |
263 | if (unlikely(!(vma->vm_flags & VM_WRITE))) |
264 | return true; |
265 | return false; |
266 | } |
267 | |
268 | /* |
269 | * VM_READ, VM_WRITE and VM_EXEC may imply read permissions, as |
270 | * defined in protection_map[]. In that case Read faults can only be |
271 | * caused by a PROT_NONE mapping. However a non exec access on a |
272 | * VM_EXEC only mapping is invalid anyway, so report it as such. |
273 | */ |
274 | if (unlikely(!vma_is_accessible(vma))) |
275 | return true; |
276 | |
277 | if ((vma->vm_flags & VM_ACCESS_FLAGS) == VM_EXEC) |
278 | return true; |
279 | |
280 | /* |
281 | * We should ideally do the vma pkey access check here. But in the |
282 | * fault path, handle_mm_fault() also does the same check. To avoid |
283 | * these multiple checks, we skip it here and handle access error due |
284 | * to pkeys later. |
285 | */ |
286 | return false; |
287 | } |
288 | |
289 | #ifdef CONFIG_PPC_SMLPAR |
290 | static inline void cmo_account_page_fault(void) |
291 | { |
292 | if (firmware_has_feature(FW_FEATURE_CMO)) { |
293 | u32 page_ins; |
294 | |
295 | preempt_disable(); |
296 | page_ins = be32_to_cpu(get_lppaca()->page_ins); |
297 | page_ins += 1 << PAGE_FACTOR; |
298 | get_lppaca()->page_ins = cpu_to_be32(page_ins); |
299 | preempt_enable(); |
300 | } |
301 | } |
302 | #else |
303 | static inline void cmo_account_page_fault(void) { } |
304 | #endif /* CONFIG_PPC_SMLPAR */ |
305 | |
306 | static void sanity_check_fault(bool is_write, bool is_user, |
307 | unsigned long error_code, unsigned long address) |
308 | { |
309 | /* |
310 | * Userspace trying to access kernel address, we get PROTFAULT for that. |
311 | */ |
312 | if (is_user && address >= TASK_SIZE) { |
313 | if ((long)address == -1) |
314 | return; |
315 | |
316 | pr_crit_ratelimited("%s[%d]: User access of kernel address (%lx) - exploit attempt? (uid: %d)\n" , |
317 | current->comm, current->pid, address, |
318 | from_kuid(&init_user_ns, current_uid())); |
319 | return; |
320 | } |
321 | |
322 | if (!IS_ENABLED(CONFIG_PPC_BOOK3S)) |
323 | return; |
324 | |
325 | /* |
326 | * For hash translation mode, we should never get a |
327 | * PROTFAULT. Any update to pte to reduce access will result in us |
328 | * removing the hash page table entry, thus resulting in a DSISR_NOHPTE |
329 | * fault instead of DSISR_PROTFAULT. |
330 | * |
331 | * A pte update to relax the access will not result in a hash page table |
332 | * entry invalidate and hence can result in DSISR_PROTFAULT. |
333 | * ptep_set_access_flags() doesn't do a hpte flush. This is why we have |
334 | * the special !is_write in the below conditional. |
335 | * |
336 | * For platforms that doesn't supports coherent icache and do support |
337 | * per page noexec bit, we do setup things such that we do the |
338 | * sync between D/I cache via fault. But that is handled via low level |
339 | * hash fault code (hash_page_do_lazy_icache()) and we should not reach |
340 | * here in such case. |
341 | * |
342 | * For wrong access that can result in PROTFAULT, the above vma->vm_flags |
343 | * check should handle those and hence we should fall to the bad_area |
344 | * handling correctly. |
345 | * |
346 | * For embedded with per page exec support that doesn't support coherent |
347 | * icache we do get PROTFAULT and we handle that D/I cache sync in |
348 | * set_pte_at while taking the noexec/prot fault. Hence this is WARN_ON |
349 | * is conditional for server MMU. |
350 | * |
351 | * For radix, we can get prot fault for autonuma case, because radix |
352 | * page table will have them marked noaccess for user. |
353 | */ |
354 | if (radix_enabled() || is_write) |
355 | return; |
356 | |
357 | WARN_ON_ONCE(error_code & DSISR_PROTFAULT); |
358 | } |
359 | |
360 | /* |
361 | * Define the correct "is_write" bit in error_code based |
362 | * on the processor family |
363 | */ |
364 | #if (defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) |
365 | #define page_fault_is_write(__err) ((__err) & ESR_DST) |
366 | #else |
367 | #define page_fault_is_write(__err) ((__err) & DSISR_ISSTORE) |
368 | #endif |
369 | |
370 | #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) |
371 | #define page_fault_is_bad(__err) (0) |
372 | #elif defined(CONFIG_PPC_8xx) |
373 | #define page_fault_is_bad(__err) ((__err) & DSISR_NOEXEC_OR_G) |
374 | #elif defined(CONFIG_PPC64) |
375 | static int page_fault_is_bad(unsigned long err) |
376 | { |
377 | unsigned long flag = DSISR_BAD_FAULT_64S; |
378 | |
379 | /* |
380 | * PAPR+ v2.11 ยง 14.15.3.4.1 (unreleased) |
381 | * If byte 0, bit 3 of pi-attribute-specifier-type in |
382 | * ibm,pi-features property is defined, ignore the DSI error |
383 | * which is caused by the paste instruction on the |
384 | * suspended NX window. |
385 | */ |
386 | if (mmu_has_feature(MMU_FTR_NX_DSI)) |
387 | flag &= ~DSISR_BAD_COPYPASTE; |
388 | |
389 | return err & flag; |
390 | } |
391 | #else |
392 | #define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_32S) |
393 | #endif |
394 | |
395 | /* |
396 | * For 600- and 800-family processors, the error_code parameter is DSISR |
397 | * for a data fault, SRR1 for an instruction fault. |
398 | * For 400-family processors the error_code parameter is ESR for a data fault, |
399 | * 0 for an instruction fault. |
400 | * For 64-bit processors, the error_code parameter is DSISR for a data access |
401 | * fault, SRR1 & 0x08000000 for an instruction access fault. |
402 | * |
403 | * The return value is 0 if the fault was handled, or the signal |
404 | * number if this is a kernel fault that can't be handled here. |
405 | */ |
406 | static int ___do_page_fault(struct pt_regs *regs, unsigned long address, |
407 | unsigned long error_code) |
408 | { |
409 | struct vm_area_struct * vma; |
410 | struct mm_struct *mm = current->mm; |
411 | unsigned int flags = FAULT_FLAG_DEFAULT; |
412 | int is_exec = TRAP(regs) == INTERRUPT_INST_STORAGE; |
413 | int is_user = user_mode(regs); |
414 | int is_write = page_fault_is_write(error_code); |
415 | vm_fault_t fault, major = 0; |
416 | bool kprobe_fault = kprobe_page_fault(regs, trap: 11); |
417 | |
418 | if (unlikely(debugger_fault_handler(regs) || kprobe_fault)) |
419 | return 0; |
420 | |
421 | if (unlikely(page_fault_is_bad(error_code))) { |
422 | if (is_user) { |
423 | _exception(SIGBUS, regs, BUS_OBJERR, address); |
424 | return 0; |
425 | } |
426 | return SIGBUS; |
427 | } |
428 | |
429 | /* Additional sanity check(s) */ |
430 | sanity_check_fault(is_write, is_user, error_code, address); |
431 | |
432 | /* |
433 | * The kernel should never take an execute fault nor should it |
434 | * take a page fault to a kernel address or a page fault to a user |
435 | * address outside of dedicated places |
436 | */ |
437 | if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write))) { |
438 | if (kfence_handle_page_fault(addr: address, is_write, regs)) |
439 | return 0; |
440 | |
441 | return SIGSEGV; |
442 | } |
443 | |
444 | /* |
445 | * If we're in an interrupt, have no user context or are running |
446 | * in a region with pagefaults disabled then we must not take the fault |
447 | */ |
448 | if (unlikely(faulthandler_disabled() || !mm)) { |
449 | if (is_user) |
450 | printk_ratelimited(KERN_ERR "Page fault in user mode" |
451 | " with faulthandler_disabled()=%d" |
452 | " mm=%p\n" , |
453 | faulthandler_disabled(), mm); |
454 | return bad_area_nosemaphore(regs, address); |
455 | } |
456 | |
457 | interrupt_cond_local_irq_enable(regs); |
458 | |
459 | perf_sw_event(event_id: PERF_COUNT_SW_PAGE_FAULTS, nr: 1, regs, addr: address); |
460 | |
461 | /* |
462 | * We want to do this outside mmap_lock, because reading code around nip |
463 | * can result in fault, which will cause a deadlock when called with |
464 | * mmap_lock held |
465 | */ |
466 | if (is_user) |
467 | flags |= FAULT_FLAG_USER; |
468 | if (is_write) |
469 | flags |= FAULT_FLAG_WRITE; |
470 | if (is_exec) |
471 | flags |= FAULT_FLAG_INSTRUCTION; |
472 | |
473 | if (!(flags & FAULT_FLAG_USER)) |
474 | goto lock_mmap; |
475 | |
476 | vma = lock_vma_under_rcu(mm, address); |
477 | if (!vma) |
478 | goto lock_mmap; |
479 | |
480 | if (unlikely(access_pkey_error(is_write, is_exec, |
481 | (error_code & DSISR_KEYFAULT), vma))) { |
482 | vma_end_read(vma); |
483 | goto lock_mmap; |
484 | } |
485 | |
486 | if (unlikely(access_error(is_write, is_exec, vma))) { |
487 | vma_end_read(vma); |
488 | goto lock_mmap; |
489 | } |
490 | |
491 | fault = handle_mm_fault(vma, address, flags: flags | FAULT_FLAG_VMA_LOCK, regs); |
492 | if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED))) |
493 | vma_end_read(vma); |
494 | |
495 | if (!(fault & VM_FAULT_RETRY)) { |
496 | count_vm_vma_lock_event(VMA_LOCK_SUCCESS); |
497 | goto done; |
498 | } |
499 | count_vm_vma_lock_event(VMA_LOCK_RETRY); |
500 | if (fault & VM_FAULT_MAJOR) |
501 | flags |= FAULT_FLAG_TRIED; |
502 | |
503 | if (fault_signal_pending(fault_flags: fault, regs)) |
504 | return user_mode(regs) ? 0 : SIGBUS; |
505 | |
506 | lock_mmap: |
507 | |
508 | /* When running in the kernel we expect faults to occur only to |
509 | * addresses in user space. All other faults represent errors in the |
510 | * kernel and should generate an OOPS. Unfortunately, in the case of an |
511 | * erroneous fault occurring in a code path which already holds mmap_lock |
512 | * we will deadlock attempting to validate the fault against the |
513 | * address space. Luckily the kernel only validly references user |
514 | * space from well defined areas of code, which are listed in the |
515 | * exceptions table. lock_mm_and_find_vma() handles that logic. |
516 | */ |
517 | retry: |
518 | vma = lock_mm_and_find_vma(mm, address, regs); |
519 | if (unlikely(!vma)) |
520 | return bad_area_nosemaphore(regs, address); |
521 | |
522 | if (unlikely(access_pkey_error(is_write, is_exec, |
523 | (error_code & DSISR_KEYFAULT), vma))) |
524 | return bad_access_pkey(regs, address, vma); |
525 | |
526 | if (unlikely(access_error(is_write, is_exec, vma))) |
527 | return bad_access(regs, address); |
528 | |
529 | /* |
530 | * If for any reason at all we couldn't handle the fault, |
531 | * make sure we exit gracefully rather than endlessly redo |
532 | * the fault. |
533 | */ |
534 | fault = handle_mm_fault(vma, address, flags, regs); |
535 | |
536 | major |= fault & VM_FAULT_MAJOR; |
537 | |
538 | if (fault_signal_pending(fault_flags: fault, regs)) |
539 | return user_mode(regs) ? 0 : SIGBUS; |
540 | |
541 | /* The fault is fully completed (including releasing mmap lock) */ |
542 | if (fault & VM_FAULT_COMPLETED) |
543 | goto out; |
544 | |
545 | /* |
546 | * Handle the retry right now, the mmap_lock has been released in that |
547 | * case. |
548 | */ |
549 | if (unlikely(fault & VM_FAULT_RETRY)) { |
550 | flags |= FAULT_FLAG_TRIED; |
551 | goto retry; |
552 | } |
553 | |
554 | mmap_read_unlock(current->mm); |
555 | |
556 | done: |
557 | if (unlikely(fault & VM_FAULT_ERROR)) |
558 | return mm_fault_error(regs, addr: address, fault); |
559 | |
560 | out: |
561 | /* |
562 | * Major/minor page fault accounting. |
563 | */ |
564 | if (major) |
565 | cmo_account_page_fault(); |
566 | |
567 | return 0; |
568 | } |
569 | NOKPROBE_SYMBOL(___do_page_fault); |
570 | |
571 | static __always_inline void __do_page_fault(struct pt_regs *regs) |
572 | { |
573 | long err; |
574 | |
575 | err = ___do_page_fault(regs, address: regs->dar, error_code: regs->dsisr); |
576 | if (unlikely(err)) |
577 | bad_page_fault(regs, err); |
578 | } |
579 | |
580 | DEFINE_INTERRUPT_HANDLER(do_page_fault) |
581 | { |
582 | __do_page_fault(regs: regs); |
583 | } |
584 | |
585 | #ifdef CONFIG_PPC_BOOK3S_64 |
586 | /* Same as do_page_fault but interrupt entry has already run in do_hash_fault */ |
587 | void hash__do_page_fault(struct pt_regs *regs) |
588 | { |
589 | __do_page_fault(regs); |
590 | } |
591 | NOKPROBE_SYMBOL(hash__do_page_fault); |
592 | #endif |
593 | |
594 | /* |
595 | * bad_page_fault is called when we have a bad access from the kernel. |
596 | * It is called from the DSI and ISI handlers in head.S and from some |
597 | * of the procedures in traps.c. |
598 | */ |
599 | static void __bad_page_fault(struct pt_regs *regs, int sig) |
600 | { |
601 | int is_write = page_fault_is_write(regs->dsisr); |
602 | const char *msg; |
603 | |
604 | /* kernel has accessed a bad area */ |
605 | |
606 | if (regs->dar < PAGE_SIZE) |
607 | msg = "Kernel NULL pointer dereference" ; |
608 | else |
609 | msg = "Unable to handle kernel data access" ; |
610 | |
611 | switch (TRAP(regs)) { |
612 | case INTERRUPT_DATA_STORAGE: |
613 | case INTERRUPT_H_DATA_STORAGE: |
614 | pr_alert("BUG: %s on %s at 0x%08lx\n" , msg, |
615 | is_write ? "write" : "read" , regs->dar); |
616 | break; |
617 | case INTERRUPT_DATA_SEGMENT: |
618 | pr_alert("BUG: %s at 0x%08lx\n" , msg, regs->dar); |
619 | break; |
620 | case INTERRUPT_INST_STORAGE: |
621 | case INTERRUPT_INST_SEGMENT: |
622 | pr_alert("BUG: Unable to handle kernel instruction fetch%s" , |
623 | regs->nip < PAGE_SIZE ? " (NULL pointer?)\n" : "\n" ); |
624 | break; |
625 | case INTERRUPT_ALIGNMENT: |
626 | pr_alert("BUG: Unable to handle kernel unaligned access at 0x%08lx\n" , |
627 | regs->dar); |
628 | break; |
629 | default: |
630 | pr_alert("BUG: Unable to handle unknown paging fault at 0x%08lx\n" , |
631 | regs->dar); |
632 | break; |
633 | } |
634 | printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n" , |
635 | regs->nip); |
636 | |
637 | if (task_stack_end_corrupted(current)) |
638 | printk(KERN_ALERT "Thread overran stack, or stack corrupted\n" ); |
639 | |
640 | die("Kernel access of bad area" , regs, sig); |
641 | } |
642 | |
643 | void bad_page_fault(struct pt_regs *regs, int sig) |
644 | { |
645 | const struct exception_table_entry *entry; |
646 | |
647 | /* Are we prepared to handle this fault? */ |
648 | entry = search_exception_tables(add: instruction_pointer(regs)); |
649 | if (entry) |
650 | instruction_pointer_set(regs, val: extable_fixup(entry)); |
651 | else |
652 | __bad_page_fault(regs, sig); |
653 | } |
654 | |
655 | #ifdef CONFIG_PPC_BOOK3S_64 |
656 | DEFINE_INTERRUPT_HANDLER(do_bad_page_fault_segv) |
657 | { |
658 | bad_page_fault(regs, SIGSEGV); |
659 | } |
660 | |
661 | /* |
662 | * In radix, segment interrupts indicate the EA is not addressable by the |
663 | * page table geometry, so they are always sent here. |
664 | * |
665 | * In hash, this is called if do_slb_fault returns error. Typically it is |
666 | * because the EA was outside the region allowed by software. |
667 | */ |
668 | DEFINE_INTERRUPT_HANDLER(do_bad_segment_interrupt) |
669 | { |
670 | int err = regs->result; |
671 | |
672 | if (err == -EFAULT) { |
673 | if (user_mode(regs)) |
674 | _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar); |
675 | else |
676 | bad_page_fault(regs, SIGSEGV); |
677 | } else if (err == -EINVAL) { |
678 | unrecoverable_exception(regs); |
679 | } else { |
680 | BUG(); |
681 | } |
682 | } |
683 | #endif |
684 | |