1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Architecture specific (PPC64) functions for kexec based crash dumps. |
4 | * |
5 | * Copyright (C) 2005, IBM Corp. |
6 | * |
7 | * Created by: Haren Myneni |
8 | */ |
9 | |
10 | #include <linux/kernel.h> |
11 | #include <linux/smp.h> |
12 | #include <linux/reboot.h> |
13 | #include <linux/kexec.h> |
14 | #include <linux/export.h> |
15 | #include <linux/crash_dump.h> |
16 | #include <linux/delay.h> |
17 | #include <linux/irq.h> |
18 | #include <linux/types.h> |
19 | |
20 | #include <asm/processor.h> |
21 | #include <asm/machdep.h> |
22 | #include <asm/kexec.h> |
23 | #include <asm/smp.h> |
24 | #include <asm/setjmp.h> |
25 | #include <asm/debug.h> |
26 | #include <asm/interrupt.h> |
27 | |
28 | /* |
29 | * The primary CPU waits a while for all secondary CPUs to enter. This is to |
30 | * avoid sending an IPI if the secondary CPUs are entering |
31 | * crash_kexec_secondary on their own (eg via a system reset). |
32 | * |
33 | * The secondary timeout has to be longer than the primary. Both timeouts are |
34 | * in milliseconds. |
35 | */ |
36 | #define PRIMARY_TIMEOUT 500 |
37 | #define SECONDARY_TIMEOUT 1000 |
38 | |
39 | #define IPI_TIMEOUT 10000 |
40 | #define REAL_MODE_TIMEOUT 10000 |
41 | |
42 | static int time_to_dump; |
43 | |
44 | /* |
45 | * In case of system reset, secondary CPUs enter crash_kexec_secondary with out |
46 | * having to send an IPI explicitly. So, indicate if the crash is via |
47 | * system reset to avoid sending another IPI. |
48 | */ |
49 | static int is_via_system_reset; |
50 | |
51 | /* |
52 | * crash_wake_offline should be set to 1 by platforms that intend to wake |
53 | * up offline cpus prior to jumping to a kdump kernel. Currently powernv |
54 | * sets it to 1, since we want to avoid things from happening when an |
55 | * offline CPU wakes up due to something like an HMI (malfunction error), |
56 | * which propagates to all threads. |
57 | */ |
58 | int crash_wake_offline; |
59 | |
60 | #define CRASH_HANDLER_MAX 3 |
61 | /* List of shutdown handles */ |
62 | static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX]; |
63 | static DEFINE_SPINLOCK(crash_handlers_lock); |
64 | |
65 | static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; |
66 | static int crash_shutdown_cpu = -1; |
67 | |
68 | static int handle_fault(struct pt_regs *regs) |
69 | { |
70 | if (crash_shutdown_cpu == smp_processor_id()) |
71 | longjmp(crash_shutdown_buf, 1); |
72 | return 0; |
73 | } |
74 | |
75 | #ifdef CONFIG_SMP |
76 | |
77 | static atomic_t cpus_in_crash; |
78 | void crash_ipi_callback(struct pt_regs *regs) |
79 | { |
80 | static cpumask_t cpus_state_saved = CPU_MASK_NONE; |
81 | |
82 | int cpu = smp_processor_id(); |
83 | |
84 | hard_irq_disable(); |
85 | if (!cpumask_test_cpu(cpu, cpumask: &cpus_state_saved)) { |
86 | crash_save_cpu(regs, cpu); |
87 | cpumask_set_cpu(cpu, dstp: &cpus_state_saved); |
88 | } |
89 | |
90 | atomic_inc(v: &cpus_in_crash); |
91 | smp_mb__after_atomic(); |
92 | |
93 | /* |
94 | * Starting the kdump boot. |
95 | * This barrier is needed to make sure that all CPUs are stopped. |
96 | */ |
97 | while (!time_to_dump) |
98 | cpu_relax(); |
99 | |
100 | if (ppc_md.kexec_cpu_down) |
101 | ppc_md.kexec_cpu_down(1, 1); |
102 | |
103 | #ifdef CONFIG_PPC64 |
104 | kexec_smp_wait(); |
105 | #else |
106 | for (;;); /* FIXME */ |
107 | #endif |
108 | |
109 | /* NOTREACHED */ |
110 | } |
111 | |
112 | static void crash_kexec_prepare_cpus(void) |
113 | { |
114 | unsigned int msecs; |
115 | volatile unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ |
116 | volatile int tries = 0; |
117 | int (*old_handler)(struct pt_regs *regs); |
118 | |
119 | printk(KERN_EMERG "Sending IPI to other CPUs\n" ); |
120 | |
121 | if (crash_wake_offline) |
122 | ncpus = num_present_cpus() - 1; |
123 | |
124 | /* |
125 | * If we came in via system reset, secondaries enter via crash_kexec_secondary(). |
126 | * So, wait a while for the secondary CPUs to enter for that case. |
127 | * Else, send IPI to all other CPUs. |
128 | */ |
129 | if (is_via_system_reset) |
130 | mdelay(PRIMARY_TIMEOUT); |
131 | else |
132 | crash_send_ipi(crash_ipi_callback); |
133 | smp_wmb(); |
134 | |
135 | again: |
136 | /* |
137 | * FIXME: Until we will have the way to stop other CPUs reliably, |
138 | * the crash CPU will send an IPI and wait for other CPUs to |
139 | * respond. |
140 | */ |
141 | msecs = IPI_TIMEOUT; |
142 | while ((atomic_read(v: &cpus_in_crash) < ncpus) && (--msecs > 0)) |
143 | mdelay(1); |
144 | |
145 | /* Would it be better to replace the trap vector here? */ |
146 | |
147 | if (atomic_read(v: &cpus_in_crash) >= ncpus) { |
148 | printk(KERN_EMERG "IPI complete\n" ); |
149 | return; |
150 | } |
151 | |
152 | printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n" , |
153 | ncpus - atomic_read(&cpus_in_crash)); |
154 | |
155 | /* |
156 | * If we have a panic timeout set then we can't wait indefinitely |
157 | * for someone to activate system reset. We also give up on the |
158 | * second time through if system reset fail to work. |
159 | */ |
160 | if ((panic_timeout > 0) || (tries > 0)) |
161 | return; |
162 | |
163 | /* |
164 | * A system reset will cause all CPUs to take an 0x100 exception. |
165 | * The primary CPU returns here via setjmp, and the secondary |
166 | * CPUs reexecute the crash_kexec_secondary path. |
167 | */ |
168 | old_handler = __debugger; |
169 | __debugger = handle_fault; |
170 | crash_shutdown_cpu = smp_processor_id(); |
171 | |
172 | if (setjmp(crash_shutdown_buf) == 0) { |
173 | printk(KERN_EMERG "Activate system reset (dumprestart) " |
174 | "to stop other cpu(s)\n" ); |
175 | |
176 | /* |
177 | * A system reset will force all CPUs to execute the |
178 | * crash code again. We need to reset cpus_in_crash so we |
179 | * wait for everyone to do this. |
180 | */ |
181 | atomic_set(v: &cpus_in_crash, i: 0); |
182 | smp_mb(); |
183 | |
184 | while (atomic_read(v: &cpus_in_crash) < ncpus) |
185 | cpu_relax(); |
186 | } |
187 | |
188 | crash_shutdown_cpu = -1; |
189 | __debugger = old_handler; |
190 | |
191 | tries++; |
192 | goto again; |
193 | } |
194 | |
195 | /* |
196 | * This function will be called by secondary cpus. |
197 | */ |
198 | void crash_kexec_secondary(struct pt_regs *regs) |
199 | { |
200 | unsigned long flags; |
201 | int msecs = SECONDARY_TIMEOUT; |
202 | |
203 | local_irq_save(flags); |
204 | |
205 | /* Wait for the primary crash CPU to signal its progress */ |
206 | while (crashing_cpu < 0) { |
207 | if (--msecs < 0) { |
208 | /* No response, kdump image may not have been loaded */ |
209 | local_irq_restore(flags); |
210 | return; |
211 | } |
212 | |
213 | mdelay(1); |
214 | } |
215 | |
216 | crash_ipi_callback(regs); |
217 | } |
218 | |
219 | #else /* ! CONFIG_SMP */ |
220 | |
221 | static void crash_kexec_prepare_cpus(void) |
222 | { |
223 | /* |
224 | * move the secondaries to us so that we can copy |
225 | * the new kernel 0-0x100 safely |
226 | * |
227 | * do this if kexec in setup.c ? |
228 | */ |
229 | #ifdef CONFIG_PPC64 |
230 | smp_release_cpus(); |
231 | #else |
232 | /* FIXME */ |
233 | #endif |
234 | } |
235 | |
236 | void crash_kexec_secondary(struct pt_regs *regs) |
237 | { |
238 | } |
239 | #endif /* CONFIG_SMP */ |
240 | |
241 | /* wait for all the CPUs to hit real mode but timeout if they don't come in */ |
242 | #if defined(CONFIG_SMP) && defined(CONFIG_PPC64) |
243 | noinstr static void __maybe_unused crash_kexec_wait_realmode(int cpu) |
244 | { |
245 | unsigned int msecs; |
246 | int i; |
247 | |
248 | msecs = REAL_MODE_TIMEOUT; |
249 | for (i=0; i < nr_cpu_ids && msecs > 0; i++) { |
250 | if (i == cpu) |
251 | continue; |
252 | |
253 | while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) { |
254 | barrier(); |
255 | if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0)) |
256 | break; |
257 | msecs--; |
258 | mdelay(1); |
259 | } |
260 | } |
261 | mb(); |
262 | } |
263 | #else |
264 | static inline void crash_kexec_wait_realmode(int cpu) {} |
265 | #endif /* CONFIG_SMP && CONFIG_PPC64 */ |
266 | |
267 | void crash_kexec_prepare(void) |
268 | { |
269 | /* Avoid hardlocking with irresponsive CPU holding logbuf_lock */ |
270 | printk_deferred_enter(); |
271 | |
272 | /* |
273 | * This function is only called after the system |
274 | * has panicked or is otherwise in a critical state. |
275 | * The minimum amount of code to allow a kexec'd kernel |
276 | * to run successfully needs to happen here. |
277 | * |
278 | * In practice this means stopping other cpus in |
279 | * an SMP system. |
280 | * The kernel is broken so disable interrupts. |
281 | */ |
282 | hard_irq_disable(); |
283 | |
284 | /* |
285 | * Make a note of crashing cpu. Will be used in machine_kexec |
286 | * such that another IPI will not be sent. |
287 | */ |
288 | crashing_cpu = smp_processor_id(); |
289 | |
290 | crash_kexec_prepare_cpus(); |
291 | } |
292 | |
293 | /* |
294 | * Register a function to be called on shutdown. Only use this if you |
295 | * can't reset your device in the second kernel. |
296 | */ |
297 | int crash_shutdown_register(crash_shutdown_t handler) |
298 | { |
299 | unsigned int i, rc; |
300 | |
301 | spin_lock(lock: &crash_handlers_lock); |
302 | for (i = 0 ; i < CRASH_HANDLER_MAX; i++) |
303 | if (!crash_shutdown_handles[i]) { |
304 | /* Insert handle at first empty entry */ |
305 | crash_shutdown_handles[i] = handler; |
306 | rc = 0; |
307 | break; |
308 | } |
309 | |
310 | if (i == CRASH_HANDLER_MAX) { |
311 | printk(KERN_ERR "Crash shutdown handles full, " |
312 | "not registered.\n" ); |
313 | rc = 1; |
314 | } |
315 | |
316 | spin_unlock(lock: &crash_handlers_lock); |
317 | return rc; |
318 | } |
319 | EXPORT_SYMBOL(crash_shutdown_register); |
320 | |
321 | int crash_shutdown_unregister(crash_shutdown_t handler) |
322 | { |
323 | unsigned int i, rc; |
324 | |
325 | spin_lock(lock: &crash_handlers_lock); |
326 | for (i = 0 ; i < CRASH_HANDLER_MAX; i++) |
327 | if (crash_shutdown_handles[i] == handler) |
328 | break; |
329 | |
330 | if (i == CRASH_HANDLER_MAX) { |
331 | printk(KERN_ERR "Crash shutdown handle not found\n" ); |
332 | rc = 1; |
333 | } else { |
334 | /* Shift handles down */ |
335 | for (; i < (CRASH_HANDLER_MAX - 1); i++) |
336 | crash_shutdown_handles[i] = |
337 | crash_shutdown_handles[i+1]; |
338 | /* |
339 | * Reset last entry to NULL now that it has been shifted down, |
340 | * this will allow new handles to be added here. |
341 | */ |
342 | crash_shutdown_handles[i] = NULL; |
343 | rc = 0; |
344 | } |
345 | |
346 | spin_unlock(lock: &crash_handlers_lock); |
347 | return rc; |
348 | } |
349 | EXPORT_SYMBOL(crash_shutdown_unregister); |
350 | |
351 | void default_machine_crash_shutdown(struct pt_regs *regs) |
352 | { |
353 | volatile unsigned int i; |
354 | int (*old_handler)(struct pt_regs *regs); |
355 | |
356 | if (TRAP(regs) == INTERRUPT_SYSTEM_RESET) |
357 | is_via_system_reset = 1; |
358 | |
359 | crash_smp_send_stop(); |
360 | |
361 | crash_save_cpu(regs, cpu: crashing_cpu); |
362 | |
363 | time_to_dump = 1; |
364 | |
365 | crash_kexec_wait_realmode(cpu: crashing_cpu); |
366 | |
367 | machine_kexec_mask_interrupts(); |
368 | |
369 | /* |
370 | * Call registered shutdown routines safely. Swap out |
371 | * __debugger_fault_handler, and replace on exit. |
372 | */ |
373 | old_handler = __debugger_fault_handler; |
374 | __debugger_fault_handler = handle_fault; |
375 | crash_shutdown_cpu = smp_processor_id(); |
376 | for (i = 0; i < CRASH_HANDLER_MAX && crash_shutdown_handles[i]; i++) { |
377 | if (setjmp(crash_shutdown_buf) == 0) { |
378 | /* |
379 | * Insert syncs and delay to ensure |
380 | * instructions in the dangerous region don't |
381 | * leak away from this protected region. |
382 | */ |
383 | asm volatile("sync; isync" ); |
384 | /* dangerous region */ |
385 | crash_shutdown_handles[i](); |
386 | asm volatile("sync; isync" ); |
387 | } |
388 | } |
389 | crash_shutdown_cpu = -1; |
390 | __debugger_fault_handler = old_handler; |
391 | |
392 | if (ppc_md.kexec_cpu_down) |
393 | ppc_md.kexec_cpu_down(1, 0); |
394 | } |
395 | |