1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * crash.c - kernel crash support code. |
4 | * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> |
5 | */ |
6 | |
7 | #include <linux/buildid.h> |
8 | #include <linux/init.h> |
9 | #include <linux/utsname.h> |
10 | #include <linux/vmalloc.h> |
11 | #include <linux/sizes.h> |
12 | #include <linux/kexec.h> |
13 | #include <linux/memory.h> |
14 | #include <linux/cpuhotplug.h> |
15 | #include <linux/memblock.h> |
16 | #include <linux/kexec.h> |
17 | #include <linux/kmemleak.h> |
18 | |
19 | #include <asm/page.h> |
20 | #include <asm/sections.h> |
21 | |
22 | #include <crypto/sha1.h> |
23 | |
24 | #include "kallsyms_internal.h" |
25 | #include "kexec_internal.h" |
26 | |
27 | /* Per cpu memory for storing cpu states in case of system crash. */ |
28 | note_buf_t __percpu *crash_notes; |
29 | |
30 | /* vmcoreinfo stuff */ |
31 | unsigned char *vmcoreinfo_data; |
32 | size_t vmcoreinfo_size; |
33 | u32 *vmcoreinfo_note; |
34 | |
35 | /* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */ |
36 | static unsigned char *vmcoreinfo_data_safecopy; |
37 | |
38 | /* Location of the reserved area for the crash kernel */ |
39 | struct resource crashk_res = { |
40 | .name = "Crash kernel" , |
41 | .start = 0, |
42 | .end = 0, |
43 | .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, |
44 | .desc = IORES_DESC_CRASH_KERNEL |
45 | }; |
46 | struct resource crashk_low_res = { |
47 | .name = "Crash kernel" , |
48 | .start = 0, |
49 | .end = 0, |
50 | .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM, |
51 | .desc = IORES_DESC_CRASH_KERNEL |
52 | }; |
53 | |
54 | /* |
55 | * parsing the "crashkernel" commandline |
56 | * |
57 | * this code is intended to be called from architecture specific code |
58 | */ |
59 | |
60 | |
61 | /* |
62 | * This function parses command lines in the format |
63 | * |
64 | * crashkernel=ramsize-range:size[,...][@offset] |
65 | * |
66 | * The function returns 0 on success and -EINVAL on failure. |
67 | */ |
68 | static int __init parse_crashkernel_mem(char *cmdline, |
69 | unsigned long long system_ram, |
70 | unsigned long long *crash_size, |
71 | unsigned long long *crash_base) |
72 | { |
73 | char *cur = cmdline, *tmp; |
74 | unsigned long long total_mem = system_ram; |
75 | |
76 | /* |
77 | * Firmware sometimes reserves some memory regions for its own use, |
78 | * so the system memory size is less than the actual physical memory |
79 | * size. Work around this by rounding up the total size to 128M, |
80 | * which is enough for most test cases. |
81 | */ |
82 | total_mem = roundup(total_mem, SZ_128M); |
83 | |
84 | /* for each entry of the comma-separated list */ |
85 | do { |
86 | unsigned long long start, end = ULLONG_MAX, size; |
87 | |
88 | /* get the start of the range */ |
89 | start = memparse(ptr: cur, retptr: &tmp); |
90 | if (cur == tmp) { |
91 | pr_warn("crashkernel: Memory value expected\n" ); |
92 | return -EINVAL; |
93 | } |
94 | cur = tmp; |
95 | if (*cur != '-') { |
96 | pr_warn("crashkernel: '-' expected\n" ); |
97 | return -EINVAL; |
98 | } |
99 | cur++; |
100 | |
101 | /* if no ':' is here, than we read the end */ |
102 | if (*cur != ':') { |
103 | end = memparse(ptr: cur, retptr: &tmp); |
104 | if (cur == tmp) { |
105 | pr_warn("crashkernel: Memory value expected\n" ); |
106 | return -EINVAL; |
107 | } |
108 | cur = tmp; |
109 | if (end <= start) { |
110 | pr_warn("crashkernel: end <= start\n" ); |
111 | return -EINVAL; |
112 | } |
113 | } |
114 | |
115 | if (*cur != ':') { |
116 | pr_warn("crashkernel: ':' expected\n" ); |
117 | return -EINVAL; |
118 | } |
119 | cur++; |
120 | |
121 | size = memparse(ptr: cur, retptr: &tmp); |
122 | if (cur == tmp) { |
123 | pr_warn("Memory value expected\n" ); |
124 | return -EINVAL; |
125 | } |
126 | cur = tmp; |
127 | if (size >= total_mem) { |
128 | pr_warn("crashkernel: invalid size\n" ); |
129 | return -EINVAL; |
130 | } |
131 | |
132 | /* match ? */ |
133 | if (total_mem >= start && total_mem < end) { |
134 | *crash_size = size; |
135 | break; |
136 | } |
137 | } while (*cur++ == ','); |
138 | |
139 | if (*crash_size > 0) { |
140 | while (*cur && *cur != ' ' && *cur != '@') |
141 | cur++; |
142 | if (*cur == '@') { |
143 | cur++; |
144 | *crash_base = memparse(ptr: cur, retptr: &tmp); |
145 | if (cur == tmp) { |
146 | pr_warn("Memory value expected after '@'\n" ); |
147 | return -EINVAL; |
148 | } |
149 | } |
150 | } else |
151 | pr_info("crashkernel size resulted in zero bytes\n" ); |
152 | |
153 | return 0; |
154 | } |
155 | |
156 | /* |
157 | * That function parses "simple" (old) crashkernel command lines like |
158 | * |
159 | * crashkernel=size[@offset] |
160 | * |
161 | * It returns 0 on success and -EINVAL on failure. |
162 | */ |
163 | static int __init parse_crashkernel_simple(char *cmdline, |
164 | unsigned long long *crash_size, |
165 | unsigned long long *crash_base) |
166 | { |
167 | char *cur = cmdline; |
168 | |
169 | *crash_size = memparse(ptr: cmdline, retptr: &cur); |
170 | if (cmdline == cur) { |
171 | pr_warn("crashkernel: memory value expected\n" ); |
172 | return -EINVAL; |
173 | } |
174 | |
175 | if (*cur == '@') |
176 | *crash_base = memparse(ptr: cur+1, retptr: &cur); |
177 | else if (*cur != ' ' && *cur != '\0') { |
178 | pr_warn("crashkernel: unrecognized char: %c\n" , *cur); |
179 | return -EINVAL; |
180 | } |
181 | |
182 | return 0; |
183 | } |
184 | |
185 | #define SUFFIX_HIGH 0 |
186 | #define SUFFIX_LOW 1 |
187 | #define SUFFIX_NULL 2 |
188 | static __initdata char *suffix_tbl[] = { |
189 | [SUFFIX_HIGH] = ",high" , |
190 | [SUFFIX_LOW] = ",low" , |
191 | [SUFFIX_NULL] = NULL, |
192 | }; |
193 | |
194 | /* |
195 | * That function parses "suffix" crashkernel command lines like |
196 | * |
197 | * crashkernel=size,[high|low] |
198 | * |
199 | * It returns 0 on success and -EINVAL on failure. |
200 | */ |
201 | static int __init parse_crashkernel_suffix(char *cmdline, |
202 | unsigned long long *crash_size, |
203 | const char *suffix) |
204 | { |
205 | char *cur = cmdline; |
206 | |
207 | *crash_size = memparse(ptr: cmdline, retptr: &cur); |
208 | if (cmdline == cur) { |
209 | pr_warn("crashkernel: memory value expected\n" ); |
210 | return -EINVAL; |
211 | } |
212 | |
213 | /* check with suffix */ |
214 | if (strncmp(cur, suffix, strlen(suffix))) { |
215 | pr_warn("crashkernel: unrecognized char: %c\n" , *cur); |
216 | return -EINVAL; |
217 | } |
218 | cur += strlen(suffix); |
219 | if (*cur != ' ' && *cur != '\0') { |
220 | pr_warn("crashkernel: unrecognized char: %c\n" , *cur); |
221 | return -EINVAL; |
222 | } |
223 | |
224 | return 0; |
225 | } |
226 | |
227 | static __init char *get_last_crashkernel(char *cmdline, |
228 | const char *name, |
229 | const char *suffix) |
230 | { |
231 | char *p = cmdline, *ck_cmdline = NULL; |
232 | |
233 | /* find crashkernel and use the last one if there are more */ |
234 | p = strstr(p, name); |
235 | while (p) { |
236 | char *end_p = strchr(p, ' '); |
237 | char *q; |
238 | |
239 | if (!end_p) |
240 | end_p = p + strlen(p); |
241 | |
242 | if (!suffix) { |
243 | int i; |
244 | |
245 | /* skip the one with any known suffix */ |
246 | for (i = 0; suffix_tbl[i]; i++) { |
247 | q = end_p - strlen(suffix_tbl[i]); |
248 | if (!strncmp(q, suffix_tbl[i], |
249 | strlen(suffix_tbl[i]))) |
250 | goto next; |
251 | } |
252 | ck_cmdline = p; |
253 | } else { |
254 | q = end_p - strlen(suffix); |
255 | if (!strncmp(q, suffix, strlen(suffix))) |
256 | ck_cmdline = p; |
257 | } |
258 | next: |
259 | p = strstr(p+1, name); |
260 | } |
261 | |
262 | return ck_cmdline; |
263 | } |
264 | |
265 | static int __init __parse_crashkernel(char *cmdline, |
266 | unsigned long long system_ram, |
267 | unsigned long long *crash_size, |
268 | unsigned long long *crash_base, |
269 | const char *suffix) |
270 | { |
271 | char *first_colon, *first_space; |
272 | char *ck_cmdline; |
273 | char *name = "crashkernel=" ; |
274 | |
275 | BUG_ON(!crash_size || !crash_base); |
276 | *crash_size = 0; |
277 | *crash_base = 0; |
278 | |
279 | ck_cmdline = get_last_crashkernel(cmdline, name, suffix); |
280 | if (!ck_cmdline) |
281 | return -ENOENT; |
282 | |
283 | ck_cmdline += strlen(name); |
284 | |
285 | if (suffix) |
286 | return parse_crashkernel_suffix(cmdline: ck_cmdline, crash_size, |
287 | suffix); |
288 | /* |
289 | * if the commandline contains a ':', then that's the extended |
290 | * syntax -- if not, it must be the classic syntax |
291 | */ |
292 | first_colon = strchr(ck_cmdline, ':'); |
293 | first_space = strchr(ck_cmdline, ' '); |
294 | if (first_colon && (!first_space || first_colon < first_space)) |
295 | return parse_crashkernel_mem(cmdline: ck_cmdline, system_ram, |
296 | crash_size, crash_base); |
297 | |
298 | return parse_crashkernel_simple(cmdline: ck_cmdline, crash_size, crash_base); |
299 | } |
300 | |
301 | /* |
302 | * That function is the entry point for command line parsing and should be |
303 | * called from the arch-specific code. |
304 | * |
305 | * If crashkernel=,high|low is supported on architecture, non-NULL values |
306 | * should be passed to parameters 'low_size' and 'high'. |
307 | */ |
308 | int __init parse_crashkernel(char *cmdline, |
309 | unsigned long long system_ram, |
310 | unsigned long long *crash_size, |
311 | unsigned long long *crash_base, |
312 | unsigned long long *low_size, |
313 | bool *high) |
314 | { |
315 | int ret; |
316 | |
317 | /* crashkernel=X[@offset] */ |
318 | ret = __parse_crashkernel(cmdline, system_ram, crash_size, |
319 | crash_base, NULL); |
320 | #ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION |
321 | /* |
322 | * If non-NULL 'high' passed in and no normal crashkernel |
323 | * setting detected, try parsing crashkernel=,high|low. |
324 | */ |
325 | if (high && ret == -ENOENT) { |
326 | ret = __parse_crashkernel(cmdline, system_ram: 0, crash_size, |
327 | crash_base, suffix: suffix_tbl[SUFFIX_HIGH]); |
328 | if (ret || !*crash_size) |
329 | return -EINVAL; |
330 | |
331 | /* |
332 | * crashkernel=Y,low can be specified or not, but invalid value |
333 | * is not allowed. |
334 | */ |
335 | ret = __parse_crashkernel(cmdline, system_ram: 0, crash_size: low_size, |
336 | crash_base, suffix: suffix_tbl[SUFFIX_LOW]); |
337 | if (ret == -ENOENT) { |
338 | *low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; |
339 | ret = 0; |
340 | } else if (ret) { |
341 | return ret; |
342 | } |
343 | |
344 | *high = true; |
345 | } |
346 | #endif |
347 | if (!*crash_size) |
348 | ret = -EINVAL; |
349 | |
350 | return ret; |
351 | } |
352 | |
353 | /* |
354 | * Add a dummy early_param handler to mark crashkernel= as a known command line |
355 | * parameter and suppress incorrect warnings in init/main.c. |
356 | */ |
357 | static int __init parse_crashkernel_dummy(char *arg) |
358 | { |
359 | return 0; |
360 | } |
361 | early_param("crashkernel" , parse_crashkernel_dummy); |
362 | |
363 | #ifdef CONFIG_ARCH_HAS_GENERIC_CRASHKERNEL_RESERVATION |
364 | static int __init reserve_crashkernel_low(unsigned long long low_size) |
365 | { |
366 | #ifdef CONFIG_64BIT |
367 | unsigned long long low_base; |
368 | |
369 | low_base = memblock_phys_alloc_range(size: low_size, CRASH_ALIGN, start: 0, CRASH_ADDR_LOW_MAX); |
370 | if (!low_base) { |
371 | pr_err("cannot allocate crashkernel low memory (size:0x%llx).\n" , low_size); |
372 | return -ENOMEM; |
373 | } |
374 | |
375 | pr_info("crashkernel low memory reserved: 0x%08llx - 0x%08llx (%lld MB)\n" , |
376 | low_base, low_base + low_size, low_size >> 20); |
377 | |
378 | crashk_low_res.start = low_base; |
379 | crashk_low_res.end = low_base + low_size - 1; |
380 | insert_resource(parent: &iomem_resource, new: &crashk_low_res); |
381 | #endif |
382 | return 0; |
383 | } |
384 | |
385 | void __init reserve_crashkernel_generic(char *cmdline, |
386 | unsigned long long crash_size, |
387 | unsigned long long crash_base, |
388 | unsigned long long crash_low_size, |
389 | bool high) |
390 | { |
391 | unsigned long long search_end = CRASH_ADDR_LOW_MAX, search_base = 0; |
392 | bool fixed_base = false; |
393 | |
394 | /* User specifies base address explicitly. */ |
395 | if (crash_base) { |
396 | fixed_base = true; |
397 | search_base = crash_base; |
398 | search_end = crash_base + crash_size; |
399 | } else if (high) { |
400 | search_base = CRASH_ADDR_LOW_MAX; |
401 | search_end = CRASH_ADDR_HIGH_MAX; |
402 | } |
403 | |
404 | retry: |
405 | crash_base = memblock_phys_alloc_range(size: crash_size, CRASH_ALIGN, |
406 | start: search_base, end: search_end); |
407 | if (!crash_base) { |
408 | /* |
409 | * For crashkernel=size[KMG]@offset[KMG], print out failure |
410 | * message if can't reserve the specified region. |
411 | */ |
412 | if (fixed_base) { |
413 | pr_warn("crashkernel reservation failed - memory is in use.\n" ); |
414 | return; |
415 | } |
416 | |
417 | /* |
418 | * For crashkernel=size[KMG], if the first attempt was for |
419 | * low memory, fall back to high memory, the minimum required |
420 | * low memory will be reserved later. |
421 | */ |
422 | if (!high && search_end == CRASH_ADDR_LOW_MAX) { |
423 | search_end = CRASH_ADDR_HIGH_MAX; |
424 | search_base = CRASH_ADDR_LOW_MAX; |
425 | crash_low_size = DEFAULT_CRASH_KERNEL_LOW_SIZE; |
426 | goto retry; |
427 | } |
428 | |
429 | /* |
430 | * For crashkernel=size[KMG],high, if the first attempt was |
431 | * for high memory, fall back to low memory. |
432 | */ |
433 | if (high && search_end == CRASH_ADDR_HIGH_MAX) { |
434 | search_end = CRASH_ADDR_LOW_MAX; |
435 | search_base = 0; |
436 | goto retry; |
437 | } |
438 | pr_warn("cannot allocate crashkernel (size:0x%llx)\n" , |
439 | crash_size); |
440 | return; |
441 | } |
442 | |
443 | if ((crash_base > CRASH_ADDR_LOW_MAX) && |
444 | crash_low_size && reserve_crashkernel_low(low_size: crash_low_size)) { |
445 | memblock_phys_free(base: crash_base, size: crash_size); |
446 | return; |
447 | } |
448 | |
449 | pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n" , |
450 | crash_base, crash_base + crash_size, crash_size >> 20); |
451 | |
452 | /* |
453 | * The crashkernel memory will be removed from the kernel linear |
454 | * map. Inform kmemleak so that it won't try to access it. |
455 | */ |
456 | kmemleak_ignore_phys(phys: crash_base); |
457 | if (crashk_low_res.end) |
458 | kmemleak_ignore_phys(phys: crashk_low_res.start); |
459 | |
460 | crashk_res.start = crash_base; |
461 | crashk_res.end = crash_base + crash_size - 1; |
462 | insert_resource(parent: &iomem_resource, new: &crashk_res); |
463 | } |
464 | #endif |
465 | |
466 | int (struct crash_mem *mem, int need_kernel_map, |
467 | void **addr, unsigned long *sz) |
468 | { |
469 | Elf64_Ehdr *ehdr; |
470 | Elf64_Phdr *phdr; |
471 | unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz; |
472 | unsigned char *buf; |
473 | unsigned int cpu, i; |
474 | unsigned long long notes_addr; |
475 | unsigned long mstart, mend; |
476 | |
477 | /* extra phdr for vmcoreinfo ELF note */ |
478 | nr_phdr = nr_cpus + 1; |
479 | nr_phdr += mem->nr_ranges; |
480 | |
481 | /* |
482 | * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping |
483 | * area (for example, ffffffff80000000 - ffffffffa0000000 on x86_64). |
484 | * I think this is required by tools like gdb. So same physical |
485 | * memory will be mapped in two ELF headers. One will contain kernel |
486 | * text virtual addresses and other will have __va(physical) addresses. |
487 | */ |
488 | |
489 | nr_phdr++; |
490 | elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr); |
491 | elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN); |
492 | |
493 | buf = vzalloc(size: elf_sz); |
494 | if (!buf) |
495 | return -ENOMEM; |
496 | |
497 | ehdr = (Elf64_Ehdr *)buf; |
498 | phdr = (Elf64_Phdr *)(ehdr + 1); |
499 | memcpy(ehdr->e_ident, ELFMAG, SELFMAG); |
500 | ehdr->e_ident[EI_CLASS] = ELFCLASS64; |
501 | ehdr->e_ident[EI_DATA] = ELFDATA2LSB; |
502 | ehdr->e_ident[EI_VERSION] = EV_CURRENT; |
503 | ehdr->e_ident[EI_OSABI] = ELF_OSABI; |
504 | memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD); |
505 | ehdr->e_type = ET_CORE; |
506 | ehdr->e_machine = ELF_ARCH; |
507 | ehdr->e_version = EV_CURRENT; |
508 | ehdr->e_phoff = sizeof(Elf64_Ehdr); |
509 | ehdr->e_ehsize = sizeof(Elf64_Ehdr); |
510 | ehdr->e_phentsize = sizeof(Elf64_Phdr); |
511 | |
512 | /* Prepare one phdr of type PT_NOTE for each possible CPU */ |
513 | for_each_possible_cpu(cpu) { |
514 | phdr->p_type = PT_NOTE; |
515 | notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu)); |
516 | phdr->p_offset = phdr->p_paddr = notes_addr; |
517 | phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t); |
518 | (ehdr->e_phnum)++; |
519 | phdr++; |
520 | } |
521 | |
522 | /* Prepare one PT_NOTE header for vmcoreinfo */ |
523 | phdr->p_type = PT_NOTE; |
524 | phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note(); |
525 | phdr->p_filesz = phdr->p_memsz = VMCOREINFO_NOTE_SIZE; |
526 | (ehdr->e_phnum)++; |
527 | phdr++; |
528 | |
529 | /* Prepare PT_LOAD type program header for kernel text region */ |
530 | if (need_kernel_map) { |
531 | phdr->p_type = PT_LOAD; |
532 | phdr->p_flags = PF_R|PF_W|PF_X; |
533 | phdr->p_vaddr = (unsigned long) _text; |
534 | phdr->p_filesz = phdr->p_memsz = _end - _text; |
535 | phdr->p_offset = phdr->p_paddr = __pa_symbol(_text); |
536 | ehdr->e_phnum++; |
537 | phdr++; |
538 | } |
539 | |
540 | /* Go through all the ranges in mem->ranges[] and prepare phdr */ |
541 | for (i = 0; i < mem->nr_ranges; i++) { |
542 | mstart = mem->ranges[i].start; |
543 | mend = mem->ranges[i].end; |
544 | |
545 | phdr->p_type = PT_LOAD; |
546 | phdr->p_flags = PF_R|PF_W|PF_X; |
547 | phdr->p_offset = mstart; |
548 | |
549 | phdr->p_paddr = mstart; |
550 | phdr->p_vaddr = (unsigned long) __va(mstart); |
551 | phdr->p_filesz = phdr->p_memsz = mend - mstart + 1; |
552 | phdr->p_align = 0; |
553 | ehdr->e_phnum++; |
554 | pr_debug("Crash PT_LOAD ELF header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n" , |
555 | phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz, |
556 | ehdr->e_phnum, phdr->p_offset); |
557 | phdr++; |
558 | } |
559 | |
560 | *addr = buf; |
561 | *sz = elf_sz; |
562 | return 0; |
563 | } |
564 | |
565 | int crash_exclude_mem_range(struct crash_mem *mem, |
566 | unsigned long long mstart, unsigned long long mend) |
567 | { |
568 | int i, j; |
569 | unsigned long long start, end, p_start, p_end; |
570 | struct range temp_range = {0, 0}; |
571 | |
572 | for (i = 0; i < mem->nr_ranges; i++) { |
573 | start = mem->ranges[i].start; |
574 | end = mem->ranges[i].end; |
575 | p_start = mstart; |
576 | p_end = mend; |
577 | |
578 | if (mstart > end || mend < start) |
579 | continue; |
580 | |
581 | /* Truncate any area outside of range */ |
582 | if (mstart < start) |
583 | p_start = start; |
584 | if (mend > end) |
585 | p_end = end; |
586 | |
587 | /* Found completely overlapping range */ |
588 | if (p_start == start && p_end == end) { |
589 | mem->ranges[i].start = 0; |
590 | mem->ranges[i].end = 0; |
591 | if (i < mem->nr_ranges - 1) { |
592 | /* Shift rest of the ranges to left */ |
593 | for (j = i; j < mem->nr_ranges - 1; j++) { |
594 | mem->ranges[j].start = |
595 | mem->ranges[j+1].start; |
596 | mem->ranges[j].end = |
597 | mem->ranges[j+1].end; |
598 | } |
599 | |
600 | /* |
601 | * Continue to check if there are another overlapping ranges |
602 | * from the current position because of shifting the above |
603 | * mem ranges. |
604 | */ |
605 | i--; |
606 | mem->nr_ranges--; |
607 | continue; |
608 | } |
609 | mem->nr_ranges--; |
610 | return 0; |
611 | } |
612 | |
613 | if (p_start > start && p_end < end) { |
614 | /* Split original range */ |
615 | mem->ranges[i].end = p_start - 1; |
616 | temp_range.start = p_end + 1; |
617 | temp_range.end = end; |
618 | } else if (p_start != start) |
619 | mem->ranges[i].end = p_start - 1; |
620 | else |
621 | mem->ranges[i].start = p_end + 1; |
622 | break; |
623 | } |
624 | |
625 | /* If a split happened, add the split to array */ |
626 | if (!temp_range.end) |
627 | return 0; |
628 | |
629 | /* Split happened */ |
630 | if (i == mem->max_nr_ranges - 1) |
631 | return -ENOMEM; |
632 | |
633 | /* Location where new range should go */ |
634 | j = i + 1; |
635 | if (j < mem->nr_ranges) { |
636 | /* Move over all ranges one slot towards the end */ |
637 | for (i = mem->nr_ranges - 1; i >= j; i--) |
638 | mem->ranges[i + 1] = mem->ranges[i]; |
639 | } |
640 | |
641 | mem->ranges[j].start = temp_range.start; |
642 | mem->ranges[j].end = temp_range.end; |
643 | mem->nr_ranges++; |
644 | return 0; |
645 | } |
646 | |
647 | Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, |
648 | void *data, size_t data_len) |
649 | { |
650 | struct elf_note *note = (struct elf_note *)buf; |
651 | |
652 | note->n_namesz = strlen(name) + 1; |
653 | note->n_descsz = data_len; |
654 | note->n_type = type; |
655 | buf += DIV_ROUND_UP(sizeof(*note), sizeof(Elf_Word)); |
656 | memcpy(buf, name, note->n_namesz); |
657 | buf += DIV_ROUND_UP(note->n_namesz, sizeof(Elf_Word)); |
658 | memcpy(buf, data, data_len); |
659 | buf += DIV_ROUND_UP(data_len, sizeof(Elf_Word)); |
660 | |
661 | return buf; |
662 | } |
663 | |
664 | void final_note(Elf_Word *buf) |
665 | { |
666 | memset(buf, 0, sizeof(struct elf_note)); |
667 | } |
668 | |
669 | static void update_vmcoreinfo_note(void) |
670 | { |
671 | u32 *buf = vmcoreinfo_note; |
672 | |
673 | if (!vmcoreinfo_size) |
674 | return; |
675 | buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, type: 0, data: vmcoreinfo_data, |
676 | data_len: vmcoreinfo_size); |
677 | final_note(buf); |
678 | } |
679 | |
680 | void crash_update_vmcoreinfo_safecopy(void *ptr) |
681 | { |
682 | if (ptr) |
683 | memcpy(ptr, vmcoreinfo_data, vmcoreinfo_size); |
684 | |
685 | vmcoreinfo_data_safecopy = ptr; |
686 | } |
687 | |
688 | void crash_save_vmcoreinfo(void) |
689 | { |
690 | if (!vmcoreinfo_note) |
691 | return; |
692 | |
693 | /* Use the safe copy to generate vmcoreinfo note if have */ |
694 | if (vmcoreinfo_data_safecopy) |
695 | vmcoreinfo_data = vmcoreinfo_data_safecopy; |
696 | |
697 | vmcoreinfo_append_str(fmt: "CRASHTIME=%lld\n" , ktime_get_real_seconds()); |
698 | update_vmcoreinfo_note(); |
699 | } |
700 | |
701 | void vmcoreinfo_append_str(const char *fmt, ...) |
702 | { |
703 | va_list args; |
704 | char buf[0x50]; |
705 | size_t r; |
706 | |
707 | va_start(args, fmt); |
708 | r = vscnprintf(buf, size: sizeof(buf), fmt, args); |
709 | va_end(args); |
710 | |
711 | r = min(r, (size_t)VMCOREINFO_BYTES - vmcoreinfo_size); |
712 | |
713 | memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); |
714 | |
715 | vmcoreinfo_size += r; |
716 | |
717 | WARN_ONCE(vmcoreinfo_size == VMCOREINFO_BYTES, |
718 | "vmcoreinfo data exceeds allocated size, truncating" ); |
719 | } |
720 | |
721 | /* |
722 | * provide an empty default implementation here -- architecture |
723 | * code may override this |
724 | */ |
725 | void __weak arch_crash_save_vmcoreinfo(void) |
726 | {} |
727 | |
728 | phys_addr_t __weak paddr_vmcoreinfo_note(void) |
729 | { |
730 | return __pa(vmcoreinfo_note); |
731 | } |
732 | EXPORT_SYMBOL(paddr_vmcoreinfo_note); |
733 | |
734 | static int __init crash_save_vmcoreinfo_init(void) |
735 | { |
736 | vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL); |
737 | if (!vmcoreinfo_data) { |
738 | pr_warn("Memory allocation for vmcoreinfo_data failed\n" ); |
739 | return -ENOMEM; |
740 | } |
741 | |
742 | vmcoreinfo_note = alloc_pages_exact(VMCOREINFO_NOTE_SIZE, |
743 | GFP_KERNEL | __GFP_ZERO); |
744 | if (!vmcoreinfo_note) { |
745 | free_page((unsigned long)vmcoreinfo_data); |
746 | vmcoreinfo_data = NULL; |
747 | pr_warn("Memory allocation for vmcoreinfo_note failed\n" ); |
748 | return -ENOMEM; |
749 | } |
750 | |
751 | VMCOREINFO_OSRELEASE(init_uts_ns.name.release); |
752 | VMCOREINFO_BUILD_ID(); |
753 | VMCOREINFO_PAGESIZE(PAGE_SIZE); |
754 | |
755 | VMCOREINFO_SYMBOL(init_uts_ns); |
756 | VMCOREINFO_OFFSET(uts_namespace, name); |
757 | VMCOREINFO_SYMBOL(node_online_map); |
758 | #ifdef CONFIG_MMU |
759 | VMCOREINFO_SYMBOL_ARRAY(swapper_pg_dir); |
760 | #endif |
761 | VMCOREINFO_SYMBOL(_stext); |
762 | VMCOREINFO_SYMBOL(vmap_area_list); |
763 | |
764 | #ifndef CONFIG_NUMA |
765 | VMCOREINFO_SYMBOL(mem_map); |
766 | VMCOREINFO_SYMBOL(contig_page_data); |
767 | #endif |
768 | #ifdef CONFIG_SPARSEMEM |
769 | VMCOREINFO_SYMBOL_ARRAY(mem_section); |
770 | VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); |
771 | VMCOREINFO_STRUCT_SIZE(mem_section); |
772 | VMCOREINFO_OFFSET(mem_section, section_mem_map); |
773 | VMCOREINFO_NUMBER(SECTION_SIZE_BITS); |
774 | VMCOREINFO_NUMBER(MAX_PHYSMEM_BITS); |
775 | #endif |
776 | VMCOREINFO_STRUCT_SIZE(page); |
777 | VMCOREINFO_STRUCT_SIZE(pglist_data); |
778 | VMCOREINFO_STRUCT_SIZE(zone); |
779 | VMCOREINFO_STRUCT_SIZE(free_area); |
780 | VMCOREINFO_STRUCT_SIZE(list_head); |
781 | VMCOREINFO_SIZE(nodemask_t); |
782 | VMCOREINFO_OFFSET(page, flags); |
783 | VMCOREINFO_OFFSET(page, _refcount); |
784 | VMCOREINFO_OFFSET(page, mapping); |
785 | VMCOREINFO_OFFSET(page, lru); |
786 | VMCOREINFO_OFFSET(page, _mapcount); |
787 | VMCOREINFO_OFFSET(page, private); |
788 | VMCOREINFO_OFFSET(page, compound_head); |
789 | VMCOREINFO_OFFSET(pglist_data, node_zones); |
790 | VMCOREINFO_OFFSET(pglist_data, nr_zones); |
791 | #ifdef CONFIG_FLATMEM |
792 | VMCOREINFO_OFFSET(pglist_data, node_mem_map); |
793 | #endif |
794 | VMCOREINFO_OFFSET(pglist_data, node_start_pfn); |
795 | VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); |
796 | VMCOREINFO_OFFSET(pglist_data, node_id); |
797 | VMCOREINFO_OFFSET(zone, free_area); |
798 | VMCOREINFO_OFFSET(zone, vm_stat); |
799 | VMCOREINFO_OFFSET(zone, spanned_pages); |
800 | VMCOREINFO_OFFSET(free_area, free_list); |
801 | VMCOREINFO_OFFSET(list_head, next); |
802 | VMCOREINFO_OFFSET(list_head, prev); |
803 | VMCOREINFO_OFFSET(vmap_area, va_start); |
804 | VMCOREINFO_OFFSET(vmap_area, list); |
805 | VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER + 1); |
806 | log_buf_vmcoreinfo_setup(); |
807 | VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); |
808 | VMCOREINFO_NUMBER(NR_FREE_PAGES); |
809 | VMCOREINFO_NUMBER(PG_lru); |
810 | VMCOREINFO_NUMBER(PG_private); |
811 | VMCOREINFO_NUMBER(PG_swapcache); |
812 | VMCOREINFO_NUMBER(PG_swapbacked); |
813 | VMCOREINFO_NUMBER(PG_slab); |
814 | #ifdef CONFIG_MEMORY_FAILURE |
815 | VMCOREINFO_NUMBER(PG_hwpoison); |
816 | #endif |
817 | VMCOREINFO_NUMBER(PG_head_mask); |
818 | #define PAGE_BUDDY_MAPCOUNT_VALUE (~PG_buddy) |
819 | VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); |
820 | #ifdef CONFIG_HUGETLB_PAGE |
821 | VMCOREINFO_NUMBER(PG_hugetlb); |
822 | #define PAGE_OFFLINE_MAPCOUNT_VALUE (~PG_offline) |
823 | VMCOREINFO_NUMBER(PAGE_OFFLINE_MAPCOUNT_VALUE); |
824 | #endif |
825 | |
826 | #ifdef CONFIG_KALLSYMS |
827 | VMCOREINFO_SYMBOL(kallsyms_names); |
828 | VMCOREINFO_SYMBOL(kallsyms_num_syms); |
829 | VMCOREINFO_SYMBOL(kallsyms_token_table); |
830 | VMCOREINFO_SYMBOL(kallsyms_token_index); |
831 | #ifdef CONFIG_KALLSYMS_BASE_RELATIVE |
832 | VMCOREINFO_SYMBOL(kallsyms_offsets); |
833 | VMCOREINFO_SYMBOL(kallsyms_relative_base); |
834 | #else |
835 | VMCOREINFO_SYMBOL(kallsyms_addresses); |
836 | #endif /* CONFIG_KALLSYMS_BASE_RELATIVE */ |
837 | #endif /* CONFIG_KALLSYMS */ |
838 | |
839 | arch_crash_save_vmcoreinfo(); |
840 | update_vmcoreinfo_note(); |
841 | |
842 | return 0; |
843 | } |
844 | |
845 | subsys_initcall(crash_save_vmcoreinfo_init); |
846 | |
847 | static int __init crash_notes_memory_init(void) |
848 | { |
849 | /* Allocate memory for saving cpu registers. */ |
850 | size_t size, align; |
851 | |
852 | /* |
853 | * crash_notes could be allocated across 2 vmalloc pages when percpu |
854 | * is vmalloc based . vmalloc doesn't guarantee 2 continuous vmalloc |
855 | * pages are also on 2 continuous physical pages. In this case the |
856 | * 2nd part of crash_notes in 2nd page could be lost since only the |
857 | * starting address and size of crash_notes are exported through sysfs. |
858 | * Here round up the size of crash_notes to the nearest power of two |
859 | * and pass it to __alloc_percpu as align value. This can make sure |
860 | * crash_notes is allocated inside one physical page. |
861 | */ |
862 | size = sizeof(note_buf_t); |
863 | align = min(roundup_pow_of_two(sizeof(note_buf_t)), PAGE_SIZE); |
864 | |
865 | /* |
866 | * Break compile if size is bigger than PAGE_SIZE since crash_notes |
867 | * definitely will be in 2 pages with that. |
868 | */ |
869 | BUILD_BUG_ON(size > PAGE_SIZE); |
870 | |
871 | crash_notes = __alloc_percpu(size, align); |
872 | if (!crash_notes) { |
873 | pr_warn("Memory allocation for saving cpu register states failed\n" ); |
874 | return -ENOMEM; |
875 | } |
876 | return 0; |
877 | } |
878 | subsys_initcall(crash_notes_memory_init); |
879 | |
880 | #ifdef CONFIG_CRASH_HOTPLUG |
881 | #undef pr_fmt |
882 | #define pr_fmt(fmt) "crash hp: " fmt |
883 | |
884 | /* |
885 | * Different than kexec/kdump loading/unloading/jumping/shrinking which |
886 | * usually rarely happen, there will be many crash hotplug events notified |
887 | * during one short period, e.g one memory board is hot added and memory |
888 | * regions are online. So mutex lock __crash_hotplug_lock is used to |
889 | * serialize the crash hotplug handling specifically. |
890 | */ |
891 | DEFINE_MUTEX(__crash_hotplug_lock); |
892 | #define crash_hotplug_lock() mutex_lock(&__crash_hotplug_lock) |
893 | #define crash_hotplug_unlock() mutex_unlock(&__crash_hotplug_lock) |
894 | |
895 | /* |
896 | * This routine utilized when the crash_hotplug sysfs node is read. |
897 | * It reflects the kernel's ability/permission to update the crash |
898 | * elfcorehdr directly. |
899 | */ |
900 | int crash_check_update_elfcorehdr(void) |
901 | { |
902 | int rc = 0; |
903 | |
904 | crash_hotplug_lock(); |
905 | /* Obtain lock while reading crash information */ |
906 | if (!kexec_trylock()) { |
907 | pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n" ); |
908 | crash_hotplug_unlock(); |
909 | return 0; |
910 | } |
911 | if (kexec_crash_image) { |
912 | if (kexec_crash_image->file_mode) |
913 | rc = 1; |
914 | else |
915 | rc = kexec_crash_image->update_elfcorehdr; |
916 | } |
917 | /* Release lock now that update complete */ |
918 | kexec_unlock(); |
919 | crash_hotplug_unlock(); |
920 | |
921 | return rc; |
922 | } |
923 | |
924 | /* |
925 | * To accurately reflect hot un/plug changes of cpu and memory resources |
926 | * (including onling and offlining of those resources), the elfcorehdr |
927 | * (which is passed to the crash kernel via the elfcorehdr= parameter) |
928 | * must be updated with the new list of CPUs and memories. |
929 | * |
930 | * In order to make changes to elfcorehdr, two conditions are needed: |
931 | * First, the segment containing the elfcorehdr must be large enough |
932 | * to permit a growing number of resources; the elfcorehdr memory size |
933 | * is based on NR_CPUS_DEFAULT and CRASH_MAX_MEMORY_RANGES. |
934 | * Second, purgatory must explicitly exclude the elfcorehdr from the |
935 | * list of segments it checks (since the elfcorehdr changes and thus |
936 | * would require an update to purgatory itself to update the digest). |
937 | */ |
938 | static void crash_handle_hotplug_event(unsigned int hp_action, unsigned int cpu) |
939 | { |
940 | struct kimage *image; |
941 | |
942 | crash_hotplug_lock(); |
943 | /* Obtain lock while changing crash information */ |
944 | if (!kexec_trylock()) { |
945 | pr_info("kexec_trylock() failed, elfcorehdr may be inaccurate\n" ); |
946 | crash_hotplug_unlock(); |
947 | return; |
948 | } |
949 | |
950 | /* Check kdump is not loaded */ |
951 | if (!kexec_crash_image) |
952 | goto out; |
953 | |
954 | image = kexec_crash_image; |
955 | |
956 | /* Check that updating elfcorehdr is permitted */ |
957 | if (!(image->file_mode || image->update_elfcorehdr)) |
958 | goto out; |
959 | |
960 | if (hp_action == KEXEC_CRASH_HP_ADD_CPU || |
961 | hp_action == KEXEC_CRASH_HP_REMOVE_CPU) |
962 | pr_debug("hp_action %u, cpu %u\n" , hp_action, cpu); |
963 | else |
964 | pr_debug("hp_action %u\n" , hp_action); |
965 | |
966 | /* |
967 | * The elfcorehdr_index is set to -1 when the struct kimage |
968 | * is allocated. Find the segment containing the elfcorehdr, |
969 | * if not already found. |
970 | */ |
971 | if (image->elfcorehdr_index < 0) { |
972 | unsigned long mem; |
973 | unsigned char *ptr; |
974 | unsigned int n; |
975 | |
976 | for (n = 0; n < image->nr_segments; n++) { |
977 | mem = image->segment[n].mem; |
978 | ptr = kmap_local_page(pfn_to_page(mem >> PAGE_SHIFT)); |
979 | if (ptr) { |
980 | /* The segment containing elfcorehdr */ |
981 | if (memcmp(p: ptr, ELFMAG, SELFMAG) == 0) |
982 | image->elfcorehdr_index = (int)n; |
983 | kunmap_local(ptr); |
984 | } |
985 | } |
986 | } |
987 | |
988 | if (image->elfcorehdr_index < 0) { |
989 | pr_err("unable to locate elfcorehdr segment" ); |
990 | goto out; |
991 | } |
992 | |
993 | /* Needed in order for the segments to be updated */ |
994 | arch_kexec_unprotect_crashkres(); |
995 | |
996 | /* Differentiate between normal load and hotplug update */ |
997 | image->hp_action = hp_action; |
998 | |
999 | /* Now invoke arch-specific update handler */ |
1000 | arch_crash_handle_hotplug_event(image); |
1001 | |
1002 | /* No longer handling a hotplug event */ |
1003 | image->hp_action = KEXEC_CRASH_HP_NONE; |
1004 | image->elfcorehdr_updated = true; |
1005 | |
1006 | /* Change back to read-only */ |
1007 | arch_kexec_protect_crashkres(); |
1008 | |
1009 | /* Errors in the callback is not a reason to rollback state */ |
1010 | out: |
1011 | /* Release lock now that update complete */ |
1012 | kexec_unlock(); |
1013 | crash_hotplug_unlock(); |
1014 | } |
1015 | |
1016 | static int crash_memhp_notifier(struct notifier_block *nb, unsigned long val, void *v) |
1017 | { |
1018 | switch (val) { |
1019 | case MEM_ONLINE: |
1020 | crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_MEMORY, |
1021 | KEXEC_CRASH_HP_INVALID_CPU); |
1022 | break; |
1023 | |
1024 | case MEM_OFFLINE: |
1025 | crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_MEMORY, |
1026 | KEXEC_CRASH_HP_INVALID_CPU); |
1027 | break; |
1028 | } |
1029 | return NOTIFY_OK; |
1030 | } |
1031 | |
1032 | static struct notifier_block crash_memhp_nb = { |
1033 | .notifier_call = crash_memhp_notifier, |
1034 | .priority = 0 |
1035 | }; |
1036 | |
1037 | static int crash_cpuhp_online(unsigned int cpu) |
1038 | { |
1039 | crash_handle_hotplug_event(KEXEC_CRASH_HP_ADD_CPU, cpu); |
1040 | return 0; |
1041 | } |
1042 | |
1043 | static int crash_cpuhp_offline(unsigned int cpu) |
1044 | { |
1045 | crash_handle_hotplug_event(KEXEC_CRASH_HP_REMOVE_CPU, cpu); |
1046 | return 0; |
1047 | } |
1048 | |
1049 | static int __init crash_hotplug_init(void) |
1050 | { |
1051 | int result = 0; |
1052 | |
1053 | if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) |
1054 | register_memory_notifier(nb: &crash_memhp_nb); |
1055 | |
1056 | if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) { |
1057 | result = cpuhp_setup_state_nocalls(state: CPUHP_BP_PREPARE_DYN, |
1058 | name: "crash/cpuhp" , startup: crash_cpuhp_online, teardown: crash_cpuhp_offline); |
1059 | } |
1060 | |
1061 | return result; |
1062 | } |
1063 | |
1064 | subsys_initcall(crash_hotplug_init); |
1065 | #endif |
1066 | |