1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * ppc64 code to implement the kexec_file_load syscall
4 *
5 * Copyright (C) 2004 Adam Litke (agl@us.ibm.com)
6 * Copyright (C) 2004 IBM Corp.
7 * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation
8 * Copyright (C) 2005 R Sharada (sharada@in.ibm.com)
9 * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com)
10 * Copyright (C) 2020 IBM Corporation
11 *
12 * Based on kexec-tools' kexec-ppc64.c, kexec-elf-rel-ppc64.c, fs2dt.c.
13 * Heavily modified for the kernel by
14 * Hari Bathini, IBM Corporation.
15 */
16
17#include <linux/kexec.h>
18#include <linux/of_fdt.h>
19#include <linux/libfdt.h>
20#include <linux/of.h>
21#include <linux/memblock.h>
22#include <linux/slab.h>
23#include <linux/vmalloc.h>
24#include <asm/setup.h>
25#include <asm/drmem.h>
26#include <asm/firmware.h>
27#include <asm/kexec_ranges.h>
28#include <asm/crashdump-ppc64.h>
29#include <asm/mmzone.h>
30#include <asm/iommu.h>
31#include <asm/prom.h>
32#include <asm/plpks.h>
33
34struct umem_info {
35 __be64 *buf; /* data buffer for usable-memory property */
36 u32 size; /* size allocated for the data buffer */
37 u32 max_entries; /* maximum no. of entries */
38 u32 idx; /* index of current entry */
39
40 /* usable memory ranges to look up */
41 unsigned int nr_ranges;
42 const struct range *ranges;
43};
44
45const struct kexec_file_ops * const kexec_file_loaders[] = {
46 &kexec_elf64_ops,
47 NULL
48};
49
50/**
51 * get_exclude_memory_ranges - Get exclude memory ranges. This list includes
52 * regions like opal/rtas, tce-table, initrd,
53 * kernel, htab which should be avoided while
54 * setting up kexec load segments.
55 * @mem_ranges: Range list to add the memory ranges to.
56 *
57 * Returns 0 on success, negative errno on error.
58 */
59static int get_exclude_memory_ranges(struct crash_mem **mem_ranges)
60{
61 int ret;
62
63 ret = add_tce_mem_ranges(mem_ranges);
64 if (ret)
65 goto out;
66
67 ret = add_initrd_mem_range(mem_ranges);
68 if (ret)
69 goto out;
70
71 ret = add_htab_mem_range(mem_ranges);
72 if (ret)
73 goto out;
74
75 ret = add_kernel_mem_range(mem_ranges);
76 if (ret)
77 goto out;
78
79 ret = add_rtas_mem_range(mem_ranges);
80 if (ret)
81 goto out;
82
83 ret = add_opal_mem_range(mem_ranges);
84 if (ret)
85 goto out;
86
87 ret = add_reserved_mem_ranges(mem_ranges);
88 if (ret)
89 goto out;
90
91 /* exclude memory ranges should be sorted for easy lookup */
92 sort_memory_ranges(*mem_ranges, true);
93out:
94 if (ret)
95 pr_err("Failed to setup exclude memory ranges\n");
96 return ret;
97}
98
99/**
100 * get_reserved_memory_ranges - Get reserve memory ranges. This list includes
101 * memory regions that should be added to the
102 * memory reserve map to ensure the region is
103 * protected from any mischief.
104 * @mem_ranges: Range list to add the memory ranges to.
105 *
106 * Returns 0 on success, negative errno on error.
107 */
108static int get_reserved_memory_ranges(struct crash_mem **mem_ranges)
109{
110 int ret;
111
112 ret = add_rtas_mem_range(mem_ranges);
113 if (ret)
114 goto out;
115
116 ret = add_tce_mem_ranges(mem_ranges);
117 if (ret)
118 goto out;
119
120 ret = add_reserved_mem_ranges(mem_ranges);
121out:
122 if (ret)
123 pr_err("Failed to setup reserved memory ranges\n");
124 return ret;
125}
126
127/**
128 * __locate_mem_hole_top_down - Looks top down for a large enough memory hole
129 * in the memory regions between buf_min & buf_max
130 * for the buffer. If found, sets kbuf->mem.
131 * @kbuf: Buffer contents and memory parameters.
132 * @buf_min: Minimum address for the buffer.
133 * @buf_max: Maximum address for the buffer.
134 *
135 * Returns 0 on success, negative errno on error.
136 */
137static int __locate_mem_hole_top_down(struct kexec_buf *kbuf,
138 u64 buf_min, u64 buf_max)
139{
140 int ret = -EADDRNOTAVAIL;
141 phys_addr_t start, end;
142 u64 i;
143
144 for_each_mem_range_rev(i, &start, &end) {
145 /*
146 * memblock uses [start, end) convention while it is
147 * [start, end] here. Fix the off-by-one to have the
148 * same convention.
149 */
150 end -= 1;
151
152 if (start > buf_max)
153 continue;
154
155 /* Memory hole not found */
156 if (end < buf_min)
157 break;
158
159 /* Adjust memory region based on the given range */
160 if (start < buf_min)
161 start = buf_min;
162 if (end > buf_max)
163 end = buf_max;
164
165 start = ALIGN(start, kbuf->buf_align);
166 if (start < end && (end - start + 1) >= kbuf->memsz) {
167 /* Suitable memory range found. Set kbuf->mem */
168 kbuf->mem = ALIGN_DOWN(end - kbuf->memsz + 1,
169 kbuf->buf_align);
170 ret = 0;
171 break;
172 }
173 }
174
175 return ret;
176}
177
178/**
179 * locate_mem_hole_top_down_ppc64 - Skip special memory regions to find a
180 * suitable buffer with top down approach.
181 * @kbuf: Buffer contents and memory parameters.
182 * @buf_min: Minimum address for the buffer.
183 * @buf_max: Maximum address for the buffer.
184 * @emem: Exclude memory ranges.
185 *
186 * Returns 0 on success, negative errno on error.
187 */
188static int locate_mem_hole_top_down_ppc64(struct kexec_buf *kbuf,
189 u64 buf_min, u64 buf_max,
190 const struct crash_mem *emem)
191{
192 int i, ret = 0, err = -EADDRNOTAVAIL;
193 u64 start, end, tmin, tmax;
194
195 tmax = buf_max;
196 for (i = (emem->nr_ranges - 1); i >= 0; i--) {
197 start = emem->ranges[i].start;
198 end = emem->ranges[i].end;
199
200 if (start > tmax)
201 continue;
202
203 if (end < tmax) {
204 tmin = (end < buf_min ? buf_min : end + 1);
205 ret = __locate_mem_hole_top_down(kbuf, buf_min: tmin, buf_max: tmax);
206 if (!ret)
207 return 0;
208 }
209
210 tmax = start - 1;
211
212 if (tmax < buf_min) {
213 ret = err;
214 break;
215 }
216 ret = 0;
217 }
218
219 if (!ret) {
220 tmin = buf_min;
221 ret = __locate_mem_hole_top_down(kbuf, buf_min: tmin, buf_max: tmax);
222 }
223 return ret;
224}
225
226/**
227 * __locate_mem_hole_bottom_up - Looks bottom up for a large enough memory hole
228 * in the memory regions between buf_min & buf_max
229 * for the buffer. If found, sets kbuf->mem.
230 * @kbuf: Buffer contents and memory parameters.
231 * @buf_min: Minimum address for the buffer.
232 * @buf_max: Maximum address for the buffer.
233 *
234 * Returns 0 on success, negative errno on error.
235 */
236static int __locate_mem_hole_bottom_up(struct kexec_buf *kbuf,
237 u64 buf_min, u64 buf_max)
238{
239 int ret = -EADDRNOTAVAIL;
240 phys_addr_t start, end;
241 u64 i;
242
243 for_each_mem_range(i, &start, &end) {
244 /*
245 * memblock uses [start, end) convention while it is
246 * [start, end] here. Fix the off-by-one to have the
247 * same convention.
248 */
249 end -= 1;
250
251 if (end < buf_min)
252 continue;
253
254 /* Memory hole not found */
255 if (start > buf_max)
256 break;
257
258 /* Adjust memory region based on the given range */
259 if (start < buf_min)
260 start = buf_min;
261 if (end > buf_max)
262 end = buf_max;
263
264 start = ALIGN(start, kbuf->buf_align);
265 if (start < end && (end - start + 1) >= kbuf->memsz) {
266 /* Suitable memory range found. Set kbuf->mem */
267 kbuf->mem = start;
268 ret = 0;
269 break;
270 }
271 }
272
273 return ret;
274}
275
276/**
277 * locate_mem_hole_bottom_up_ppc64 - Skip special memory regions to find a
278 * suitable buffer with bottom up approach.
279 * @kbuf: Buffer contents and memory parameters.
280 * @buf_min: Minimum address for the buffer.
281 * @buf_max: Maximum address for the buffer.
282 * @emem: Exclude memory ranges.
283 *
284 * Returns 0 on success, negative errno on error.
285 */
286static int locate_mem_hole_bottom_up_ppc64(struct kexec_buf *kbuf,
287 u64 buf_min, u64 buf_max,
288 const struct crash_mem *emem)
289{
290 int i, ret = 0, err = -EADDRNOTAVAIL;
291 u64 start, end, tmin, tmax;
292
293 tmin = buf_min;
294 for (i = 0; i < emem->nr_ranges; i++) {
295 start = emem->ranges[i].start;
296 end = emem->ranges[i].end;
297
298 if (end < tmin)
299 continue;
300
301 if (start > tmin) {
302 tmax = (start > buf_max ? buf_max : start - 1);
303 ret = __locate_mem_hole_bottom_up(kbuf, buf_min: tmin, buf_max: tmax);
304 if (!ret)
305 return 0;
306 }
307
308 tmin = end + 1;
309
310 if (tmin > buf_max) {
311 ret = err;
312 break;
313 }
314 ret = 0;
315 }
316
317 if (!ret) {
318 tmax = buf_max;
319 ret = __locate_mem_hole_bottom_up(kbuf, buf_min: tmin, buf_max: tmax);
320 }
321 return ret;
322}
323
324#ifdef CONFIG_CRASH_DUMP
325/**
326 * get_usable_memory_ranges - Get usable memory ranges. This list includes
327 * regions like crashkernel, opal/rtas & tce-table,
328 * that kdump kernel could use.
329 * @mem_ranges: Range list to add the memory ranges to.
330 *
331 * Returns 0 on success, negative errno on error.
332 */
333static int get_usable_memory_ranges(struct crash_mem **mem_ranges)
334{
335 int ret;
336
337 /*
338 * Early boot failure observed on guests when low memory (first memory
339 * block?) is not added to usable memory. So, add [0, crashk_res.end]
340 * instead of [crashk_res.start, crashk_res.end] to workaround it.
341 * Also, crashed kernel's memory must be added to reserve map to
342 * avoid kdump kernel from using it.
343 */
344 ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1);
345 if (ret)
346 goto out;
347
348 ret = add_rtas_mem_range(mem_ranges);
349 if (ret)
350 goto out;
351
352 ret = add_opal_mem_range(mem_ranges);
353 if (ret)
354 goto out;
355
356 ret = add_tce_mem_ranges(mem_ranges);
357out:
358 if (ret)
359 pr_err("Failed to setup usable memory ranges\n");
360 return ret;
361}
362
363/**
364 * get_crash_memory_ranges - Get crash memory ranges. This list includes
365 * first/crashing kernel's memory regions that
366 * would be exported via an elfcore.
367 * @mem_ranges: Range list to add the memory ranges to.
368 *
369 * Returns 0 on success, negative errno on error.
370 */
371static int get_crash_memory_ranges(struct crash_mem **mem_ranges)
372{
373 phys_addr_t base, end;
374 struct crash_mem *tmem;
375 u64 i;
376 int ret;
377
378 for_each_mem_range(i, &base, &end) {
379 u64 size = end - base;
380
381 /* Skip backup memory region, which needs a separate entry */
382 if (base == BACKUP_SRC_START) {
383 if (size > BACKUP_SRC_SIZE) {
384 base = BACKUP_SRC_END + 1;
385 size -= BACKUP_SRC_SIZE;
386 } else
387 continue;
388 }
389
390 ret = add_mem_range(mem_ranges, base, size);
391 if (ret)
392 goto out;
393
394 /* Try merging adjacent ranges before reallocation attempt */
395 if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges)
396 sort_memory_ranges(*mem_ranges, true);
397 }
398
399 /* Reallocate memory ranges if there is no space to split ranges */
400 tmem = *mem_ranges;
401 if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) {
402 tmem = realloc_mem_ranges(mem_ranges);
403 if (!tmem)
404 goto out;
405 }
406
407 /* Exclude crashkernel region */
408 ret = crash_exclude_mem_range(mem: tmem, mstart: crashk_res.start, mend: crashk_res.end);
409 if (ret)
410 goto out;
411
412 /*
413 * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL
414 * regions are exported to save their context at the time of
415 * crash, they should actually be backed up just like the
416 * first 64K bytes of memory.
417 */
418 ret = add_rtas_mem_range(mem_ranges);
419 if (ret)
420 goto out;
421
422 ret = add_opal_mem_range(mem_ranges);
423 if (ret)
424 goto out;
425
426 /* create a separate program header for the backup region */
427 ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE);
428 if (ret)
429 goto out;
430
431 sort_memory_ranges(*mem_ranges, false);
432out:
433 if (ret)
434 pr_err("Failed to setup crash memory ranges\n");
435 return ret;
436}
437
438/**
439 * check_realloc_usable_mem - Reallocate buffer if it can't accommodate entries
440 * @um_info: Usable memory buffer and ranges info.
441 * @cnt: No. of entries to accommodate.
442 *
443 * Frees up the old buffer if memory reallocation fails.
444 *
445 * Returns buffer on success, NULL on error.
446 */
447static __be64 *check_realloc_usable_mem(struct umem_info *um_info, int cnt)
448{
449 u32 new_size;
450 __be64 *tbuf;
451
452 if ((um_info->idx + cnt) <= um_info->max_entries)
453 return um_info->buf;
454
455 new_size = um_info->size + MEM_RANGE_CHUNK_SZ;
456 tbuf = krealloc(objp: um_info->buf, new_size, GFP_KERNEL);
457 if (tbuf) {
458 um_info->buf = tbuf;
459 um_info->size = new_size;
460 um_info->max_entries = (um_info->size / sizeof(u64));
461 }
462
463 return tbuf;
464}
465
466/**
467 * add_usable_mem - Add the usable memory ranges within the given memory range
468 * to the buffer
469 * @um_info: Usable memory buffer and ranges info.
470 * @base: Base address of memory range to look for.
471 * @end: End address of memory range to look for.
472 *
473 * Returns 0 on success, negative errno on error.
474 */
475static int add_usable_mem(struct umem_info *um_info, u64 base, u64 end)
476{
477 u64 loc_base, loc_end;
478 bool add;
479 int i;
480
481 for (i = 0; i < um_info->nr_ranges; i++) {
482 add = false;
483 loc_base = um_info->ranges[i].start;
484 loc_end = um_info->ranges[i].end;
485 if (loc_base >= base && loc_end <= end)
486 add = true;
487 else if (base < loc_end && end > loc_base) {
488 if (loc_base < base)
489 loc_base = base;
490 if (loc_end > end)
491 loc_end = end;
492 add = true;
493 }
494
495 if (add) {
496 if (!check_realloc_usable_mem(um_info, cnt: 2))
497 return -ENOMEM;
498
499 um_info->buf[um_info->idx++] = cpu_to_be64(loc_base);
500 um_info->buf[um_info->idx++] =
501 cpu_to_be64(loc_end - loc_base + 1);
502 }
503 }
504
505 return 0;
506}
507
508/**
509 * kdump_setup_usable_lmb - This is a callback function that gets called by
510 * walk_drmem_lmbs for every LMB to set its
511 * usable memory ranges.
512 * @lmb: LMB info.
513 * @usm: linux,drconf-usable-memory property value.
514 * @data: Pointer to usable memory buffer and ranges info.
515 *
516 * Returns 0 on success, negative errno on error.
517 */
518static int kdump_setup_usable_lmb(struct drmem_lmb *lmb, const __be32 **usm,
519 void *data)
520{
521 struct umem_info *um_info;
522 int tmp_idx, ret;
523 u64 base, end;
524
525 /*
526 * kdump load isn't supported on kernels already booted with
527 * linux,drconf-usable-memory property.
528 */
529 if (*usm) {
530 pr_err("linux,drconf-usable-memory property already exists!");
531 return -EINVAL;
532 }
533
534 um_info = data;
535 tmp_idx = um_info->idx;
536 if (!check_realloc_usable_mem(um_info, cnt: 1))
537 return -ENOMEM;
538
539 um_info->idx++;
540 base = lmb->base_addr;
541 end = base + drmem_lmb_size() - 1;
542 ret = add_usable_mem(um_info, base, end);
543 if (!ret) {
544 /*
545 * Update the no. of ranges added. Two entries (base & size)
546 * for every range added.
547 */
548 um_info->buf[tmp_idx] =
549 cpu_to_be64((um_info->idx - tmp_idx - 1) / 2);
550 }
551
552 return ret;
553}
554
555#define NODE_PATH_LEN 256
556/**
557 * add_usable_mem_property - Add usable memory property for the given
558 * memory node.
559 * @fdt: Flattened device tree for the kdump kernel.
560 * @dn: Memory node.
561 * @um_info: Usable memory buffer and ranges info.
562 *
563 * Returns 0 on success, negative errno on error.
564 */
565static int add_usable_mem_property(void *fdt, struct device_node *dn,
566 struct umem_info *um_info)
567{
568 int n_mem_addr_cells, n_mem_size_cells, node;
569 char path[NODE_PATH_LEN];
570 int i, len, ranges, ret;
571 const __be32 *prop;
572 u64 base, end;
573
574 of_node_get(node: dn);
575
576 if (snprintf(buf: path, NODE_PATH_LEN, fmt: "%pOF", dn) > (NODE_PATH_LEN - 1)) {
577 pr_err("Buffer (%d) too small for memory node: %pOF\n",
578 NODE_PATH_LEN, dn);
579 return -EOVERFLOW;
580 }
581 kexec_dprintk("Memory node path: %s\n", path);
582
583 /* Now that we know the path, find its offset in kdump kernel's fdt */
584 node = fdt_path_offset(fdt, path);
585 if (node < 0) {
586 pr_err("Malformed device tree: error reading %s\n", path);
587 ret = -EINVAL;
588 goto out;
589 }
590
591 /* Get the address & size cells */
592 n_mem_addr_cells = of_n_addr_cells(np: dn);
593 n_mem_size_cells = of_n_size_cells(np: dn);
594 kexec_dprintk("address cells: %d, size cells: %d\n", n_mem_addr_cells,
595 n_mem_size_cells);
596
597 um_info->idx = 0;
598 if (!check_realloc_usable_mem(um_info, cnt: 2)) {
599 ret = -ENOMEM;
600 goto out;
601 }
602
603 prop = of_get_property(node: dn, name: "reg", lenp: &len);
604 if (!prop || len <= 0) {
605 ret = 0;
606 goto out;
607 }
608
609 /*
610 * "reg" property represents sequence of (addr,size) tuples
611 * each representing a memory range.
612 */
613 ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
614
615 for (i = 0; i < ranges; i++) {
616 base = of_read_number(cell: prop, size: n_mem_addr_cells);
617 prop += n_mem_addr_cells;
618 end = base + of_read_number(cell: prop, size: n_mem_size_cells) - 1;
619 prop += n_mem_size_cells;
620
621 ret = add_usable_mem(um_info, base, end);
622 if (ret)
623 goto out;
624 }
625
626 /*
627 * No kdump kernel usable memory found in this memory node.
628 * Write (0,0) tuple in linux,usable-memory property for
629 * this region to be ignored.
630 */
631 if (um_info->idx == 0) {
632 um_info->buf[0] = 0;
633 um_info->buf[1] = 0;
634 um_info->idx = 2;
635 }
636
637 ret = fdt_setprop(fdt, nodeoffset: node, name: "linux,usable-memory", val: um_info->buf,
638 len: (um_info->idx * sizeof(u64)));
639
640out:
641 of_node_put(node: dn);
642 return ret;
643}
644
645
646/**
647 * update_usable_mem_fdt - Updates kdump kernel's fdt with linux,usable-memory
648 * and linux,drconf-usable-memory DT properties as
649 * appropriate to restrict its memory usage.
650 * @fdt: Flattened device tree for the kdump kernel.
651 * @usable_mem: Usable memory ranges for kdump kernel.
652 *
653 * Returns 0 on success, negative errno on error.
654 */
655static int update_usable_mem_fdt(void *fdt, struct crash_mem *usable_mem)
656{
657 struct umem_info um_info;
658 struct device_node *dn;
659 int node, ret = 0;
660
661 if (!usable_mem) {
662 pr_err("Usable memory ranges for kdump kernel not found\n");
663 return -ENOENT;
664 }
665
666 node = fdt_path_offset(fdt, path: "/ibm,dynamic-reconfiguration-memory");
667 if (node == -FDT_ERR_NOTFOUND)
668 kexec_dprintk("No dynamic reconfiguration memory found\n");
669 else if (node < 0) {
670 pr_err("Malformed device tree: error reading /ibm,dynamic-reconfiguration-memory.\n");
671 return -EINVAL;
672 }
673
674 um_info.buf = NULL;
675 um_info.size = 0;
676 um_info.max_entries = 0;
677 um_info.idx = 0;
678 /* Memory ranges to look up */
679 um_info.ranges = &(usable_mem->ranges[0]);
680 um_info.nr_ranges = usable_mem->nr_ranges;
681
682 dn = of_find_node_by_path(path: "/ibm,dynamic-reconfiguration-memory");
683 if (dn) {
684 ret = walk_drmem_lmbs(dn, &um_info, kdump_setup_usable_lmb);
685 of_node_put(node: dn);
686
687 if (ret) {
688 pr_err("Could not setup linux,drconf-usable-memory property for kdump\n");
689 goto out;
690 }
691
692 ret = fdt_setprop(fdt, nodeoffset: node, name: "linux,drconf-usable-memory",
693 val: um_info.buf, len: (um_info.idx * sizeof(u64)));
694 if (ret) {
695 pr_err("Failed to update fdt with linux,drconf-usable-memory property: %s",
696 fdt_strerror(ret));
697 goto out;
698 }
699 }
700
701 /*
702 * Walk through each memory node and set linux,usable-memory property
703 * for the corresponding node in kdump kernel's fdt.
704 */
705 for_each_node_by_type(dn, "memory") {
706 ret = add_usable_mem_property(fdt, dn, um_info: &um_info);
707 if (ret) {
708 pr_err("Failed to set linux,usable-memory property for %s node",
709 dn->full_name);
710 of_node_put(node: dn);
711 goto out;
712 }
713 }
714
715out:
716 kfree(objp: um_info.buf);
717 return ret;
718}
719
720/**
721 * load_backup_segment - Locate a memory hole to place the backup region.
722 * @image: Kexec image.
723 * @kbuf: Buffer contents and memory parameters.
724 *
725 * Returns 0 on success, negative errno on error.
726 */
727static int load_backup_segment(struct kimage *image, struct kexec_buf *kbuf)
728{
729 void *buf;
730 int ret;
731
732 /*
733 * Setup a source buffer for backup segment.
734 *
735 * A source buffer has no meaning for backup region as data will
736 * be copied from backup source, after crash, in the purgatory.
737 * But as load segment code doesn't recognize such segments,
738 * setup a dummy source buffer to keep it happy for now.
739 */
740 buf = vzalloc(size: BACKUP_SRC_SIZE);
741 if (!buf)
742 return -ENOMEM;
743
744 kbuf->buffer = buf;
745 kbuf->mem = KEXEC_BUF_MEM_UNKNOWN;
746 kbuf->bufsz = kbuf->memsz = BACKUP_SRC_SIZE;
747 kbuf->top_down = false;
748
749 ret = kexec_add_buffer(kbuf);
750 if (ret) {
751 vfree(addr: buf);
752 return ret;
753 }
754
755 image->arch.backup_buf = buf;
756 image->arch.backup_start = kbuf->mem;
757 return 0;
758}
759
760/**
761 * update_backup_region_phdr - Update backup region's offset for the core to
762 * export the region appropriately.
763 * @image: Kexec image.
764 * @ehdr: ELF core header.
765 *
766 * Assumes an exclusive program header is setup for the backup region
767 * in the ELF headers
768 *
769 * Returns nothing.
770 */
771static void update_backup_region_phdr(struct kimage *image, Elf64_Ehdr *ehdr)
772{
773 Elf64_Phdr *phdr;
774 unsigned int i;
775
776 phdr = (Elf64_Phdr *)(ehdr + 1);
777 for (i = 0; i < ehdr->e_phnum; i++) {
778 if (phdr->p_paddr == BACKUP_SRC_START) {
779 phdr->p_offset = image->arch.backup_start;
780 kexec_dprintk("Backup region offset updated to 0x%lx\n",
781 image->arch.backup_start);
782 return;
783 }
784 }
785}
786
787/**
788 * load_elfcorehdr_segment - Setup crash memory ranges and initialize elfcorehdr
789 * segment needed to load kdump kernel.
790 * @image: Kexec image.
791 * @kbuf: Buffer contents and memory parameters.
792 *
793 * Returns 0 on success, negative errno on error.
794 */
795static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf)
796{
797 struct crash_mem *cmem = NULL;
798 unsigned long headers_sz;
799 void *headers = NULL;
800 int ret;
801
802 ret = get_crash_memory_ranges(mem_ranges: &cmem);
803 if (ret)
804 goto out;
805
806 /* Setup elfcorehdr segment */
807 ret = crash_prepare_elf64_headers(mem: cmem, need_kernel_map: false, addr: &headers, sz: &headers_sz);
808 if (ret) {
809 pr_err("Failed to prepare elf headers for the core\n");
810 goto out;
811 }
812
813 /* Fix the offset for backup region in the ELF header */
814 update_backup_region_phdr(image, ehdr: headers);
815
816 kbuf->buffer = headers;
817 kbuf->mem = KEXEC_BUF_MEM_UNKNOWN;
818 kbuf->bufsz = kbuf->memsz = headers_sz;
819 kbuf->top_down = false;
820
821 ret = kexec_add_buffer(kbuf);
822 if (ret) {
823 vfree(addr: headers);
824 goto out;
825 }
826
827 image->elf_load_addr = kbuf->mem;
828 image->elf_headers_sz = headers_sz;
829 image->elf_headers = headers;
830out:
831 kfree(objp: cmem);
832 return ret;
833}
834
835/**
836 * load_crashdump_segments_ppc64 - Initialize the additional segements needed
837 * to load kdump kernel.
838 * @image: Kexec image.
839 * @kbuf: Buffer contents and memory parameters.
840 *
841 * Returns 0 on success, negative errno on error.
842 */
843int load_crashdump_segments_ppc64(struct kimage *image,
844 struct kexec_buf *kbuf)
845{
846 int ret;
847
848 /* Load backup segment - first 64K bytes of the crashing kernel */
849 ret = load_backup_segment(image, kbuf);
850 if (ret) {
851 pr_err("Failed to load backup segment\n");
852 return ret;
853 }
854 kexec_dprintk("Loaded the backup region at 0x%lx\n", kbuf->mem);
855
856 /* Load elfcorehdr segment - to export crashing kernel's vmcore */
857 ret = load_elfcorehdr_segment(image, kbuf);
858 if (ret) {
859 pr_err("Failed to load elfcorehdr segment\n");
860 return ret;
861 }
862 kexec_dprintk("Loaded elf core header at 0x%lx, bufsz=0x%lx memsz=0x%lx\n",
863 image->elf_load_addr, kbuf->bufsz, kbuf->memsz);
864
865 return 0;
866}
867#endif
868
869/**
870 * setup_purgatory_ppc64 - initialize PPC64 specific purgatory's global
871 * variables and call setup_purgatory() to initialize
872 * common global variable.
873 * @image: kexec image.
874 * @slave_code: Slave code for the purgatory.
875 * @fdt: Flattened device tree for the next kernel.
876 * @kernel_load_addr: Address where the kernel is loaded.
877 * @fdt_load_addr: Address where the flattened device tree is loaded.
878 *
879 * Returns 0 on success, negative errno on error.
880 */
881int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
882 const void *fdt, unsigned long kernel_load_addr,
883 unsigned long fdt_load_addr)
884{
885 struct device_node *dn = NULL;
886 int ret;
887
888 ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
889 fdt_load_addr);
890 if (ret)
891 goto out;
892
893 if (image->type == KEXEC_TYPE_CRASH) {
894 u32 my_run_at_load = 1;
895
896 /*
897 * Tell relocatable kernel to run at load address
898 * via the word meant for that at 0x5c.
899 */
900 ret = kexec_purgatory_get_set_symbol(image, name: "run_at_load",
901 buf: &my_run_at_load,
902 size: sizeof(my_run_at_load),
903 get_value: false);
904 if (ret)
905 goto out;
906 }
907
908 /* Tell purgatory where to look for backup region */
909 ret = kexec_purgatory_get_set_symbol(image, name: "backup_start",
910 buf: &image->arch.backup_start,
911 size: sizeof(image->arch.backup_start),
912 get_value: false);
913 if (ret)
914 goto out;
915
916 /* Setup OPAL base & entry values */
917 dn = of_find_node_by_path(path: "/ibm,opal");
918 if (dn) {
919 u64 val;
920
921 of_property_read_u64(np: dn, propname: "opal-base-address", out_value: &val);
922 ret = kexec_purgatory_get_set_symbol(image, name: "opal_base", buf: &val,
923 size: sizeof(val), get_value: false);
924 if (ret)
925 goto out;
926
927 of_property_read_u64(np: dn, propname: "opal-entry-address", out_value: &val);
928 ret = kexec_purgatory_get_set_symbol(image, name: "opal_entry", buf: &val,
929 size: sizeof(val), get_value: false);
930 }
931out:
932 if (ret)
933 pr_err("Failed to setup purgatory symbols");
934 of_node_put(node: dn);
935 return ret;
936}
937
938/**
939 * cpu_node_size - Compute the size of a CPU node in the FDT.
940 * This should be done only once and the value is stored in
941 * a static variable.
942 * Returns the max size of a CPU node in the FDT.
943 */
944static unsigned int cpu_node_size(void)
945{
946 static unsigned int size;
947 struct device_node *dn;
948 struct property *pp;
949
950 /*
951 * Don't compute it twice, we are assuming that the per CPU node size
952 * doesn't change during the system's life.
953 */
954 if (size)
955 return size;
956
957 dn = of_find_node_by_type(NULL, type: "cpu");
958 if (WARN_ON_ONCE(!dn)) {
959 // Unlikely to happen
960 return 0;
961 }
962
963 /*
964 * We compute the sub node size for a CPU node, assuming it
965 * will be the same for all.
966 */
967 size += strlen(dn->name) + 5;
968 for_each_property_of_node(dn, pp) {
969 size += strlen(pp->name);
970 size += pp->length;
971 }
972
973 of_node_put(node: dn);
974 return size;
975}
976
977static unsigned int kdump_extra_fdt_size_ppc64(struct kimage *image)
978{
979 unsigned int cpu_nodes, extra_size = 0;
980 struct device_node *dn;
981 u64 usm_entries;
982
983 if (!IS_ENABLED(CONFIG_CRASH_DUMP) || image->type != KEXEC_TYPE_CRASH)
984 return 0;
985
986 /*
987 * For kdump kernel, account for linux,usable-memory and
988 * linux,drconf-usable-memory properties. Get an approximate on the
989 * number of usable memory entries and use for FDT size estimation.
990 */
991 if (drmem_lmb_size()) {
992 usm_entries = ((memory_hotplug_max() / drmem_lmb_size()) +
993 (2 * (resource_size(res: &crashk_res) / drmem_lmb_size())));
994 extra_size += (unsigned int)(usm_entries * sizeof(u64));
995 }
996
997 /*
998 * Get the number of CPU nodes in the current DT. This allows to
999 * reserve places for CPU nodes added since the boot time.
1000 */
1001 cpu_nodes = 0;
1002 for_each_node_by_type(dn, "cpu") {
1003 cpu_nodes++;
1004 }
1005
1006 if (cpu_nodes > boot_cpu_node_count)
1007 extra_size += (cpu_nodes - boot_cpu_node_count) * cpu_node_size();
1008
1009 return extra_size;
1010}
1011
1012/**
1013 * kexec_extra_fdt_size_ppc64 - Return the estimated additional size needed to
1014 * setup FDT for kexec/kdump kernel.
1015 * @image: kexec image being loaded.
1016 *
1017 * Returns the estimated extra size needed for kexec/kdump kernel FDT.
1018 */
1019unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
1020{
1021 unsigned int extra_size = 0;
1022
1023 // Budget some space for the password blob. There's already extra space
1024 // for the key name
1025 if (plpks_is_available())
1026 extra_size += (unsigned int)plpks_get_passwordlen();
1027
1028 return extra_size + kdump_extra_fdt_size_ppc64(image);
1029}
1030
1031/**
1032 * add_node_props - Reads node properties from device node structure and add
1033 * them to fdt.
1034 * @fdt: Flattened device tree of the kernel
1035 * @node_offset: offset of the node to add a property at
1036 * @dn: device node pointer
1037 *
1038 * Returns 0 on success, negative errno on error.
1039 */
1040static int add_node_props(void *fdt, int node_offset, const struct device_node *dn)
1041{
1042 int ret = 0;
1043 struct property *pp;
1044
1045 if (!dn)
1046 return -EINVAL;
1047
1048 for_each_property_of_node(dn, pp) {
1049 ret = fdt_setprop(fdt, nodeoffset: node_offset, name: pp->name, val: pp->value, len: pp->length);
1050 if (ret < 0) {
1051 pr_err("Unable to add %s property: %s\n", pp->name, fdt_strerror(ret));
1052 return ret;
1053 }
1054 }
1055 return ret;
1056}
1057
1058/**
1059 * update_cpus_node - Update cpus node of flattened device tree using of_root
1060 * device node.
1061 * @fdt: Flattened device tree of the kernel.
1062 *
1063 * Returns 0 on success, negative errno on error.
1064 */
1065static int update_cpus_node(void *fdt)
1066{
1067 struct device_node *cpus_node, *dn;
1068 int cpus_offset, cpus_subnode_offset, ret = 0;
1069
1070 cpus_offset = fdt_path_offset(fdt, path: "/cpus");
1071 if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) {
1072 pr_err("Malformed device tree: error reading /cpus node: %s\n",
1073 fdt_strerror(cpus_offset));
1074 return cpus_offset;
1075 }
1076
1077 if (cpus_offset > 0) {
1078 ret = fdt_del_node(fdt, nodeoffset: cpus_offset);
1079 if (ret < 0) {
1080 pr_err("Error deleting /cpus node: %s\n", fdt_strerror(ret));
1081 return -EINVAL;
1082 }
1083 }
1084
1085 /* Add cpus node to fdt */
1086 cpus_offset = fdt_add_subnode(fdt, parentoffset: fdt_path_offset(fdt, path: "/"), name: "cpus");
1087 if (cpus_offset < 0) {
1088 pr_err("Error creating /cpus node: %s\n", fdt_strerror(cpus_offset));
1089 return -EINVAL;
1090 }
1091
1092 /* Add cpus node properties */
1093 cpus_node = of_find_node_by_path(path: "/cpus");
1094 ret = add_node_props(fdt, node_offset: cpus_offset, dn: cpus_node);
1095 of_node_put(node: cpus_node);
1096 if (ret < 0)
1097 return ret;
1098
1099 /* Loop through all subnodes of cpus and add them to fdt */
1100 for_each_node_by_type(dn, "cpu") {
1101 cpus_subnode_offset = fdt_add_subnode(fdt, parentoffset: cpus_offset, name: dn->full_name);
1102 if (cpus_subnode_offset < 0) {
1103 pr_err("Unable to add %s subnode: %s\n", dn->full_name,
1104 fdt_strerror(cpus_subnode_offset));
1105 ret = cpus_subnode_offset;
1106 goto out;
1107 }
1108
1109 ret = add_node_props(fdt, node_offset: cpus_subnode_offset, dn);
1110 if (ret < 0)
1111 goto out;
1112 }
1113out:
1114 of_node_put(node: dn);
1115 return ret;
1116}
1117
1118static int copy_property(void *fdt, int node_offset, const struct device_node *dn,
1119 const char *propname)
1120{
1121 const void *prop, *fdtprop;
1122 int len = 0, fdtlen = 0;
1123
1124 prop = of_get_property(node: dn, name: propname, lenp: &len);
1125 fdtprop = fdt_getprop(fdt, nodeoffset: node_offset, name: propname, lenp: &fdtlen);
1126
1127 if (fdtprop && !prop)
1128 return fdt_delprop(fdt, nodeoffset: node_offset, name: propname);
1129 else if (prop)
1130 return fdt_setprop(fdt, nodeoffset: node_offset, name: propname, val: prop, len);
1131 else
1132 return -FDT_ERR_NOTFOUND;
1133}
1134
1135static int update_pci_dma_nodes(void *fdt, const char *dmapropname)
1136{
1137 struct device_node *dn;
1138 int pci_offset, root_offset, ret = 0;
1139
1140 if (!firmware_has_feature(FW_FEATURE_LPAR))
1141 return 0;
1142
1143 root_offset = fdt_path_offset(fdt, path: "/");
1144 for_each_node_with_property(dn, dmapropname) {
1145 pci_offset = fdt_subnode_offset(fdt, parentoffset: root_offset, name: of_node_full_name(np: dn));
1146 if (pci_offset < 0)
1147 continue;
1148
1149 ret = copy_property(fdt, node_offset: pci_offset, dn, propname: "ibm,dma-window");
1150 if (ret < 0) {
1151 of_node_put(node: dn);
1152 break;
1153 }
1154 ret = copy_property(fdt, node_offset: pci_offset, dn, propname: dmapropname);
1155 if (ret < 0) {
1156 of_node_put(node: dn);
1157 break;
1158 }
1159 }
1160
1161 return ret;
1162}
1163
1164/**
1165 * setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel
1166 * being loaded.
1167 * @image: kexec image being loaded.
1168 * @fdt: Flattened device tree for the next kernel.
1169 * @initrd_load_addr: Address where the next initrd will be loaded.
1170 * @initrd_len: Size of the next initrd, or 0 if there will be none.
1171 * @cmdline: Command line for the next kernel, or NULL if there will
1172 * be none.
1173 *
1174 * Returns 0 on success, negative errno on error.
1175 */
1176int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
1177 unsigned long initrd_load_addr,
1178 unsigned long initrd_len, const char *cmdline)
1179{
1180 struct crash_mem *umem = NULL, *rmem = NULL;
1181 int i, nr_ranges, ret;
1182
1183#ifdef CONFIG_CRASH_DUMP
1184 /*
1185 * Restrict memory usage for kdump kernel by setting up
1186 * usable memory ranges and memory reserve map.
1187 */
1188 if (image->type == KEXEC_TYPE_CRASH) {
1189 ret = get_usable_memory_ranges(mem_ranges: &umem);
1190 if (ret)
1191 goto out;
1192
1193 ret = update_usable_mem_fdt(fdt, usable_mem: umem);
1194 if (ret) {
1195 pr_err("Error setting up usable-memory property for kdump kernel\n");
1196 goto out;
1197 }
1198
1199 /*
1200 * Ensure we don't touch crashed kernel's memory except the
1201 * first 64K of RAM, which will be backed up.
1202 */
1203 ret = fdt_add_mem_rsv(fdt, BACKUP_SRC_END + 1,
1204 crashk_res.start - BACKUP_SRC_SIZE);
1205 if (ret) {
1206 pr_err("Error reserving crash memory: %s\n",
1207 fdt_strerror(ret));
1208 goto out;
1209 }
1210
1211 /* Ensure backup region is not used by kdump/capture kernel */
1212 ret = fdt_add_mem_rsv(fdt, image->arch.backup_start,
1213 BACKUP_SRC_SIZE);
1214 if (ret) {
1215 pr_err("Error reserving memory for backup: %s\n",
1216 fdt_strerror(ret));
1217 goto out;
1218 }
1219 }
1220#endif
1221
1222 /* Update cpus nodes information to account hotplug CPUs. */
1223 ret = update_cpus_node(fdt);
1224 if (ret < 0)
1225 goto out;
1226
1227 ret = update_pci_dma_nodes(fdt, DIRECT64_PROPNAME);
1228 if (ret < 0)
1229 goto out;
1230
1231 ret = update_pci_dma_nodes(fdt, DMA64_PROPNAME);
1232 if (ret < 0)
1233 goto out;
1234
1235 /* Update memory reserve map */
1236 ret = get_reserved_memory_ranges(mem_ranges: &rmem);
1237 if (ret)
1238 goto out;
1239
1240 nr_ranges = rmem ? rmem->nr_ranges : 0;
1241 for (i = 0; i < nr_ranges; i++) {
1242 u64 base, size;
1243
1244 base = rmem->ranges[i].start;
1245 size = rmem->ranges[i].end - base + 1;
1246 ret = fdt_add_mem_rsv(fdt, address: base, size);
1247 if (ret) {
1248 pr_err("Error updating memory reserve map: %s\n",
1249 fdt_strerror(ret));
1250 goto out;
1251 }
1252 }
1253
1254 // If we have PLPKS active, we need to provide the password to the new kernel
1255 if (plpks_is_available())
1256 ret = plpks_populate_fdt(fdt);
1257
1258out:
1259 kfree(objp: rmem);
1260 kfree(objp: umem);
1261 return ret;
1262}
1263
1264/**
1265 * arch_kexec_locate_mem_hole - Skip special memory regions like rtas, opal,
1266 * tce-table, reserved-ranges & such (exclude
1267 * memory ranges) as they can't be used for kexec
1268 * segment buffer. Sets kbuf->mem when a suitable
1269 * memory hole is found.
1270 * @kbuf: Buffer contents and memory parameters.
1271 *
1272 * Assumes minimum of PAGE_SIZE alignment for kbuf->memsz & kbuf->buf_align.
1273 *
1274 * Returns 0 on success, negative errno on error.
1275 */
1276int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
1277{
1278 struct crash_mem **emem;
1279 u64 buf_min, buf_max;
1280 int ret;
1281
1282 /* Look up the exclude ranges list while locating the memory hole */
1283 emem = &(kbuf->image->arch.exclude_ranges);
1284 if (!(*emem) || ((*emem)->nr_ranges == 0)) {
1285 pr_warn("No exclude range list. Using the default locate mem hole method\n");
1286 return kexec_locate_mem_hole(kbuf);
1287 }
1288
1289 buf_min = kbuf->buf_min;
1290 buf_max = kbuf->buf_max;
1291 /* Segments for kdump kernel should be within crashkernel region */
1292 if (IS_ENABLED(CONFIG_CRASH_DUMP) && kbuf->image->type == KEXEC_TYPE_CRASH) {
1293 buf_min = (buf_min < crashk_res.start ?
1294 crashk_res.start : buf_min);
1295 buf_max = (buf_max > crashk_res.end ?
1296 crashk_res.end : buf_max);
1297 }
1298
1299 if (buf_min > buf_max) {
1300 pr_err("Invalid buffer min and/or max values\n");
1301 return -EINVAL;
1302 }
1303
1304 if (kbuf->top_down)
1305 ret = locate_mem_hole_top_down_ppc64(kbuf, buf_min, buf_max,
1306 emem: *emem);
1307 else
1308 ret = locate_mem_hole_bottom_up_ppc64(kbuf, buf_min, buf_max,
1309 emem: *emem);
1310
1311 /* Add the buffer allocated to the exclude list for the next lookup */
1312 if (!ret) {
1313 add_mem_range(emem, kbuf->mem, kbuf->memsz);
1314 sort_memory_ranges(*emem, true);
1315 } else {
1316 pr_err("Failed to locate memory buffer of size %lu\n",
1317 kbuf->memsz);
1318 }
1319 return ret;
1320}
1321
1322/**
1323 * arch_kexec_kernel_image_probe - Does additional handling needed to setup
1324 * kexec segments.
1325 * @image: kexec image being loaded.
1326 * @buf: Buffer pointing to elf data.
1327 * @buf_len: Length of the buffer.
1328 *
1329 * Returns 0 on success, negative errno on error.
1330 */
1331int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
1332 unsigned long buf_len)
1333{
1334 int ret;
1335
1336 /* Get exclude memory ranges needed for setting up kexec segments */
1337 ret = get_exclude_memory_ranges(mem_ranges: &(image->arch.exclude_ranges));
1338 if (ret) {
1339 pr_err("Failed to setup exclude memory ranges for buffer lookup\n");
1340 return ret;
1341 }
1342
1343 return kexec_image_probe_default(image, buf, buf_len);
1344}
1345
1346/**
1347 * arch_kimage_file_post_load_cleanup - Frees up all the allocations done
1348 * while loading the image.
1349 * @image: kexec image being loaded.
1350 *
1351 * Returns 0 on success, negative errno on error.
1352 */
1353int arch_kimage_file_post_load_cleanup(struct kimage *image)
1354{
1355 kfree(objp: image->arch.exclude_ranges);
1356 image->arch.exclude_ranges = NULL;
1357
1358 vfree(addr: image->arch.backup_buf);
1359 image->arch.backup_buf = NULL;
1360
1361 vfree(addr: image->elf_headers);
1362 image->elf_headers = NULL;
1363 image->elf_headers_sz = 0;
1364
1365 kvfree(addr: image->arch.fdt);
1366 image->arch.fdt = NULL;
1367
1368 return kexec_image_post_load_cleanup_default(image);
1369}
1370

source code of linux/arch/powerpc/kexec/file_load_64.c