1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Low level x86 E820 memory map handling functions. |
4 | * |
5 | * The firmware and bootloader passes us the "E820 table", which is the primary |
6 | * physical memory layout description available about x86 systems. |
7 | * |
8 | * The kernel takes the E820 memory layout and optionally modifies it with |
9 | * quirks and other tweaks, and feeds that into the generic Linux memory |
10 | * allocation code routines via a platform independent interface (memblock, etc.). |
11 | */ |
12 | #include <linux/crash_dump.h> |
13 | #include <linux/memblock.h> |
14 | #include <linux/suspend.h> |
15 | #include <linux/acpi.h> |
16 | #include <linux/firmware-map.h> |
17 | #include <linux/sort.h> |
18 | #include <linux/memory_hotplug.h> |
19 | |
20 | #include <asm/e820/api.h> |
21 | #include <asm/setup.h> |
22 | |
23 | /* |
24 | * We organize the E820 table into three main data structures: |
25 | * |
26 | * - 'e820_table_firmware': the original firmware version passed to us by the |
27 | * bootloader - not modified by the kernel. It is composed of two parts: |
28 | * the first 128 E820 memory entries in boot_params.e820_table and the remaining |
29 | * (if any) entries of the SETUP_E820_EXT nodes. We use this to: |
30 | * |
31 | * - inform the user about the firmware's notion of memory layout |
32 | * via /sys/firmware/memmap |
33 | * |
34 | * - the hibernation code uses it to generate a kernel-independent CRC32 |
35 | * checksum of the physical memory layout of a system. |
36 | * |
37 | * - 'e820_table_kexec': a slightly modified (by the kernel) firmware version |
38 | * passed to us by the bootloader - the major difference between |
39 | * e820_table_firmware[] and this one is that, the latter marks the setup_data |
40 | * list created by the EFI boot stub as reserved, so that kexec can reuse the |
41 | * setup_data information in the second kernel. Besides, e820_table_kexec[] |
42 | * might also be modified by the kexec itself to fake a mptable. |
43 | * We use this to: |
44 | * |
45 | * - kexec, which is a bootloader in disguise, uses the original E820 |
46 | * layout to pass to the kexec-ed kernel. This way the original kernel |
47 | * can have a restricted E820 map while the kexec()-ed kexec-kernel |
48 | * can have access to full memory - etc. |
49 | * |
50 | * - 'e820_table': this is the main E820 table that is massaged by the |
51 | * low level x86 platform code, or modified by boot parameters, before |
52 | * passed on to higher level MM layers. |
53 | * |
54 | * Once the E820 map has been converted to the standard Linux memory layout |
55 | * information its role stops - modifying it has no effect and does not get |
56 | * re-propagated. So its main role is a temporary bootstrap storage of firmware |
57 | * specific memory layout data during early bootup. |
58 | */ |
59 | static struct e820_table e820_table_init __initdata; |
60 | static struct e820_table e820_table_kexec_init __initdata; |
61 | static struct e820_table e820_table_firmware_init __initdata; |
62 | |
63 | struct e820_table *e820_table __refdata = &e820_table_init; |
64 | struct e820_table *e820_table_kexec __refdata = &e820_table_kexec_init; |
65 | struct e820_table *e820_table_firmware __refdata = &e820_table_firmware_init; |
66 | |
67 | /* For PCI or other memory-mapped resources */ |
68 | unsigned long pci_mem_start = 0xaeedbabe; |
69 | #ifdef CONFIG_PCI |
70 | EXPORT_SYMBOL(pci_mem_start); |
71 | #endif |
72 | |
73 | /* |
74 | * This function checks if any part of the range <start,end> is mapped |
75 | * with type. |
76 | */ |
77 | static bool _e820__mapped_any(struct e820_table *table, |
78 | u64 start, u64 end, enum e820_type type) |
79 | { |
80 | int i; |
81 | |
82 | for (i = 0; i < table->nr_entries; i++) { |
83 | struct e820_entry *entry = &table->entries[i]; |
84 | |
85 | if (type && entry->type != type) |
86 | continue; |
87 | if (entry->addr >= end || entry->addr + entry->size <= start) |
88 | continue; |
89 | return true; |
90 | } |
91 | return false; |
92 | } |
93 | |
94 | bool e820__mapped_raw_any(u64 start, u64 end, enum e820_type type) |
95 | { |
96 | return _e820__mapped_any(table: e820_table_firmware, start, end, type); |
97 | } |
98 | EXPORT_SYMBOL_GPL(e820__mapped_raw_any); |
99 | |
100 | bool e820__mapped_any(u64 start, u64 end, enum e820_type type) |
101 | { |
102 | return _e820__mapped_any(table: e820_table, start, end, type); |
103 | } |
104 | EXPORT_SYMBOL_GPL(e820__mapped_any); |
105 | |
106 | /* |
107 | * This function checks if the entire <start,end> range is mapped with 'type'. |
108 | * |
109 | * Note: this function only works correctly once the E820 table is sorted and |
110 | * not-overlapping (at least for the range specified), which is the case normally. |
111 | */ |
112 | static struct e820_entry *__e820__mapped_all(u64 start, u64 end, |
113 | enum e820_type type) |
114 | { |
115 | int i; |
116 | |
117 | for (i = 0; i < e820_table->nr_entries; i++) { |
118 | struct e820_entry *entry = &e820_table->entries[i]; |
119 | |
120 | if (type && entry->type != type) |
121 | continue; |
122 | |
123 | /* Is the region (part) in overlap with the current region? */ |
124 | if (entry->addr >= end || entry->addr + entry->size <= start) |
125 | continue; |
126 | |
127 | /* |
128 | * If the region is at the beginning of <start,end> we move |
129 | * 'start' to the end of the region since it's ok until there |
130 | */ |
131 | if (entry->addr <= start) |
132 | start = entry->addr + entry->size; |
133 | |
134 | /* |
135 | * If 'start' is now at or beyond 'end', we're done, full |
136 | * coverage of the desired range exists: |
137 | */ |
138 | if (start >= end) |
139 | return entry; |
140 | } |
141 | |
142 | return NULL; |
143 | } |
144 | |
145 | /* |
146 | * This function checks if the entire range <start,end> is mapped with type. |
147 | */ |
148 | bool __init e820__mapped_all(u64 start, u64 end, enum e820_type type) |
149 | { |
150 | return __e820__mapped_all(start, end, type); |
151 | } |
152 | |
153 | /* |
154 | * This function returns the type associated with the range <start,end>. |
155 | */ |
156 | int e820__get_entry_type(u64 start, u64 end) |
157 | { |
158 | struct e820_entry *entry = __e820__mapped_all(start, end, type: 0); |
159 | |
160 | return entry ? entry->type : -EINVAL; |
161 | } |
162 | |
163 | /* |
164 | * Add a memory region to the kernel E820 map. |
165 | */ |
166 | static void __init __e820__range_add(struct e820_table *table, u64 start, u64 size, enum e820_type type) |
167 | { |
168 | int x = table->nr_entries; |
169 | |
170 | if (x >= ARRAY_SIZE(table->entries)) { |
171 | pr_err("too many entries; ignoring [mem %#010llx-%#010llx]\n" , |
172 | start, start + size - 1); |
173 | return; |
174 | } |
175 | |
176 | table->entries[x].addr = start; |
177 | table->entries[x].size = size; |
178 | table->entries[x].type = type; |
179 | table->nr_entries++; |
180 | } |
181 | |
182 | void __init e820__range_add(u64 start, u64 size, enum e820_type type) |
183 | { |
184 | __e820__range_add(table: e820_table, start, size, type); |
185 | } |
186 | |
187 | static void __init e820_print_type(enum e820_type type) |
188 | { |
189 | switch (type) { |
190 | case E820_TYPE_RAM: /* Fall through: */ |
191 | case E820_TYPE_RESERVED_KERN: pr_cont("usable" ); break; |
192 | case E820_TYPE_RESERVED: pr_cont("reserved" ); break; |
193 | case E820_TYPE_SOFT_RESERVED: pr_cont("soft reserved" ); break; |
194 | case E820_TYPE_ACPI: pr_cont("ACPI data" ); break; |
195 | case E820_TYPE_NVS: pr_cont("ACPI NVS" ); break; |
196 | case E820_TYPE_UNUSABLE: pr_cont("unusable" ); break; |
197 | case E820_TYPE_PMEM: /* Fall through: */ |
198 | case E820_TYPE_PRAM: pr_cont("persistent (type %u)" , type); break; |
199 | default: pr_cont("type %u" , type); break; |
200 | } |
201 | } |
202 | |
203 | void __init e820__print_table(char *who) |
204 | { |
205 | int i; |
206 | |
207 | for (i = 0; i < e820_table->nr_entries; i++) { |
208 | pr_info("%s: [mem %#018Lx-%#018Lx] " , |
209 | who, |
210 | e820_table->entries[i].addr, |
211 | e820_table->entries[i].addr + e820_table->entries[i].size - 1); |
212 | |
213 | e820_print_type(type: e820_table->entries[i].type); |
214 | pr_cont("\n" ); |
215 | } |
216 | } |
217 | |
218 | /* |
219 | * Sanitize an E820 map. |
220 | * |
221 | * Some E820 layouts include overlapping entries. The following |
222 | * replaces the original E820 map with a new one, removing overlaps, |
223 | * and resolving conflicting memory types in favor of highest |
224 | * numbered type. |
225 | * |
226 | * The input parameter 'entries' points to an array of 'struct |
227 | * e820_entry' which on entry has elements in the range [0, *nr_entries) |
228 | * valid, and which has space for up to max_nr_entries entries. |
229 | * On return, the resulting sanitized E820 map entries will be in |
230 | * overwritten in the same location, starting at 'entries'. |
231 | * |
232 | * The integer pointed to by nr_entries must be valid on entry (the |
233 | * current number of valid entries located at 'entries'). If the |
234 | * sanitizing succeeds the *nr_entries will be updated with the new |
235 | * number of valid entries (something no more than max_nr_entries). |
236 | * |
237 | * The return value from e820__update_table() is zero if it |
238 | * successfully 'sanitized' the map entries passed in, and is -1 |
239 | * if it did nothing, which can happen if either of (1) it was |
240 | * only passed one map entry, or (2) any of the input map entries |
241 | * were invalid (start + size < start, meaning that the size was |
242 | * so big the described memory range wrapped around through zero.) |
243 | * |
244 | * Visually we're performing the following |
245 | * (1,2,3,4 = memory types)... |
246 | * |
247 | * Sample memory map (w/overlaps): |
248 | * ____22__________________ |
249 | * ______________________4_ |
250 | * ____1111________________ |
251 | * _44_____________________ |
252 | * 11111111________________ |
253 | * ____________________33__ |
254 | * ___________44___________ |
255 | * __________33333_________ |
256 | * ______________22________ |
257 | * ___________________2222_ |
258 | * _________111111111______ |
259 | * _____________________11_ |
260 | * _________________4______ |
261 | * |
262 | * Sanitized equivalent (no overlap): |
263 | * 1_______________________ |
264 | * _44_____________________ |
265 | * ___1____________________ |
266 | * ____22__________________ |
267 | * ______11________________ |
268 | * _________1______________ |
269 | * __________3_____________ |
270 | * ___________44___________ |
271 | * _____________33_________ |
272 | * _______________2________ |
273 | * ________________1_______ |
274 | * _________________4______ |
275 | * ___________________2____ |
276 | * ____________________33__ |
277 | * ______________________4_ |
278 | */ |
279 | struct change_member { |
280 | /* Pointer to the original entry: */ |
281 | struct e820_entry *entry; |
282 | /* Address for this change point: */ |
283 | unsigned long long addr; |
284 | }; |
285 | |
286 | static struct change_member change_point_list[2*E820_MAX_ENTRIES] __initdata; |
287 | static struct change_member *change_point[2*E820_MAX_ENTRIES] __initdata; |
288 | static struct e820_entry *overlap_list[E820_MAX_ENTRIES] __initdata; |
289 | static struct e820_entry new_entries[E820_MAX_ENTRIES] __initdata; |
290 | |
291 | static int __init cpcompare(const void *a, const void *b) |
292 | { |
293 | struct change_member * const *app = a, * const *bpp = b; |
294 | const struct change_member *ap = *app, *bp = *bpp; |
295 | |
296 | /* |
297 | * Inputs are pointers to two elements of change_point[]. If their |
298 | * addresses are not equal, their difference dominates. If the addresses |
299 | * are equal, then consider one that represents the end of its region |
300 | * to be greater than one that does not. |
301 | */ |
302 | if (ap->addr != bp->addr) |
303 | return ap->addr > bp->addr ? 1 : -1; |
304 | |
305 | return (ap->addr != ap->entry->addr) - (bp->addr != bp->entry->addr); |
306 | } |
307 | |
308 | static bool e820_nomerge(enum e820_type type) |
309 | { |
310 | /* |
311 | * These types may indicate distinct platform ranges aligned to |
312 | * numa node, protection domain, performance domain, or other |
313 | * boundaries. Do not merge them. |
314 | */ |
315 | if (type == E820_TYPE_PRAM) |
316 | return true; |
317 | if (type == E820_TYPE_SOFT_RESERVED) |
318 | return true; |
319 | return false; |
320 | } |
321 | |
322 | int __init e820__update_table(struct e820_table *table) |
323 | { |
324 | struct e820_entry *entries = table->entries; |
325 | u32 max_nr_entries = ARRAY_SIZE(table->entries); |
326 | enum e820_type current_type, last_type; |
327 | unsigned long long last_addr; |
328 | u32 new_nr_entries, overlap_entries; |
329 | u32 i, chg_idx, chg_nr; |
330 | |
331 | /* If there's only one memory region, don't bother: */ |
332 | if (table->nr_entries < 2) |
333 | return -1; |
334 | |
335 | BUG_ON(table->nr_entries > max_nr_entries); |
336 | |
337 | /* Bail out if we find any unreasonable addresses in the map: */ |
338 | for (i = 0; i < table->nr_entries; i++) { |
339 | if (entries[i].addr + entries[i].size < entries[i].addr) |
340 | return -1; |
341 | } |
342 | |
343 | /* Create pointers for initial change-point information (for sorting): */ |
344 | for (i = 0; i < 2 * table->nr_entries; i++) |
345 | change_point[i] = &change_point_list[i]; |
346 | |
347 | /* |
348 | * Record all known change-points (starting and ending addresses), |
349 | * omitting empty memory regions: |
350 | */ |
351 | chg_idx = 0; |
352 | for (i = 0; i < table->nr_entries; i++) { |
353 | if (entries[i].size != 0) { |
354 | change_point[chg_idx]->addr = entries[i].addr; |
355 | change_point[chg_idx++]->entry = &entries[i]; |
356 | change_point[chg_idx]->addr = entries[i].addr + entries[i].size; |
357 | change_point[chg_idx++]->entry = &entries[i]; |
358 | } |
359 | } |
360 | chg_nr = chg_idx; |
361 | |
362 | /* Sort change-point list by memory addresses (low -> high): */ |
363 | sort(base: change_point, num: chg_nr, size: sizeof(*change_point), cmp_func: cpcompare, NULL); |
364 | |
365 | /* Create a new memory map, removing overlaps: */ |
366 | overlap_entries = 0; /* Number of entries in the overlap table */ |
367 | new_nr_entries = 0; /* Index for creating new map entries */ |
368 | last_type = 0; /* Start with undefined memory type */ |
369 | last_addr = 0; /* Start with 0 as last starting address */ |
370 | |
371 | /* Loop through change-points, determining effect on the new map: */ |
372 | for (chg_idx = 0; chg_idx < chg_nr; chg_idx++) { |
373 | /* Keep track of all overlapping entries */ |
374 | if (change_point[chg_idx]->addr == change_point[chg_idx]->entry->addr) { |
375 | /* Add map entry to overlap list (> 1 entry implies an overlap) */ |
376 | overlap_list[overlap_entries++] = change_point[chg_idx]->entry; |
377 | } else { |
378 | /* Remove entry from list (order independent, so swap with last): */ |
379 | for (i = 0; i < overlap_entries; i++) { |
380 | if (overlap_list[i] == change_point[chg_idx]->entry) |
381 | overlap_list[i] = overlap_list[overlap_entries-1]; |
382 | } |
383 | overlap_entries--; |
384 | } |
385 | /* |
386 | * If there are overlapping entries, decide which |
387 | * "type" to use (larger value takes precedence -- |
388 | * 1=usable, 2,3,4,4+=unusable) |
389 | */ |
390 | current_type = 0; |
391 | for (i = 0; i < overlap_entries; i++) { |
392 | if (overlap_list[i]->type > current_type) |
393 | current_type = overlap_list[i]->type; |
394 | } |
395 | |
396 | /* Continue building up new map based on this information: */ |
397 | if (current_type != last_type || e820_nomerge(type: current_type)) { |
398 | if (last_type) { |
399 | new_entries[new_nr_entries].size = change_point[chg_idx]->addr - last_addr; |
400 | /* Move forward only if the new size was non-zero: */ |
401 | if (new_entries[new_nr_entries].size != 0) |
402 | /* No more space left for new entries? */ |
403 | if (++new_nr_entries >= max_nr_entries) |
404 | break; |
405 | } |
406 | if (current_type) { |
407 | new_entries[new_nr_entries].addr = change_point[chg_idx]->addr; |
408 | new_entries[new_nr_entries].type = current_type; |
409 | last_addr = change_point[chg_idx]->addr; |
410 | } |
411 | last_type = current_type; |
412 | } |
413 | } |
414 | |
415 | /* Copy the new entries into the original location: */ |
416 | memcpy(entries, new_entries, new_nr_entries*sizeof(*entries)); |
417 | table->nr_entries = new_nr_entries; |
418 | |
419 | return 0; |
420 | } |
421 | |
422 | static int __init __append_e820_table(struct boot_e820_entry *entries, u32 nr_entries) |
423 | { |
424 | struct boot_e820_entry *entry = entries; |
425 | |
426 | while (nr_entries) { |
427 | u64 start = entry->addr; |
428 | u64 size = entry->size; |
429 | u64 end = start + size - 1; |
430 | u32 type = entry->type; |
431 | |
432 | /* Ignore the entry on 64-bit overflow: */ |
433 | if (start > end && likely(size)) |
434 | return -1; |
435 | |
436 | e820__range_add(start, size, type); |
437 | |
438 | entry++; |
439 | nr_entries--; |
440 | } |
441 | return 0; |
442 | } |
443 | |
444 | /* |
445 | * Copy the BIOS E820 map into a safe place. |
446 | * |
447 | * Sanity-check it while we're at it.. |
448 | * |
449 | * If we're lucky and live on a modern system, the setup code |
450 | * will have given us a memory map that we can use to properly |
451 | * set up memory. If we aren't, we'll fake a memory map. |
452 | */ |
453 | static int __init append_e820_table(struct boot_e820_entry *entries, u32 nr_entries) |
454 | { |
455 | /* Only one memory region (or negative)? Ignore it */ |
456 | if (nr_entries < 2) |
457 | return -1; |
458 | |
459 | return __append_e820_table(entries, nr_entries); |
460 | } |
461 | |
462 | static u64 __init |
463 | __e820__range_update(struct e820_table *table, u64 start, u64 size, enum e820_type old_type, enum e820_type new_type) |
464 | { |
465 | u64 end; |
466 | unsigned int i; |
467 | u64 real_updated_size = 0; |
468 | |
469 | BUG_ON(old_type == new_type); |
470 | |
471 | if (size > (ULLONG_MAX - start)) |
472 | size = ULLONG_MAX - start; |
473 | |
474 | end = start + size; |
475 | printk(KERN_DEBUG "e820: update [mem %#010Lx-%#010Lx] " , start, end - 1); |
476 | e820_print_type(type: old_type); |
477 | pr_cont(" ==> " ); |
478 | e820_print_type(type: new_type); |
479 | pr_cont("\n" ); |
480 | |
481 | for (i = 0; i < table->nr_entries; i++) { |
482 | struct e820_entry *entry = &table->entries[i]; |
483 | u64 final_start, final_end; |
484 | u64 entry_end; |
485 | |
486 | if (entry->type != old_type) |
487 | continue; |
488 | |
489 | entry_end = entry->addr + entry->size; |
490 | |
491 | /* Completely covered by new range? */ |
492 | if (entry->addr >= start && entry_end <= end) { |
493 | entry->type = new_type; |
494 | real_updated_size += entry->size; |
495 | continue; |
496 | } |
497 | |
498 | /* New range is completely covered? */ |
499 | if (entry->addr < start && entry_end > end) { |
500 | __e820__range_add(table, start, size, type: new_type); |
501 | __e820__range_add(table, start: end, size: entry_end - end, type: entry->type); |
502 | entry->size = start - entry->addr; |
503 | real_updated_size += size; |
504 | continue; |
505 | } |
506 | |
507 | /* Partially covered: */ |
508 | final_start = max(start, entry->addr); |
509 | final_end = min(end, entry_end); |
510 | if (final_start >= final_end) |
511 | continue; |
512 | |
513 | __e820__range_add(table, start: final_start, size: final_end - final_start, type: new_type); |
514 | |
515 | real_updated_size += final_end - final_start; |
516 | |
517 | /* |
518 | * Left range could be head or tail, so need to update |
519 | * its size first: |
520 | */ |
521 | entry->size -= final_end - final_start; |
522 | if (entry->addr < final_start) |
523 | continue; |
524 | |
525 | entry->addr = final_end; |
526 | } |
527 | return real_updated_size; |
528 | } |
529 | |
530 | u64 __init e820__range_update(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type) |
531 | { |
532 | return __e820__range_update(table: e820_table, start, size, old_type, new_type); |
533 | } |
534 | |
535 | static u64 __init e820__range_update_kexec(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type) |
536 | { |
537 | return __e820__range_update(table: e820_table_kexec, start, size, old_type, new_type); |
538 | } |
539 | |
540 | /* Remove a range of memory from the E820 table: */ |
541 | u64 __init e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type) |
542 | { |
543 | int i; |
544 | u64 end; |
545 | u64 real_removed_size = 0; |
546 | |
547 | if (size > (ULLONG_MAX - start)) |
548 | size = ULLONG_MAX - start; |
549 | |
550 | end = start + size; |
551 | printk(KERN_DEBUG "e820: remove [mem %#010Lx-%#010Lx] " , start, end - 1); |
552 | if (check_type) |
553 | e820_print_type(type: old_type); |
554 | pr_cont("\n" ); |
555 | |
556 | for (i = 0; i < e820_table->nr_entries; i++) { |
557 | struct e820_entry *entry = &e820_table->entries[i]; |
558 | u64 final_start, final_end; |
559 | u64 entry_end; |
560 | |
561 | if (check_type && entry->type != old_type) |
562 | continue; |
563 | |
564 | entry_end = entry->addr + entry->size; |
565 | |
566 | /* Completely covered? */ |
567 | if (entry->addr >= start && entry_end <= end) { |
568 | real_removed_size += entry->size; |
569 | memset(entry, 0, sizeof(*entry)); |
570 | continue; |
571 | } |
572 | |
573 | /* Is the new range completely covered? */ |
574 | if (entry->addr < start && entry_end > end) { |
575 | e820__range_add(start: end, size: entry_end - end, type: entry->type); |
576 | entry->size = start - entry->addr; |
577 | real_removed_size += size; |
578 | continue; |
579 | } |
580 | |
581 | /* Partially covered: */ |
582 | final_start = max(start, entry->addr); |
583 | final_end = min(end, entry_end); |
584 | if (final_start >= final_end) |
585 | continue; |
586 | |
587 | real_removed_size += final_end - final_start; |
588 | |
589 | /* |
590 | * Left range could be head or tail, so need to update |
591 | * the size first: |
592 | */ |
593 | entry->size -= final_end - final_start; |
594 | if (entry->addr < final_start) |
595 | continue; |
596 | |
597 | entry->addr = final_end; |
598 | } |
599 | return real_removed_size; |
600 | } |
601 | |
602 | void __init e820__update_table_print(void) |
603 | { |
604 | if (e820__update_table(table: e820_table)) |
605 | return; |
606 | |
607 | pr_info("modified physical RAM map:\n" ); |
608 | e820__print_table(who: "modified" ); |
609 | } |
610 | |
611 | static void __init e820__update_table_kexec(void) |
612 | { |
613 | e820__update_table(table: e820_table_kexec); |
614 | } |
615 | |
616 | #define MAX_GAP_END 0x100000000ull |
617 | |
618 | /* |
619 | * Search for a gap in the E820 memory space from 0 to MAX_GAP_END (4GB). |
620 | */ |
621 | static int __init e820_search_gap(unsigned long *gapstart, unsigned long *gapsize) |
622 | { |
623 | unsigned long long last = MAX_GAP_END; |
624 | int i = e820_table->nr_entries; |
625 | int found = 0; |
626 | |
627 | while (--i >= 0) { |
628 | unsigned long long start = e820_table->entries[i].addr; |
629 | unsigned long long end = start + e820_table->entries[i].size; |
630 | |
631 | /* |
632 | * Since "last" is at most 4GB, we know we'll |
633 | * fit in 32 bits if this condition is true: |
634 | */ |
635 | if (last > end) { |
636 | unsigned long gap = last - end; |
637 | |
638 | if (gap >= *gapsize) { |
639 | *gapsize = gap; |
640 | *gapstart = end; |
641 | found = 1; |
642 | } |
643 | } |
644 | if (start < last) |
645 | last = start; |
646 | } |
647 | return found; |
648 | } |
649 | |
650 | /* |
651 | * Search for the biggest gap in the low 32 bits of the E820 |
652 | * memory space. We pass this space to the PCI subsystem, so |
653 | * that it can assign MMIO resources for hotplug or |
654 | * unconfigured devices in. |
655 | * |
656 | * Hopefully the BIOS let enough space left. |
657 | */ |
658 | __init void e820__setup_pci_gap(void) |
659 | { |
660 | unsigned long gapstart, gapsize; |
661 | int found; |
662 | |
663 | gapsize = 0x400000; |
664 | found = e820_search_gap(gapstart: &gapstart, gapsize: &gapsize); |
665 | |
666 | if (!found) { |
667 | #ifdef CONFIG_X86_64 |
668 | gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; |
669 | pr_err("Cannot find an available gap in the 32-bit address range\n" ); |
670 | pr_err("PCI devices with unassigned 32-bit BARs may not work!\n" ); |
671 | #else |
672 | gapstart = 0x10000000; |
673 | #endif |
674 | } |
675 | |
676 | /* |
677 | * e820__reserve_resources_late() protects stolen RAM already: |
678 | */ |
679 | pci_mem_start = gapstart; |
680 | |
681 | pr_info("[mem %#010lx-%#010lx] available for PCI devices\n" , |
682 | gapstart, gapstart + gapsize - 1); |
683 | } |
684 | |
685 | /* |
686 | * Called late during init, in free_initmem(). |
687 | * |
688 | * Initial e820_table and e820_table_kexec are largish __initdata arrays. |
689 | * |
690 | * Copy them to a (usually much smaller) dynamically allocated area that is |
691 | * sized precisely after the number of e820 entries. |
692 | * |
693 | * This is done after we've performed all the fixes and tweaks to the tables. |
694 | * All functions which modify them are __init functions, which won't exist |
695 | * after free_initmem(). |
696 | */ |
697 | __init void e820__reallocate_tables(void) |
698 | { |
699 | struct e820_table *n; |
700 | int size; |
701 | |
702 | size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table->nr_entries; |
703 | n = kmemdup(p: e820_table, size, GFP_KERNEL); |
704 | BUG_ON(!n); |
705 | e820_table = n; |
706 | |
707 | size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table_kexec->nr_entries; |
708 | n = kmemdup(p: e820_table_kexec, size, GFP_KERNEL); |
709 | BUG_ON(!n); |
710 | e820_table_kexec = n; |
711 | |
712 | size = offsetof(struct e820_table, entries) + sizeof(struct e820_entry)*e820_table_firmware->nr_entries; |
713 | n = kmemdup(p: e820_table_firmware, size, GFP_KERNEL); |
714 | BUG_ON(!n); |
715 | e820_table_firmware = n; |
716 | } |
717 | |
718 | /* |
719 | * Because of the small fixed size of struct boot_params, only the first |
720 | * 128 E820 memory entries are passed to the kernel via boot_params.e820_table, |
721 | * the remaining (if any) entries are passed via the SETUP_E820_EXT node of |
722 | * struct setup_data, which is parsed here. |
723 | */ |
724 | void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len) |
725 | { |
726 | int entries; |
727 | struct boot_e820_entry *extmap; |
728 | struct setup_data *sdata; |
729 | |
730 | sdata = early_memremap(phys_addr, size: data_len); |
731 | entries = sdata->len / sizeof(*extmap); |
732 | extmap = (struct boot_e820_entry *)(sdata->data); |
733 | |
734 | __append_e820_table(entries: extmap, nr_entries: entries); |
735 | e820__update_table(table: e820_table); |
736 | |
737 | memcpy(e820_table_kexec, e820_table, sizeof(*e820_table_kexec)); |
738 | memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware)); |
739 | |
740 | early_memunmap(addr: sdata, size: data_len); |
741 | pr_info("extended physical RAM map:\n" ); |
742 | e820__print_table(who: "extended" ); |
743 | } |
744 | |
745 | /* |
746 | * Find the ranges of physical addresses that do not correspond to |
747 | * E820 RAM areas and register the corresponding pages as 'nosave' for |
748 | * hibernation (32-bit) or software suspend and suspend to RAM (64-bit). |
749 | * |
750 | * This function requires the E820 map to be sorted and without any |
751 | * overlapping entries. |
752 | */ |
753 | void __init e820__register_nosave_regions(unsigned long limit_pfn) |
754 | { |
755 | int i; |
756 | unsigned long pfn = 0; |
757 | |
758 | for (i = 0; i < e820_table->nr_entries; i++) { |
759 | struct e820_entry *entry = &e820_table->entries[i]; |
760 | |
761 | if (pfn < PFN_UP(entry->addr)) |
762 | register_nosave_region(b: pfn, PFN_UP(entry->addr)); |
763 | |
764 | pfn = PFN_DOWN(entry->addr + entry->size); |
765 | |
766 | if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN) |
767 | register_nosave_region(PFN_UP(entry->addr), e: pfn); |
768 | |
769 | if (pfn >= limit_pfn) |
770 | break; |
771 | } |
772 | } |
773 | |
774 | #ifdef CONFIG_ACPI |
775 | /* |
776 | * Register ACPI NVS memory regions, so that we can save/restore them during |
777 | * hibernation and the subsequent resume: |
778 | */ |
779 | static int __init e820__register_nvs_regions(void) |
780 | { |
781 | int i; |
782 | |
783 | for (i = 0; i < e820_table->nr_entries; i++) { |
784 | struct e820_entry *entry = &e820_table->entries[i]; |
785 | |
786 | if (entry->type == E820_TYPE_NVS) |
787 | acpi_nvs_register(start: entry->addr, size: entry->size); |
788 | } |
789 | |
790 | return 0; |
791 | } |
792 | core_initcall(e820__register_nvs_regions); |
793 | #endif |
794 | |
795 | /* |
796 | * Allocate the requested number of bytes with the requested alignment |
797 | * and return (the physical address) to the caller. Also register this |
798 | * range in the 'kexec' E820 table as a reserved range. |
799 | * |
800 | * This allows kexec to fake a new mptable, as if it came from the real |
801 | * system. |
802 | */ |
803 | u64 __init e820__memblock_alloc_reserved(u64 size, u64 align) |
804 | { |
805 | u64 addr; |
806 | |
807 | addr = memblock_phys_alloc(size, align); |
808 | if (addr) { |
809 | e820__range_update_kexec(start: addr, size, old_type: E820_TYPE_RAM, new_type: E820_TYPE_RESERVED); |
810 | pr_info("update e820_table_kexec for e820__memblock_alloc_reserved()\n" ); |
811 | e820__update_table_kexec(); |
812 | } |
813 | |
814 | return addr; |
815 | } |
816 | |
817 | #ifdef CONFIG_X86_32 |
818 | # ifdef CONFIG_X86_PAE |
819 | # define MAX_ARCH_PFN (1ULL<<(36-PAGE_SHIFT)) |
820 | # else |
821 | # define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT)) |
822 | # endif |
823 | #else /* CONFIG_X86_32 */ |
824 | # define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT |
825 | #endif |
826 | |
827 | /* |
828 | * Find the highest page frame number we have available |
829 | */ |
830 | static unsigned long __init e820_end_pfn(unsigned long limit_pfn, enum e820_type type) |
831 | { |
832 | int i; |
833 | unsigned long last_pfn = 0; |
834 | unsigned long max_arch_pfn = MAX_ARCH_PFN; |
835 | |
836 | for (i = 0; i < e820_table->nr_entries; i++) { |
837 | struct e820_entry *entry = &e820_table->entries[i]; |
838 | unsigned long start_pfn; |
839 | unsigned long end_pfn; |
840 | |
841 | if (entry->type != type) |
842 | continue; |
843 | |
844 | start_pfn = entry->addr >> PAGE_SHIFT; |
845 | end_pfn = (entry->addr + entry->size) >> PAGE_SHIFT; |
846 | |
847 | if (start_pfn >= limit_pfn) |
848 | continue; |
849 | if (end_pfn > limit_pfn) { |
850 | last_pfn = limit_pfn; |
851 | break; |
852 | } |
853 | if (end_pfn > last_pfn) |
854 | last_pfn = end_pfn; |
855 | } |
856 | |
857 | if (last_pfn > max_arch_pfn) |
858 | last_pfn = max_arch_pfn; |
859 | |
860 | pr_info("last_pfn = %#lx max_arch_pfn = %#lx\n" , |
861 | last_pfn, max_arch_pfn); |
862 | return last_pfn; |
863 | } |
864 | |
865 | unsigned long __init e820__end_of_ram_pfn(void) |
866 | { |
867 | return e820_end_pfn(MAX_ARCH_PFN, type: E820_TYPE_RAM); |
868 | } |
869 | |
870 | unsigned long __init e820__end_of_low_ram_pfn(void) |
871 | { |
872 | return e820_end_pfn(limit_pfn: 1UL << (32 - PAGE_SHIFT), type: E820_TYPE_RAM); |
873 | } |
874 | |
875 | static void __init early_panic(char *msg) |
876 | { |
877 | early_printk(fmt: msg); |
878 | panic(fmt: msg); |
879 | } |
880 | |
881 | static int userdef __initdata; |
882 | |
883 | /* The "mem=nopentium" boot option disables 4MB page tables on 32-bit kernels: */ |
884 | static int __init parse_memopt(char *p) |
885 | { |
886 | u64 mem_size; |
887 | |
888 | if (!p) |
889 | return -EINVAL; |
890 | |
891 | if (!strcmp(p, "nopentium" )) { |
892 | #ifdef CONFIG_X86_32 |
893 | setup_clear_cpu_cap(X86_FEATURE_PSE); |
894 | return 0; |
895 | #else |
896 | pr_warn("mem=nopentium ignored! (only supported on x86_32)\n" ); |
897 | return -EINVAL; |
898 | #endif |
899 | } |
900 | |
901 | userdef = 1; |
902 | mem_size = memparse(ptr: p, retptr: &p); |
903 | |
904 | /* Don't remove all memory when getting "mem={invalid}" parameter: */ |
905 | if (mem_size == 0) |
906 | return -EINVAL; |
907 | |
908 | e820__range_remove(start: mem_size, ULLONG_MAX - mem_size, old_type: E820_TYPE_RAM, check_type: 1); |
909 | |
910 | #ifdef CONFIG_MEMORY_HOTPLUG |
911 | max_mem_size = mem_size; |
912 | #endif |
913 | |
914 | return 0; |
915 | } |
916 | early_param("mem" , parse_memopt); |
917 | |
918 | static int __init parse_memmap_one(char *p) |
919 | { |
920 | char *oldp; |
921 | u64 start_at, mem_size; |
922 | |
923 | if (!p) |
924 | return -EINVAL; |
925 | |
926 | if (!strncmp(p, "exactmap" , 8)) { |
927 | e820_table->nr_entries = 0; |
928 | userdef = 1; |
929 | return 0; |
930 | } |
931 | |
932 | oldp = p; |
933 | mem_size = memparse(ptr: p, retptr: &p); |
934 | if (p == oldp) |
935 | return -EINVAL; |
936 | |
937 | userdef = 1; |
938 | if (*p == '@') { |
939 | start_at = memparse(ptr: p+1, retptr: &p); |
940 | e820__range_add(start: start_at, size: mem_size, type: E820_TYPE_RAM); |
941 | } else if (*p == '#') { |
942 | start_at = memparse(ptr: p+1, retptr: &p); |
943 | e820__range_add(start: start_at, size: mem_size, type: E820_TYPE_ACPI); |
944 | } else if (*p == '$') { |
945 | start_at = memparse(ptr: p+1, retptr: &p); |
946 | e820__range_add(start: start_at, size: mem_size, type: E820_TYPE_RESERVED); |
947 | } else if (*p == '!') { |
948 | start_at = memparse(ptr: p+1, retptr: &p); |
949 | e820__range_add(start: start_at, size: mem_size, type: E820_TYPE_PRAM); |
950 | } else if (*p == '%') { |
951 | enum e820_type from = 0, to = 0; |
952 | |
953 | start_at = memparse(ptr: p + 1, retptr: &p); |
954 | if (*p == '-') |
955 | from = simple_strtoull(p + 1, &p, 0); |
956 | if (*p == '+') |
957 | to = simple_strtoull(p + 1, &p, 0); |
958 | if (*p != '\0') |
959 | return -EINVAL; |
960 | if (from && to) |
961 | e820__range_update(start: start_at, size: mem_size, old_type: from, new_type: to); |
962 | else if (to) |
963 | e820__range_add(start: start_at, size: mem_size, type: to); |
964 | else if (from) |
965 | e820__range_remove(start: start_at, size: mem_size, old_type: from, check_type: 1); |
966 | else |
967 | e820__range_remove(start: start_at, size: mem_size, old_type: 0, check_type: 0); |
968 | } else { |
969 | e820__range_remove(start: mem_size, ULLONG_MAX - mem_size, old_type: E820_TYPE_RAM, check_type: 1); |
970 | } |
971 | |
972 | return *p == '\0' ? 0 : -EINVAL; |
973 | } |
974 | |
975 | static int __init parse_memmap_opt(char *str) |
976 | { |
977 | while (str) { |
978 | char *k = strchr(str, ','); |
979 | |
980 | if (k) |
981 | *k++ = 0; |
982 | |
983 | parse_memmap_one(p: str); |
984 | str = k; |
985 | } |
986 | |
987 | return 0; |
988 | } |
989 | early_param("memmap" , parse_memmap_opt); |
990 | |
991 | /* |
992 | * Reserve all entries from the bootloader's extensible data nodes list, |
993 | * because if present we are going to use it later on to fetch e820 |
994 | * entries from it: |
995 | */ |
996 | void __init e820__reserve_setup_data(void) |
997 | { |
998 | struct setup_indirect *indirect; |
999 | struct setup_data *data; |
1000 | u64 pa_data, pa_next; |
1001 | u32 len; |
1002 | |
1003 | pa_data = boot_params.hdr.setup_data; |
1004 | if (!pa_data) |
1005 | return; |
1006 | |
1007 | while (pa_data) { |
1008 | data = early_memremap(phys_addr: pa_data, size: sizeof(*data)); |
1009 | if (!data) { |
1010 | pr_warn("e820: failed to memremap setup_data entry\n" ); |
1011 | return; |
1012 | } |
1013 | |
1014 | len = sizeof(*data); |
1015 | pa_next = data->next; |
1016 | |
1017 | e820__range_update(start: pa_data, size: sizeof(*data)+data->len, old_type: E820_TYPE_RAM, new_type: E820_TYPE_RESERVED_KERN); |
1018 | |
1019 | if (data->type == SETUP_INDIRECT) { |
1020 | len += data->len; |
1021 | early_memunmap(addr: data, size: sizeof(*data)); |
1022 | data = early_memremap(phys_addr: pa_data, size: len); |
1023 | if (!data) { |
1024 | pr_warn("e820: failed to memremap indirect setup_data\n" ); |
1025 | return; |
1026 | } |
1027 | |
1028 | indirect = (struct setup_indirect *)data->data; |
1029 | |
1030 | if (indirect->type != SETUP_INDIRECT) |
1031 | e820__range_update(start: indirect->addr, size: indirect->len, |
1032 | old_type: E820_TYPE_RAM, new_type: E820_TYPE_RESERVED_KERN); |
1033 | } |
1034 | |
1035 | pa_data = pa_next; |
1036 | early_memunmap(addr: data, size: len); |
1037 | } |
1038 | |
1039 | e820__update_table(table: e820_table); |
1040 | |
1041 | pr_info("extended physical RAM map:\n" ); |
1042 | e820__print_table(who: "reserve setup_data" ); |
1043 | } |
1044 | |
1045 | /* |
1046 | * Called after parse_early_param(), after early parameters (such as mem=) |
1047 | * have been processed, in which case we already have an E820 table filled in |
1048 | * via the parameter callback function(s), but it's not sorted and printed yet: |
1049 | */ |
1050 | void __init e820__finish_early_params(void) |
1051 | { |
1052 | if (userdef) { |
1053 | if (e820__update_table(table: e820_table) < 0) |
1054 | early_panic(msg: "Invalid user supplied memory map" ); |
1055 | |
1056 | pr_info("user-defined physical RAM map:\n" ); |
1057 | e820__print_table(who: "user" ); |
1058 | } |
1059 | } |
1060 | |
1061 | static const char *__init e820_type_to_string(struct e820_entry *entry) |
1062 | { |
1063 | switch (entry->type) { |
1064 | case E820_TYPE_RESERVED_KERN: /* Fall-through: */ |
1065 | case E820_TYPE_RAM: return "System RAM" ; |
1066 | case E820_TYPE_ACPI: return "ACPI Tables" ; |
1067 | case E820_TYPE_NVS: return "ACPI Non-volatile Storage" ; |
1068 | case E820_TYPE_UNUSABLE: return "Unusable memory" ; |
1069 | case E820_TYPE_PRAM: return "Persistent Memory (legacy)" ; |
1070 | case E820_TYPE_PMEM: return "Persistent Memory" ; |
1071 | case E820_TYPE_RESERVED: return "Reserved" ; |
1072 | case E820_TYPE_SOFT_RESERVED: return "Soft Reserved" ; |
1073 | default: return "Unknown E820 type" ; |
1074 | } |
1075 | } |
1076 | |
1077 | static unsigned long __init e820_type_to_iomem_type(struct e820_entry *entry) |
1078 | { |
1079 | switch (entry->type) { |
1080 | case E820_TYPE_RESERVED_KERN: /* Fall-through: */ |
1081 | case E820_TYPE_RAM: return IORESOURCE_SYSTEM_RAM; |
1082 | case E820_TYPE_ACPI: /* Fall-through: */ |
1083 | case E820_TYPE_NVS: /* Fall-through: */ |
1084 | case E820_TYPE_UNUSABLE: /* Fall-through: */ |
1085 | case E820_TYPE_PRAM: /* Fall-through: */ |
1086 | case E820_TYPE_PMEM: /* Fall-through: */ |
1087 | case E820_TYPE_RESERVED: /* Fall-through: */ |
1088 | case E820_TYPE_SOFT_RESERVED: /* Fall-through: */ |
1089 | default: return IORESOURCE_MEM; |
1090 | } |
1091 | } |
1092 | |
1093 | static unsigned long __init e820_type_to_iores_desc(struct e820_entry *entry) |
1094 | { |
1095 | switch (entry->type) { |
1096 | case E820_TYPE_ACPI: return IORES_DESC_ACPI_TABLES; |
1097 | case E820_TYPE_NVS: return IORES_DESC_ACPI_NV_STORAGE; |
1098 | case E820_TYPE_PMEM: return IORES_DESC_PERSISTENT_MEMORY; |
1099 | case E820_TYPE_PRAM: return IORES_DESC_PERSISTENT_MEMORY_LEGACY; |
1100 | case E820_TYPE_RESERVED: return IORES_DESC_RESERVED; |
1101 | case E820_TYPE_SOFT_RESERVED: return IORES_DESC_SOFT_RESERVED; |
1102 | case E820_TYPE_RESERVED_KERN: /* Fall-through: */ |
1103 | case E820_TYPE_RAM: /* Fall-through: */ |
1104 | case E820_TYPE_UNUSABLE: /* Fall-through: */ |
1105 | default: return IORES_DESC_NONE; |
1106 | } |
1107 | } |
1108 | |
1109 | static bool __init do_mark_busy(enum e820_type type, struct resource *res) |
1110 | { |
1111 | /* this is the legacy bios/dos rom-shadow + mmio region */ |
1112 | if (res->start < (1ULL<<20)) |
1113 | return true; |
1114 | |
1115 | /* |
1116 | * Treat persistent memory and other special memory ranges like |
1117 | * device memory, i.e. reserve it for exclusive use of a driver |
1118 | */ |
1119 | switch (type) { |
1120 | case E820_TYPE_RESERVED: |
1121 | case E820_TYPE_SOFT_RESERVED: |
1122 | case E820_TYPE_PRAM: |
1123 | case E820_TYPE_PMEM: |
1124 | return false; |
1125 | case E820_TYPE_RESERVED_KERN: |
1126 | case E820_TYPE_RAM: |
1127 | case E820_TYPE_ACPI: |
1128 | case E820_TYPE_NVS: |
1129 | case E820_TYPE_UNUSABLE: |
1130 | default: |
1131 | return true; |
1132 | } |
1133 | } |
1134 | |
1135 | /* |
1136 | * Mark E820 reserved areas as busy for the resource manager: |
1137 | */ |
1138 | |
1139 | static struct resource __initdata *e820_res; |
1140 | |
1141 | void __init e820__reserve_resources(void) |
1142 | { |
1143 | int i; |
1144 | struct resource *res; |
1145 | u64 end; |
1146 | |
1147 | res = memblock_alloc(size: sizeof(*res) * e820_table->nr_entries, |
1148 | SMP_CACHE_BYTES); |
1149 | if (!res) |
1150 | panic(fmt: "%s: Failed to allocate %zu bytes\n" , __func__, |
1151 | sizeof(*res) * e820_table->nr_entries); |
1152 | e820_res = res; |
1153 | |
1154 | for (i = 0; i < e820_table->nr_entries; i++) { |
1155 | struct e820_entry *entry = e820_table->entries + i; |
1156 | |
1157 | end = entry->addr + entry->size - 1; |
1158 | if (end != (resource_size_t)end) { |
1159 | res++; |
1160 | continue; |
1161 | } |
1162 | res->start = entry->addr; |
1163 | res->end = end; |
1164 | res->name = e820_type_to_string(entry); |
1165 | res->flags = e820_type_to_iomem_type(entry); |
1166 | res->desc = e820_type_to_iores_desc(entry); |
1167 | |
1168 | /* |
1169 | * Don't register the region that could be conflicted with |
1170 | * PCI device BAR resources and insert them later in |
1171 | * pcibios_resource_survey(): |
1172 | */ |
1173 | if (do_mark_busy(type: entry->type, res)) { |
1174 | res->flags |= IORESOURCE_BUSY; |
1175 | insert_resource(parent: &iomem_resource, new: res); |
1176 | } |
1177 | res++; |
1178 | } |
1179 | |
1180 | /* Expose the bootloader-provided memory layout to the sysfs. */ |
1181 | for (i = 0; i < e820_table_firmware->nr_entries; i++) { |
1182 | struct e820_entry *entry = e820_table_firmware->entries + i; |
1183 | |
1184 | firmware_map_add_early(start: entry->addr, end: entry->addr + entry->size, type: e820_type_to_string(entry)); |
1185 | } |
1186 | } |
1187 | |
1188 | /* |
1189 | * How much should we pad the end of RAM, depending on where it is? |
1190 | */ |
1191 | static unsigned long __init ram_alignment(resource_size_t pos) |
1192 | { |
1193 | unsigned long mb = pos >> 20; |
1194 | |
1195 | /* To 64kB in the first megabyte */ |
1196 | if (!mb) |
1197 | return 64*1024; |
1198 | |
1199 | /* To 1MB in the first 16MB */ |
1200 | if (mb < 16) |
1201 | return 1024*1024; |
1202 | |
1203 | /* To 64MB for anything above that */ |
1204 | return 64*1024*1024; |
1205 | } |
1206 | |
1207 | #define MAX_RESOURCE_SIZE ((resource_size_t)-1) |
1208 | |
1209 | void __init e820__reserve_resources_late(void) |
1210 | { |
1211 | int i; |
1212 | struct resource *res; |
1213 | |
1214 | res = e820_res; |
1215 | for (i = 0; i < e820_table->nr_entries; i++) { |
1216 | if (!res->parent && res->end) |
1217 | insert_resource_expand_to_fit(root: &iomem_resource, new: res); |
1218 | res++; |
1219 | } |
1220 | |
1221 | /* |
1222 | * Try to bump up RAM regions to reasonable boundaries, to |
1223 | * avoid stolen RAM: |
1224 | */ |
1225 | for (i = 0; i < e820_table->nr_entries; i++) { |
1226 | struct e820_entry *entry = &e820_table->entries[i]; |
1227 | u64 start, end; |
1228 | |
1229 | if (entry->type != E820_TYPE_RAM) |
1230 | continue; |
1231 | |
1232 | start = entry->addr + entry->size; |
1233 | end = round_up(start, ram_alignment(start)) - 1; |
1234 | if (end > MAX_RESOURCE_SIZE) |
1235 | end = MAX_RESOURCE_SIZE; |
1236 | if (start >= end) |
1237 | continue; |
1238 | |
1239 | printk(KERN_DEBUG "e820: reserve RAM buffer [mem %#010llx-%#010llx]\n" , start, end); |
1240 | reserve_region_with_split(root: &iomem_resource, start, end, name: "RAM buffer" ); |
1241 | } |
1242 | } |
1243 | |
1244 | /* |
1245 | * Pass the firmware (bootloader) E820 map to the kernel and process it: |
1246 | */ |
1247 | char *__init e820__memory_setup_default(void) |
1248 | { |
1249 | char *who = "BIOS-e820" ; |
1250 | |
1251 | /* |
1252 | * Try to copy the BIOS-supplied E820-map. |
1253 | * |
1254 | * Otherwise fake a memory map; one section from 0k->640k, |
1255 | * the next section from 1mb->appropriate_mem_k |
1256 | */ |
1257 | if (append_e820_table(entries: boot_params.e820_table, nr_entries: boot_params.e820_entries) < 0) { |
1258 | u64 mem_size; |
1259 | |
1260 | /* Compare results from other methods and take the one that gives more RAM: */ |
1261 | if (boot_params.alt_mem_k < boot_params.screen_info.ext_mem_k) { |
1262 | mem_size = boot_params.screen_info.ext_mem_k; |
1263 | who = "BIOS-88" ; |
1264 | } else { |
1265 | mem_size = boot_params.alt_mem_k; |
1266 | who = "BIOS-e801" ; |
1267 | } |
1268 | |
1269 | e820_table->nr_entries = 0; |
1270 | e820__range_add(start: 0, LOWMEMSIZE(), type: E820_TYPE_RAM); |
1271 | e820__range_add(HIGH_MEMORY, size: mem_size << 10, type: E820_TYPE_RAM); |
1272 | } |
1273 | |
1274 | /* We just appended a lot of ranges, sanitize the table: */ |
1275 | e820__update_table(table: e820_table); |
1276 | |
1277 | return who; |
1278 | } |
1279 | |
1280 | /* |
1281 | * Calls e820__memory_setup_default() in essence to pick up the firmware/bootloader |
1282 | * E820 map - with an optional platform quirk available for virtual platforms |
1283 | * to override this method of boot environment processing: |
1284 | */ |
1285 | void __init e820__memory_setup(void) |
1286 | { |
1287 | char *who; |
1288 | |
1289 | /* This is a firmware interface ABI - make sure we don't break it: */ |
1290 | BUILD_BUG_ON(sizeof(struct boot_e820_entry) != 20); |
1291 | |
1292 | who = x86_init.resources.memory_setup(); |
1293 | |
1294 | memcpy(e820_table_kexec, e820_table, sizeof(*e820_table_kexec)); |
1295 | memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware)); |
1296 | |
1297 | pr_info("BIOS-provided physical RAM map:\n" ); |
1298 | e820__print_table(who); |
1299 | } |
1300 | |
1301 | void __init e820__memblock_setup(void) |
1302 | { |
1303 | int i; |
1304 | u64 end; |
1305 | |
1306 | /* |
1307 | * The bootstrap memblock region count maximum is 128 entries |
1308 | * (INIT_MEMBLOCK_REGIONS), but EFI might pass us more E820 entries |
1309 | * than that - so allow memblock resizing. |
1310 | * |
1311 | * This is safe, because this call happens pretty late during x86 setup, |
1312 | * so we know about reserved memory regions already. (This is important |
1313 | * so that memblock resizing does no stomp over reserved areas.) |
1314 | */ |
1315 | memblock_allow_resize(); |
1316 | |
1317 | for (i = 0; i < e820_table->nr_entries; i++) { |
1318 | struct e820_entry *entry = &e820_table->entries[i]; |
1319 | |
1320 | end = entry->addr + entry->size; |
1321 | if (end != (resource_size_t)end) |
1322 | continue; |
1323 | |
1324 | if (entry->type == E820_TYPE_SOFT_RESERVED) |
1325 | memblock_reserve(base: entry->addr, size: entry->size); |
1326 | |
1327 | if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN) |
1328 | continue; |
1329 | |
1330 | memblock_add(base: entry->addr, size: entry->size); |
1331 | } |
1332 | |
1333 | /* Throw away partial pages: */ |
1334 | memblock_trim_memory(PAGE_SIZE); |
1335 | |
1336 | memblock_dump_all(); |
1337 | } |
1338 | |