1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Machine specific setup for xen |
4 | * |
5 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 |
6 | */ |
7 | |
8 | #include <linux/init.h> |
9 | #include <linux/iscsi_ibft.h> |
10 | #include <linux/sched.h> |
11 | #include <linux/kstrtox.h> |
12 | #include <linux/mm.h> |
13 | #include <linux/pm.h> |
14 | #include <linux/memblock.h> |
15 | #include <linux/cpuidle.h> |
16 | #include <linux/cpufreq.h> |
17 | #include <linux/memory_hotplug.h> |
18 | |
19 | #include <asm/elf.h> |
20 | #include <asm/vdso.h> |
21 | #include <asm/e820/api.h> |
22 | #include <asm/setup.h> |
23 | #include <asm/acpi.h> |
24 | #include <asm/numa.h> |
25 | #include <asm/idtentry.h> |
26 | #include <asm/xen/hypervisor.h> |
27 | #include <asm/xen/hypercall.h> |
28 | |
29 | #include <xen/xen.h> |
30 | #include <xen/page.h> |
31 | #include <xen/interface/callback.h> |
32 | #include <xen/interface/memory.h> |
33 | #include <xen/interface/physdev.h> |
34 | #include <xen/features.h> |
35 | #include <xen/hvc-console.h> |
36 | #include "xen-ops.h" |
37 | #include "mmu.h" |
38 | |
39 | #define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024) |
40 | |
41 | /* Number of pages released from the initial allocation. */ |
42 | unsigned long xen_released_pages; |
43 | |
44 | /* Memory map would allow PCI passthrough. */ |
45 | bool xen_pv_pci_possible; |
46 | |
47 | /* E820 map used during setting up memory. */ |
48 | static struct e820_table xen_e820_table __initdata; |
49 | |
50 | /* |
51 | * Buffer used to remap identity mapped pages. We only need the virtual space. |
52 | * The physical page behind this address is remapped as needed to different |
53 | * buffer pages. |
54 | */ |
55 | #define REMAP_SIZE (P2M_PER_PAGE - 3) |
56 | static struct { |
57 | unsigned long next_area_mfn; |
58 | unsigned long target_pfn; |
59 | unsigned long size; |
60 | unsigned long mfns[REMAP_SIZE]; |
61 | } xen_remap_buf __initdata __aligned(PAGE_SIZE); |
62 | static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY; |
63 | |
64 | static bool xen_512gb_limit __initdata = IS_ENABLED(CONFIG_XEN_512GB); |
65 | |
66 | static void __init xen_parse_512gb(void) |
67 | { |
68 | bool val = false; |
69 | char *arg; |
70 | |
71 | arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit" ); |
72 | if (!arg) |
73 | return; |
74 | |
75 | arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit=" ); |
76 | if (!arg) |
77 | val = true; |
78 | else if (kstrtobool(s: arg + strlen("xen_512gb_limit=" ), res: &val)) |
79 | return; |
80 | |
81 | xen_512gb_limit = val; |
82 | } |
83 | |
84 | static void __init (unsigned long start_pfn, |
85 | unsigned long n_pfns) |
86 | { |
87 | int i; |
88 | unsigned long start_r, size_r; |
89 | |
90 | for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { |
91 | start_r = xen_extra_mem[i].start_pfn; |
92 | size_r = xen_extra_mem[i].n_pfns; |
93 | |
94 | /* Start of region. */ |
95 | if (start_r == start_pfn) { |
96 | BUG_ON(n_pfns > size_r); |
97 | xen_extra_mem[i].start_pfn += n_pfns; |
98 | xen_extra_mem[i].n_pfns -= n_pfns; |
99 | break; |
100 | } |
101 | /* End of region. */ |
102 | if (start_r + size_r == start_pfn + n_pfns) { |
103 | BUG_ON(n_pfns > size_r); |
104 | xen_extra_mem[i].n_pfns -= n_pfns; |
105 | break; |
106 | } |
107 | /* Mid of region. */ |
108 | if (start_pfn > start_r && start_pfn < start_r + size_r) { |
109 | BUG_ON(start_pfn + n_pfns > start_r + size_r); |
110 | xen_extra_mem[i].n_pfns = start_pfn - start_r; |
111 | /* Calling memblock_reserve() again is okay. */ |
112 | xen_add_extra_mem(start_pfn: start_pfn + n_pfns, n_pfns: start_r + size_r - |
113 | (start_pfn + n_pfns)); |
114 | break; |
115 | } |
116 | } |
117 | memblock_phys_free(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns)); |
118 | } |
119 | |
120 | /* |
121 | * Called during boot before the p2m list can take entries beyond the |
122 | * hypervisor supplied p2m list. Entries in extra mem are to be regarded as |
123 | * invalid. |
124 | */ |
125 | unsigned long __ref (unsigned long pfn) |
126 | { |
127 | int i; |
128 | |
129 | for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { |
130 | if (pfn >= xen_extra_mem[i].start_pfn && |
131 | pfn < xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns) |
132 | return INVALID_P2M_ENTRY; |
133 | } |
134 | |
135 | return IDENTITY_FRAME(pfn); |
136 | } |
137 | |
138 | /* |
139 | * Mark all pfns of extra mem as invalid in p2m list. |
140 | */ |
141 | void __init (void) |
142 | { |
143 | unsigned long pfn, pfn_s, pfn_e; |
144 | int i; |
145 | |
146 | for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) { |
147 | if (!xen_extra_mem[i].n_pfns) |
148 | continue; |
149 | pfn_s = xen_extra_mem[i].start_pfn; |
150 | pfn_e = pfn_s + xen_extra_mem[i].n_pfns; |
151 | for (pfn = pfn_s; pfn < pfn_e; pfn++) |
152 | set_phys_to_machine(pfn, INVALID_P2M_ENTRY); |
153 | } |
154 | } |
155 | |
156 | /* |
157 | * Finds the next RAM pfn available in the E820 map after min_pfn. |
158 | * This function updates min_pfn with the pfn found and returns |
159 | * the size of that range or zero if not found. |
160 | */ |
161 | static unsigned long __init xen_find_pfn_range(unsigned long *min_pfn) |
162 | { |
163 | const struct e820_entry *entry = xen_e820_table.entries; |
164 | unsigned int i; |
165 | unsigned long done = 0; |
166 | |
167 | for (i = 0; i < xen_e820_table.nr_entries; i++, entry++) { |
168 | unsigned long s_pfn; |
169 | unsigned long e_pfn; |
170 | |
171 | if (entry->type != E820_TYPE_RAM) |
172 | continue; |
173 | |
174 | e_pfn = PFN_DOWN(entry->addr + entry->size); |
175 | |
176 | /* We only care about E820 after this */ |
177 | if (e_pfn <= *min_pfn) |
178 | continue; |
179 | |
180 | s_pfn = PFN_UP(entry->addr); |
181 | |
182 | /* If min_pfn falls within the E820 entry, we want to start |
183 | * at the min_pfn PFN. |
184 | */ |
185 | if (s_pfn <= *min_pfn) { |
186 | done = e_pfn - *min_pfn; |
187 | } else { |
188 | done = e_pfn - s_pfn; |
189 | *min_pfn = s_pfn; |
190 | } |
191 | break; |
192 | } |
193 | |
194 | return done; |
195 | } |
196 | |
197 | static int __init xen_free_mfn(unsigned long mfn) |
198 | { |
199 | struct xen_memory_reservation reservation = { |
200 | .address_bits = 0, |
201 | .extent_order = 0, |
202 | .domid = DOMID_SELF |
203 | }; |
204 | |
205 | set_xen_guest_handle(reservation.extent_start, &mfn); |
206 | reservation.nr_extents = 1; |
207 | |
208 | return HYPERVISOR_memory_op(XENMEM_decrease_reservation, arg: &reservation); |
209 | } |
210 | |
211 | /* |
212 | * This releases a chunk of memory and then does the identity map. It's used |
213 | * as a fallback if the remapping fails. |
214 | */ |
215 | static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn, |
216 | unsigned long end_pfn, unsigned long nr_pages) |
217 | { |
218 | unsigned long pfn, end; |
219 | int ret; |
220 | |
221 | WARN_ON(start_pfn > end_pfn); |
222 | |
223 | /* Release pages first. */ |
224 | end = min(end_pfn, nr_pages); |
225 | for (pfn = start_pfn; pfn < end; pfn++) { |
226 | unsigned long mfn = pfn_to_mfn(pfn); |
227 | |
228 | /* Make sure pfn exists to start with */ |
229 | if (mfn == INVALID_P2M_ENTRY || mfn_to_pfn(mfn) != pfn) |
230 | continue; |
231 | |
232 | ret = xen_free_mfn(mfn); |
233 | WARN(ret != 1, "Failed to release pfn %lx err=%d\n" , pfn, ret); |
234 | |
235 | if (ret == 1) { |
236 | xen_released_pages++; |
237 | if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY)) |
238 | break; |
239 | } else |
240 | break; |
241 | } |
242 | |
243 | set_phys_range_identity(pfn_s: start_pfn, pfn_e: end_pfn); |
244 | } |
245 | |
246 | /* |
247 | * Helper function to update the p2m and m2p tables and kernel mapping. |
248 | */ |
249 | static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn) |
250 | { |
251 | struct mmu_update update = { |
252 | .ptr = ((uint64_t)mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE, |
253 | .val = pfn |
254 | }; |
255 | |
256 | /* Update p2m */ |
257 | if (!set_phys_to_machine(pfn, mfn)) { |
258 | WARN(1, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n" , |
259 | pfn, mfn); |
260 | BUG(); |
261 | } |
262 | |
263 | /* Update m2p */ |
264 | if (HYPERVISOR_mmu_update(req: &update, count: 1, NULL, DOMID_SELF) < 0) { |
265 | WARN(1, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n" , |
266 | mfn, pfn); |
267 | BUG(); |
268 | } |
269 | |
270 | if (HYPERVISOR_update_va_mapping(va: (unsigned long)__va(pfn << PAGE_SHIFT), |
271 | new_val: mfn_pte(page_nr: mfn, PAGE_KERNEL), flags: 0)) { |
272 | WARN(1, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n" , |
273 | mfn, pfn); |
274 | BUG(); |
275 | } |
276 | } |
277 | |
278 | /* |
279 | * This function updates the p2m and m2p tables with an identity map from |
280 | * start_pfn to start_pfn+size and prepares remapping the underlying RAM of the |
281 | * original allocation at remap_pfn. The information needed for remapping is |
282 | * saved in the memory itself to avoid the need for allocating buffers. The |
283 | * complete remap information is contained in a list of MFNs each containing |
284 | * up to REMAP_SIZE MFNs and the start target PFN for doing the remap. |
285 | * This enables us to preserve the original mfn sequence while doing the |
286 | * remapping at a time when the memory management is capable of allocating |
287 | * virtual and physical memory in arbitrary amounts, see 'xen_remap_memory' and |
288 | * its callers. |
289 | */ |
290 | static void __init xen_do_set_identity_and_remap_chunk( |
291 | unsigned long start_pfn, unsigned long size, unsigned long remap_pfn) |
292 | { |
293 | unsigned long buf = (unsigned long)&xen_remap_buf; |
294 | unsigned long mfn_save, mfn; |
295 | unsigned long ident_pfn_iter, remap_pfn_iter; |
296 | unsigned long ident_end_pfn = start_pfn + size; |
297 | unsigned long left = size; |
298 | unsigned int i, chunk; |
299 | |
300 | WARN_ON(size == 0); |
301 | |
302 | mfn_save = virt_to_mfn((void *)buf); |
303 | |
304 | for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn; |
305 | ident_pfn_iter < ident_end_pfn; |
306 | ident_pfn_iter += REMAP_SIZE, remap_pfn_iter += REMAP_SIZE) { |
307 | chunk = (left < REMAP_SIZE) ? left : REMAP_SIZE; |
308 | |
309 | /* Map first pfn to xen_remap_buf */ |
310 | mfn = pfn_to_mfn(pfn: ident_pfn_iter); |
311 | set_pte_mfn(vaddr: buf, pfn: mfn, PAGE_KERNEL); |
312 | |
313 | /* Save mapping information in page */ |
314 | xen_remap_buf.next_area_mfn = xen_remap_mfn; |
315 | xen_remap_buf.target_pfn = remap_pfn_iter; |
316 | xen_remap_buf.size = chunk; |
317 | for (i = 0; i < chunk; i++) |
318 | xen_remap_buf.mfns[i] = pfn_to_mfn(pfn: ident_pfn_iter + i); |
319 | |
320 | /* Put remap buf into list. */ |
321 | xen_remap_mfn = mfn; |
322 | |
323 | /* Set identity map */ |
324 | set_phys_range_identity(pfn_s: ident_pfn_iter, pfn_e: ident_pfn_iter + chunk); |
325 | |
326 | left -= chunk; |
327 | } |
328 | |
329 | /* Restore old xen_remap_buf mapping */ |
330 | set_pte_mfn(vaddr: buf, pfn: mfn_save, PAGE_KERNEL); |
331 | } |
332 | |
333 | /* |
334 | * This function takes a contiguous pfn range that needs to be identity mapped |
335 | * and: |
336 | * |
337 | * 1) Finds a new range of pfns to use to remap based on E820 and remap_pfn. |
338 | * 2) Calls the do_ function to actually do the mapping/remapping work. |
339 | * |
340 | * The goal is to not allocate additional memory but to remap the existing |
341 | * pages. In the case of an error the underlying memory is simply released back |
342 | * to Xen and not remapped. |
343 | */ |
344 | static unsigned long __init xen_set_identity_and_remap_chunk( |
345 | unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, |
346 | unsigned long remap_pfn) |
347 | { |
348 | unsigned long pfn; |
349 | unsigned long i = 0; |
350 | unsigned long n = end_pfn - start_pfn; |
351 | |
352 | if (remap_pfn == 0) |
353 | remap_pfn = nr_pages; |
354 | |
355 | while (i < n) { |
356 | unsigned long cur_pfn = start_pfn + i; |
357 | unsigned long left = n - i; |
358 | unsigned long size = left; |
359 | unsigned long remap_range_size; |
360 | |
361 | /* Do not remap pages beyond the current allocation */ |
362 | if (cur_pfn >= nr_pages) { |
363 | /* Identity map remaining pages */ |
364 | set_phys_range_identity(pfn_s: cur_pfn, pfn_e: cur_pfn + size); |
365 | break; |
366 | } |
367 | if (cur_pfn + size > nr_pages) |
368 | size = nr_pages - cur_pfn; |
369 | |
370 | remap_range_size = xen_find_pfn_range(min_pfn: &remap_pfn); |
371 | if (!remap_range_size) { |
372 | pr_warn("Unable to find available pfn range, not remapping identity pages\n" ); |
373 | xen_set_identity_and_release_chunk(start_pfn: cur_pfn, |
374 | end_pfn: cur_pfn + left, nr_pages); |
375 | break; |
376 | } |
377 | /* Adjust size to fit in current e820 RAM region */ |
378 | if (size > remap_range_size) |
379 | size = remap_range_size; |
380 | |
381 | xen_do_set_identity_and_remap_chunk(start_pfn: cur_pfn, size, remap_pfn); |
382 | |
383 | /* Update variables to reflect new mappings. */ |
384 | i += size; |
385 | remap_pfn += size; |
386 | } |
387 | |
388 | /* |
389 | * If the PFNs are currently mapped, their VA mappings need to be |
390 | * zapped. |
391 | */ |
392 | for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) |
393 | (void)HYPERVISOR_update_va_mapping( |
394 | va: (unsigned long)__va(pfn << PAGE_SHIFT), |
395 | new_val: native_make_pte(val: 0), flags: 0); |
396 | |
397 | return remap_pfn; |
398 | } |
399 | |
400 | static unsigned long __init xen_count_remap_pages( |
401 | unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages, |
402 | unsigned long remap_pages) |
403 | { |
404 | if (start_pfn >= nr_pages) |
405 | return remap_pages; |
406 | |
407 | return remap_pages + min(end_pfn, nr_pages) - start_pfn; |
408 | } |
409 | |
410 | static unsigned long __init xen_foreach_remap_area(unsigned long nr_pages, |
411 | unsigned long (*func)(unsigned long start_pfn, unsigned long end_pfn, |
412 | unsigned long nr_pages, unsigned long last_val)) |
413 | { |
414 | phys_addr_t start = 0; |
415 | unsigned long ret_val = 0; |
416 | const struct e820_entry *entry = xen_e820_table.entries; |
417 | int i; |
418 | |
419 | /* |
420 | * Combine non-RAM regions and gaps until a RAM region (or the |
421 | * end of the map) is reached, then call the provided function |
422 | * to perform its duty on the non-RAM region. |
423 | * |
424 | * The combined non-RAM regions are rounded to a whole number |
425 | * of pages so any partial pages are accessible via the 1:1 |
426 | * mapping. This is needed for some BIOSes that put (for |
427 | * example) the DMI tables in a reserved region that begins on |
428 | * a non-page boundary. |
429 | */ |
430 | for (i = 0; i < xen_e820_table.nr_entries; i++, entry++) { |
431 | phys_addr_t end = entry->addr + entry->size; |
432 | if (entry->type == E820_TYPE_RAM || i == xen_e820_table.nr_entries - 1) { |
433 | unsigned long start_pfn = PFN_DOWN(start); |
434 | unsigned long end_pfn = PFN_UP(end); |
435 | |
436 | if (entry->type == E820_TYPE_RAM) |
437 | end_pfn = PFN_UP(entry->addr); |
438 | |
439 | if (start_pfn < end_pfn) |
440 | ret_val = func(start_pfn, end_pfn, nr_pages, |
441 | ret_val); |
442 | start = end; |
443 | } |
444 | } |
445 | |
446 | return ret_val; |
447 | } |
448 | |
449 | /* |
450 | * Remap the memory prepared in xen_do_set_identity_and_remap_chunk(). |
451 | * The remap information (which mfn remap to which pfn) is contained in the |
452 | * to be remapped memory itself in a linked list anchored at xen_remap_mfn. |
453 | * This scheme allows to remap the different chunks in arbitrary order while |
454 | * the resulting mapping will be independent from the order. |
455 | */ |
456 | void __init xen_remap_memory(void) |
457 | { |
458 | unsigned long buf = (unsigned long)&xen_remap_buf; |
459 | unsigned long mfn_save, pfn; |
460 | unsigned long remapped = 0; |
461 | unsigned int i; |
462 | unsigned long pfn_s = ~0UL; |
463 | unsigned long len = 0; |
464 | |
465 | mfn_save = virt_to_mfn((void *)buf); |
466 | |
467 | while (xen_remap_mfn != INVALID_P2M_ENTRY) { |
468 | /* Map the remap information */ |
469 | set_pte_mfn(vaddr: buf, pfn: xen_remap_mfn, PAGE_KERNEL); |
470 | |
471 | BUG_ON(xen_remap_mfn != xen_remap_buf.mfns[0]); |
472 | |
473 | pfn = xen_remap_buf.target_pfn; |
474 | for (i = 0; i < xen_remap_buf.size; i++) { |
475 | xen_update_mem_tables(pfn, mfn: xen_remap_buf.mfns[i]); |
476 | remapped++; |
477 | pfn++; |
478 | } |
479 | if (pfn_s == ~0UL || pfn == pfn_s) { |
480 | pfn_s = xen_remap_buf.target_pfn; |
481 | len += xen_remap_buf.size; |
482 | } else if (pfn_s + len == xen_remap_buf.target_pfn) { |
483 | len += xen_remap_buf.size; |
484 | } else { |
485 | xen_del_extra_mem(start_pfn: pfn_s, n_pfns: len); |
486 | pfn_s = xen_remap_buf.target_pfn; |
487 | len = xen_remap_buf.size; |
488 | } |
489 | xen_remap_mfn = xen_remap_buf.next_area_mfn; |
490 | } |
491 | |
492 | if (pfn_s != ~0UL && len) |
493 | xen_del_extra_mem(start_pfn: pfn_s, n_pfns: len); |
494 | |
495 | set_pte_mfn(vaddr: buf, pfn: mfn_save, PAGE_KERNEL); |
496 | |
497 | pr_info("Remapped %ld page(s)\n" , remapped); |
498 | } |
499 | |
500 | static unsigned long __init xen_get_pages_limit(void) |
501 | { |
502 | unsigned long limit; |
503 | |
504 | limit = MAXMEM / PAGE_SIZE; |
505 | if (!xen_initial_domain() && xen_512gb_limit) |
506 | limit = GB(512) / PAGE_SIZE; |
507 | |
508 | return limit; |
509 | } |
510 | |
511 | static unsigned long __init xen_get_max_pages(void) |
512 | { |
513 | unsigned long max_pages, limit; |
514 | domid_t domid = DOMID_SELF; |
515 | long ret; |
516 | |
517 | limit = xen_get_pages_limit(); |
518 | max_pages = limit; |
519 | |
520 | /* |
521 | * For the initial domain we use the maximum reservation as |
522 | * the maximum page. |
523 | * |
524 | * For guest domains the current maximum reservation reflects |
525 | * the current maximum rather than the static maximum. In this |
526 | * case the e820 map provided to us will cover the static |
527 | * maximum region. |
528 | */ |
529 | if (xen_initial_domain()) { |
530 | ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, arg: &domid); |
531 | if (ret > 0) |
532 | max_pages = ret; |
533 | } |
534 | |
535 | return min(max_pages, limit); |
536 | } |
537 | |
538 | static void __init xen_align_and_add_e820_region(phys_addr_t start, |
539 | phys_addr_t size, int type) |
540 | { |
541 | phys_addr_t end = start + size; |
542 | |
543 | /* Align RAM regions to page boundaries. */ |
544 | if (type == E820_TYPE_RAM) { |
545 | start = PAGE_ALIGN(start); |
546 | end &= ~((phys_addr_t)PAGE_SIZE - 1); |
547 | #ifdef CONFIG_MEMORY_HOTPLUG |
548 | /* |
549 | * Don't allow adding memory not in E820 map while booting the |
550 | * system. Once the balloon driver is up it will remove that |
551 | * restriction again. |
552 | */ |
553 | max_mem_size = end; |
554 | #endif |
555 | } |
556 | |
557 | e820__range_add(start, size: end - start, type); |
558 | } |
559 | |
560 | static void __init xen_ignore_unusable(void) |
561 | { |
562 | struct e820_entry *entry = xen_e820_table.entries; |
563 | unsigned int i; |
564 | |
565 | for (i = 0; i < xen_e820_table.nr_entries; i++, entry++) { |
566 | if (entry->type == E820_TYPE_UNUSABLE) |
567 | entry->type = E820_TYPE_RAM; |
568 | } |
569 | } |
570 | |
571 | bool __init xen_is_e820_reserved(phys_addr_t start, phys_addr_t size) |
572 | { |
573 | struct e820_entry *entry; |
574 | unsigned mapcnt; |
575 | phys_addr_t end; |
576 | |
577 | if (!size) |
578 | return false; |
579 | |
580 | end = start + size; |
581 | entry = xen_e820_table.entries; |
582 | |
583 | for (mapcnt = 0; mapcnt < xen_e820_table.nr_entries; mapcnt++) { |
584 | if (entry->type == E820_TYPE_RAM && entry->addr <= start && |
585 | (entry->addr + entry->size) >= end) |
586 | return false; |
587 | |
588 | entry++; |
589 | } |
590 | |
591 | return true; |
592 | } |
593 | |
594 | /* |
595 | * Find a free area in physical memory not yet reserved and compliant with |
596 | * E820 map. |
597 | * Used to relocate pre-allocated areas like initrd or p2m list which are in |
598 | * conflict with the to be used E820 map. |
599 | * In case no area is found, return 0. Otherwise return the physical address |
600 | * of the area which is already reserved for convenience. |
601 | */ |
602 | phys_addr_t __init xen_find_free_area(phys_addr_t size) |
603 | { |
604 | unsigned mapcnt; |
605 | phys_addr_t addr, start; |
606 | struct e820_entry *entry = xen_e820_table.entries; |
607 | |
608 | for (mapcnt = 0; mapcnt < xen_e820_table.nr_entries; mapcnt++, entry++) { |
609 | if (entry->type != E820_TYPE_RAM || entry->size < size) |
610 | continue; |
611 | start = entry->addr; |
612 | for (addr = start; addr < start + size; addr += PAGE_SIZE) { |
613 | if (!memblock_is_reserved(addr)) |
614 | continue; |
615 | start = addr + PAGE_SIZE; |
616 | if (start + size > entry->addr + entry->size) |
617 | break; |
618 | } |
619 | if (addr >= start + size) { |
620 | memblock_reserve(base: start, size); |
621 | return start; |
622 | } |
623 | } |
624 | |
625 | return 0; |
626 | } |
627 | |
628 | /* |
629 | * Like memcpy, but with physical addresses for dest and src. |
630 | */ |
631 | static void __init xen_phys_memcpy(phys_addr_t dest, phys_addr_t src, |
632 | phys_addr_t n) |
633 | { |
634 | phys_addr_t dest_off, src_off, dest_len, src_len, len; |
635 | void *from, *to; |
636 | |
637 | while (n) { |
638 | dest_off = dest & ~PAGE_MASK; |
639 | src_off = src & ~PAGE_MASK; |
640 | dest_len = n; |
641 | if (dest_len > (NR_FIX_BTMAPS << PAGE_SHIFT) - dest_off) |
642 | dest_len = (NR_FIX_BTMAPS << PAGE_SHIFT) - dest_off; |
643 | src_len = n; |
644 | if (src_len > (NR_FIX_BTMAPS << PAGE_SHIFT) - src_off) |
645 | src_len = (NR_FIX_BTMAPS << PAGE_SHIFT) - src_off; |
646 | len = min(dest_len, src_len); |
647 | to = early_memremap(phys_addr: dest - dest_off, size: dest_len + dest_off); |
648 | from = early_memremap(phys_addr: src - src_off, size: src_len + src_off); |
649 | memcpy(to, from, len); |
650 | early_memunmap(addr: to, size: dest_len + dest_off); |
651 | early_memunmap(addr: from, size: src_len + src_off); |
652 | n -= len; |
653 | dest += len; |
654 | src += len; |
655 | } |
656 | } |
657 | |
658 | /* |
659 | * Reserve Xen mfn_list. |
660 | */ |
661 | static void __init xen_reserve_xen_mfnlist(void) |
662 | { |
663 | phys_addr_t start, size; |
664 | |
665 | if (xen_start_info->mfn_list >= __START_KERNEL_map) { |
666 | start = __pa(xen_start_info->mfn_list); |
667 | size = PFN_ALIGN(xen_start_info->nr_pages * |
668 | sizeof(unsigned long)); |
669 | } else { |
670 | start = PFN_PHYS(xen_start_info->first_p2m_pfn); |
671 | size = PFN_PHYS(xen_start_info->nr_p2m_frames); |
672 | } |
673 | |
674 | memblock_reserve(base: start, size); |
675 | if (!xen_is_e820_reserved(start, size)) |
676 | return; |
677 | |
678 | xen_relocate_p2m(); |
679 | memblock_phys_free(base: start, size); |
680 | } |
681 | |
682 | /** |
683 | * xen_memory_setup - Hook for machine specific memory setup. |
684 | **/ |
685 | char * __init xen_memory_setup(void) |
686 | { |
687 | unsigned long max_pfn, pfn_s, n_pfns; |
688 | phys_addr_t mem_end, addr, size, chunk_size; |
689 | u32 type; |
690 | int rc; |
691 | struct xen_memory_map memmap; |
692 | unsigned long max_pages; |
693 | unsigned long = 0; |
694 | int i; |
695 | int op; |
696 | |
697 | xen_parse_512gb(); |
698 | max_pfn = xen_get_pages_limit(); |
699 | max_pfn = min(max_pfn, xen_start_info->nr_pages); |
700 | mem_end = PFN_PHYS(max_pfn); |
701 | |
702 | memmap.nr_entries = ARRAY_SIZE(xen_e820_table.entries); |
703 | set_xen_guest_handle(memmap.buffer, xen_e820_table.entries); |
704 | |
705 | #if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_XEN_BALLOON) |
706 | xen_saved_max_mem_size = max_mem_size; |
707 | #endif |
708 | |
709 | op = xen_initial_domain() ? |
710 | XENMEM_machine_memory_map : |
711 | XENMEM_memory_map; |
712 | rc = HYPERVISOR_memory_op(cmd: op, arg: &memmap); |
713 | if (rc == -ENOSYS) { |
714 | BUG_ON(xen_initial_domain()); |
715 | memmap.nr_entries = 1; |
716 | xen_e820_table.entries[0].addr = 0ULL; |
717 | xen_e820_table.entries[0].size = mem_end; |
718 | /* 8MB slack (to balance backend allocations). */ |
719 | xen_e820_table.entries[0].size += 8ULL << 20; |
720 | xen_e820_table.entries[0].type = E820_TYPE_RAM; |
721 | rc = 0; |
722 | } |
723 | BUG_ON(rc); |
724 | BUG_ON(memmap.nr_entries == 0); |
725 | xen_e820_table.nr_entries = memmap.nr_entries; |
726 | |
727 | if (xen_initial_domain()) { |
728 | /* |
729 | * Xen won't allow a 1:1 mapping to be created to UNUSABLE |
730 | * regions, so if we're using the machine memory map leave the |
731 | * region as RAM as it is in the pseudo-physical map. |
732 | * |
733 | * UNUSABLE regions in domUs are not handled and will need |
734 | * a patch in the future. |
735 | */ |
736 | xen_ignore_unusable(); |
737 | |
738 | #ifdef CONFIG_ISCSI_IBFT_FIND |
739 | /* Reserve 0.5 MiB to 1 MiB region so iBFT can be found */ |
740 | xen_e820_table.entries[xen_e820_table.nr_entries].addr = IBFT_START; |
741 | xen_e820_table.entries[xen_e820_table.nr_entries].size = IBFT_END - IBFT_START; |
742 | xen_e820_table.entries[xen_e820_table.nr_entries].type = E820_TYPE_RESERVED; |
743 | xen_e820_table.nr_entries++; |
744 | #endif |
745 | } |
746 | |
747 | /* Make sure the Xen-supplied memory map is well-ordered. */ |
748 | e820__update_table(table: &xen_e820_table); |
749 | |
750 | max_pages = xen_get_max_pages(); |
751 | |
752 | /* How many extra pages do we need due to remapping? */ |
753 | max_pages += xen_foreach_remap_area(nr_pages: max_pfn, func: xen_count_remap_pages); |
754 | |
755 | if (max_pages > max_pfn) |
756 | extra_pages += max_pages - max_pfn; |
757 | |
758 | /* |
759 | * Clamp the amount of extra memory to a EXTRA_MEM_RATIO |
760 | * factor the base size. |
761 | * |
762 | * Make sure we have no memory above max_pages, as this area |
763 | * isn't handled by the p2m management. |
764 | */ |
765 | extra_pages = min3(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), |
766 | extra_pages, max_pages - max_pfn); |
767 | i = 0; |
768 | addr = xen_e820_table.entries[0].addr; |
769 | size = xen_e820_table.entries[0].size; |
770 | while (i < xen_e820_table.nr_entries) { |
771 | bool discard = false; |
772 | |
773 | chunk_size = size; |
774 | type = xen_e820_table.entries[i].type; |
775 | |
776 | if (type == E820_TYPE_RESERVED) |
777 | xen_pv_pci_possible = true; |
778 | |
779 | if (type == E820_TYPE_RAM) { |
780 | if (addr < mem_end) { |
781 | chunk_size = min(size, mem_end - addr); |
782 | } else if (extra_pages) { |
783 | chunk_size = min(size, PFN_PHYS(extra_pages)); |
784 | pfn_s = PFN_UP(addr); |
785 | n_pfns = PFN_DOWN(addr + chunk_size) - pfn_s; |
786 | extra_pages -= n_pfns; |
787 | xen_add_extra_mem(start_pfn: pfn_s, n_pfns); |
788 | xen_max_p2m_pfn = pfn_s + n_pfns; |
789 | } else |
790 | discard = true; |
791 | } |
792 | |
793 | if (!discard) |
794 | xen_align_and_add_e820_region(start: addr, size: chunk_size, type); |
795 | |
796 | addr += chunk_size; |
797 | size -= chunk_size; |
798 | if (size == 0) { |
799 | i++; |
800 | if (i < xen_e820_table.nr_entries) { |
801 | addr = xen_e820_table.entries[i].addr; |
802 | size = xen_e820_table.entries[i].size; |
803 | } |
804 | } |
805 | } |
806 | |
807 | /* |
808 | * Set the rest as identity mapped, in case PCI BARs are |
809 | * located here. |
810 | */ |
811 | set_phys_range_identity(pfn_s: addr / PAGE_SIZE, pfn_e: ~0ul); |
812 | |
813 | /* |
814 | * In domU, the ISA region is normal, usable memory, but we |
815 | * reserve ISA memory anyway because too many things poke |
816 | * about in there. |
817 | */ |
818 | e820__range_add(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, type: E820_TYPE_RESERVED); |
819 | |
820 | e820__update_table(table: e820_table); |
821 | |
822 | /* |
823 | * Check whether the kernel itself conflicts with the target E820 map. |
824 | * Failing now is better than running into weird problems later due |
825 | * to relocating (and even reusing) pages with kernel text or data. |
826 | */ |
827 | if (xen_is_e820_reserved(__pa_symbol(_text), |
828 | __pa_symbol(__bss_stop) - __pa_symbol(_text))) { |
829 | xen_raw_console_write(str: "Xen hypervisor allocated kernel memory conflicts with E820 map\n" ); |
830 | BUG(); |
831 | } |
832 | |
833 | /* |
834 | * Check for a conflict of the hypervisor supplied page tables with |
835 | * the target E820 map. |
836 | */ |
837 | xen_pt_check_e820(); |
838 | |
839 | xen_reserve_xen_mfnlist(); |
840 | |
841 | /* Check for a conflict of the initrd with the target E820 map. */ |
842 | if (xen_is_e820_reserved(start: boot_params.hdr.ramdisk_image, |
843 | size: boot_params.hdr.ramdisk_size)) { |
844 | phys_addr_t new_area, start, size; |
845 | |
846 | new_area = xen_find_free_area(size: boot_params.hdr.ramdisk_size); |
847 | if (!new_area) { |
848 | xen_raw_console_write(str: "Can't find new memory area for initrd needed due to E820 map conflict\n" ); |
849 | BUG(); |
850 | } |
851 | |
852 | start = boot_params.hdr.ramdisk_image; |
853 | size = boot_params.hdr.ramdisk_size; |
854 | xen_phys_memcpy(dest: new_area, src: start, n: size); |
855 | pr_info("initrd moved from [mem %#010llx-%#010llx] to [mem %#010llx-%#010llx]\n" , |
856 | start, start + size, new_area, new_area + size); |
857 | memblock_phys_free(base: start, size); |
858 | boot_params.hdr.ramdisk_image = new_area; |
859 | boot_params.ext_ramdisk_image = new_area >> 32; |
860 | } |
861 | |
862 | /* |
863 | * Set identity map on non-RAM pages and prepare remapping the |
864 | * underlying RAM. |
865 | */ |
866 | xen_foreach_remap_area(nr_pages: max_pfn, func: xen_set_identity_and_remap_chunk); |
867 | |
868 | pr_info("Released %ld page(s)\n" , xen_released_pages); |
869 | |
870 | return "Xen" ; |
871 | } |
872 | |
873 | static int register_callback(unsigned type, const void *func) |
874 | { |
875 | struct callback_register callback = { |
876 | .type = type, |
877 | .address = XEN_CALLBACK(__KERNEL_CS, func), |
878 | .flags = CALLBACKF_mask_events, |
879 | }; |
880 | |
881 | return HYPERVISOR_callback_op(CALLBACKOP_register, arg: &callback); |
882 | } |
883 | |
884 | void xen_enable_sysenter(void) |
885 | { |
886 | if (cpu_feature_enabled(X86_FEATURE_SYSENTER32) && |
887 | register_callback(CALLBACKTYPE_sysenter, func: xen_entry_SYSENTER_compat)) |
888 | setup_clear_cpu_cap(X86_FEATURE_SYSENTER32); |
889 | } |
890 | |
891 | void xen_enable_syscall(void) |
892 | { |
893 | int ret; |
894 | |
895 | ret = register_callback(CALLBACKTYPE_syscall, func: xen_entry_SYSCALL_64); |
896 | if (ret != 0) { |
897 | printk(KERN_ERR "Failed to set syscall callback: %d\n" , ret); |
898 | /* Pretty fatal; 64-bit userspace has no other |
899 | mechanism for syscalls. */ |
900 | } |
901 | |
902 | if (cpu_feature_enabled(X86_FEATURE_SYSCALL32) && |
903 | register_callback(CALLBACKTYPE_syscall32, func: xen_entry_SYSCALL_compat)) |
904 | setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); |
905 | } |
906 | |
907 | static void __init xen_pvmmu_arch_setup(void) |
908 | { |
909 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); |
910 | |
911 | if (register_callback(CALLBACKTYPE_event, |
912 | func: xen_asm_exc_xen_hypervisor_callback) || |
913 | register_callback(CALLBACKTYPE_failsafe, func: xen_failsafe_callback)) |
914 | BUG(); |
915 | |
916 | xen_enable_sysenter(); |
917 | xen_enable_syscall(); |
918 | } |
919 | |
920 | /* This function is not called for HVM domains */ |
921 | void __init xen_arch_setup(void) |
922 | { |
923 | xen_panic_handler_init(); |
924 | xen_pvmmu_arch_setup(); |
925 | |
926 | #ifdef CONFIG_ACPI |
927 | if (!(xen_start_info->flags & SIF_INITDOMAIN)) { |
928 | printk(KERN_INFO "ACPI in unprivileged domain disabled\n" ); |
929 | disable_acpi(); |
930 | } |
931 | #endif |
932 | |
933 | memcpy(boot_command_line, xen_start_info->cmd_line, |
934 | MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ? |
935 | COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE); |
936 | |
937 | /* Set up idle, making sure it calls safe_halt() pvop */ |
938 | disable_cpuidle(); |
939 | disable_cpufreq(); |
940 | WARN_ON(xen_set_default_idle()); |
941 | #ifdef CONFIG_NUMA |
942 | numa_off = 1; |
943 | #endif |
944 | } |
945 | |