setup.c source code [linux/arch/x86/xen/setup.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* Machine specific setup for xen
4	*
5	* Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
6	*/
7
8	#include <linux/init.h>
9	#include <linux/iscsi_ibft.h>
10	#include <linux/sched.h>
11	#include <linux/kstrtox.h>
12	#include <linux/mm.h>
13	#include <linux/pm.h>
14	#include <linux/memblock.h>
15	#include <linux/cpuidle.h>
16	#include <linux/cpufreq.h>
17	#include <linux/memory_hotplug.h>
18
19	#include <asm/elf.h>
20	#include <asm/vdso.h>
21	#include <asm/e820/api.h>
22	#include <asm/setup.h>
23	#include <asm/acpi.h>
24	#include <asm/numa.h>
25	#include <asm/idtentry.h>
26	#include <asm/xen/hypervisor.h>
27	#include <asm/xen/hypercall.h>
28
29	#include <xen/xen.h>
30	#include <xen/page.h>
31	#include <xen/interface/callback.h>
32	#include <xen/interface/memory.h>
33	#include <xen/interface/physdev.h>
34	#include <xen/features.h>
35	#include <xen/hvc-console.h>
36	#include "xen-ops.h"
37	#include "mmu.h"
38
39	#define GB(x) ((uint64_t)(x) * 1024 * 1024 * 1024)
40
41	/ Amount of extra memory space we add to the e820 ranges /
42	struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
43
44	/ Number of pages released from the initial allocation. /
45	unsigned long xen_released_pages;
46
47	/ Memory map would allow PCI passthrough. /
48	bool xen_pv_pci_possible;
49
50	/ E820 map used during setting up memory. /
51	static struct e820_table xen_e820_table __initdata;
52
53	/*
54	* Buffer used to remap identity mapped pages. We only need the virtual space.
55	* The physical page behind this address is remapped as needed to different
56	* buffer pages.
57	*/
58	#define REMAP_SIZE (P2M_PER_PAGE - 3)
59	static struct {
60	unsigned long next_area_mfn;
61	unsigned long target_pfn;
62	unsigned long size;
63	unsigned long mfns[REMAP_SIZE];
64	} xen_remap_buf __initdata __aligned(PAGE_SIZE);
65	static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY;
66
67	/*
68	* The maximum amount of extra memory compared to the base size. The
69	* main scaling factor is the size of struct page. At extreme ratios
70	* of base:extra, all the base memory can be filled with page
71	* structures for the extra memory, leaving no space for anything
72	* else.
73	*
74	* 10x seems like a reasonable balance between scaling flexibility and
75	* leaving a practically usable system.
76	*/
77	#define EXTRA_MEM_RATIO (10)
78
79	static bool xen_512gb_limit __initdata = IS_ENABLED(CONFIG_XEN_512GB);
80
81	static void __init xen_parse_512gb(void)
82	{
83	bool val = false;
84	char *arg;
85
86	arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit");
87	if (!arg)
88	return;
89
90	arg = strstr(xen_start_info->cmd_line, "xen_512gb_limit=");
91	if (!arg)
92	val = true;
93	else if (kstrtobool(s: arg + strlen("xen_512gb_limit="), res: &val))
94	return;
95
96	xen_512gb_limit = val;
97	}
98
99	static void __init xen_add_extra_mem(unsigned long start_pfn,
100	unsigned long n_pfns)
101	{
102	int i;
103
104	/*
105	* No need to check for zero size, should happen rarely and will only
106	* write a new entry regarded to be unused due to zero size.
107	*/
108	for (i = `0`; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
109	/ Add new region. /
110	if (xen_extra_mem[i].n_pfns == `0`) {
111	xen_extra_mem[i].start_pfn = start_pfn;
112	xen_extra_mem[i].n_pfns = n_pfns;
113	break;
114	}
115	/ Append to existing region. /
116	if (xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns ==
117	start_pfn) {
118	xen_extra_mem[i].n_pfns += n_pfns;
119	break;
120	}
121	}
122	if (i == XEN_EXTRA_MEM_MAX_REGIONS)
123	printk(KERN_WARNING "Warning: not enough extra memory regions\n");
124
125	memblock_reserve(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns));
126	}
127
128	static void __init xen_del_extra_mem(unsigned long start_pfn,
129	unsigned long n_pfns)
130	{
131	int i;
132	unsigned long start_r, size_r;
133
134	for (i = `0`; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
135	start_r = xen_extra_mem[i].start_pfn;
136	size_r = xen_extra_mem[i].n_pfns;
137
138	/ Start of region. /
139	if (start_r == start_pfn) {
140	BUG_ON(n_pfns > size_r);
141	xen_extra_mem[i].start_pfn += n_pfns;
142	xen_extra_mem[i].n_pfns -= n_pfns;
143	break;
144	}
145	/ End of region. /
146	if (start_r + size_r == start_pfn + n_pfns) {
147	BUG_ON(n_pfns > size_r);
148	xen_extra_mem[i].n_pfns -= n_pfns;
149	break;
150	}
151	/ Mid of region. /
152	if (start_pfn > start_r && start_pfn < start_r + size_r) {
153	BUG_ON(start_pfn + n_pfns > start_r + size_r);
154	xen_extra_mem[i].n_pfns = start_pfn - start_r;
155	/ Calling memblock_reserve() again is okay. /
156	xen_add_extra_mem(start_pfn: start_pfn + n_pfns, n_pfns: start_r + size_r -
157	(start_pfn + n_pfns));
158	break;
159	}
160	}
161	memblock_phys_free(PFN_PHYS(start_pfn), PFN_PHYS(n_pfns));
162	}
163
164	/*
165	* Called during boot before the p2m list can take entries beyond the
166	* hypervisor supplied p2m list. Entries in extra mem are to be regarded as
167	* invalid.
168	*/
169	unsigned long __ref xen_chk_extra_mem(unsigned long pfn)
170	{
171	int i;
172
173	for (i = `0`; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
174	if (pfn >= xen_extra_mem[i].start_pfn &&
175	pfn < xen_extra_mem[i].start_pfn + xen_extra_mem[i].n_pfns)
176	return INVALID_P2M_ENTRY;
177	}
178
179	return IDENTITY_FRAME(pfn);
180	}
181
182	/*
183	* Mark all pfns of extra mem as invalid in p2m list.
184	*/
185	void __init xen_inv_extra_mem(void)
186	{
187	unsigned long pfn, pfn_s, pfn_e;
188	int i;
189
190	for (i = `0`; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
191	if (!xen_extra_mem[i].n_pfns)
192	continue;
193	pfn_s = xen_extra_mem[i].start_pfn;
194	pfn_e = pfn_s + xen_extra_mem[i].n_pfns;
195	for (pfn = pfn_s; pfn < pfn_e; pfn++)
196	set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
197	}
198	}
199
200	/*
201	* Finds the next RAM pfn available in the E820 map after min_pfn.
202	* This function updates min_pfn with the pfn found and returns
203	* the size of that range or zero if not found.
204	*/
205	static unsigned long __init xen_find_pfn_range(unsigned long *min_pfn)
206	{
207	const struct e820_entry *entry = xen_e820_table.entries;
208	unsigned int i;
209	unsigned long done = `0`;
210
211	for (i = `0`; i < xen_e820_table.nr_entries; i++, entry++) {
212	unsigned long s_pfn;
213	unsigned long e_pfn;
214
215	if (entry->type != E820_TYPE_RAM)
216	continue;
217
218	e_pfn = PFN_DOWN(entry->addr + entry->size);
219
220	/ We only care about E820 after this /
221	if (e_pfn <= *min_pfn)
222	continue;
223
224	s_pfn = PFN_UP(entry->addr);
225
226	/ If min_pfn falls within the E820 entry, we want to start*
227	* at the min_pfn PFN.
228	*/
229	if (s_pfn <= *min_pfn) {
230	done = e_pfn - *min_pfn;
231	} else {
232	done = e_pfn - s_pfn;
233	*min_pfn = s_pfn;
234	}
235	break;
236	}
237
238	return done;
239	}
240
241	static int __init xen_free_mfn(unsigned long mfn)
242	{
243	struct xen_memory_reservation reservation = {
244	.address_bits = `0`,
245	.extent_order = `0`,
246	.domid = DOMID_SELF
247	};
248
249	set_xen_guest_handle(reservation.extent_start, &mfn);
250	reservation.nr_extents = `1`;
251
252	return HYPERVISOR_memory_op(XENMEM_decrease_reservation, arg: &reservation);
253	}
254
255	/*
256	* This releases a chunk of memory and then does the identity map. It's used
257	* as a fallback if the remapping fails.
258	*/
259	static void __init xen_set_identity_and_release_chunk(unsigned long start_pfn,
260	unsigned long end_pfn, unsigned long nr_pages)
261	{
262	unsigned long pfn, end;
263	int ret;
264
265	WARN_ON(start_pfn > end_pfn);
266
267	/ Release pages first. /
268	end = min(end_pfn, nr_pages);
269	for (pfn = start_pfn; pfn < end; pfn++) {
270	unsigned long mfn = pfn_to_mfn(pfn);
271
272	/ Make sure pfn exists to start with /
273	if (mfn == INVALID_P2M_ENTRY \|\| mfn_to_pfn(mfn) != pfn)
274	continue;
275
276	ret = xen_free_mfn(mfn);
277	WARN(ret != `1`, "Failed to release pfn %lx err=%d\n", pfn, ret);
278
279	if (ret == `1`) {
280	xen_released_pages++;
281	if (!__set_phys_to_machine(pfn, INVALID_P2M_ENTRY))
282	break;
283	} else
284	break;
285	}
286
287	set_phys_range_identity(pfn_s: start_pfn, pfn_e: end_pfn);
288	}
289
290	/*
291	* Helper function to update the p2m and m2p tables and kernel mapping.
292	*/
293	static void __init xen_update_mem_tables(unsigned long pfn, unsigned long mfn)
294	{
295	struct mmu_update update = {
296	.ptr = ((uint64_t)mfn << PAGE_SHIFT) \| MMU_MACHPHYS_UPDATE,
297	.val = pfn
298	};
299
300	/ Update p2m /
301	if (!set_phys_to_machine(pfn, mfn)) {
302	WARN(`1`, "Failed to set p2m mapping for pfn=%ld mfn=%ld\n",
303	pfn, mfn);
304	BUG();
305	}
306
307	/ Update m2p /
308	if (HYPERVISOR_mmu_update(req: &update, count: `1`, NULL, DOMID_SELF) < `0`) {
309	WARN(`1`, "Failed to set m2p mapping for mfn=%ld pfn=%ld\n",
310	mfn, pfn);
311	BUG();
312	}
313
314	if (HYPERVISOR_update_va_mapping(va: (unsigned long)__va(pfn << PAGE_SHIFT),
315	new_val: mfn_pte(page_nr: mfn, PAGE_KERNEL), flags: `0`)) {
316	WARN(`1`, "Failed to update kernel mapping for mfn=%ld pfn=%ld\n",
317	mfn, pfn);
318	BUG();
319	}
320	}
321
322	/*
323	* This function updates the p2m and m2p tables with an identity map from
324	* start_pfn to start_pfn+size and prepares remapping the underlying RAM of the
325	* original allocation at remap_pfn. The information needed for remapping is
326	* saved in the memory itself to avoid the need for allocating buffers. The
327	* complete remap information is contained in a list of MFNs each containing
328	* up to REMAP_SIZE MFNs and the start target PFN for doing the remap.
329	* This enables us to preserve the original mfn sequence while doing the
330	* remapping at a time when the memory management is capable of allocating
331	* virtual and physical memory in arbitrary amounts, see 'xen_remap_memory' and
332	* its callers.
333	*/
334	static void __init xen_do_set_identity_and_remap_chunk(
335	unsigned long start_pfn, unsigned long size, unsigned long remap_pfn)
336	{
337	unsigned long buf = (unsigned long)&xen_remap_buf;
338	unsigned long mfn_save, mfn;
339	unsigned long ident_pfn_iter, remap_pfn_iter;
340	unsigned long ident_end_pfn = start_pfn + size;
341	unsigned long left = size;
342	unsigned int i, chunk;
343
344	WARN_ON(size == `0`);
345
346	mfn_save = virt_to_mfn((void *)buf);
347
348	for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn;
349	ident_pfn_iter < ident_end_pfn;
350	ident_pfn_iter += REMAP_SIZE, remap_pfn_iter += REMAP_SIZE) {
351	chunk = (left < REMAP_SIZE) ? left : REMAP_SIZE;
352
353	/ Map first pfn to xen_remap_buf /
354	mfn = pfn_to_mfn(pfn: ident_pfn_iter);
355	set_pte_mfn(vaddr: buf, pfn: mfn, PAGE_KERNEL);
356
357	/ Save mapping information in page /
358	xen_remap_buf.next_area_mfn = xen_remap_mfn;
359	xen_remap_buf.target_pfn = remap_pfn_iter;
360	xen_remap_buf.size = chunk;
361	for (i = `0`; i < chunk; i++)
362	xen_remap_buf.mfns[i] = pfn_to_mfn(pfn: ident_pfn_iter + i);
363
364	/ Put remap buf into list. /
365	xen_remap_mfn = mfn;
366
367	/ Set identity map /
368	set_phys_range_identity(pfn_s: ident_pfn_iter, pfn_e: ident_pfn_iter + chunk);
369
370	left -= chunk;
371	}
372
373	/ Restore old xen_remap_buf mapping /
374	set_pte_mfn(vaddr: buf, pfn: mfn_save, PAGE_KERNEL);
375	}
376
377	/*
378	* This function takes a contiguous pfn range that needs to be identity mapped
379	* and:
380	*
381	* 1) Finds a new range of pfns to use to remap based on E820 and remap_pfn.
382	* 2) Calls the do_ function to actually do the mapping/remapping work.
383	*
384	* The goal is to not allocate additional memory but to remap the existing
385	* pages. In the case of an error the underlying memory is simply released back
386	* to Xen and not remapped.
387	*/
388	static unsigned long __init xen_set_identity_and_remap_chunk(
389	unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages,
390	unsigned long remap_pfn)
391	{
392	unsigned long pfn;
393	unsigned long i = `0`;
394	unsigned long n = end_pfn - start_pfn;
395
396	if (remap_pfn == `0`)
397	remap_pfn = nr_pages;
398
399	while (i < n) {
400	unsigned long cur_pfn = start_pfn + i;
401	unsigned long left = n - i;
402	unsigned long size = left;
403	unsigned long remap_range_size;
404
405	/ Do not remap pages beyond the current allocation /
406	if (cur_pfn >= nr_pages) {
407	/ Identity map remaining pages /
408	set_phys_range_identity(pfn_s: cur_pfn, pfn_e: cur_pfn + size);
409	break;
410	}
411	if (cur_pfn + size > nr_pages)
412	size = nr_pages - cur_pfn;
413
414	remap_range_size = xen_find_pfn_range(min_pfn: &remap_pfn);
415	if (!remap_range_size) {
416	pr_warn("Unable to find available pfn range, not remapping identity pages\n");
417	xen_set_identity_and_release_chunk(start_pfn: cur_pfn,
418	end_pfn: cur_pfn + left, nr_pages);
419	break;
420	}
421	/ Adjust size to fit in current e820 RAM region /
422	if (size > remap_range_size)
423	size = remap_range_size;
424
425	xen_do_set_identity_and_remap_chunk(start_pfn: cur_pfn, size, remap_pfn);
426
427	/ Update variables to reflect new mappings. /
428	i += size;
429	remap_pfn += size;
430	}
431
432	/*
433	* If the PFNs are currently mapped, their VA mappings need to be
434	* zapped.
435	*/
436	for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++)
437	(void)HYPERVISOR_update_va_mapping(
438	va: (unsigned long)__va(pfn << PAGE_SHIFT),
439	new_val: native_make_pte(val: `0`), flags: `0`);
440
441	return remap_pfn;
442	}
443
444	static unsigned long __init xen_count_remap_pages(
445	unsigned long start_pfn, unsigned long end_pfn, unsigned long nr_pages,
446	unsigned long remap_pages)
447	{
448	if (start_pfn >= nr_pages)
449	return remap_pages;
450
451	return remap_pages + min(end_pfn, nr_pages) - start_pfn;
452	}
453
454	static unsigned long __init xen_foreach_remap_area(unsigned long nr_pages,
455	unsigned long (func)(unsigned* long start_pfn, unsigned long end_pfn,
456	unsigned long nr_pages, unsigned long last_val))
457	{
458	phys_addr_t start = `0`;
459	unsigned long ret_val = `0`;
460	const struct e820_entry *entry = xen_e820_table.entries;
461	int i;
462
463	/*
464	* Combine non-RAM regions and gaps until a RAM region (or the
465	* end of the map) is reached, then call the provided function
466	* to perform its duty on the non-RAM region.
467	*
468	* The combined non-RAM regions are rounded to a whole number
469	* of pages so any partial pages are accessible via the 1:1
470	* mapping. This is needed for some BIOSes that put (for
471	* example) the DMI tables in a reserved region that begins on
472	* a non-page boundary.
473	*/
474	for (i = `0`; i < xen_e820_table.nr_entries; i++, entry++) {
475	phys_addr_t end = entry->addr + entry->size;
476	if (entry->type == E820_TYPE_RAM \|\| i == xen_e820_table.nr_entries - `1`) {
477	unsigned long start_pfn = PFN_DOWN(start);
478	unsigned long end_pfn = PFN_UP(end);
479
480	if (entry->type == E820_TYPE_RAM)
481	end_pfn = PFN_UP(entry->addr);
482
483	if (start_pfn < end_pfn)
484	ret_val = func(start_pfn, end_pfn, nr_pages,
485	ret_val);
486	start = end;
487	}
488	}
489
490	return ret_val;
491	}
492
493	/*
494	* Remap the memory prepared in xen_do_set_identity_and_remap_chunk().
495	* The remap information (which mfn remap to which pfn) is contained in the
496	* to be remapped memory itself in a linked list anchored at xen_remap_mfn.
497	* This scheme allows to remap the different chunks in arbitrary order while
498	* the resulting mapping will be independent from the order.
499	*/
500	void __init xen_remap_memory(void)
501	{
502	unsigned long buf = (unsigned long)&xen_remap_buf;
503	unsigned long mfn_save, pfn;
504	unsigned long remapped = `0`;
505	unsigned int i;
506	unsigned long pfn_s = ~`0UL`;
507	unsigned long len = `0`;
508
509	mfn_save = virt_to_mfn((void *)buf);
510
511	while (xen_remap_mfn != INVALID_P2M_ENTRY) {
512	/ Map the remap information /
513	set_pte_mfn(vaddr: buf, pfn: xen_remap_mfn, PAGE_KERNEL);
514
515	BUG_ON(xen_remap_mfn != xen_remap_buf.mfns[`0`]);
516
517	pfn = xen_remap_buf.target_pfn;
518	for (i = `0`; i < xen_remap_buf.size; i++) {
519	xen_update_mem_tables(pfn, mfn: xen_remap_buf.mfns[i]);
520	remapped++;
521	pfn++;
522	}
523	if (pfn_s == ~`0UL` \|\| pfn == pfn_s) {
524	pfn_s = xen_remap_buf.target_pfn;
525	len += xen_remap_buf.size;
526	} else if (pfn_s + len == xen_remap_buf.target_pfn) {
527	len += xen_remap_buf.size;
528	} else {
529	xen_del_extra_mem(start_pfn: pfn_s, n_pfns: len);
530	pfn_s = xen_remap_buf.target_pfn;
531	len = xen_remap_buf.size;
532	}
533	xen_remap_mfn = xen_remap_buf.next_area_mfn;
534	}
535
536	if (pfn_s != ~`0UL` && len)
537	xen_del_extra_mem(start_pfn: pfn_s, n_pfns: len);
538
539	set_pte_mfn(vaddr: buf, pfn: mfn_save, PAGE_KERNEL);
540
541	pr_info("Remapped %ld page(s)\n", remapped);
542	}
543
544	static unsigned long __init xen_get_pages_limit(void)
545	{
546	unsigned long limit;
547
548	limit = MAXMEM / PAGE_SIZE;
549	if (!xen_initial_domain() && xen_512gb_limit)
550	limit = GB(`512`) / PAGE_SIZE;
551
552	return limit;
553	}
554
555	static unsigned long __init xen_get_max_pages(void)
556	{
557	unsigned long max_pages, limit;
558	domid_t domid = DOMID_SELF;
559	long ret;
560
561	limit = xen_get_pages_limit();
562	max_pages = limit;
563
564	/*
565	* For the initial domain we use the maximum reservation as
566	* the maximum page.
567	*
568	* For guest domains the current maximum reservation reflects
569	* the current maximum rather than the static maximum. In this
570	* case the e820 map provided to us will cover the static
571	* maximum region.
572	*/
573	if (xen_initial_domain()) {
574	ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, arg: &domid);
575	if (ret > `0`)
576	max_pages = ret;
577	}
578
579	return min(max_pages, limit);
580	}
581
582	static void __init xen_align_and_add_e820_region(phys_addr_t start,
583	phys_addr_t size, int type)
584	{
585	phys_addr_t end = start + size;
586
587	/ Align RAM regions to page boundaries. /
588	if (type == E820_TYPE_RAM) {
589	start = PAGE_ALIGN(start);
590	end &= ~((phys_addr_t)PAGE_SIZE - `1`);
591	#ifdef CONFIG_MEMORY_HOTPLUG
592	/*
593	* Don't allow adding memory not in E820 map while booting the
594	* system. Once the balloon driver is up it will remove that
595	* restriction again.
596	*/
597	max_mem_size = end;
598	#endif
599	}
600
601	e820__range_add(start, size: end - start, type);
602	}
603
604	static void __init xen_ignore_unusable(void)
605	{
606	struct e820_entry *entry = xen_e820_table.entries;
607	unsigned int i;
608
609	for (i = `0`; i < xen_e820_table.nr_entries; i++, entry++) {
610	if (entry->type == E820_TYPE_UNUSABLE)
611	entry->type = E820_TYPE_RAM;
612	}
613	}
614
615	bool __init xen_is_e820_reserved(phys_addr_t start, phys_addr_t size)
616	{
617	struct e820_entry *entry;
618	unsigned mapcnt;
619	phys_addr_t end;
620
621	if (!size)
622	return false;
623
624	end = start + size;
625	entry = xen_e820_table.entries;
626
627	for (mapcnt = `0`; mapcnt < xen_e820_table.nr_entries; mapcnt++) {
628	if (entry->type == E820_TYPE_RAM && entry->addr <= start &&
629	(entry->addr + entry->size) >= end)
630	return false;
631
632	entry++;
633	}
634
635	return true;
636	}
637
638	/*
639	* Find a free area in physical memory not yet reserved and compliant with
640	* E820 map.
641	* Used to relocate pre-allocated areas like initrd or p2m list which are in
642	* conflict with the to be used E820 map.
643	* In case no area is found, return 0. Otherwise return the physical address
644	* of the area which is already reserved for convenience.
645	*/
646	phys_addr_t __init xen_find_free_area(phys_addr_t size)
647	{
648	unsigned mapcnt;
649	phys_addr_t addr, start;
650	struct e820_entry *entry = xen_e820_table.entries;
651
652	for (mapcnt = `0`; mapcnt < xen_e820_table.nr_entries; mapcnt++, entry++) {
653	if (entry->type != E820_TYPE_RAM \|\| entry->size < size)
654	continue;
655	start = entry->addr;
656	for (addr = start; addr < start + size; addr += PAGE_SIZE) {
657	if (!memblock_is_reserved(addr))
658	continue;
659	start = addr + PAGE_SIZE;
660	if (start + size > entry->addr + entry->size)
661	break;
662	}
663	if (addr >= start + size) {
664	memblock_reserve(base: start, size);
665	return start;
666	}
667	}
668
669	return `0`;
670	}
671
672	/*
673	* Like memcpy, but with physical addresses for dest and src.
674	*/
675	static void __init xen_phys_memcpy(phys_addr_t dest, phys_addr_t src,
676	phys_addr_t n)
677	{
678	phys_addr_t dest_off, src_off, dest_len, src_len, len;
679	void from, to;
680
681	while (n) {
682	dest_off = dest & ~PAGE_MASK;
683	src_off = src & ~PAGE_MASK;
684	dest_len = n;
685	if (dest_len > (NR_FIX_BTMAPS << PAGE_SHIFT) - dest_off)
686	dest_len = (NR_FIX_BTMAPS << PAGE_SHIFT) - dest_off;
687	src_len = n;
688	if (src_len > (NR_FIX_BTMAPS << PAGE_SHIFT) - src_off)
689	src_len = (NR_FIX_BTMAPS << PAGE_SHIFT) - src_off;
690	len = min(dest_len, src_len);
691	to = early_memremap(phys_addr: dest - dest_off, size: dest_len + dest_off);
692	from = early_memremap(phys_addr: src - src_off, size: src_len + src_off);
693	memcpy(to, from, len);
694	early_memunmap(addr: to, size: dest_len + dest_off);
695	early_memunmap(addr: from, size: src_len + src_off);
696	n -= len;
697	dest += len;
698	src += len;
699	}
700	}
701
702	/*
703	* Reserve Xen mfn_list.
704	*/
705	static void __init xen_reserve_xen_mfnlist(void)
706	{
707	phys_addr_t start, size;
708
709	if (xen_start_info->mfn_list >= __START_KERNEL_map) {
710	start = __pa(xen_start_info->mfn_list);
711	size = PFN_ALIGN(xen_start_info->nr_pages *
712	sizeof(unsigned long));
713	} else {
714	start = PFN_PHYS(xen_start_info->first_p2m_pfn);
715	size = PFN_PHYS(xen_start_info->nr_p2m_frames);
716	}
717
718	memblock_reserve(base: start, size);
719	if (!xen_is_e820_reserved(start, size))
720	return;
721
722	xen_relocate_p2m();
723	memblock_phys_free(base: start, size);
724	}
725
726	/**
727	* xen_memory_setup - Hook for machine specific memory setup.
728	**/
729	char * __init xen_memory_setup(void)
730	{
731	unsigned long max_pfn, pfn_s, n_pfns;
732	phys_addr_t mem_end, addr, size, chunk_size;
733	u32 type;
734	int rc;
735	struct xen_memory_map memmap;
736	unsigned long max_pages;
737	unsigned long extra_pages = `0`;
738	int i;
739	int op;
740
741	xen_parse_512gb();
742	max_pfn = xen_get_pages_limit();
743	max_pfn = min(max_pfn, xen_start_info->nr_pages);
744	mem_end = PFN_PHYS(max_pfn);
745
746	memmap.nr_entries = ARRAY_SIZE(xen_e820_table.entries);
747	set_xen_guest_handle(memmap.buffer, xen_e820_table.entries);
748
749	#if defined(CONFIG_MEMORY_HOTPLUG) && defined(CONFIG_XEN_BALLOON)
750	xen_saved_max_mem_size = max_mem_size;
751	#endif
752
753	op = xen_initial_domain() ?
754	XENMEM_machine_memory_map :
755	XENMEM_memory_map;
756	rc = HYPERVISOR_memory_op(cmd: op, arg: &memmap);
757	if (rc == -ENOSYS) {
758	BUG_ON(xen_initial_domain());
759	memmap.nr_entries = `1`;
760	xen_e820_table.entries[`0`].addr = `0ULL`;
761	xen_e820_table.entries[`0`].size = mem_end;
762	/ 8MB slack (to balance backend allocations). /
763	xen_e820_table.entries[`0`].size += `8ULL` << `20`;
764	xen_e820_table.entries[`0`].type = E820_TYPE_RAM;
765	rc = `0`;
766	}
767	BUG_ON(rc);
768	BUG_ON(memmap.nr_entries == `0`);
769	xen_e820_table.nr_entries = memmap.nr_entries;
770
771	if (xen_initial_domain()) {
772	/*
773	* Xen won't allow a 1:1 mapping to be created to UNUSABLE
774	* regions, so if we're using the machine memory map leave the
775	* region as RAM as it is in the pseudo-physical map.
776	*
777	* UNUSABLE regions in domUs are not handled and will need
778	* a patch in the future.
779	*/
780	xen_ignore_unusable();
781
782	#ifdef CONFIG_ISCSI_IBFT_FIND
783	/ Reserve 0.5 MiB to 1 MiB region so iBFT can be found /
784	xen_e820_table.entries[xen_e820_table.nr_entries].addr = IBFT_START;
785	xen_e820_table.entries[xen_e820_table.nr_entries].size = IBFT_END - IBFT_START;
786	xen_e820_table.entries[xen_e820_table.nr_entries].type = E820_TYPE_RESERVED;
787	xen_e820_table.nr_entries++;
788	#endif
789	}
790
791	/ Make sure the Xen-supplied memory map is well-ordered. /
792	e820__update_table(table: &xen_e820_table);
793
794	max_pages = xen_get_max_pages();
795
796	/ How many extra pages do we need due to remapping? /
797	max_pages += xen_foreach_remap_area(nr_pages: max_pfn, func: xen_count_remap_pages);
798
799	if (max_pages > max_pfn)
800	extra_pages += max_pages - max_pfn;
801
802	/*
803	* Clamp the amount of extra memory to a EXTRA_MEM_RATIO
804	* factor the base size.
805	*
806	* Make sure we have no memory above max_pages, as this area
807	* isn't handled by the p2m management.
808	*/
809	extra_pages = min3(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
810	extra_pages, max_pages - max_pfn);
811	i = `0`;
812	addr = xen_e820_table.entries[`0`].addr;
813	size = xen_e820_table.entries[`0`].size;
814	while (i < xen_e820_table.nr_entries) {
815	bool discard = false;
816
817	chunk_size = size;
818	type = xen_e820_table.entries[i].type;
819
820	if (type == E820_TYPE_RESERVED)
821	xen_pv_pci_possible = true;
822
823	if (type == E820_TYPE_RAM) {
824	if (addr < mem_end) {
825	chunk_size = min(size, mem_end - addr);
826	} else if (extra_pages) {
827	chunk_size = min(size, PFN_PHYS(extra_pages));
828	pfn_s = PFN_UP(addr);
829	n_pfns = PFN_DOWN(addr + chunk_size) - pfn_s;
830	extra_pages -= n_pfns;
831	xen_add_extra_mem(start_pfn: pfn_s, n_pfns);
832	xen_max_p2m_pfn = pfn_s + n_pfns;
833	} else
834	discard = true;
835	}
836
837	if (!discard)
838	xen_align_and_add_e820_region(start: addr, size: chunk_size, type);
839
840	addr += chunk_size;
841	size -= chunk_size;
842	if (size == `0`) {
843	i++;
844	if (i < xen_e820_table.nr_entries) {
845	addr = xen_e820_table.entries[i].addr;
846	size = xen_e820_table.entries[i].size;
847	}
848	}
849	}
850
851	/*
852	* Set the rest as identity mapped, in case PCI BARs are
853	* located here.
854	*/
855	set_phys_range_identity(pfn_s: addr / PAGE_SIZE, pfn_e: ~`0ul`);
856
857	/*
858	* In domU, the ISA region is normal, usable memory, but we
859	* reserve ISA memory anyway because too many things poke
860	* about in there.
861	*/
862	e820__range_add(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS, type: E820_TYPE_RESERVED);
863
864	e820__update_table(table: e820_table);
865
866	/*
867	* Check whether the kernel itself conflicts with the target E820 map.
868	* Failing now is better than running into weird problems later due
869	* to relocating (and even reusing) pages with kernel text or data.
870	*/
871	if (xen_is_e820_reserved(__pa_symbol(_text),
872	__pa_symbol(__bss_stop) - __pa_symbol(_text))) {
873	xen_raw_console_write(str: "Xen hypervisor allocated kernel memory conflicts with E820 map\n");
874	BUG();
875	}
876
877	/*
878	* Check for a conflict of the hypervisor supplied page tables with
879	* the target E820 map.
880	*/
881	xen_pt_check_e820();
882
883	xen_reserve_xen_mfnlist();
884
885	/ Check for a conflict of the initrd with the target E820 map. /
886	if (xen_is_e820_reserved(start: boot_params.hdr.ramdisk_image,
887	size: boot_params.hdr.ramdisk_size)) {
888	phys_addr_t new_area, start, size;
889
890	new_area = xen_find_free_area(size: boot_params.hdr.ramdisk_size);
891	if (!new_area) {
892	xen_raw_console_write(str: "Can't find new memory area for initrd needed due to E820 map conflict\n");
893	BUG();
894	}
895
896	start = boot_params.hdr.ramdisk_image;
897	size = boot_params.hdr.ramdisk_size;
898	xen_phys_memcpy(dest: new_area, src: start, n: size);
899	pr_info("initrd moved from [mem %#010llx-%#010llx] to [mem %#010llx-%#010llx]\n",
900	start, start + size, new_area, new_area + size);
901	memblock_phys_free(base: start, size);
902	boot_params.hdr.ramdisk_image = new_area;
903	boot_params.ext_ramdisk_image = new_area >> `32`;
904	}
905
906	/*
907	* Set identity map on non-RAM pages and prepare remapping the
908	* underlying RAM.
909	*/
910	xen_foreach_remap_area(nr_pages: max_pfn, func: xen_set_identity_and_remap_chunk);
911
912	pr_info("Released %ld page(s)\n", xen_released_pages);
913
914	return "Xen";
915	}
916
917	static int register_callback(unsigned type, const void *func)
918	{
919	struct callback_register callback = {
920	.type = type,
921	.address = XEN_CALLBACK(__KERNEL_CS, func),
922	.flags = CALLBACKF_mask_events,
923	};
924
925	return HYPERVISOR_callback_op(CALLBACKOP_register, arg: &callback);
926	}
927
928	void xen_enable_sysenter(void)
929	{
930	if (cpu_feature_enabled(X86_FEATURE_SYSENTER32) &&
931	register_callback(CALLBACKTYPE_sysenter, func: xen_entry_SYSENTER_compat))
932	setup_clear_cpu_cap(X86_FEATURE_SYSENTER32);
933	}
934
935	void xen_enable_syscall(void)
936	{
937	int ret;
938
939	ret = register_callback(CALLBACKTYPE_syscall, func: xen_entry_SYSCALL_64);
940	if (ret != `0`) {
941	printk(KERN_ERR "Failed to set syscall callback: %d\n", ret);
942	/ Pretty fatal; 64-bit userspace has no other*
943	mechanism for syscalls. /*
944	}
945
946	if (cpu_feature_enabled(X86_FEATURE_SYSCALL32) &&
947	register_callback(CALLBACKTYPE_syscall32, func: xen_entry_SYSCALL_compat))
948	setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
949	}
950
951	static void __init xen_pvmmu_arch_setup(void)
952	{
953	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
954
955	if (register_callback(CALLBACKTYPE_event,
956	func: xen_asm_exc_xen_hypervisor_callback) \|\|
957	register_callback(CALLBACKTYPE_failsafe, func: xen_failsafe_callback))
958	BUG();
959
960	xen_enable_sysenter();
961	xen_enable_syscall();
962	}
963
964	/ This function is not called for HVM domains /
965	void __init xen_arch_setup(void)
966	{
967	xen_panic_handler_init();
968	xen_pvmmu_arch_setup();
969
970	#ifdef CONFIG_ACPI
971	if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
972	printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
973	disable_acpi();
974	}
975	#endif
976
977	memcpy(boot_command_line, xen_start_info->cmd_line,
978	MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ?
979	COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
980
981	/ Set up idle, making sure it calls safe_halt() pvop /
982	disable_cpuidle();
983	disable_cpufreq();
984	WARN_ON(xen_set_default_idle());
985	#ifdef CONFIG_NUMA
986	numa_off = `1`;
987	#endif
988	}
989

source code of linux/arch/x86/xen/setup.c