1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (C) 2020 - Google LLC |
4 | * Author: Quentin Perret <qperret@google.com> |
5 | */ |
6 | |
7 | #include <linux/init.h> |
8 | #include <linux/kmemleak.h> |
9 | #include <linux/kvm_host.h> |
10 | #include <linux/memblock.h> |
11 | #include <linux/mutex.h> |
12 | #include <linux/sort.h> |
13 | |
14 | #include <asm/kvm_pkvm.h> |
15 | |
16 | #include "hyp_constants.h" |
17 | |
18 | DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); |
19 | |
20 | static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory); |
21 | static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr); |
22 | |
23 | phys_addr_t hyp_mem_base; |
24 | phys_addr_t hyp_mem_size; |
25 | |
26 | static int cmp_hyp_memblock(const void *p1, const void *p2) |
27 | { |
28 | const struct memblock_region *r1 = p1; |
29 | const struct memblock_region *r2 = p2; |
30 | |
31 | return r1->base < r2->base ? -1 : (r1->base > r2->base); |
32 | } |
33 | |
34 | static void __init sort_memblock_regions(void) |
35 | { |
36 | sort(base: hyp_memory, |
37 | num: *hyp_memblock_nr_ptr, |
38 | size: sizeof(struct memblock_region), |
39 | cmp_func: cmp_hyp_memblock, |
40 | NULL); |
41 | } |
42 | |
43 | static int __init register_memblock_regions(void) |
44 | { |
45 | struct memblock_region *reg; |
46 | |
47 | for_each_mem_region(reg) { |
48 | if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS) |
49 | return -ENOMEM; |
50 | |
51 | hyp_memory[*hyp_memblock_nr_ptr] = *reg; |
52 | (*hyp_memblock_nr_ptr)++; |
53 | } |
54 | sort_memblock_regions(); |
55 | |
56 | return 0; |
57 | } |
58 | |
59 | void __init kvm_hyp_reserve(void) |
60 | { |
61 | u64 hyp_mem_pages = 0; |
62 | int ret; |
63 | |
64 | if (!is_hyp_mode_available() || is_kernel_in_hyp_mode()) |
65 | return; |
66 | |
67 | if (kvm_get_mode() != KVM_MODE_PROTECTED) |
68 | return; |
69 | |
70 | ret = register_memblock_regions(); |
71 | if (ret) { |
72 | *hyp_memblock_nr_ptr = 0; |
73 | kvm_err("Failed to register hyp memblocks: %d\n" , ret); |
74 | return; |
75 | } |
76 | |
77 | hyp_mem_pages += hyp_s1_pgtable_pages(); |
78 | hyp_mem_pages += host_s2_pgtable_pages(); |
79 | hyp_mem_pages += hyp_vm_table_pages(); |
80 | hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE); |
81 | hyp_mem_pages += hyp_ffa_proxy_pages(); |
82 | |
83 | /* |
84 | * Try to allocate a PMD-aligned region to reduce TLB pressure once |
85 | * this is unmapped from the host stage-2, and fallback to PAGE_SIZE. |
86 | */ |
87 | hyp_mem_size = hyp_mem_pages << PAGE_SHIFT; |
88 | hyp_mem_base = memblock_phys_alloc(ALIGN(hyp_mem_size, PMD_SIZE), |
89 | PMD_SIZE); |
90 | if (!hyp_mem_base) |
91 | hyp_mem_base = memblock_phys_alloc(size: hyp_mem_size, PAGE_SIZE); |
92 | else |
93 | hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE); |
94 | |
95 | if (!hyp_mem_base) { |
96 | kvm_err("Failed to reserve hyp memory\n" ); |
97 | return; |
98 | } |
99 | |
100 | kvm_info("Reserved %lld MiB at 0x%llx\n" , hyp_mem_size >> 20, |
101 | hyp_mem_base); |
102 | } |
103 | |
104 | static void __pkvm_destroy_hyp_vm(struct kvm *host_kvm) |
105 | { |
106 | if (host_kvm->arch.pkvm.handle) { |
107 | WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm, |
108 | host_kvm->arch.pkvm.handle)); |
109 | } |
110 | |
111 | host_kvm->arch.pkvm.handle = 0; |
112 | free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc); |
113 | } |
114 | |
115 | /* |
116 | * Allocates and donates memory for hypervisor VM structs at EL2. |
117 | * |
118 | * Allocates space for the VM state, which includes the hyp vm as well as |
119 | * the hyp vcpus. |
120 | * |
121 | * Stores an opaque handler in the kvm struct for future reference. |
122 | * |
123 | * Return 0 on success, negative error code on failure. |
124 | */ |
125 | static int __pkvm_create_hyp_vm(struct kvm *host_kvm) |
126 | { |
127 | size_t pgd_sz, hyp_vm_sz, hyp_vcpu_sz; |
128 | struct kvm_vcpu *host_vcpu; |
129 | pkvm_handle_t handle; |
130 | void *pgd, *hyp_vm; |
131 | unsigned long idx; |
132 | int ret; |
133 | |
134 | if (host_kvm->created_vcpus < 1) |
135 | return -EINVAL; |
136 | |
137 | pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.mmu.vtcr); |
138 | |
139 | /* |
140 | * The PGD pages will be reclaimed using a hyp_memcache which implies |
141 | * page granularity. So, use alloc_pages_exact() to get individual |
142 | * refcounts. |
143 | */ |
144 | pgd = alloc_pages_exact(size: pgd_sz, GFP_KERNEL_ACCOUNT); |
145 | if (!pgd) |
146 | return -ENOMEM; |
147 | |
148 | /* Allocate memory to donate to hyp for vm and vcpu pointers. */ |
149 | hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE, |
150 | size_mul(sizeof(void *), |
151 | host_kvm->created_vcpus))); |
152 | hyp_vm = alloc_pages_exact(size: hyp_vm_sz, GFP_KERNEL_ACCOUNT); |
153 | if (!hyp_vm) { |
154 | ret = -ENOMEM; |
155 | goto free_pgd; |
156 | } |
157 | |
158 | /* Donate the VM memory to hyp and let hyp initialize it. */ |
159 | ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd); |
160 | if (ret < 0) |
161 | goto free_vm; |
162 | |
163 | handle = ret; |
164 | |
165 | host_kvm->arch.pkvm.handle = handle; |
166 | |
167 | /* Donate memory for the vcpus at hyp and initialize it. */ |
168 | hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE); |
169 | kvm_for_each_vcpu(idx, host_vcpu, host_kvm) { |
170 | void *hyp_vcpu; |
171 | |
172 | /* Indexing of the vcpus to be sequential starting at 0. */ |
173 | if (WARN_ON(host_vcpu->vcpu_idx != idx)) { |
174 | ret = -EINVAL; |
175 | goto destroy_vm; |
176 | } |
177 | |
178 | hyp_vcpu = alloc_pages_exact(size: hyp_vcpu_sz, GFP_KERNEL_ACCOUNT); |
179 | if (!hyp_vcpu) { |
180 | ret = -ENOMEM; |
181 | goto destroy_vm; |
182 | } |
183 | |
184 | ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu, |
185 | hyp_vcpu); |
186 | if (ret) { |
187 | free_pages_exact(virt: hyp_vcpu, size: hyp_vcpu_sz); |
188 | goto destroy_vm; |
189 | } |
190 | } |
191 | |
192 | return 0; |
193 | |
194 | destroy_vm: |
195 | __pkvm_destroy_hyp_vm(host_kvm); |
196 | return ret; |
197 | free_vm: |
198 | free_pages_exact(virt: hyp_vm, size: hyp_vm_sz); |
199 | free_pgd: |
200 | free_pages_exact(virt: pgd, size: pgd_sz); |
201 | return ret; |
202 | } |
203 | |
204 | int pkvm_create_hyp_vm(struct kvm *host_kvm) |
205 | { |
206 | int ret = 0; |
207 | |
208 | mutex_lock(&host_kvm->arch.config_lock); |
209 | if (!host_kvm->arch.pkvm.handle) |
210 | ret = __pkvm_create_hyp_vm(host_kvm); |
211 | mutex_unlock(lock: &host_kvm->arch.config_lock); |
212 | |
213 | return ret; |
214 | } |
215 | |
216 | void pkvm_destroy_hyp_vm(struct kvm *host_kvm) |
217 | { |
218 | mutex_lock(&host_kvm->arch.config_lock); |
219 | __pkvm_destroy_hyp_vm(host_kvm); |
220 | mutex_unlock(lock: &host_kvm->arch.config_lock); |
221 | } |
222 | |
223 | int pkvm_init_host_vm(struct kvm *host_kvm) |
224 | { |
225 | mutex_init(&host_kvm->lock); |
226 | return 0; |
227 | } |
228 | |
229 | static void __init _kvm_host_prot_finalize(void *arg) |
230 | { |
231 | int *err = arg; |
232 | |
233 | if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize))) |
234 | WRITE_ONCE(*err, -EINVAL); |
235 | } |
236 | |
237 | static int __init pkvm_drop_host_privileges(void) |
238 | { |
239 | int ret = 0; |
240 | |
241 | /* |
242 | * Flip the static key upfront as that may no longer be possible |
243 | * once the host stage 2 is installed. |
244 | */ |
245 | static_branch_enable(&kvm_protected_mode_initialized); |
246 | on_each_cpu(func: _kvm_host_prot_finalize, info: &ret, wait: 1); |
247 | return ret; |
248 | } |
249 | |
250 | static int __init finalize_pkvm(void) |
251 | { |
252 | int ret; |
253 | |
254 | if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised()) |
255 | return 0; |
256 | |
257 | /* |
258 | * Exclude HYP sections from kmemleak so that they don't get peeked |
259 | * at, which would end badly once inaccessible. |
260 | */ |
261 | kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); |
262 | kmemleak_free_part_phys(phys: hyp_mem_base, size: hyp_mem_size); |
263 | |
264 | ret = pkvm_drop_host_privileges(); |
265 | if (ret) |
266 | pr_err("Failed to finalize Hyp protection: %d\n" , ret); |
267 | |
268 | return ret; |
269 | } |
270 | device_initcall_sync(finalize_pkvm); |
271 | |