1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * AMD Memory Encryption Support |
4 | * |
5 | * Copyright (C) 2016 Advanced Micro Devices, Inc. |
6 | * |
7 | * Author: Tom Lendacky <thomas.lendacky@amd.com> |
8 | */ |
9 | |
10 | #define DISABLE_BRANCH_PROFILING |
11 | |
12 | #include <linux/linkage.h> |
13 | #include <linux/init.h> |
14 | #include <linux/mm.h> |
15 | #include <linux/dma-direct.h> |
16 | #include <linux/swiotlb.h> |
17 | #include <linux/mem_encrypt.h> |
18 | #include <linux/device.h> |
19 | #include <linux/kernel.h> |
20 | #include <linux/bitops.h> |
21 | #include <linux/dma-mapping.h> |
22 | #include <linux/cc_platform.h> |
23 | |
24 | #include <asm/tlbflush.h> |
25 | #include <asm/fixmap.h> |
26 | #include <asm/setup.h> |
27 | #include <asm/mem_encrypt.h> |
28 | #include <asm/bootparam.h> |
29 | #include <asm/set_memory.h> |
30 | #include <asm/cacheflush.h> |
31 | #include <asm/processor-flags.h> |
32 | #include <asm/msr.h> |
33 | #include <asm/cmdline.h> |
34 | #include <asm/sev.h> |
35 | #include <asm/ia32.h> |
36 | |
37 | #include "mm_internal.h" |
38 | |
39 | /* |
40 | * Since SME related variables are set early in the boot process they must |
41 | * reside in the .data section so as not to be zeroed out when the .bss |
42 | * section is later cleared. |
43 | */ |
44 | u64 sme_me_mask __section(".data" ) = 0; |
45 | u64 sev_status __section(".data" ) = 0; |
46 | u64 sev_check_data __section(".data" ) = 0; |
47 | EXPORT_SYMBOL(sme_me_mask); |
48 | |
49 | /* Buffer used for early in-place encryption by BSP, no locking needed */ |
50 | static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE); |
51 | |
52 | /* |
53 | * SNP-specific routine which needs to additionally change the page state from |
54 | * private to shared before copying the data from the source to destination and |
55 | * restore after the copy. |
56 | */ |
57 | static inline void __init snp_memcpy(void *dst, void *src, size_t sz, |
58 | unsigned long paddr, bool decrypt) |
59 | { |
60 | unsigned long npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; |
61 | |
62 | if (decrypt) { |
63 | /* |
64 | * @paddr needs to be accessed decrypted, mark the page shared in |
65 | * the RMP table before copying it. |
66 | */ |
67 | early_snp_set_memory_shared(vaddr: (unsigned long)__va(paddr), paddr, npages); |
68 | |
69 | memcpy(dst, src, sz); |
70 | |
71 | /* Restore the page state after the memcpy. */ |
72 | early_snp_set_memory_private(vaddr: (unsigned long)__va(paddr), paddr, npages); |
73 | } else { |
74 | /* |
75 | * @paddr need to be accessed encrypted, no need for the page state |
76 | * change. |
77 | */ |
78 | memcpy(dst, src, sz); |
79 | } |
80 | } |
81 | |
82 | /* |
83 | * This routine does not change the underlying encryption setting of the |
84 | * page(s) that map this memory. It assumes that eventually the memory is |
85 | * meant to be accessed as either encrypted or decrypted but the contents |
86 | * are currently not in the desired state. |
87 | * |
88 | * This routine follows the steps outlined in the AMD64 Architecture |
89 | * Programmer's Manual Volume 2, Section 7.10.8 Encrypt-in-Place. |
90 | */ |
91 | static void __init __sme_early_enc_dec(resource_size_t paddr, |
92 | unsigned long size, bool enc) |
93 | { |
94 | void *src, *dst; |
95 | size_t len; |
96 | |
97 | if (!sme_me_mask) |
98 | return; |
99 | |
100 | wbinvd(); |
101 | |
102 | /* |
103 | * There are limited number of early mapping slots, so map (at most) |
104 | * one page at time. |
105 | */ |
106 | while (size) { |
107 | len = min_t(size_t, sizeof(sme_early_buffer), size); |
108 | |
109 | /* |
110 | * Create mappings for the current and desired format of |
111 | * the memory. Use a write-protected mapping for the source. |
112 | */ |
113 | src = enc ? early_memremap_decrypted_wp(phys_addr: paddr, size: len) : |
114 | early_memremap_encrypted_wp(phys_addr: paddr, size: len); |
115 | |
116 | dst = enc ? early_memremap_encrypted(phys_addr: paddr, size: len) : |
117 | early_memremap_decrypted(phys_addr: paddr, size: len); |
118 | |
119 | /* |
120 | * If a mapping can't be obtained to perform the operation, |
121 | * then eventual access of that area in the desired mode |
122 | * will cause a crash. |
123 | */ |
124 | BUG_ON(!src || !dst); |
125 | |
126 | /* |
127 | * Use a temporary buffer, of cache-line multiple size, to |
128 | * avoid data corruption as documented in the APM. |
129 | */ |
130 | if (cc_platform_has(attr: CC_ATTR_GUEST_SEV_SNP)) { |
131 | snp_memcpy(dst: sme_early_buffer, src, sz: len, paddr, decrypt: enc); |
132 | snp_memcpy(dst, src: sme_early_buffer, sz: len, paddr, decrypt: !enc); |
133 | } else { |
134 | memcpy(sme_early_buffer, src, len); |
135 | memcpy(dst, sme_early_buffer, len); |
136 | } |
137 | |
138 | early_memunmap(addr: dst, size: len); |
139 | early_memunmap(addr: src, size: len); |
140 | |
141 | paddr += len; |
142 | size -= len; |
143 | } |
144 | } |
145 | |
146 | void __init sme_early_encrypt(resource_size_t paddr, unsigned long size) |
147 | { |
148 | __sme_early_enc_dec(paddr, size, enc: true); |
149 | } |
150 | |
151 | void __init sme_early_decrypt(resource_size_t paddr, unsigned long size) |
152 | { |
153 | __sme_early_enc_dec(paddr, size, enc: false); |
154 | } |
155 | |
156 | static void __init __sme_early_map_unmap_mem(void *vaddr, unsigned long size, |
157 | bool map) |
158 | { |
159 | unsigned long paddr = (unsigned long)vaddr - __PAGE_OFFSET; |
160 | pmdval_t pmd_flags, pmd; |
161 | |
162 | /* Use early_pmd_flags but remove the encryption mask */ |
163 | pmd_flags = __sme_clr(early_pmd_flags); |
164 | |
165 | do { |
166 | pmd = map ? (paddr & PMD_MASK) + pmd_flags : 0; |
167 | __early_make_pgtable(address: (unsigned long)vaddr, pmd); |
168 | |
169 | vaddr += PMD_SIZE; |
170 | paddr += PMD_SIZE; |
171 | size = (size <= PMD_SIZE) ? 0 : size - PMD_SIZE; |
172 | } while (size); |
173 | |
174 | flush_tlb_local(); |
175 | } |
176 | |
177 | void __init sme_unmap_bootdata(char *real_mode_data) |
178 | { |
179 | struct boot_params *boot_data; |
180 | unsigned long cmdline_paddr; |
181 | |
182 | if (!cc_platform_has(attr: CC_ATTR_HOST_MEM_ENCRYPT)) |
183 | return; |
184 | |
185 | /* Get the command line address before unmapping the real_mode_data */ |
186 | boot_data = (struct boot_params *)real_mode_data; |
187 | cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32); |
188 | |
189 | __sme_early_map_unmap_mem(vaddr: real_mode_data, size: sizeof(boot_params), map: false); |
190 | |
191 | if (!cmdline_paddr) |
192 | return; |
193 | |
194 | __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, map: false); |
195 | } |
196 | |
197 | void __init sme_map_bootdata(char *real_mode_data) |
198 | { |
199 | struct boot_params *boot_data; |
200 | unsigned long cmdline_paddr; |
201 | |
202 | if (!cc_platform_has(attr: CC_ATTR_HOST_MEM_ENCRYPT)) |
203 | return; |
204 | |
205 | __sme_early_map_unmap_mem(vaddr: real_mode_data, size: sizeof(boot_params), map: true); |
206 | |
207 | /* Get the command line address after mapping the real_mode_data */ |
208 | boot_data = (struct boot_params *)real_mode_data; |
209 | cmdline_paddr = boot_data->hdr.cmd_line_ptr | ((u64)boot_data->ext_cmd_line_ptr << 32); |
210 | |
211 | if (!cmdline_paddr) |
212 | return; |
213 | |
214 | __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, map: true); |
215 | } |
216 | |
217 | static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot) |
218 | { |
219 | unsigned long pfn = 0; |
220 | pgprot_t prot; |
221 | |
222 | switch (level) { |
223 | case PG_LEVEL_4K: |
224 | pfn = pte_pfn(pte: *kpte); |
225 | prot = pte_pgprot(*kpte); |
226 | break; |
227 | case PG_LEVEL_2M: |
228 | pfn = pmd_pfn(pmd: *(pmd_t *)kpte); |
229 | prot = pmd_pgprot(*(pmd_t *)kpte); |
230 | break; |
231 | case PG_LEVEL_1G: |
232 | pfn = pud_pfn(pud: *(pud_t *)kpte); |
233 | prot = pud_pgprot(*(pud_t *)kpte); |
234 | break; |
235 | default: |
236 | WARN_ONCE(1, "Invalid level for kpte\n" ); |
237 | return 0; |
238 | } |
239 | |
240 | if (ret_prot) |
241 | *ret_prot = prot; |
242 | |
243 | return pfn; |
244 | } |
245 | |
246 | static bool amd_enc_tlb_flush_required(bool enc) |
247 | { |
248 | return true; |
249 | } |
250 | |
251 | static bool amd_enc_cache_flush_required(void) |
252 | { |
253 | return !cpu_feature_enabled(X86_FEATURE_SME_COHERENT); |
254 | } |
255 | |
256 | static void enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc) |
257 | { |
258 | #ifdef CONFIG_PARAVIRT |
259 | unsigned long vaddr_end = vaddr + size; |
260 | |
261 | while (vaddr < vaddr_end) { |
262 | int psize, pmask, level; |
263 | unsigned long pfn; |
264 | pte_t *kpte; |
265 | |
266 | kpte = lookup_address(address: vaddr, level: &level); |
267 | if (!kpte || pte_none(pte: *kpte)) { |
268 | WARN_ONCE(1, "kpte lookup for vaddr\n" ); |
269 | return; |
270 | } |
271 | |
272 | pfn = pg_level_to_pfn(level, kpte, NULL); |
273 | if (!pfn) |
274 | continue; |
275 | |
276 | psize = page_level_size(level); |
277 | pmask = page_level_mask(level); |
278 | |
279 | notify_page_enc_status_changed(pfn, npages: psize >> PAGE_SHIFT, enc); |
280 | |
281 | vaddr = (vaddr & pmask) + psize; |
282 | } |
283 | #endif |
284 | } |
285 | |
286 | static bool amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) |
287 | { |
288 | /* |
289 | * To maintain the security guarantees of SEV-SNP guests, make sure |
290 | * to invalidate the memory before encryption attribute is cleared. |
291 | */ |
292 | if (cc_platform_has(attr: CC_ATTR_GUEST_SEV_SNP) && !enc) |
293 | snp_set_memory_shared(vaddr, npages); |
294 | |
295 | return true; |
296 | } |
297 | |
298 | /* Return true unconditionally: return value doesn't matter for the SEV side */ |
299 | static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc) |
300 | { |
301 | /* |
302 | * After memory is mapped encrypted in the page table, validate it |
303 | * so that it is consistent with the page table updates. |
304 | */ |
305 | if (cc_platform_has(attr: CC_ATTR_GUEST_SEV_SNP) && enc) |
306 | snp_set_memory_private(vaddr, npages); |
307 | |
308 | if (!cc_platform_has(attr: CC_ATTR_HOST_MEM_ENCRYPT)) |
309 | enc_dec_hypercall(vaddr, size: npages << PAGE_SHIFT, enc); |
310 | |
311 | return true; |
312 | } |
313 | |
314 | static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) |
315 | { |
316 | pgprot_t old_prot, new_prot; |
317 | unsigned long pfn, pa, size; |
318 | pte_t new_pte; |
319 | |
320 | pfn = pg_level_to_pfn(level, kpte, ret_prot: &old_prot); |
321 | if (!pfn) |
322 | return; |
323 | |
324 | new_prot = old_prot; |
325 | if (enc) |
326 | pgprot_val(new_prot) |= _PAGE_ENC; |
327 | else |
328 | pgprot_val(new_prot) &= ~_PAGE_ENC; |
329 | |
330 | /* If prot is same then do nothing. */ |
331 | if (pgprot_val(old_prot) == pgprot_val(new_prot)) |
332 | return; |
333 | |
334 | pa = pfn << PAGE_SHIFT; |
335 | size = page_level_size(level); |
336 | |
337 | /* |
338 | * We are going to perform in-place en-/decryption and change the |
339 | * physical page attribute from C=1 to C=0 or vice versa. Flush the |
340 | * caches to ensure that data gets accessed with the correct C-bit. |
341 | */ |
342 | clflush_cache_range(__va(pa), size); |
343 | |
344 | /* Encrypt/decrypt the contents in-place */ |
345 | if (enc) { |
346 | sme_early_encrypt(paddr: pa, size); |
347 | } else { |
348 | sme_early_decrypt(paddr: pa, size); |
349 | |
350 | /* |
351 | * ON SNP, the page state in the RMP table must happen |
352 | * before the page table updates. |
353 | */ |
354 | early_snp_set_memory_shared(vaddr: (unsigned long)__va(pa), paddr: pa, npages: 1); |
355 | } |
356 | |
357 | /* Change the page encryption mask. */ |
358 | new_pte = pfn_pte(page_nr: pfn, pgprot: new_prot); |
359 | set_pte_atomic(ptep: kpte, pte: new_pte); |
360 | |
361 | /* |
362 | * If page is set encrypted in the page table, then update the RMP table to |
363 | * add this page as private. |
364 | */ |
365 | if (enc) |
366 | early_snp_set_memory_private(vaddr: (unsigned long)__va(pa), paddr: pa, npages: 1); |
367 | } |
368 | |
369 | static int __init early_set_memory_enc_dec(unsigned long vaddr, |
370 | unsigned long size, bool enc) |
371 | { |
372 | unsigned long vaddr_end, vaddr_next, start; |
373 | unsigned long psize, pmask; |
374 | int split_page_size_mask; |
375 | int level, ret; |
376 | pte_t *kpte; |
377 | |
378 | start = vaddr; |
379 | vaddr_next = vaddr; |
380 | vaddr_end = vaddr + size; |
381 | |
382 | for (; vaddr < vaddr_end; vaddr = vaddr_next) { |
383 | kpte = lookup_address(address: vaddr, level: &level); |
384 | if (!kpte || pte_none(pte: *kpte)) { |
385 | ret = 1; |
386 | goto out; |
387 | } |
388 | |
389 | if (level == PG_LEVEL_4K) { |
390 | __set_clr_pte_enc(kpte, level, enc); |
391 | vaddr_next = (vaddr & PAGE_MASK) + PAGE_SIZE; |
392 | continue; |
393 | } |
394 | |
395 | psize = page_level_size(level); |
396 | pmask = page_level_mask(level); |
397 | |
398 | /* |
399 | * Check whether we can change the large page in one go. |
400 | * We request a split when the address is not aligned and |
401 | * the number of pages to set/clear encryption bit is smaller |
402 | * than the number of pages in the large page. |
403 | */ |
404 | if (vaddr == (vaddr & pmask) && |
405 | ((vaddr_end - vaddr) >= psize)) { |
406 | __set_clr_pte_enc(kpte, level, enc); |
407 | vaddr_next = (vaddr & pmask) + psize; |
408 | continue; |
409 | } |
410 | |
411 | /* |
412 | * The virtual address is part of a larger page, create the next |
413 | * level page table mapping (4K or 2M). If it is part of a 2M |
414 | * page then we request a split of the large page into 4K |
415 | * chunks. A 1GB large page is split into 2M pages, resp. |
416 | */ |
417 | if (level == PG_LEVEL_2M) |
418 | split_page_size_mask = 0; |
419 | else |
420 | split_page_size_mask = 1 << PG_LEVEL_2M; |
421 | |
422 | /* |
423 | * kernel_physical_mapping_change() does not flush the TLBs, so |
424 | * a TLB flush is required after we exit from the for loop. |
425 | */ |
426 | kernel_physical_mapping_change(__pa(vaddr & pmask), |
427 | __pa((vaddr_end & pmask) + psize), |
428 | page_size_mask: split_page_size_mask); |
429 | } |
430 | |
431 | ret = 0; |
432 | |
433 | early_set_mem_enc_dec_hypercall(vaddr: start, size, enc); |
434 | out: |
435 | __flush_tlb_all(); |
436 | return ret; |
437 | } |
438 | |
439 | int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size) |
440 | { |
441 | return early_set_memory_enc_dec(vaddr, size, enc: false); |
442 | } |
443 | |
444 | int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) |
445 | { |
446 | return early_set_memory_enc_dec(vaddr, size, enc: true); |
447 | } |
448 | |
449 | void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc) |
450 | { |
451 | enc_dec_hypercall(vaddr, size, enc); |
452 | } |
453 | |
454 | void __init sme_early_init(void) |
455 | { |
456 | if (!sme_me_mask) |
457 | return; |
458 | |
459 | early_pmd_flags = __sme_set(early_pmd_flags); |
460 | |
461 | __supported_pte_mask = __sme_set(__supported_pte_mask); |
462 | |
463 | /* Update the protection map with memory encryption mask */ |
464 | add_encrypt_protection_map(); |
465 | |
466 | x86_platform.guest.enc_status_change_prepare = amd_enc_status_change_prepare; |
467 | x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish; |
468 | x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required; |
469 | x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required; |
470 | |
471 | /* |
472 | * AMD-SEV-ES intercepts the RDMSR to read the X2APIC ID in the |
473 | * parallel bringup low level code. That raises #VC which cannot be |
474 | * handled there. |
475 | * It does not provide a RDMSR GHCB protocol so the early startup |
476 | * code cannot directly communicate with the secure firmware. The |
477 | * alternative solution to retrieve the APIC ID via CPUID(0xb), |
478 | * which is covered by the GHCB protocol, is not viable either |
479 | * because there is no enforcement of the CPUID(0xb) provided |
480 | * "initial" APIC ID to be the same as the real APIC ID. |
481 | * Disable parallel bootup. |
482 | */ |
483 | if (sev_status & MSR_AMD64_SEV_ES_ENABLED) |
484 | x86_cpuinit.parallel_bringup = false; |
485 | |
486 | /* |
487 | * The VMM is capable of injecting interrupt 0x80 and triggering the |
488 | * compatibility syscall path. |
489 | * |
490 | * By default, the 32-bit emulation is disabled in order to ensure |
491 | * the safety of the VM. |
492 | */ |
493 | if (sev_status & MSR_AMD64_SEV_ENABLED) |
494 | ia32_disable(); |
495 | |
496 | /* |
497 | * Override init functions that scan the ROM region in SEV-SNP guests, |
498 | * as this memory is not pre-validated and would thus cause a crash. |
499 | */ |
500 | if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) { |
501 | x86_init.mpparse.find_mptable = x86_init_noop; |
502 | x86_init.pci.init_irq = x86_init_noop; |
503 | x86_init.resources.probe_roms = x86_init_noop; |
504 | |
505 | /* |
506 | * DMI setup behavior for SEV-SNP guests depends on |
507 | * efi_enabled(EFI_CONFIG_TABLES), which hasn't been |
508 | * parsed yet. snp_dmi_setup() will run after that |
509 | * parsing has happened. |
510 | */ |
511 | x86_init.resources.dmi_setup = snp_dmi_setup; |
512 | } |
513 | } |
514 | |
515 | void __init mem_encrypt_free_decrypted_mem(void) |
516 | { |
517 | unsigned long vaddr, vaddr_end, npages; |
518 | int r; |
519 | |
520 | vaddr = (unsigned long)__start_bss_decrypted_unused; |
521 | vaddr_end = (unsigned long)__end_bss_decrypted; |
522 | npages = (vaddr_end - vaddr) >> PAGE_SHIFT; |
523 | |
524 | /* |
525 | * If the unused memory range was mapped decrypted, change the encryption |
526 | * attribute from decrypted to encrypted before freeing it. Base the |
527 | * re-encryption on the same condition used for the decryption in |
528 | * sme_postprocess_startup(). Higher level abstractions, such as |
529 | * CC_ATTR_MEM_ENCRYPT, aren't necessarily equivalent in a Hyper-V VM |
530 | * using vTOM, where sme_me_mask is always zero. |
531 | */ |
532 | if (sme_me_mask) { |
533 | r = set_memory_encrypted(addr: vaddr, numpages: npages); |
534 | if (r) { |
535 | pr_warn("failed to free unused decrypted pages\n" ); |
536 | return; |
537 | } |
538 | } |
539 | |
540 | free_init_pages(what: "unused decrypted" , begin: vaddr, end: vaddr_end); |
541 | } |
542 | |