1 | // SPDX-License-Identifier: GPL-2.0 |
2 | #define DISABLE_BRANCH_PROFILING |
3 | #define pr_fmt(fmt) "kasan: " fmt |
4 | |
5 | /* cpu_feature_enabled() cannot be used this early */ |
6 | #define USE_EARLY_PGTABLE_L5 |
7 | |
8 | #include <linux/memblock.h> |
9 | #include <linux/kasan.h> |
10 | #include <linux/kdebug.h> |
11 | #include <linux/mm.h> |
12 | #include <linux/sched.h> |
13 | #include <linux/sched/task.h> |
14 | #include <linux/vmalloc.h> |
15 | |
16 | #include <asm/e820/types.h> |
17 | #include <asm/pgalloc.h> |
18 | #include <asm/tlbflush.h> |
19 | #include <asm/sections.h> |
20 | #include <asm/cpu_entry_area.h> |
21 | |
22 | extern struct range pfn_mapped[E820_MAX_ENTRIES]; |
23 | |
24 | static p4d_t tmp_p4d_table[MAX_PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); |
25 | |
26 | static __init void *early_alloc(size_t size, int nid, bool should_panic) |
27 | { |
28 | void *ptr = memblock_alloc_try_nid(size, align: size, |
29 | __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, nid); |
30 | |
31 | if (!ptr && should_panic) |
32 | panic(fmt: "%pS: Failed to allocate page, nid=%d from=%lx\n" , |
33 | (void *)_RET_IP_, nid, __pa(MAX_DMA_ADDRESS)); |
34 | |
35 | return ptr; |
36 | } |
37 | |
38 | static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr, |
39 | unsigned long end, int nid) |
40 | { |
41 | pte_t *pte; |
42 | |
43 | if (pmd_none(pmd: *pmd)) { |
44 | void *p; |
45 | |
46 | if (boot_cpu_has(X86_FEATURE_PSE) && |
47 | ((end - addr) == PMD_SIZE) && |
48 | IS_ALIGNED(addr, PMD_SIZE)) { |
49 | p = early_alloc(PMD_SIZE, nid, should_panic: false); |
50 | if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL)) |
51 | return; |
52 | memblock_free(ptr: p, PMD_SIZE); |
53 | } |
54 | |
55 | p = early_alloc(PAGE_SIZE, nid, should_panic: true); |
56 | pmd_populate_kernel(mm: &init_mm, pmd, pte: p); |
57 | } |
58 | |
59 | pte = pte_offset_kernel(pmd, address: addr); |
60 | do { |
61 | pte_t entry; |
62 | void *p; |
63 | |
64 | if (!pte_none(pte: *pte)) |
65 | continue; |
66 | |
67 | p = early_alloc(PAGE_SIZE, nid, should_panic: true); |
68 | entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL); |
69 | set_pte_at(&init_mm, addr, pte, entry); |
70 | } while (pte++, addr += PAGE_SIZE, addr != end); |
71 | } |
72 | |
73 | static void __init kasan_populate_pud(pud_t *pud, unsigned long addr, |
74 | unsigned long end, int nid) |
75 | { |
76 | pmd_t *pmd; |
77 | unsigned long next; |
78 | |
79 | if (pud_none(pud: *pud)) { |
80 | void *p; |
81 | |
82 | if (boot_cpu_has(X86_FEATURE_GBPAGES) && |
83 | ((end - addr) == PUD_SIZE) && |
84 | IS_ALIGNED(addr, PUD_SIZE)) { |
85 | p = early_alloc(PUD_SIZE, nid, should_panic: false); |
86 | if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL)) |
87 | return; |
88 | memblock_free(ptr: p, PUD_SIZE); |
89 | } |
90 | |
91 | p = early_alloc(PAGE_SIZE, nid, should_panic: true); |
92 | pud_populate(mm: &init_mm, pud, pmd: p); |
93 | } |
94 | |
95 | pmd = pmd_offset(pud, address: addr); |
96 | do { |
97 | next = pmd_addr_end(addr, end); |
98 | if (!pmd_leaf(pte: *pmd)) |
99 | kasan_populate_pmd(pmd, addr, end: next, nid); |
100 | } while (pmd++, addr = next, addr != end); |
101 | } |
102 | |
103 | static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr, |
104 | unsigned long end, int nid) |
105 | { |
106 | pud_t *pud; |
107 | unsigned long next; |
108 | |
109 | if (p4d_none(p4d: *p4d)) { |
110 | void *p = early_alloc(PAGE_SIZE, nid, should_panic: true); |
111 | |
112 | p4d_populate(mm: &init_mm, p4d, pud: p); |
113 | } |
114 | |
115 | pud = pud_offset(p4d, address: addr); |
116 | do { |
117 | next = pud_addr_end(addr, end); |
118 | if (!pud_leaf(pud: *pud)) |
119 | kasan_populate_pud(pud, addr, end: next, nid); |
120 | } while (pud++, addr = next, addr != end); |
121 | } |
122 | |
123 | static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr, |
124 | unsigned long end, int nid) |
125 | { |
126 | void *p; |
127 | p4d_t *p4d; |
128 | unsigned long next; |
129 | |
130 | if (pgd_none(pgd: *pgd)) { |
131 | p = early_alloc(PAGE_SIZE, nid, should_panic: true); |
132 | pgd_populate(mm: &init_mm, pgd, p4d: p); |
133 | } |
134 | |
135 | p4d = p4d_offset(pgd, address: addr); |
136 | do { |
137 | next = p4d_addr_end(addr, end); |
138 | kasan_populate_p4d(p4d, addr, end: next, nid); |
139 | } while (p4d++, addr = next, addr != end); |
140 | } |
141 | |
142 | static void __init kasan_populate_shadow(unsigned long addr, unsigned long end, |
143 | int nid) |
144 | { |
145 | pgd_t *pgd; |
146 | unsigned long next; |
147 | |
148 | addr = addr & PAGE_MASK; |
149 | end = round_up(end, PAGE_SIZE); |
150 | pgd = pgd_offset_k(addr); |
151 | do { |
152 | next = pgd_addr_end(addr, end); |
153 | kasan_populate_pgd(pgd, addr, end: next, nid); |
154 | } while (pgd++, addr = next, addr != end); |
155 | } |
156 | |
157 | static void __init map_range(struct range *range) |
158 | { |
159 | unsigned long start; |
160 | unsigned long end; |
161 | |
162 | start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(pfn: range->start)); |
163 | end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(pfn: range->end)); |
164 | |
165 | kasan_populate_shadow(addr: start, end, nid: early_pfn_to_nid(pfn: range->start)); |
166 | } |
167 | |
168 | static void __init clear_pgds(unsigned long start, |
169 | unsigned long end) |
170 | { |
171 | pgd_t *pgd; |
172 | /* See comment in kasan_init() */ |
173 | unsigned long pgd_end = end & PGDIR_MASK; |
174 | |
175 | for (; start < pgd_end; start += PGDIR_SIZE) { |
176 | pgd = pgd_offset_k(start); |
177 | /* |
178 | * With folded p4d, pgd_clear() is nop, use p4d_clear() |
179 | * instead. |
180 | */ |
181 | if (pgtable_l5_enabled()) |
182 | pgd_clear(pgd); |
183 | else |
184 | p4d_clear(p4dp: p4d_offset(pgd, address: start)); |
185 | } |
186 | |
187 | pgd = pgd_offset_k(start); |
188 | for (; start < end; start += P4D_SIZE) |
189 | p4d_clear(p4dp: p4d_offset(pgd, address: start)); |
190 | } |
191 | |
192 | static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr) |
193 | { |
194 | unsigned long p4d; |
195 | |
196 | if (!pgtable_l5_enabled()) |
197 | return (p4d_t *)pgd; |
198 | |
199 | p4d = pgd_val(pgd: *pgd) & PTE_PFN_MASK; |
200 | p4d += __START_KERNEL_map - phys_base; |
201 | return (p4d_t *)p4d + p4d_index(address: addr); |
202 | } |
203 | |
204 | static void __init kasan_early_p4d_populate(pgd_t *pgd, |
205 | unsigned long addr, |
206 | unsigned long end) |
207 | { |
208 | pgd_t pgd_entry; |
209 | p4d_t *p4d, p4d_entry; |
210 | unsigned long next; |
211 | |
212 | if (pgd_none(pgd: *pgd)) { |
213 | pgd_entry = __pgd(_KERNPG_TABLE | |
214 | __pa_nodebug(kasan_early_shadow_p4d)); |
215 | set_pgd(pgd, pgd_entry); |
216 | } |
217 | |
218 | p4d = early_p4d_offset(pgd, addr); |
219 | do { |
220 | next = p4d_addr_end(addr, end); |
221 | |
222 | if (!p4d_none(p4d: *p4d)) |
223 | continue; |
224 | |
225 | p4d_entry = __p4d(_KERNPG_TABLE | |
226 | __pa_nodebug(kasan_early_shadow_pud)); |
227 | set_p4d(p4dp: p4d, p4d: p4d_entry); |
228 | } while (p4d++, addr = next, addr != end && p4d_none(p4d: *p4d)); |
229 | } |
230 | |
231 | static void __init kasan_map_early_shadow(pgd_t *pgd) |
232 | { |
233 | /* See comment in kasan_init() */ |
234 | unsigned long addr = KASAN_SHADOW_START & PGDIR_MASK; |
235 | unsigned long end = KASAN_SHADOW_END; |
236 | unsigned long next; |
237 | |
238 | pgd += pgd_index(addr); |
239 | do { |
240 | next = pgd_addr_end(addr, end); |
241 | kasan_early_p4d_populate(pgd, addr, end: next); |
242 | } while (pgd++, addr = next, addr != end); |
243 | } |
244 | |
245 | static void __init kasan_shallow_populate_p4ds(pgd_t *pgd, |
246 | unsigned long addr, |
247 | unsigned long end) |
248 | { |
249 | p4d_t *p4d; |
250 | unsigned long next; |
251 | void *p; |
252 | |
253 | p4d = p4d_offset(pgd, address: addr); |
254 | do { |
255 | next = p4d_addr_end(addr, end); |
256 | |
257 | if (p4d_none(p4d: *p4d)) { |
258 | p = early_alloc(PAGE_SIZE, NUMA_NO_NODE, should_panic: true); |
259 | p4d_populate(mm: &init_mm, p4d, pud: p); |
260 | } |
261 | } while (p4d++, addr = next, addr != end); |
262 | } |
263 | |
264 | static void __init kasan_shallow_populate_pgds(void *start, void *end) |
265 | { |
266 | unsigned long addr, next; |
267 | pgd_t *pgd; |
268 | void *p; |
269 | |
270 | addr = (unsigned long)start; |
271 | pgd = pgd_offset_k(addr); |
272 | do { |
273 | next = pgd_addr_end(addr, (unsigned long)end); |
274 | |
275 | if (pgd_none(pgd: *pgd)) { |
276 | p = early_alloc(PAGE_SIZE, NUMA_NO_NODE, should_panic: true); |
277 | pgd_populate(mm: &init_mm, pgd, p4d: p); |
278 | } |
279 | |
280 | /* |
281 | * we need to populate p4ds to be synced when running in |
282 | * four level mode - see sync_global_pgds_l4() |
283 | */ |
284 | kasan_shallow_populate_p4ds(pgd, addr, end: next); |
285 | } while (pgd++, addr = next, addr != (unsigned long)end); |
286 | } |
287 | |
288 | void __init kasan_early_init(void) |
289 | { |
290 | int i; |
291 | pteval_t pte_val = __pa_nodebug(kasan_early_shadow_page) | |
292 | __PAGE_KERNEL | _PAGE_ENC; |
293 | pmdval_t pmd_val = __pa_nodebug(kasan_early_shadow_pte) | _KERNPG_TABLE; |
294 | pudval_t pud_val = __pa_nodebug(kasan_early_shadow_pmd) | _KERNPG_TABLE; |
295 | p4dval_t p4d_val = __pa_nodebug(kasan_early_shadow_pud) | _KERNPG_TABLE; |
296 | |
297 | /* Mask out unsupported __PAGE_KERNEL bits: */ |
298 | pte_val &= __default_kernel_pte_mask; |
299 | pmd_val &= __default_kernel_pte_mask; |
300 | pud_val &= __default_kernel_pte_mask; |
301 | p4d_val &= __default_kernel_pte_mask; |
302 | |
303 | for (i = 0; i < PTRS_PER_PTE; i++) |
304 | kasan_early_shadow_pte[i] = __pte(pte_val); |
305 | |
306 | for (i = 0; i < PTRS_PER_PMD; i++) |
307 | kasan_early_shadow_pmd[i] = __pmd(pmd_val); |
308 | |
309 | for (i = 0; i < PTRS_PER_PUD; i++) |
310 | kasan_early_shadow_pud[i] = __pud(pud_val); |
311 | |
312 | for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++) |
313 | kasan_early_shadow_p4d[i] = __p4d(p4d_val); |
314 | |
315 | kasan_map_early_shadow(pgd: early_top_pgt); |
316 | kasan_map_early_shadow(pgd: init_top_pgt); |
317 | } |
318 | |
319 | static unsigned long kasan_mem_to_shadow_align_down(unsigned long va) |
320 | { |
321 | unsigned long shadow = (unsigned long)kasan_mem_to_shadow((void *)va); |
322 | |
323 | return round_down(shadow, PAGE_SIZE); |
324 | } |
325 | |
326 | static unsigned long kasan_mem_to_shadow_align_up(unsigned long va) |
327 | { |
328 | unsigned long shadow = (unsigned long)kasan_mem_to_shadow((void *)va); |
329 | |
330 | return round_up(shadow, PAGE_SIZE); |
331 | } |
332 | |
333 | void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid) |
334 | { |
335 | unsigned long shadow_start, shadow_end; |
336 | |
337 | shadow_start = kasan_mem_to_shadow_align_down(va: (unsigned long)va); |
338 | shadow_end = kasan_mem_to_shadow_align_up(va: (unsigned long)va + size); |
339 | kasan_populate_shadow(addr: shadow_start, end: shadow_end, nid); |
340 | } |
341 | |
342 | void __init kasan_init(void) |
343 | { |
344 | unsigned long shadow_cea_begin, shadow_cea_per_cpu_begin, shadow_cea_end; |
345 | int i; |
346 | |
347 | memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); |
348 | |
349 | /* |
350 | * We use the same shadow offset for 4- and 5-level paging to |
351 | * facilitate boot-time switching between paging modes. |
352 | * As result in 5-level paging mode KASAN_SHADOW_START and |
353 | * KASAN_SHADOW_END are not aligned to PGD boundary. |
354 | * |
355 | * KASAN_SHADOW_START doesn't share PGD with anything else. |
356 | * We claim whole PGD entry to make things easier. |
357 | * |
358 | * KASAN_SHADOW_END lands in the last PGD entry and it collides with |
359 | * bunch of things like kernel code, modules, EFI mapping, etc. |
360 | * We need to take extra steps to not overwrite them. |
361 | */ |
362 | if (pgtable_l5_enabled()) { |
363 | void *ptr; |
364 | |
365 | ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END)); |
366 | memcpy(tmp_p4d_table, (void *)ptr, sizeof(tmp_p4d_table)); |
367 | set_pgd(&early_top_pgt[pgd_index(KASAN_SHADOW_END)], |
368 | __pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE)); |
369 | } |
370 | |
371 | load_cr3(pgdir: early_top_pgt); |
372 | __flush_tlb_all(); |
373 | |
374 | clear_pgds(start: KASAN_SHADOW_START & PGDIR_MASK, end: KASAN_SHADOW_END); |
375 | |
376 | kasan_populate_early_shadow((void *)(KASAN_SHADOW_START & PGDIR_MASK), |
377 | kasan_mem_to_shadow((void *)PAGE_OFFSET)); |
378 | |
379 | for (i = 0; i < E820_MAX_ENTRIES; i++) { |
380 | if (pfn_mapped[i].end == 0) |
381 | break; |
382 | |
383 | map_range(range: &pfn_mapped[i]); |
384 | } |
385 | |
386 | shadow_cea_begin = kasan_mem_to_shadow_align_down(CPU_ENTRY_AREA_BASE); |
387 | shadow_cea_per_cpu_begin = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_PER_CPU); |
388 | shadow_cea_end = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_BASE + |
389 | CPU_ENTRY_AREA_MAP_SIZE); |
390 | |
391 | kasan_populate_early_shadow( |
392 | kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), |
393 | kasan_mem_to_shadow((void *)VMALLOC_START)); |
394 | |
395 | /* |
396 | * If we're in full vmalloc mode, don't back vmalloc space with early |
397 | * shadow pages. Instead, prepopulate pgds/p4ds so they are synced to |
398 | * the global table and we can populate the lower levels on demand. |
399 | */ |
400 | if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) |
401 | kasan_shallow_populate_pgds( |
402 | start: kasan_mem_to_shadow((void *)VMALLOC_START), |
403 | end: kasan_mem_to_shadow((void *)VMALLOC_END)); |
404 | else |
405 | kasan_populate_early_shadow( |
406 | kasan_mem_to_shadow((void *)VMALLOC_START), |
407 | kasan_mem_to_shadow((void *)VMALLOC_END)); |
408 | |
409 | kasan_populate_early_shadow( |
410 | kasan_mem_to_shadow((void *)VMALLOC_END + 1), |
411 | (void *)shadow_cea_begin); |
412 | |
413 | /* |
414 | * Populate the shadow for the shared portion of the CPU entry area. |
415 | * Shadows for the per-CPU areas are mapped on-demand, as each CPU's |
416 | * area is randomly placed somewhere in the 512GiB range and mapping |
417 | * the entire 512GiB range is prohibitively expensive. |
418 | */ |
419 | kasan_populate_shadow(addr: shadow_cea_begin, |
420 | end: shadow_cea_per_cpu_begin, nid: 0); |
421 | |
422 | kasan_populate_early_shadow((void *)shadow_cea_end, |
423 | kasan_mem_to_shadow((void *)__START_KERNEL_map)); |
424 | |
425 | kasan_populate_shadow(addr: (unsigned long)kasan_mem_to_shadow(_stext), |
426 | end: (unsigned long)kasan_mem_to_shadow(_end), |
427 | nid: early_pfn_to_nid(__pa(_stext))); |
428 | |
429 | kasan_populate_early_shadow(kasan_mem_to_shadow((void *)MODULES_END), |
430 | (void *)KASAN_SHADOW_END); |
431 | |
432 | load_cr3(pgdir: init_top_pgt); |
433 | __flush_tlb_all(); |
434 | |
435 | /* |
436 | * kasan_early_shadow_page has been used as early shadow memory, thus |
437 | * it may contain some garbage. Now we can clear and write protect it, |
438 | * since after the TLB flush no one should write to it. |
439 | */ |
440 | memset(kasan_early_shadow_page, 0, PAGE_SIZE); |
441 | for (i = 0; i < PTRS_PER_PTE; i++) { |
442 | pte_t pte; |
443 | pgprot_t prot; |
444 | |
445 | prot = __pgprot(__PAGE_KERNEL_RO | _PAGE_ENC); |
446 | pgprot_val(prot) &= __default_kernel_pte_mask; |
447 | |
448 | pte = __pte(__pa(kasan_early_shadow_page) | pgprot_val(prot)); |
449 | set_pte(&kasan_early_shadow_pte[i], pte); |
450 | } |
451 | /* Flush TLBs again to be sure that write protection applied. */ |
452 | __flush_tlb_all(); |
453 | |
454 | init_task.kasan_depth = 0; |
455 | pr_info("KernelAddressSanitizer initialized\n" ); |
456 | } |
457 | |