1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
2 | /* |
3 | * Low-level CPU initialisation |
4 | * Based on arch/arm/kernel/head.S |
5 | * |
6 | * Copyright (C) 1994-2002 Russell King |
7 | * Copyright (C) 2003-2012 ARM Ltd. |
8 | * Authors: Catalin Marinas <catalin.marinas@arm.com> |
9 | * Will Deacon <will.deacon@arm.com> |
10 | */ |
11 | |
12 | #include <linux/linkage.h> |
13 | #include <linux/init.h> |
14 | #include <linux/pgtable.h> |
15 | |
16 | #include <asm/asm_pointer_auth.h> |
17 | #include <asm/assembler.h> |
18 | #include <asm/boot.h> |
19 | #include <asm/bug.h> |
20 | #include <asm/ptrace.h> |
21 | #include <asm/asm-offsets.h> |
22 | #include <asm/cache.h> |
23 | #include <asm/cputype.h> |
24 | #include <asm/el2_setup.h> |
25 | #include <asm/elf.h> |
26 | #include <asm/image.h> |
27 | #include <asm/kernel-pgtable.h> |
28 | #include <asm/kvm_arm.h> |
29 | #include <asm/memory.h> |
30 | #include <asm/pgtable-hwdef.h> |
31 | #include <asm/page.h> |
32 | #include <asm/scs.h> |
33 | #include <asm/smp.h> |
34 | #include <asm/sysreg.h> |
35 | #include <asm/thread_info.h> |
36 | #include <asm/virt.h> |
37 | |
38 | #include "efi-header.S" |
39 | |
40 | #if (PAGE_OFFSET & 0x1fffff) != 0 |
41 | #error PAGE_OFFSET must be at least 2MB aligned |
42 | #endif |
43 | |
44 | /* |
45 | * Kernel startup entry point. |
46 | * --------------------------- |
47 | * |
48 | * The requirements are: |
49 | * MMU = off, D-cache = off, I-cache = on or off, |
50 | * x0 = physical address to the FDT blob. |
51 | * |
52 | * Note that the callee-saved registers are used for storing variables |
53 | * that are useful before the MMU is enabled. The allocations are described |
54 | * in the entry routines. |
55 | */ |
56 | __HEAD |
57 | /* |
58 | * DO NOT MODIFY. Image header expected by Linux boot-loaders. |
59 | */ |
60 | efi_signature_nop // special NOP to identity as PE/COFF executable |
61 | b primary_entry // branch to kernel start, magic |
62 | .quad 0 // Image load offset from start of RAM, little-endian |
63 | le64sym _kernel_size_le // Effective size of kernel image, little-endian |
64 | le64sym _kernel_flags_le // Informative flags, little-endian |
65 | .quad 0 // reserved |
66 | .quad 0 // reserved |
67 | .quad 0 // reserved |
68 | .ascii ARM64_IMAGE_MAGIC // Magic number |
69 | .long .Lpe_header_offset // Offset to the PE header. |
70 | |
71 | __EFI_PE_HEADER |
72 | |
73 | .section ".idmap.text" ,"a" |
74 | |
75 | /* |
76 | * The following callee saved general purpose registers are used on the |
77 | * primary lowlevel boot path: |
78 | * |
79 | * Register Scope Purpose |
80 | * x19 primary_entry() .. start_kernel() whether we entered with the MMU on |
81 | * x20 primary_entry() .. __primary_switch() CPU boot mode |
82 | * x21 primary_entry() .. start_kernel() FDT pointer passed at boot in x0 |
83 | * x22 create_idmap() .. start_kernel() ID map VA of the DT blob |
84 | * x23 primary_entry() .. start_kernel() physical misalignment/KASLR offset |
85 | * x24 __primary_switch() linear map KASLR seed |
86 | * x25 primary_entry() .. start_kernel() supported VA size |
87 | * x28 create_idmap() callee preserved temp register |
88 | */ |
89 | SYM_CODE_START(primary_entry) |
90 | bl record_mmu_state |
91 | bl preserve_boot_args |
92 | bl create_idmap |
93 | |
94 | /* |
95 | * If we entered with the MMU and caches on, clean the ID mapped part |
96 | * of the primary boot code to the PoC so we can safely execute it with |
97 | * the MMU off. |
98 | */ |
99 | cbz x19, 0f |
100 | adrp x0, __idmap_text_start |
101 | adr_l x1, __idmap_text_end |
102 | adr_l x2, dcache_clean_poc |
103 | blr x2 |
104 | 0: mov x0, x19 |
105 | bl init_kernel_el // w0=cpu_boot_mode |
106 | mov x20, x0 |
107 | |
108 | /* |
109 | * The following calls CPU setup code, see arch/arm64/mm/proc.S for |
110 | * details. |
111 | * On return, the CPU will be ready for the MMU to be turned on and |
112 | * the TCR will have been set. |
113 | */ |
114 | #if VA_BITS > 48 |
115 | mrs_s x0, SYS_ID_AA64MMFR2_EL1 |
116 | tst x0, ID_AA64MMFR2_EL1_VARange_MASK |
117 | mov x0, #VA_BITS |
118 | mov x25, #VA_BITS_MIN |
119 | csel x25, x25, x0, eq |
120 | mov x0, x25 |
121 | #endif |
122 | bl __cpu_setup // initialise processor |
123 | b __primary_switch |
124 | SYM_CODE_END(primary_entry) |
125 | |
126 | __INIT |
127 | SYM_CODE_START_LOCAL(record_mmu_state) |
128 | mrs x19, CurrentEL |
129 | cmp x19, #CurrentEL_EL2 |
130 | mrs x19, sctlr_el1 |
131 | b.ne 0f |
132 | mrs x19, sctlr_el2 |
133 | 0: |
134 | CPU_LE( tbnz x19, #SCTLR_ELx_EE_SHIFT, 1f ) |
135 | CPU_BE( tbz x19, #SCTLR_ELx_EE_SHIFT, 1f ) |
136 | tst x19, #SCTLR_ELx_C // Z := (C == 0) |
137 | and x19, x19, #SCTLR_ELx_M // isolate M bit |
138 | csel x19, xzr, x19, eq // clear x19 if Z |
139 | ret |
140 | |
141 | /* |
142 | * Set the correct endianness early so all memory accesses issued |
143 | * before init_kernel_el() occur in the correct byte order. Note that |
144 | * this means the MMU must be disabled, or the active ID map will end |
145 | * up getting interpreted with the wrong byte order. |
146 | */ |
147 | 1: eor x19, x19, #SCTLR_ELx_EE |
148 | bic x19, x19, #SCTLR_ELx_M |
149 | b.ne 2f |
150 | pre_disable_mmu_workaround |
151 | msr sctlr_el2, x19 |
152 | b 3f |
153 | 2: pre_disable_mmu_workaround |
154 | msr sctlr_el1, x19 |
155 | 3: isb |
156 | mov x19, xzr |
157 | ret |
158 | SYM_CODE_END(record_mmu_state) |
159 | |
160 | /* |
161 | * Preserve the arguments passed by the bootloader in x0 .. x3 |
162 | */ |
163 | SYM_CODE_START_LOCAL(preserve_boot_args) |
164 | mov x21, x0 // x21=FDT |
165 | |
166 | adr_l x0, boot_args // record the contents of |
167 | stp x21, x1, [x0] // x0 .. x3 at kernel entry |
168 | stp x2, x3, [x0, #16] |
169 | |
170 | cbnz x19, 0f // skip cache invalidation if MMU is on |
171 | dmb sy // needed before dc ivac with |
172 | // MMU off |
173 | |
174 | add x1, x0, #0x20 // 4 x 8 bytes |
175 | b dcache_inval_poc // tail call |
176 | 0: str_l x19, mmu_enabled_at_boot, x0 |
177 | ret |
178 | SYM_CODE_END(preserve_boot_args) |
179 | |
180 | SYM_FUNC_START_LOCAL(clear_page_tables) |
181 | /* |
182 | * Clear the init page tables. |
183 | */ |
184 | adrp x0, init_pg_dir |
185 | adrp x1, init_pg_end |
186 | sub x2, x1, x0 |
187 | mov x1, xzr |
188 | b __pi_memset // tail call |
189 | SYM_FUNC_END(clear_page_tables) |
190 | |
191 | /* |
192 | * Macro to populate page table entries, these entries can be pointers to the next level |
193 | * or last level entries pointing to physical memory. |
194 | * |
195 | * tbl: page table address |
196 | * rtbl: pointer to page table or physical memory |
197 | * index: start index to write |
198 | * eindex: end index to write - [index, eindex] written to |
199 | * flags: flags for pagetable entry to or in |
200 | * inc: increment to rtbl between each entry |
201 | * tmp1: temporary variable |
202 | * |
203 | * Preserves: tbl, eindex, flags, inc |
204 | * Corrupts: index, tmp1 |
205 | * Returns: rtbl |
206 | */ |
207 | .macro populate_entries, tbl, rtbl, index, eindex, flags, inc, tmp1 |
208 | .Lpe\@: phys_to_pte \tmp1, \rtbl |
209 | orr \tmp1, \tmp1, \flags // tmp1 = table entry |
210 | str \tmp1, [\tbl, \index, lsl #3] |
211 | add \rtbl, \rtbl, \inc // rtbl = pa next level |
212 | add \index, \index, #1 |
213 | cmp \index, \eindex |
214 | b.ls .Lpe\@ |
215 | .endm |
216 | |
217 | /* |
218 | * Compute indices of table entries from virtual address range. If multiple entries |
219 | * were needed in the previous page table level then the next page table level is assumed |
220 | * to be composed of multiple pages. (This effectively scales the end index). |
221 | * |
222 | * vstart: virtual address of start of range |
223 | * vend: virtual address of end of range - we map [vstart, vend] |
224 | * shift: shift used to transform virtual address into index |
225 | * order: #imm 2log(number of entries in page table) |
226 | * istart: index in table corresponding to vstart |
227 | * iend: index in table corresponding to vend |
228 | * count: On entry: how many extra entries were required in previous level, scales |
229 | * our end index. |
230 | * On exit: returns how many extra entries required for next page table level |
231 | * |
232 | * Preserves: vstart, vend |
233 | * Returns: istart, iend, count |
234 | */ |
235 | .macro compute_indices, vstart, vend, shift, order, istart, iend, count |
236 | ubfx \istart, \vstart, \shift, \order |
237 | ubfx \iend, \vend, \shift, \order |
238 | add \iend, \iend, \count, lsl \order |
239 | sub \count, \iend, \istart |
240 | .endm |
241 | |
242 | /* |
243 | * Map memory for specified virtual address range. Each level of page table needed supports |
244 | * multiple entries. If a level requires n entries the next page table level is assumed to be |
245 | * formed from n pages. |
246 | * |
247 | * tbl: location of page table |
248 | * rtbl: address to be used for first level page table entry (typically tbl + PAGE_SIZE) |
249 | * vstart: virtual address of start of range |
250 | * vend: virtual address of end of range - we map [vstart, vend - 1] |
251 | * flags: flags to use to map last level entries |
252 | * phys: physical address corresponding to vstart - physical memory is contiguous |
253 | * order: #imm 2log(number of entries in PGD table) |
254 | * |
255 | * If extra_shift is set, an extra level will be populated if the end address does |
256 | * not fit in 'extra_shift' bits. This assumes vend is in the TTBR0 range. |
257 | * |
258 | * Temporaries: istart, iend, tmp, count, sv - these need to be different registers |
259 | * Preserves: vstart, flags |
260 | * Corrupts: tbl, rtbl, vend, istart, iend, tmp, count, sv |
261 | */ |
262 | .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, order, istart, iend, tmp, count, sv, extra_shift |
263 | sub \vend, \vend, #1 |
264 | add \rtbl, \tbl, #PAGE_SIZE |
265 | mov \count, #0 |
266 | |
267 | .ifnb \extra_shift |
268 | tst \vend, #~((1 << (\extra_shift)) - 1) |
269 | b.eq .L_\@ |
270 | compute_indices \vstart, \vend, #\extra_shift, #(PAGE_SHIFT - 3), \istart, \iend, \count |
271 | mov \sv, \rtbl |
272 | populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp |
273 | mov \tbl, \sv |
274 | .endif |
275 | .L_\@: |
276 | compute_indices \vstart, \vend, #PGDIR_SHIFT, #\order, \istart, \iend, \count |
277 | mov \sv, \rtbl |
278 | populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp |
279 | mov \tbl, \sv |
280 | |
281 | #if SWAPPER_PGTABLE_LEVELS > 3 |
282 | compute_indices \vstart, \vend, #PUD_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count |
283 | mov \sv, \rtbl |
284 | populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp |
285 | mov \tbl, \sv |
286 | #endif |
287 | |
288 | #if SWAPPER_PGTABLE_LEVELS > 2 |
289 | compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count |
290 | mov \sv, \rtbl |
291 | populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp |
292 | mov \tbl, \sv |
293 | #endif |
294 | |
295 | compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count |
296 | bic \rtbl, \phys, #SWAPPER_BLOCK_SIZE - 1 |
297 | populate_entries \tbl, \rtbl, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp |
298 | .endm |
299 | |
300 | /* |
301 | * Remap a subregion created with the map_memory macro with modified attributes |
302 | * or output address. The entire remapped region must have been covered in the |
303 | * invocation of map_memory. |
304 | * |
305 | * x0: last level table address (returned in first argument to map_memory) |
306 | * x1: start VA of the existing mapping |
307 | * x2: start VA of the region to update |
308 | * x3: end VA of the region to update (exclusive) |
309 | * x4: start PA associated with the region to update |
310 | * x5: attributes to set on the updated region |
311 | * x6: order of the last level mappings |
312 | */ |
313 | SYM_FUNC_START_LOCAL(remap_region) |
314 | sub x3, x3, #1 // make end inclusive |
315 | |
316 | // Get the index offset for the start of the last level table |
317 | lsr x1, x1, x6 |
318 | bfi x1, xzr, #0, #PAGE_SHIFT - 3 |
319 | |
320 | // Derive the start and end indexes into the last level table |
321 | // associated with the provided region |
322 | lsr x2, x2, x6 |
323 | lsr x3, x3, x6 |
324 | sub x2, x2, x1 |
325 | sub x3, x3, x1 |
326 | |
327 | mov x1, #1 |
328 | lsl x6, x1, x6 // block size at this level |
329 | |
330 | populate_entries x0, x4, x2, x3, x5, x6, x7 |
331 | ret |
332 | SYM_FUNC_END(remap_region) |
333 | |
334 | SYM_FUNC_START_LOCAL(create_idmap) |
335 | mov x28, lr |
336 | /* |
337 | * The ID map carries a 1:1 mapping of the physical address range |
338 | * covered by the loaded image, which could be anywhere in DRAM. This |
339 | * means that the required size of the VA (== PA) space is decided at |
340 | * boot time, and could be more than the configured size of the VA |
341 | * space for ordinary kernel and user space mappings. |
342 | * |
343 | * There are three cases to consider here: |
344 | * - 39 <= VA_BITS < 48, and the ID map needs up to 48 VA bits to cover |
345 | * the placement of the image. In this case, we configure one extra |
346 | * level of translation on the fly for the ID map only. (This case |
347 | * also covers 42-bit VA/52-bit PA on 64k pages). |
348 | * |
349 | * - VA_BITS == 48, and the ID map needs more than 48 VA bits. This can |
350 | * only happen when using 64k pages, in which case we need to extend |
351 | * the root level table rather than add a level. Note that we can |
352 | * treat this case as 'always extended' as long as we take care not |
353 | * to program an unsupported T0SZ value into the TCR register. |
354 | * |
355 | * - Combinations that would require two additional levels of |
356 | * translation are not supported, e.g., VA_BITS==36 on 16k pages, or |
357 | * VA_BITS==39/4k pages with 5-level paging, where the input address |
358 | * requires more than 47 or 48 bits, respectively. |
359 | */ |
360 | #if (VA_BITS < 48) |
361 | #define IDMAP_PGD_ORDER (VA_BITS - PGDIR_SHIFT) |
362 | #define (PGDIR_SHIFT + PAGE_SHIFT - 3) |
363 | |
364 | /* |
365 | * If VA_BITS < 48, we have to configure an additional table level. |
366 | * First, we have to verify our assumption that the current value of |
367 | * VA_BITS was chosen such that all translation levels are fully |
368 | * utilised, and that lowering T0SZ will always result in an additional |
369 | * translation level to be configured. |
370 | */ |
371 | #if VA_BITS != EXTRA_SHIFT |
372 | #error "Mismatch between VA_BITS and page size/number of translation levels" |
373 | #endif |
374 | #else |
375 | #define IDMAP_PGD_ORDER (PHYS_MASK_SHIFT - PGDIR_SHIFT) |
376 | #define EXTRA_SHIFT |
377 | /* |
378 | * If VA_BITS == 48, we don't have to configure an additional |
379 | * translation level, but the top-level table has more entries. |
380 | */ |
381 | #endif |
382 | adrp x0, init_idmap_pg_dir |
383 | adrp x3, _text |
384 | adrp x6, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE |
385 | mov_q x7, SWAPPER_RX_MMUFLAGS |
386 | |
387 | map_memory x0, x1, x3, x6, x7, x3, IDMAP_PGD_ORDER, x10, x11, x12, x13, x14, EXTRA_SHIFT |
388 | |
389 | /* Remap the kernel page tables r/w in the ID map */ |
390 | adrp x1, _text |
391 | adrp x2, init_pg_dir |
392 | adrp x3, init_pg_end |
393 | bic x4, x2, #SWAPPER_BLOCK_SIZE - 1 |
394 | mov_q x5, SWAPPER_RW_MMUFLAGS |
395 | mov x6, #SWAPPER_BLOCK_SHIFT |
396 | bl remap_region |
397 | |
398 | /* Remap the FDT after the kernel image */ |
399 | adrp x1, _text |
400 | adrp x22, _end + SWAPPER_BLOCK_SIZE |
401 | bic x2, x22, #SWAPPER_BLOCK_SIZE - 1 |
402 | bfi x22, x21, #0, #SWAPPER_BLOCK_SHIFT // remapped FDT address |
403 | add x3, x2, #MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE |
404 | bic x4, x21, #SWAPPER_BLOCK_SIZE - 1 |
405 | mov_q x5, SWAPPER_RW_MMUFLAGS |
406 | mov x6, #SWAPPER_BLOCK_SHIFT |
407 | bl remap_region |
408 | |
409 | /* |
410 | * Since the page tables have been populated with non-cacheable |
411 | * accesses (MMU disabled), invalidate those tables again to |
412 | * remove any speculatively loaded cache lines. |
413 | */ |
414 | cbnz x19, 0f // skip cache invalidation if MMU is on |
415 | dmb sy |
416 | |
417 | adrp x0, init_idmap_pg_dir |
418 | adrp x1, init_idmap_pg_end |
419 | bl dcache_inval_poc |
420 | 0: ret x28 |
421 | SYM_FUNC_END(create_idmap) |
422 | |
423 | SYM_FUNC_START_LOCAL(create_kernel_mapping) |
424 | adrp x0, init_pg_dir |
425 | mov_q x5, KIMAGE_VADDR // compile time __va(_text) |
426 | #ifdef CONFIG_RELOCATABLE |
427 | add x5, x5, x23 // add KASLR displacement |
428 | #endif |
429 | adrp x6, _end // runtime __pa(_end) |
430 | adrp x3, _text // runtime __pa(_text) |
431 | sub x6, x6, x3 // _end - _text |
432 | add x6, x6, x5 // runtime __va(_end) |
433 | mov_q x7, SWAPPER_RW_MMUFLAGS |
434 | |
435 | map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14 |
436 | |
437 | dsb ishst // sync with page table walker |
438 | ret |
439 | SYM_FUNC_END(create_kernel_mapping) |
440 | |
441 | /* |
442 | * Initialize CPU registers with task-specific and cpu-specific context. |
443 | * |
444 | * Create a final frame record at task_pt_regs(current)->stackframe, so |
445 | * that the unwinder can identify the final frame record of any task by |
446 | * its location in the task stack. We reserve the entire pt_regs space |
447 | * for consistency with user tasks and kthreads. |
448 | */ |
449 | .macro init_cpu_task tsk, tmp1, tmp2 |
450 | msr sp_el0, \tsk |
451 | |
452 | ldr \tmp1, [\tsk, #TSK_STACK] |
453 | add sp, \tmp1, #THREAD_SIZE |
454 | sub sp, sp, #PT_REGS_SIZE |
455 | |
456 | stp xzr, xzr, [sp, #S_STACKFRAME] |
457 | add x29, sp, #S_STACKFRAME |
458 | |
459 | scs_load_current |
460 | |
461 | adr_l \tmp1, __per_cpu_offset |
462 | ldr w\tmp2, [\tsk, #TSK_TI_CPU] |
463 | ldr \tmp1, [\tmp1, \tmp2, lsl #3] |
464 | set_this_cpu_offset \tmp1 |
465 | .endm |
466 | |
467 | /* |
468 | * The following fragment of code is executed with the MMU enabled. |
469 | * |
470 | * x0 = __pa(KERNEL_START) |
471 | */ |
472 | SYM_FUNC_START_LOCAL(__primary_switched) |
473 | adr_l x4, init_task |
474 | init_cpu_task x4, x5, x6 |
475 | |
476 | adr_l x8, vectors // load VBAR_EL1 with virtual |
477 | msr vbar_el1, x8 // vector table address |
478 | isb |
479 | |
480 | stp x29, x30, [sp, #-16]! |
481 | mov x29, sp |
482 | |
483 | str_l x21, __fdt_pointer, x5 // Save FDT pointer |
484 | |
485 | ldr_l x4, kimage_vaddr // Save the offset between |
486 | sub x4, x4, x0 // the kernel virtual and |
487 | str_l x4, kimage_voffset, x5 // physical mappings |
488 | |
489 | mov x0, x20 |
490 | bl set_cpu_boot_mode_flag |
491 | |
492 | // Clear BSS |
493 | adr_l x0, __bss_start |
494 | mov x1, xzr |
495 | adr_l x2, __bss_stop |
496 | sub x2, x2, x0 |
497 | bl __pi_memset |
498 | dsb ishst // Make zero page visible to PTW |
499 | |
500 | #if VA_BITS > 48 |
501 | adr_l x8, vabits_actual // Set this early so KASAN early init |
502 | str x25, [x8] // ... observes the correct value |
503 | dc civac, x8 // Make visible to booting secondaries |
504 | #endif |
505 | |
506 | #ifdef CONFIG_RANDOMIZE_BASE |
507 | adrp x5, memstart_offset_seed // Save KASLR linear map seed |
508 | strh w24, [x5, :lo12:memstart_offset_seed] |
509 | #endif |
510 | #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) |
511 | bl kasan_early_init |
512 | #endif |
513 | mov x0, x21 // pass FDT address in x0 |
514 | bl early_fdt_map // Try mapping the FDT early |
515 | mov x0, x20 // pass the full boot status |
516 | bl init_feature_override // Parse cpu feature overrides |
517 | #ifdef CONFIG_UNWIND_PATCH_PAC_INTO_SCS |
518 | bl scs_patch_vmlinux |
519 | #endif |
520 | mov x0, x20 |
521 | bl finalise_el2 // Prefer VHE if possible |
522 | ldp x29, x30, [sp], #16 |
523 | bl start_kernel |
524 | ASM_BUG() |
525 | SYM_FUNC_END(__primary_switched) |
526 | |
527 | /* |
528 | * end early head section, begin head code that is also used for |
529 | * hotplug and needs to have the same protections as the text region |
530 | */ |
531 | .section ".idmap.text" ,"a" |
532 | |
533 | /* |
534 | * Starting from EL2 or EL1, configure the CPU to execute at the highest |
535 | * reachable EL supported by the kernel in a chosen default state. If dropping |
536 | * from EL2 to EL1, configure EL2 before configuring EL1. |
537 | * |
538 | * Since we cannot always rely on ERET synchronizing writes to sysregs (e.g. if |
539 | * SCTLR_ELx.EOS is clear), we place an ISB prior to ERET. |
540 | * |
541 | * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x0 if |
542 | * booted in EL1 or EL2 respectively, with the top 32 bits containing |
543 | * potential context flags. These flags are *not* stored in __boot_cpu_mode. |
544 | * |
545 | * x0: whether we are being called from the primary boot path with the MMU on |
546 | */ |
547 | SYM_FUNC_START(init_kernel_el) |
548 | mrs x1, CurrentEL |
549 | cmp x1, #CurrentEL_EL2 |
550 | b.eq init_el2 |
551 | |
552 | SYM_INNER_LABEL(init_el1, SYM_L_LOCAL) |
553 | mov_q x0, INIT_SCTLR_EL1_MMU_OFF |
554 | pre_disable_mmu_workaround |
555 | msr sctlr_el1, x0 |
556 | isb |
557 | mov_q x0, INIT_PSTATE_EL1 |
558 | msr spsr_el1, x0 |
559 | msr elr_el1, lr |
560 | mov w0, #BOOT_CPU_MODE_EL1 |
561 | eret |
562 | |
563 | SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) |
564 | msr elr_el2, lr |
565 | |
566 | // clean all HYP code to the PoC if we booted at EL2 with the MMU on |
567 | cbz x0, 0f |
568 | adrp x0, __hyp_idmap_text_start |
569 | adr_l x1, __hyp_text_end |
570 | adr_l x2, dcache_clean_poc |
571 | blr x2 |
572 | 0: |
573 | mov_q x0, HCR_HOST_NVHE_FLAGS |
574 | msr hcr_el2, x0 |
575 | isb |
576 | |
577 | init_el2_state |
578 | |
579 | /* Hypervisor stub */ |
580 | adr_l x0, __hyp_stub_vectors |
581 | msr vbar_el2, x0 |
582 | isb |
583 | |
584 | mov_q x1, INIT_SCTLR_EL1_MMU_OFF |
585 | |
586 | /* |
587 | * Fruity CPUs seem to have HCR_EL2.E2H set to RES1, |
588 | * making it impossible to start in nVHE mode. Is that |
589 | * compliant with the architecture? Absolutely not! |
590 | */ |
591 | mrs x0, hcr_el2 |
592 | and x0, x0, #HCR_E2H |
593 | cbz x0, 1f |
594 | |
595 | /* Set a sane SCTLR_EL1, the VHE way */ |
596 | pre_disable_mmu_workaround |
597 | msr_s SYS_SCTLR_EL12, x1 |
598 | mov x2, #BOOT_CPU_FLAG_E2H |
599 | b 2f |
600 | |
601 | 1: |
602 | pre_disable_mmu_workaround |
603 | msr sctlr_el1, x1 |
604 | mov x2, xzr |
605 | 2: |
606 | __init_el2_nvhe_prepare_eret |
607 | |
608 | mov w0, #BOOT_CPU_MODE_EL2 |
609 | orr x0, x0, x2 |
610 | eret |
611 | SYM_FUNC_END(init_kernel_el) |
612 | |
613 | /* |
614 | * This provides a "holding pen" for platforms to hold all secondary |
615 | * cores are held until we're ready for them to initialise. |
616 | */ |
617 | SYM_FUNC_START(secondary_holding_pen) |
618 | mov x0, xzr |
619 | bl init_kernel_el // w0=cpu_boot_mode |
620 | mrs x2, mpidr_el1 |
621 | mov_q x1, MPIDR_HWID_BITMASK |
622 | and x2, x2, x1 |
623 | adr_l x3, secondary_holding_pen_release |
624 | pen: ldr x4, [x3] |
625 | cmp x4, x2 |
626 | b.eq secondary_startup |
627 | wfe |
628 | b pen |
629 | SYM_FUNC_END(secondary_holding_pen) |
630 | |
631 | /* |
632 | * Secondary entry point that jumps straight into the kernel. Only to |
633 | * be used where CPUs are brought online dynamically by the kernel. |
634 | */ |
635 | SYM_FUNC_START(secondary_entry) |
636 | mov x0, xzr |
637 | bl init_kernel_el // w0=cpu_boot_mode |
638 | b secondary_startup |
639 | SYM_FUNC_END(secondary_entry) |
640 | |
641 | SYM_FUNC_START_LOCAL(secondary_startup) |
642 | /* |
643 | * Common entry point for secondary CPUs. |
644 | */ |
645 | mov x20, x0 // preserve boot mode |
646 | bl __cpu_secondary_check52bitva |
647 | #if VA_BITS > 48 |
648 | ldr_l x0, vabits_actual |
649 | #endif |
650 | bl __cpu_setup // initialise processor |
651 | adrp x1, swapper_pg_dir |
652 | adrp x2, idmap_pg_dir |
653 | bl __enable_mmu |
654 | ldr x8, =__secondary_switched |
655 | br x8 |
656 | SYM_FUNC_END(secondary_startup) |
657 | |
658 | .text |
659 | SYM_FUNC_START_LOCAL(__secondary_switched) |
660 | mov x0, x20 |
661 | bl set_cpu_boot_mode_flag |
662 | |
663 | mov x0, x20 |
664 | bl finalise_el2 |
665 | |
666 | str_l xzr, __early_cpu_boot_status, x3 |
667 | adr_l x5, vectors |
668 | msr vbar_el1, x5 |
669 | isb |
670 | |
671 | adr_l x0, secondary_data |
672 | ldr x2, [x0, #CPU_BOOT_TASK] |
673 | cbz x2, __secondary_too_slow |
674 | |
675 | init_cpu_task x2, x1, x3 |
676 | |
677 | #ifdef CONFIG_ARM64_PTR_AUTH |
678 | ptrauth_keys_init_cpu x2, x3, x4, x5 |
679 | #endif |
680 | |
681 | bl secondary_start_kernel |
682 | ASM_BUG() |
683 | SYM_FUNC_END(__secondary_switched) |
684 | |
685 | SYM_FUNC_START_LOCAL(__secondary_too_slow) |
686 | wfe |
687 | wfi |
688 | b __secondary_too_slow |
689 | SYM_FUNC_END(__secondary_too_slow) |
690 | |
691 | /* |
692 | * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed |
693 | * in w0. See arch/arm64/include/asm/virt.h for more info. |
694 | */ |
695 | SYM_FUNC_START_LOCAL(set_cpu_boot_mode_flag) |
696 | adr_l x1, __boot_cpu_mode |
697 | cmp w0, #BOOT_CPU_MODE_EL2 |
698 | b.ne 1f |
699 | add x1, x1, #4 |
700 | 1: str w0, [x1] // Save CPU boot mode |
701 | ret |
702 | SYM_FUNC_END(set_cpu_boot_mode_flag) |
703 | |
704 | /* |
705 | * The booting CPU updates the failed status @__early_cpu_boot_status, |
706 | * with MMU turned off. |
707 | * |
708 | * update_early_cpu_boot_status tmp, status |
709 | * - Corrupts tmp1, tmp2 |
710 | * - Writes 'status' to __early_cpu_boot_status and makes sure |
711 | * it is committed to memory. |
712 | */ |
713 | |
714 | .macro update_early_cpu_boot_status status, tmp1, tmp2 |
715 | mov \tmp2, #\status |
716 | adr_l \tmp1, __early_cpu_boot_status |
717 | str \tmp2, [\tmp1] |
718 | dmb sy |
719 | dc ivac, \tmp1 // Invalidate potentially stale cache line |
720 | .endm |
721 | |
722 | /* |
723 | * Enable the MMU. |
724 | * |
725 | * x0 = SCTLR_EL1 value for turning on the MMU. |
726 | * x1 = TTBR1_EL1 value |
727 | * x2 = ID map root table address |
728 | * |
729 | * Returns to the caller via x30/lr. This requires the caller to be covered |
730 | * by the .idmap.text section. |
731 | * |
732 | * Checks if the selected granule size is supported by the CPU. |
733 | * If it isn't, park the CPU |
734 | */ |
735 | .section ".idmap.text" ,"a" |
736 | SYM_FUNC_START(__enable_mmu) |
737 | mrs x3, ID_AA64MMFR0_EL1 |
738 | ubfx x3, x3, #ID_AA64MMFR0_EL1_TGRAN_SHIFT, 4 |
739 | cmp x3, #ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MIN |
740 | b.lt __no_granule_support |
741 | cmp x3, #ID_AA64MMFR0_EL1_TGRAN_SUPPORTED_MAX |
742 | b.gt __no_granule_support |
743 | phys_to_ttbr x2, x2 |
744 | msr ttbr0_el1, x2 // load TTBR0 |
745 | load_ttbr1 x1, x1, x3 |
746 | |
747 | set_sctlr_el1 x0 |
748 | |
749 | ret |
750 | SYM_FUNC_END(__enable_mmu) |
751 | |
752 | SYM_FUNC_START(__cpu_secondary_check52bitva) |
753 | #if VA_BITS > 48 |
754 | ldr_l x0, vabits_actual |
755 | cmp x0, #52 |
756 | b.ne 2f |
757 | |
758 | mrs_s x0, SYS_ID_AA64MMFR2_EL1 |
759 | and x0, x0, ID_AA64MMFR2_EL1_VARange_MASK |
760 | cbnz x0, 2f |
761 | |
762 | update_early_cpu_boot_status \ |
763 | CPU_STUCK_IN_KERNEL | CPU_STUCK_REASON_52_BIT_VA, x0, x1 |
764 | 1: wfe |
765 | wfi |
766 | b 1b |
767 | |
768 | #endif |
769 | 2: ret |
770 | SYM_FUNC_END(__cpu_secondary_check52bitva) |
771 | |
772 | SYM_FUNC_START_LOCAL(__no_granule_support) |
773 | /* Indicate that this CPU can't boot and is stuck in the kernel */ |
774 | update_early_cpu_boot_status \ |
775 | CPU_STUCK_IN_KERNEL | CPU_STUCK_REASON_NO_GRAN, x1, x2 |
776 | 1: |
777 | wfe |
778 | wfi |
779 | b 1b |
780 | SYM_FUNC_END(__no_granule_support) |
781 | |
782 | #ifdef CONFIG_RELOCATABLE |
783 | SYM_FUNC_START_LOCAL(__relocate_kernel) |
784 | /* |
785 | * Iterate over each entry in the relocation table, and apply the |
786 | * relocations in place. |
787 | */ |
788 | adr_l x9, __rela_start |
789 | adr_l x10, __rela_end |
790 | mov_q x11, KIMAGE_VADDR // default virtual offset |
791 | add x11, x11, x23 // actual virtual offset |
792 | |
793 | 0: cmp x9, x10 |
794 | b.hs 1f |
795 | ldp x12, x13, [x9], #24 |
796 | ldr x14, [x9, #-8] |
797 | cmp w13, #R_AARCH64_RELATIVE |
798 | b.ne 0b |
799 | add x14, x14, x23 // relocate |
800 | str x14, [x12, x23] |
801 | b 0b |
802 | |
803 | 1: |
804 | #ifdef CONFIG_RELR |
805 | /* |
806 | * Apply RELR relocations. |
807 | * |
808 | * RELR is a compressed format for storing relative relocations. The |
809 | * encoded sequence of entries looks like: |
810 | * [ AAAAAAAA BBBBBBB1 BBBBBBB1 ... AAAAAAAA BBBBBB1 ... ] |
811 | * |
812 | * i.e. start with an address, followed by any number of bitmaps. The |
813 | * address entry encodes 1 relocation. The subsequent bitmap entries |
814 | * encode up to 63 relocations each, at subsequent offsets following |
815 | * the last address entry. |
816 | * |
817 | * The bitmap entries must have 1 in the least significant bit. The |
818 | * assumption here is that an address cannot have 1 in lsb. Odd |
819 | * addresses are not supported. Any odd addresses are stored in the RELA |
820 | * section, which is handled above. |
821 | * |
822 | * Excluding the least significant bit in the bitmap, each non-zero |
823 | * bit in the bitmap represents a relocation to be applied to |
824 | * a corresponding machine word that follows the base address |
825 | * word. The second least significant bit represents the machine |
826 | * word immediately following the initial address, and each bit |
827 | * that follows represents the next word, in linear order. As such, |
828 | * a single bitmap can encode up to 63 relocations in a 64-bit object. |
829 | * |
830 | * In this implementation we store the address of the next RELR table |
831 | * entry in x9, the address being relocated by the current address or |
832 | * bitmap entry in x13 and the address being relocated by the current |
833 | * bit in x14. |
834 | */ |
835 | adr_l x9, __relr_start |
836 | adr_l x10, __relr_end |
837 | |
838 | 2: cmp x9, x10 |
839 | b.hs 7f |
840 | ldr x11, [x9], #8 |
841 | tbnz x11, #0, 3f // branch to handle bitmaps |
842 | add x13, x11, x23 |
843 | ldr x12, [x13] // relocate address entry |
844 | add x12, x12, x23 |
845 | str x12, [x13], #8 // adjust to start of bitmap |
846 | b 2b |
847 | |
848 | 3: mov x14, x13 |
849 | 4: lsr x11, x11, #1 |
850 | cbz x11, 6f |
851 | tbz x11, #0, 5f // skip bit if not set |
852 | ldr x12, [x14] // relocate bit |
853 | add x12, x12, x23 |
854 | str x12, [x14] |
855 | |
856 | 5: add x14, x14, #8 // move to next bit's address |
857 | b 4b |
858 | |
859 | 6: /* |
860 | * Move to the next bitmap's address. 8 is the word size, and 63 is the |
861 | * number of significant bits in a bitmap entry. |
862 | */ |
863 | add x13, x13, #(8 * 63) |
864 | b 2b |
865 | |
866 | 7: |
867 | #endif |
868 | ret |
869 | |
870 | SYM_FUNC_END(__relocate_kernel) |
871 | #endif |
872 | |
873 | SYM_FUNC_START_LOCAL(__primary_switch) |
874 | adrp x1, reserved_pg_dir |
875 | adrp x2, init_idmap_pg_dir |
876 | bl __enable_mmu |
877 | #ifdef CONFIG_RELOCATABLE |
878 | adrp x23, KERNEL_START |
879 | and x23, x23, MIN_KIMG_ALIGN - 1 |
880 | #ifdef CONFIG_RANDOMIZE_BASE |
881 | mov x0, x22 |
882 | adrp x1, init_pg_end |
883 | mov sp, x1 |
884 | mov x29, xzr |
885 | bl __pi_kaslr_early_init |
886 | and x24, x0, #SZ_2M - 1 // capture memstart offset seed |
887 | bic x0, x0, #SZ_2M - 1 |
888 | orr x23, x23, x0 // record kernel offset |
889 | #endif |
890 | #endif |
891 | bl clear_page_tables |
892 | bl create_kernel_mapping |
893 | |
894 | adrp x1, init_pg_dir |
895 | load_ttbr1 x1, x1, x2 |
896 | #ifdef CONFIG_RELOCATABLE |
897 | bl __relocate_kernel |
898 | #endif |
899 | ldr x8, =__primary_switched |
900 | adrp x0, KERNEL_START // __pa(KERNEL_START) |
901 | br x8 |
902 | SYM_FUNC_END(__primary_switch) |
903 | |