1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Stand-alone page-table allocator for hyp stage-1 and guest stage-2. |
4 | * No bombay mix was harmed in the writing of this file. |
5 | * |
6 | * Copyright (C) 2020 Google LLC |
7 | * Author: Will Deacon <will@kernel.org> |
8 | */ |
9 | |
10 | #include <linux/bitfield.h> |
11 | #include <asm/kvm_pgtable.h> |
12 | #include <asm/stage2_pgtable.h> |
13 | |
14 | |
15 | #define KVM_PTE_TYPE BIT(1) |
16 | #define KVM_PTE_TYPE_BLOCK 0 |
17 | #define KVM_PTE_TYPE_PAGE 1 |
18 | #define KVM_PTE_TYPE_TABLE 1 |
19 | |
20 | #define KVM_PTE_LEAF_ATTR_LO GENMASK(11, 2) |
21 | |
22 | #define KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX GENMASK(4, 2) |
23 | #define KVM_PTE_LEAF_ATTR_LO_S1_AP GENMASK(7, 6) |
24 | #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RO \ |
25 | ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 2 : 3; }) |
26 | #define KVM_PTE_LEAF_ATTR_LO_S1_AP_RW \ |
27 | ({ cpus_have_final_cap(ARM64_KVM_HVHE) ? 0 : 1; }) |
28 | #define KVM_PTE_LEAF_ATTR_LO_S1_SH GENMASK(9, 8) |
29 | #define KVM_PTE_LEAF_ATTR_LO_S1_SH_IS 3 |
30 | #define KVM_PTE_LEAF_ATTR_LO_S1_AF BIT(10) |
31 | |
32 | #define KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR GENMASK(5, 2) |
33 | #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R BIT(6) |
34 | #define KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W BIT(7) |
35 | #define KVM_PTE_LEAF_ATTR_LO_S2_SH GENMASK(9, 8) |
36 | #define KVM_PTE_LEAF_ATTR_LO_S2_SH_IS 3 |
37 | #define KVM_PTE_LEAF_ATTR_LO_S2_AF BIT(10) |
38 | |
39 | #define KVM_PTE_LEAF_ATTR_HI GENMASK(63, 50) |
40 | |
41 | #define KVM_PTE_LEAF_ATTR_HI_SW GENMASK(58, 55) |
42 | |
43 | #define KVM_PTE_LEAF_ATTR_HI_S1_XN BIT(54) |
44 | |
45 | #define KVM_PTE_LEAF_ATTR_HI_S2_XN BIT(54) |
46 | |
47 | #define KVM_PTE_LEAF_ATTR_HI_S1_GP BIT(50) |
48 | |
49 | #define KVM_PTE_LEAF_ATTR_S2_PERMS (KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R | \ |
50 | KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \ |
51 | KVM_PTE_LEAF_ATTR_HI_S2_XN) |
52 | |
53 | #define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2) |
54 | #define KVM_MAX_OWNER_ID 1 |
55 | |
56 | /* |
57 | * Used to indicate a pte for which a 'break-before-make' sequence is in |
58 | * progress. |
59 | */ |
60 | #define KVM_INVALID_PTE_LOCKED BIT(10) |
61 | |
62 | struct kvm_pgtable_walk_data { |
63 | struct kvm_pgtable_walker *walker; |
64 | |
65 | const u64 start; |
66 | u64 addr; |
67 | const u64 end; |
68 | }; |
69 | |
70 | static bool kvm_pgtable_walk_skip_bbm_tlbi(const struct kvm_pgtable_visit_ctx *ctx) |
71 | { |
72 | return unlikely(ctx->flags & KVM_PGTABLE_WALK_SKIP_BBM_TLBI); |
73 | } |
74 | |
75 | static bool kvm_pgtable_walk_skip_cmo(const struct kvm_pgtable_visit_ctx *ctx) |
76 | { |
77 | return unlikely(ctx->flags & KVM_PGTABLE_WALK_SKIP_CMO); |
78 | } |
79 | |
80 | static bool kvm_phys_is_valid(u64 phys) |
81 | { |
82 | u64 parange_max = kvm_get_parange_max(); |
83 | u8 shift = id_aa64mmfr0_parange_to_phys_shift(parange_max); |
84 | |
85 | return phys < BIT(shift); |
86 | } |
87 | |
88 | static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx, u64 phys) |
89 | { |
90 | u64 granule = kvm_granule_size(ctx->level); |
91 | |
92 | if (!kvm_level_supports_block_mapping(ctx->level)) |
93 | return false; |
94 | |
95 | if (granule > (ctx->end - ctx->addr)) |
96 | return false; |
97 | |
98 | if (kvm_phys_is_valid(phys) && !IS_ALIGNED(phys, granule)) |
99 | return false; |
100 | |
101 | return IS_ALIGNED(ctx->addr, granule); |
102 | } |
103 | |
104 | static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, s8 level) |
105 | { |
106 | u64 shift = kvm_granule_shift(level); |
107 | u64 mask = BIT(PAGE_SHIFT - 3) - 1; |
108 | |
109 | return (data->addr >> shift) & mask; |
110 | } |
111 | |
112 | static u32 kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) |
113 | { |
114 | u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */ |
115 | u64 mask = BIT(pgt->ia_bits) - 1; |
116 | |
117 | return (addr & mask) >> shift; |
118 | } |
119 | |
120 | static u32 kvm_pgd_pages(u32 ia_bits, s8 start_level) |
121 | { |
122 | struct kvm_pgtable pgt = { |
123 | .ia_bits = ia_bits, |
124 | .start_level = start_level, |
125 | }; |
126 | |
127 | return kvm_pgd_page_idx(pgt: &pgt, addr: -1ULL) + 1; |
128 | } |
129 | |
130 | static bool kvm_pte_table(kvm_pte_t pte, s8 level) |
131 | { |
132 | if (level == KVM_PGTABLE_LAST_LEVEL) |
133 | return false; |
134 | |
135 | if (!kvm_pte_valid(pte)) |
136 | return false; |
137 | |
138 | return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE; |
139 | } |
140 | |
141 | static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte, struct kvm_pgtable_mm_ops *mm_ops) |
142 | { |
143 | return mm_ops->phys_to_virt(kvm_pte_to_phys(pte)); |
144 | } |
145 | |
146 | static void kvm_clear_pte(kvm_pte_t *ptep) |
147 | { |
148 | WRITE_ONCE(*ptep, 0); |
149 | } |
150 | |
151 | static kvm_pte_t kvm_init_table_pte(kvm_pte_t *childp, struct kvm_pgtable_mm_ops *mm_ops) |
152 | { |
153 | kvm_pte_t pte = kvm_phys_to_pte(mm_ops->virt_to_phys(childp)); |
154 | |
155 | pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE); |
156 | pte |= KVM_PTE_VALID; |
157 | return pte; |
158 | } |
159 | |
160 | static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, s8 level) |
161 | { |
162 | kvm_pte_t pte = kvm_phys_to_pte(pa); |
163 | u64 type = (level == KVM_PGTABLE_LAST_LEVEL) ? KVM_PTE_TYPE_PAGE : |
164 | KVM_PTE_TYPE_BLOCK; |
165 | |
166 | pte |= attr & (KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI); |
167 | pte |= FIELD_PREP(KVM_PTE_TYPE, type); |
168 | pte |= KVM_PTE_VALID; |
169 | |
170 | return pte; |
171 | } |
172 | |
173 | static kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id) |
174 | { |
175 | return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id); |
176 | } |
177 | |
178 | static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, |
179 | const struct kvm_pgtable_visit_ctx *ctx, |
180 | enum kvm_pgtable_walk_flags visit) |
181 | { |
182 | struct kvm_pgtable_walker *walker = data->walker; |
183 | |
184 | /* Ensure the appropriate lock is held (e.g. RCU lock for stage-2 MMU) */ |
185 | WARN_ON_ONCE(kvm_pgtable_walk_shared(ctx) && !kvm_pgtable_walk_lock_held()); |
186 | return walker->cb(ctx, visit); |
187 | } |
188 | |
189 | static bool kvm_pgtable_walk_continue(const struct kvm_pgtable_walker *walker, |
190 | int r) |
191 | { |
192 | /* |
193 | * Visitor callbacks return EAGAIN when the conditions that led to a |
194 | * fault are no longer reflected in the page tables due to a race to |
195 | * update a PTE. In the context of a fault handler this is interpreted |
196 | * as a signal to retry guest execution. |
197 | * |
198 | * Ignore the return code altogether for walkers outside a fault handler |
199 | * (e.g. write protecting a range of memory) and chug along with the |
200 | * page table walk. |
201 | */ |
202 | if (r == -EAGAIN) |
203 | return !(walker->flags & KVM_PGTABLE_WALK_HANDLE_FAULT); |
204 | |
205 | return !r; |
206 | } |
207 | |
208 | static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, |
209 | struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, s8 level); |
210 | |
211 | static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, |
212 | struct kvm_pgtable_mm_ops *mm_ops, |
213 | kvm_pteref_t pteref, s8 level) |
214 | { |
215 | enum kvm_pgtable_walk_flags flags = data->walker->flags; |
216 | kvm_pte_t *ptep = kvm_dereference_pteref(data->walker, pteref); |
217 | struct kvm_pgtable_visit_ctx ctx = { |
218 | .ptep = ptep, |
219 | .old = READ_ONCE(*ptep), |
220 | .arg = data->walker->arg, |
221 | .mm_ops = mm_ops, |
222 | .start = data->start, |
223 | .addr = data->addr, |
224 | .end = data->end, |
225 | .level = level, |
226 | .flags = flags, |
227 | }; |
228 | int ret = 0; |
229 | bool reload = false; |
230 | kvm_pteref_t childp; |
231 | bool table = kvm_pte_table(ctx.old, level); |
232 | |
233 | if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE)) { |
234 | ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_PRE); |
235 | reload = true; |
236 | } |
237 | |
238 | if (!table && (ctx.flags & KVM_PGTABLE_WALK_LEAF)) { |
239 | ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_LEAF); |
240 | reload = true; |
241 | } |
242 | |
243 | /* |
244 | * Reload the page table after invoking the walker callback for leaf |
245 | * entries or after pre-order traversal, to allow the walker to descend |
246 | * into a newly installed or replaced table. |
247 | */ |
248 | if (reload) { |
249 | ctx.old = READ_ONCE(*ptep); |
250 | table = kvm_pte_table(ctx.old, level); |
251 | } |
252 | |
253 | if (!kvm_pgtable_walk_continue(walker: data->walker, r: ret)) |
254 | goto out; |
255 | |
256 | if (!table) { |
257 | data->addr = ALIGN_DOWN(data->addr, kvm_granule_size(level)); |
258 | data->addr += kvm_granule_size(level); |
259 | goto out; |
260 | } |
261 | |
262 | childp = (kvm_pteref_t)kvm_pte_follow(ctx.old, mm_ops); |
263 | ret = __kvm_pgtable_walk(data, mm_ops, childp, level + 1); |
264 | if (!kvm_pgtable_walk_continue(walker: data->walker, r: ret)) |
265 | goto out; |
266 | |
267 | if (ctx.flags & KVM_PGTABLE_WALK_TABLE_POST) |
268 | ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_POST); |
269 | |
270 | out: |
271 | if (kvm_pgtable_walk_continue(walker: data->walker, r: ret)) |
272 | return 0; |
273 | |
274 | return ret; |
275 | } |
276 | |
277 | static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, |
278 | struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, s8 level) |
279 | { |
280 | u32 idx; |
281 | int ret = 0; |
282 | |
283 | if (WARN_ON_ONCE(level < KVM_PGTABLE_FIRST_LEVEL || |
284 | level > KVM_PGTABLE_LAST_LEVEL)) |
285 | return -EINVAL; |
286 | |
287 | for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) { |
288 | kvm_pteref_t pteref = &pgtable[idx]; |
289 | |
290 | if (data->addr >= data->end) |
291 | break; |
292 | |
293 | ret = __kvm_pgtable_visit(data, mm_ops, pteref, level); |
294 | if (ret) |
295 | break; |
296 | } |
297 | |
298 | return ret; |
299 | } |
300 | |
301 | static int _kvm_pgtable_walk(struct kvm_pgtable *pgt, struct kvm_pgtable_walk_data *data) |
302 | { |
303 | u32 idx; |
304 | int ret = 0; |
305 | u64 limit = BIT(pgt->ia_bits); |
306 | |
307 | if (data->addr > limit || data->end > limit) |
308 | return -ERANGE; |
309 | |
310 | if (!pgt->pgd) |
311 | return -EINVAL; |
312 | |
313 | for (idx = kvm_pgd_page_idx(pgt, addr: data->addr); data->addr < data->end; ++idx) { |
314 | kvm_pteref_t pteref = &pgt->pgd[idx * PTRS_PER_PTE]; |
315 | |
316 | ret = __kvm_pgtable_walk(data, pgt->mm_ops, pteref, pgt->start_level); |
317 | if (ret) |
318 | break; |
319 | } |
320 | |
321 | return ret; |
322 | } |
323 | |
324 | int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, |
325 | struct kvm_pgtable_walker *walker) |
326 | { |
327 | struct kvm_pgtable_walk_data walk_data = { |
328 | .start = ALIGN_DOWN(addr, PAGE_SIZE), |
329 | .addr = ALIGN_DOWN(addr, PAGE_SIZE), |
330 | .end = PAGE_ALIGN(walk_data.addr + size), |
331 | .walker = walker, |
332 | }; |
333 | int r; |
334 | |
335 | r = kvm_pgtable_walk_begin(walker); |
336 | if (r) |
337 | return r; |
338 | |
339 | r = _kvm_pgtable_walk(pgt, data: &walk_data); |
340 | kvm_pgtable_walk_end(walker); |
341 | |
342 | return r; |
343 | } |
344 | |
345 | struct leaf_walk_data { |
346 | kvm_pte_t pte; |
347 | s8 level; |
348 | }; |
349 | |
350 | static int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx, |
351 | enum kvm_pgtable_walk_flags visit) |
352 | { |
353 | struct leaf_walk_data *data = ctx->arg; |
354 | |
355 | data->pte = ctx->old; |
356 | data->level = ctx->level; |
357 | |
358 | return 0; |
359 | } |
360 | |
361 | int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr, |
362 | kvm_pte_t *ptep, s8 *level) |
363 | { |
364 | struct leaf_walk_data data; |
365 | struct kvm_pgtable_walker walker = { |
366 | .cb = leaf_walker, |
367 | .flags = KVM_PGTABLE_WALK_LEAF, |
368 | .arg = &data, |
369 | }; |
370 | int ret; |
371 | |
372 | ret = kvm_pgtable_walk(pgt, ALIGN_DOWN(addr, PAGE_SIZE), |
373 | PAGE_SIZE, &walker); |
374 | if (!ret) { |
375 | if (ptep) |
376 | *ptep = data.pte; |
377 | if (level) |
378 | *level = data.level; |
379 | } |
380 | |
381 | return ret; |
382 | } |
383 | |
384 | struct hyp_map_data { |
385 | const u64 phys; |
386 | kvm_pte_t attr; |
387 | }; |
388 | |
389 | static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) |
390 | { |
391 | bool device = prot & KVM_PGTABLE_PROT_DEVICE; |
392 | u32 mtype = device ? MT_DEVICE_nGnRE : MT_NORMAL; |
393 | kvm_pte_t attr = FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_ATTRIDX, mtype); |
394 | u32 sh = KVM_PTE_LEAF_ATTR_LO_S1_SH_IS; |
395 | u32 ap = (prot & KVM_PGTABLE_PROT_W) ? KVM_PTE_LEAF_ATTR_LO_S1_AP_RW : |
396 | KVM_PTE_LEAF_ATTR_LO_S1_AP_RO; |
397 | |
398 | if (!(prot & KVM_PGTABLE_PROT_R)) |
399 | return -EINVAL; |
400 | |
401 | if (prot & KVM_PGTABLE_PROT_X) { |
402 | if (prot & KVM_PGTABLE_PROT_W) |
403 | return -EINVAL; |
404 | |
405 | if (device) |
406 | return -EINVAL; |
407 | |
408 | if (system_supports_bti_kernel()) |
409 | attr |= KVM_PTE_LEAF_ATTR_HI_S1_GP; |
410 | } else { |
411 | attr |= KVM_PTE_LEAF_ATTR_HI_S1_XN; |
412 | } |
413 | |
414 | attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_AP, ap); |
415 | if (!kvm_lpa2_is_enabled()) |
416 | attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S1_SH, sh); |
417 | attr |= KVM_PTE_LEAF_ATTR_LO_S1_AF; |
418 | attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW; |
419 | *ptep = attr; |
420 | |
421 | return 0; |
422 | } |
423 | |
424 | enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte) |
425 | { |
426 | enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW; |
427 | u32 ap; |
428 | |
429 | if (!kvm_pte_valid(pte)) |
430 | return prot; |
431 | |
432 | if (!(pte & KVM_PTE_LEAF_ATTR_HI_S1_XN)) |
433 | prot |= KVM_PGTABLE_PROT_X; |
434 | |
435 | ap = FIELD_GET(KVM_PTE_LEAF_ATTR_LO_S1_AP, pte); |
436 | if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RO) |
437 | prot |= KVM_PGTABLE_PROT_R; |
438 | else if (ap == KVM_PTE_LEAF_ATTR_LO_S1_AP_RW) |
439 | prot |= KVM_PGTABLE_PROT_RW; |
440 | |
441 | return prot; |
442 | } |
443 | |
444 | static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, |
445 | struct hyp_map_data *data) |
446 | { |
447 | u64 phys = data->phys + (ctx->addr - ctx->start); |
448 | kvm_pte_t new; |
449 | |
450 | if (!kvm_block_mapping_supported(ctx, phys)) |
451 | return false; |
452 | |
453 | new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level); |
454 | if (ctx->old == new) |
455 | return true; |
456 | if (!kvm_pte_valid(ctx->old)) |
457 | ctx->mm_ops->get_page(ctx->ptep); |
458 | else if (WARN_ON((ctx->old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW)) |
459 | return false; |
460 | |
461 | smp_store_release(ctx->ptep, new); |
462 | return true; |
463 | } |
464 | |
465 | static int hyp_map_walker(const struct kvm_pgtable_visit_ctx *ctx, |
466 | enum kvm_pgtable_walk_flags visit) |
467 | { |
468 | kvm_pte_t *childp, new; |
469 | struct hyp_map_data *data = ctx->arg; |
470 | struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; |
471 | |
472 | if (hyp_map_walker_try_leaf(ctx, data)) |
473 | return 0; |
474 | |
475 | if (WARN_ON(ctx->level == KVM_PGTABLE_LAST_LEVEL)) |
476 | return -EINVAL; |
477 | |
478 | childp = (kvm_pte_t *)mm_ops->zalloc_page(NULL); |
479 | if (!childp) |
480 | return -ENOMEM; |
481 | |
482 | new = kvm_init_table_pte(childp, mm_ops); |
483 | mm_ops->get_page(ctx->ptep); |
484 | smp_store_release(ctx->ptep, new); |
485 | |
486 | return 0; |
487 | } |
488 | |
489 | int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, |
490 | enum kvm_pgtable_prot prot) |
491 | { |
492 | int ret; |
493 | struct hyp_map_data map_data = { |
494 | .phys = ALIGN_DOWN(phys, PAGE_SIZE), |
495 | }; |
496 | struct kvm_pgtable_walker walker = { |
497 | .cb = hyp_map_walker, |
498 | .flags = KVM_PGTABLE_WALK_LEAF, |
499 | .arg = &map_data, |
500 | }; |
501 | |
502 | ret = hyp_set_prot_attr(prot, &map_data.attr); |
503 | if (ret) |
504 | return ret; |
505 | |
506 | ret = kvm_pgtable_walk(pgt, addr, size, walker: &walker); |
507 | dsb(ishst); |
508 | isb(); |
509 | return ret; |
510 | } |
511 | |
512 | static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, |
513 | enum kvm_pgtable_walk_flags visit) |
514 | { |
515 | kvm_pte_t *childp = NULL; |
516 | u64 granule = kvm_granule_size(ctx->level); |
517 | u64 *unmapped = ctx->arg; |
518 | struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; |
519 | |
520 | if (!kvm_pte_valid(ctx->old)) |
521 | return -EINVAL; |
522 | |
523 | if (kvm_pte_table(ctx->old, ctx->level)) { |
524 | childp = kvm_pte_follow(ctx->old, mm_ops); |
525 | |
526 | if (mm_ops->page_count(childp) != 1) |
527 | return 0; |
528 | |
529 | kvm_clear_pte(ctx->ptep); |
530 | dsb(ishst); |
531 | __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), TLBI_TTL_UNKNOWN); |
532 | } else { |
533 | if (ctx->end - ctx->addr < granule) |
534 | return -EINVAL; |
535 | |
536 | kvm_clear_pte(ctx->ptep); |
537 | dsb(ishst); |
538 | __tlbi_level(vale2is, __TLBI_VADDR(ctx->addr, 0), ctx->level); |
539 | *unmapped += granule; |
540 | } |
541 | |
542 | dsb(ish); |
543 | isb(); |
544 | mm_ops->put_page(ctx->ptep); |
545 | |
546 | if (childp) |
547 | mm_ops->put_page(childp); |
548 | |
549 | return 0; |
550 | } |
551 | |
552 | u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) |
553 | { |
554 | u64 unmapped = 0; |
555 | struct kvm_pgtable_walker walker = { |
556 | .cb = hyp_unmap_walker, |
557 | .arg = &unmapped, |
558 | .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, |
559 | }; |
560 | |
561 | if (!pgt->mm_ops->page_count) |
562 | return 0; |
563 | |
564 | kvm_pgtable_walk(pgt, addr, size, walker: &walker); |
565 | return unmapped; |
566 | } |
567 | |
568 | int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, |
569 | struct kvm_pgtable_mm_ops *mm_ops) |
570 | { |
571 | s8 start_level = KVM_PGTABLE_LAST_LEVEL + 1 - |
572 | ARM64_HW_PGTABLE_LEVELS(va_bits); |
573 | |
574 | if (start_level < KVM_PGTABLE_FIRST_LEVEL || |
575 | start_level > KVM_PGTABLE_LAST_LEVEL) |
576 | return -EINVAL; |
577 | |
578 | pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_page(NULL); |
579 | if (!pgt->pgd) |
580 | return -ENOMEM; |
581 | |
582 | pgt->ia_bits = va_bits; |
583 | pgt->start_level = start_level; |
584 | pgt->mm_ops = mm_ops; |
585 | pgt->mmu = NULL; |
586 | pgt->force_pte_cb = NULL; |
587 | |
588 | return 0; |
589 | } |
590 | |
591 | static int hyp_free_walker(const struct kvm_pgtable_visit_ctx *ctx, |
592 | enum kvm_pgtable_walk_flags visit) |
593 | { |
594 | struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; |
595 | |
596 | if (!kvm_pte_valid(ctx->old)) |
597 | return 0; |
598 | |
599 | mm_ops->put_page(ctx->ptep); |
600 | |
601 | if (kvm_pte_table(ctx->old, ctx->level)) |
602 | mm_ops->put_page(kvm_pte_follow(ctx->old, mm_ops)); |
603 | |
604 | return 0; |
605 | } |
606 | |
607 | void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) |
608 | { |
609 | struct kvm_pgtable_walker walker = { |
610 | .cb = hyp_free_walker, |
611 | .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, |
612 | }; |
613 | |
614 | WARN_ON(kvm_pgtable_walk(pgt, addr: 0, size: BIT(pgt->ia_bits), walker: &walker)); |
615 | pgt->mm_ops->put_page(kvm_dereference_pteref(&walker, pgt->pgd)); |
616 | pgt->pgd = NULL; |
617 | } |
618 | |
619 | struct stage2_map_data { |
620 | const u64 phys; |
621 | kvm_pte_t attr; |
622 | u8 owner_id; |
623 | |
624 | kvm_pte_t *anchor; |
625 | kvm_pte_t *childp; |
626 | |
627 | struct kvm_s2_mmu *mmu; |
628 | void *memcache; |
629 | |
630 | /* Force mappings to page granularity */ |
631 | bool force_pte; |
632 | }; |
633 | |
634 | u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift) |
635 | { |
636 | u64 vtcr = VTCR_EL2_FLAGS; |
637 | s8 lvls; |
638 | |
639 | vtcr |= kvm_get_parange(mmfr0) << VTCR_EL2_PS_SHIFT; |
640 | vtcr |= VTCR_EL2_T0SZ(phys_shift); |
641 | /* |
642 | * Use a minimum 2 level page table to prevent splitting |
643 | * host PMD huge pages at stage2. |
644 | */ |
645 | lvls = stage2_pgtable_levels(phys_shift); |
646 | if (lvls < 2) |
647 | lvls = 2; |
648 | |
649 | /* |
650 | * When LPA2 is enabled, the HW supports an extra level of translation |
651 | * (for 5 in total) when using 4K pages. It also introduces VTCR_EL2.SL2 |
652 | * to as an addition to SL0 to enable encoding this extra start level. |
653 | * However, since we always use concatenated pages for the first level |
654 | * lookup, we will never need this extra level and therefore do not need |
655 | * to touch SL2. |
656 | */ |
657 | vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls); |
658 | |
659 | #ifdef CONFIG_ARM64_HW_AFDBM |
660 | /* |
661 | * Enable the Hardware Access Flag management, unconditionally |
662 | * on all CPUs. In systems that have asymmetric support for the feature |
663 | * this allows KVM to leverage hardware support on the subset of cores |
664 | * that implement the feature. |
665 | * |
666 | * The architecture requires VTCR_EL2.HA to be RES0 (thus ignored by |
667 | * hardware) on implementations that do not advertise support for the |
668 | * feature. As such, setting HA unconditionally is safe, unless you |
669 | * happen to be running on a design that has unadvertised support for |
670 | * HAFDBS. Here be dragons. |
671 | */ |
672 | if (!cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38)) |
673 | vtcr |= VTCR_EL2_HA; |
674 | #endif /* CONFIG_ARM64_HW_AFDBM */ |
675 | |
676 | if (kvm_lpa2_is_enabled()) |
677 | vtcr |= VTCR_EL2_DS; |
678 | |
679 | /* Set the vmid bits */ |
680 | vtcr |= (get_vmid_bits(mmfr1) == 16) ? |
681 | VTCR_EL2_VS_16BIT : |
682 | VTCR_EL2_VS_8BIT; |
683 | |
684 | return vtcr; |
685 | } |
686 | |
687 | static bool stage2_has_fwb(struct kvm_pgtable *pgt) |
688 | { |
689 | if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) |
690 | return false; |
691 | |
692 | return !(pgt->flags & KVM_PGTABLE_S2_NOFWB); |
693 | } |
694 | |
695 | void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, |
696 | phys_addr_t addr, size_t size) |
697 | { |
698 | unsigned long pages, inval_pages; |
699 | |
700 | if (!system_supports_tlb_range()) { |
701 | kvm_call_hyp(__kvm_tlb_flush_vmid, mmu); |
702 | return; |
703 | } |
704 | |
705 | pages = size >> PAGE_SHIFT; |
706 | while (pages > 0) { |
707 | inval_pages = min(pages, MAX_TLBI_RANGE_PAGES); |
708 | kvm_call_hyp(__kvm_tlb_flush_vmid_range, mmu, addr, inval_pages); |
709 | |
710 | addr += inval_pages << PAGE_SHIFT; |
711 | pages -= inval_pages; |
712 | } |
713 | } |
714 | |
715 | #define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt)) |
716 | |
717 | static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot, |
718 | kvm_pte_t *ptep) |
719 | { |
720 | kvm_pte_t attr; |
721 | u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS; |
722 | |
723 | switch (prot & (KVM_PGTABLE_PROT_DEVICE | |
724 | KVM_PGTABLE_PROT_NORMAL_NC)) { |
725 | case KVM_PGTABLE_PROT_DEVICE | KVM_PGTABLE_PROT_NORMAL_NC: |
726 | return -EINVAL; |
727 | case KVM_PGTABLE_PROT_DEVICE: |
728 | if (prot & KVM_PGTABLE_PROT_X) |
729 | return -EINVAL; |
730 | attr = KVM_S2_MEMATTR(pgt, DEVICE_nGnRE); |
731 | break; |
732 | case KVM_PGTABLE_PROT_NORMAL_NC: |
733 | if (prot & KVM_PGTABLE_PROT_X) |
734 | return -EINVAL; |
735 | attr = KVM_S2_MEMATTR(pgt, NORMAL_NC); |
736 | break; |
737 | default: |
738 | attr = KVM_S2_MEMATTR(pgt, NORMAL); |
739 | } |
740 | |
741 | if (!(prot & KVM_PGTABLE_PROT_X)) |
742 | attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; |
743 | |
744 | if (prot & KVM_PGTABLE_PROT_R) |
745 | attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R; |
746 | |
747 | if (prot & KVM_PGTABLE_PROT_W) |
748 | attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; |
749 | |
750 | if (!kvm_lpa2_is_enabled()) |
751 | attr |= FIELD_PREP(KVM_PTE_LEAF_ATTR_LO_S2_SH, sh); |
752 | |
753 | attr |= KVM_PTE_LEAF_ATTR_LO_S2_AF; |
754 | attr |= prot & KVM_PTE_LEAF_ATTR_HI_SW; |
755 | *ptep = attr; |
756 | |
757 | return 0; |
758 | } |
759 | |
760 | enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte) |
761 | { |
762 | enum kvm_pgtable_prot prot = pte & KVM_PTE_LEAF_ATTR_HI_SW; |
763 | |
764 | if (!kvm_pte_valid(pte)) |
765 | return prot; |
766 | |
767 | if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R) |
768 | prot |= KVM_PGTABLE_PROT_R; |
769 | if (pte & KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W) |
770 | prot |= KVM_PGTABLE_PROT_W; |
771 | if (!(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN)) |
772 | prot |= KVM_PGTABLE_PROT_X; |
773 | |
774 | return prot; |
775 | } |
776 | |
777 | static bool stage2_pte_needs_update(kvm_pte_t old, kvm_pte_t new) |
778 | { |
779 | if (!kvm_pte_valid(old) || !kvm_pte_valid(new)) |
780 | return true; |
781 | |
782 | return ((old ^ new) & (~KVM_PTE_LEAF_ATTR_S2_PERMS)); |
783 | } |
784 | |
785 | static bool stage2_pte_is_counted(kvm_pte_t pte) |
786 | { |
787 | /* |
788 | * The refcount tracks valid entries as well as invalid entries if they |
789 | * encode ownership of a page to another entity than the page-table |
790 | * owner, whose id is 0. |
791 | */ |
792 | return !!pte; |
793 | } |
794 | |
795 | static bool stage2_pte_is_locked(kvm_pte_t pte) |
796 | { |
797 | return !kvm_pte_valid(pte) && (pte & KVM_INVALID_PTE_LOCKED); |
798 | } |
799 | |
800 | static bool stage2_try_set_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new) |
801 | { |
802 | if (!kvm_pgtable_walk_shared(ctx)) { |
803 | WRITE_ONCE(*ctx->ptep, new); |
804 | return true; |
805 | } |
806 | |
807 | return cmpxchg(ctx->ptep, ctx->old, new) == ctx->old; |
808 | } |
809 | |
810 | /** |
811 | * stage2_try_break_pte() - Invalidates a pte according to the |
812 | * 'break-before-make' requirements of the |
813 | * architecture. |
814 | * |
815 | * @ctx: context of the visited pte. |
816 | * @mmu: stage-2 mmu |
817 | * |
818 | * Returns: true if the pte was successfully broken. |
819 | * |
820 | * If the removed pte was valid, performs the necessary serialization and TLB |
821 | * invalidation for the old value. For counted ptes, drops the reference count |
822 | * on the containing table page. |
823 | */ |
824 | static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx, |
825 | struct kvm_s2_mmu *mmu) |
826 | { |
827 | struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; |
828 | |
829 | if (stage2_pte_is_locked(ctx->old)) { |
830 | /* |
831 | * Should never occur if this walker has exclusive access to the |
832 | * page tables. |
833 | */ |
834 | WARN_ON(!kvm_pgtable_walk_shared(ctx)); |
835 | return false; |
836 | } |
837 | |
838 | if (!stage2_try_set_pte(ctx, KVM_INVALID_PTE_LOCKED)) |
839 | return false; |
840 | |
841 | if (!kvm_pgtable_walk_skip_bbm_tlbi(ctx)) { |
842 | /* |
843 | * Perform the appropriate TLB invalidation based on the |
844 | * evicted pte value (if any). |
845 | */ |
846 | if (kvm_pte_table(ctx->old, ctx->level)) { |
847 | u64 size = kvm_granule_size(ctx->level); |
848 | u64 addr = ALIGN_DOWN(ctx->addr, size); |
849 | |
850 | kvm_tlb_flush_vmid_range(mmu, addr, size); |
851 | } else if (kvm_pte_valid(ctx->old)) { |
852 | kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, |
853 | ctx->addr, ctx->level); |
854 | } |
855 | } |
856 | |
857 | if (stage2_pte_is_counted(ctx->old)) |
858 | mm_ops->put_page(ctx->ptep); |
859 | |
860 | return true; |
861 | } |
862 | |
863 | static void stage2_make_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new) |
864 | { |
865 | struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; |
866 | |
867 | WARN_ON(!stage2_pte_is_locked(*ctx->ptep)); |
868 | |
869 | if (stage2_pte_is_counted(new)) |
870 | mm_ops->get_page(ctx->ptep); |
871 | |
872 | smp_store_release(ctx->ptep, new); |
873 | } |
874 | |
875 | static bool stage2_unmap_defer_tlb_flush(struct kvm_pgtable *pgt) |
876 | { |
877 | /* |
878 | * If FEAT_TLBIRANGE is implemented, defer the individual |
879 | * TLB invalidations until the entire walk is finished, and |
880 | * then use the range-based TLBI instructions to do the |
881 | * invalidations. Condition deferred TLB invalidation on the |
882 | * system supporting FWB as the optimization is entirely |
883 | * pointless when the unmap walker needs to perform CMOs. |
884 | */ |
885 | return system_supports_tlb_range() && stage2_has_fwb(pgt); |
886 | } |
887 | |
888 | static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx, |
889 | struct kvm_s2_mmu *mmu, |
890 | struct kvm_pgtable_mm_ops *mm_ops) |
891 | { |
892 | struct kvm_pgtable *pgt = ctx->arg; |
893 | |
894 | /* |
895 | * Clear the existing PTE, and perform break-before-make if it was |
896 | * valid. Depending on the system support, defer the TLB maintenance |
897 | * for the same until the entire unmap walk is completed. |
898 | */ |
899 | if (kvm_pte_valid(ctx->old)) { |
900 | kvm_clear_pte(ctx->ptep); |
901 | |
902 | if (kvm_pte_table(ctx->old, ctx->level)) { |
903 | kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, |
904 | TLBI_TTL_UNKNOWN); |
905 | } else if (!stage2_unmap_defer_tlb_flush(pgt)) { |
906 | kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, |
907 | ctx->level); |
908 | } |
909 | } |
910 | |
911 | mm_ops->put_page(ctx->ptep); |
912 | } |
913 | |
914 | static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte) |
915 | { |
916 | u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; |
917 | return memattr == KVM_S2_MEMATTR(pgt, NORMAL); |
918 | } |
919 | |
920 | static bool stage2_pte_executable(kvm_pte_t pte) |
921 | { |
922 | return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN); |
923 | } |
924 | |
925 | static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx, |
926 | const struct stage2_map_data *data) |
927 | { |
928 | u64 phys = data->phys; |
929 | |
930 | /* |
931 | * Stage-2 walks to update ownership data are communicated to the map |
932 | * walker using an invalid PA. Avoid offsetting an already invalid PA, |
933 | * which could overflow and make the address valid again. |
934 | */ |
935 | if (!kvm_phys_is_valid(phys)) |
936 | return phys; |
937 | |
938 | /* |
939 | * Otherwise, work out the correct PA based on how far the walk has |
940 | * gotten. |
941 | */ |
942 | return phys + (ctx->addr - ctx->start); |
943 | } |
944 | |
945 | static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx, |
946 | struct stage2_map_data *data) |
947 | { |
948 | u64 phys = stage2_map_walker_phys_addr(ctx, data); |
949 | |
950 | if (data->force_pte && ctx->level < KVM_PGTABLE_LAST_LEVEL) |
951 | return false; |
952 | |
953 | return kvm_block_mapping_supported(ctx, phys); |
954 | } |
955 | |
956 | static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, |
957 | struct stage2_map_data *data) |
958 | { |
959 | kvm_pte_t new; |
960 | u64 phys = stage2_map_walker_phys_addr(ctx, data); |
961 | u64 granule = kvm_granule_size(ctx->level); |
962 | struct kvm_pgtable *pgt = data->mmu->pgt; |
963 | struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; |
964 | |
965 | if (!stage2_leaf_mapping_allowed(ctx, data)) |
966 | return -E2BIG; |
967 | |
968 | if (kvm_phys_is_valid(phys)) |
969 | new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level); |
970 | else |
971 | new = kvm_init_invalid_leaf_owner(data->owner_id); |
972 | |
973 | /* |
974 | * Skip updating the PTE if we are trying to recreate the exact |
975 | * same mapping or only change the access permissions. Instead, |
976 | * the vCPU will exit one more time from guest if still needed |
977 | * and then go through the path of relaxing permissions. |
978 | */ |
979 | if (!stage2_pte_needs_update(ctx->old, new)) |
980 | return -EAGAIN; |
981 | |
982 | if (!stage2_try_break_pte(ctx, data->mmu)) |
983 | return -EAGAIN; |
984 | |
985 | /* Perform CMOs before installation of the guest stage-2 PTE */ |
986 | if (!kvm_pgtable_walk_skip_cmo(ctx) && mm_ops->dcache_clean_inval_poc && |
987 | stage2_pte_cacheable(pgt, new)) |
988 | mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops), |
989 | granule); |
990 | |
991 | if (!kvm_pgtable_walk_skip_cmo(ctx) && mm_ops->icache_inval_pou && |
992 | stage2_pte_executable(new)) |
993 | mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule); |
994 | |
995 | stage2_make_pte(ctx, new); |
996 | |
997 | return 0; |
998 | } |
999 | |
1000 | static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx, |
1001 | struct stage2_map_data *data) |
1002 | { |
1003 | struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; |
1004 | kvm_pte_t *childp = kvm_pte_follow(ctx->old, mm_ops); |
1005 | int ret; |
1006 | |
1007 | if (!stage2_leaf_mapping_allowed(ctx, data)) |
1008 | return 0; |
1009 | |
1010 | ret = stage2_map_walker_try_leaf(ctx, data); |
1011 | if (ret) |
1012 | return ret; |
1013 | |
1014 | mm_ops->free_unlinked_table(childp, ctx->level); |
1015 | return 0; |
1016 | } |
1017 | |
1018 | static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, |
1019 | struct stage2_map_data *data) |
1020 | { |
1021 | struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; |
1022 | kvm_pte_t *childp, new; |
1023 | int ret; |
1024 | |
1025 | ret = stage2_map_walker_try_leaf(ctx, data); |
1026 | if (ret != -E2BIG) |
1027 | return ret; |
1028 | |
1029 | if (WARN_ON(ctx->level == KVM_PGTABLE_LAST_LEVEL)) |
1030 | return -EINVAL; |
1031 | |
1032 | if (!data->memcache) |
1033 | return -ENOMEM; |
1034 | |
1035 | childp = mm_ops->zalloc_page(data->memcache); |
1036 | if (!childp) |
1037 | return -ENOMEM; |
1038 | |
1039 | if (!stage2_try_break_pte(ctx, mmu: data->mmu)) { |
1040 | mm_ops->put_page(childp); |
1041 | return -EAGAIN; |
1042 | } |
1043 | |
1044 | /* |
1045 | * If we've run into an existing block mapping then replace it with |
1046 | * a table. Accesses beyond 'end' that fall within the new table |
1047 | * will be mapped lazily. |
1048 | */ |
1049 | new = kvm_init_table_pte(childp, mm_ops); |
1050 | stage2_make_pte(ctx, new); |
1051 | |
1052 | return 0; |
1053 | } |
1054 | |
1055 | /* |
1056 | * The TABLE_PRE callback runs for table entries on the way down, looking |
1057 | * for table entries which we could conceivably replace with a block entry |
1058 | * for this mapping. If it finds one it replaces the entry and calls |
1059 | * kvm_pgtable_mm_ops::free_unlinked_table() to tear down the detached table. |
1060 | * |
1061 | * Otherwise, the LEAF callback performs the mapping at the existing leaves |
1062 | * instead. |
1063 | */ |
1064 | static int stage2_map_walker(const struct kvm_pgtable_visit_ctx *ctx, |
1065 | enum kvm_pgtable_walk_flags visit) |
1066 | { |
1067 | struct stage2_map_data *data = ctx->arg; |
1068 | |
1069 | switch (visit) { |
1070 | case KVM_PGTABLE_WALK_TABLE_PRE: |
1071 | return stage2_map_walk_table_pre(ctx, data); |
1072 | case KVM_PGTABLE_WALK_LEAF: |
1073 | return stage2_map_walk_leaf(ctx, data); |
1074 | default: |
1075 | return -EINVAL; |
1076 | } |
1077 | } |
1078 | |
1079 | int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, |
1080 | u64 phys, enum kvm_pgtable_prot prot, |
1081 | void *mc, enum kvm_pgtable_walk_flags flags) |
1082 | { |
1083 | int ret; |
1084 | struct stage2_map_data map_data = { |
1085 | .phys = ALIGN_DOWN(phys, PAGE_SIZE), |
1086 | .mmu = pgt->mmu, |
1087 | .memcache = mc, |
1088 | .force_pte = pgt->force_pte_cb && pgt->force_pte_cb(addr, addr + size, prot), |
1089 | }; |
1090 | struct kvm_pgtable_walker walker = { |
1091 | .cb = stage2_map_walker, |
1092 | .flags = flags | |
1093 | KVM_PGTABLE_WALK_TABLE_PRE | |
1094 | KVM_PGTABLE_WALK_LEAF, |
1095 | .arg = &map_data, |
1096 | }; |
1097 | |
1098 | if (WARN_ON((pgt->flags & KVM_PGTABLE_S2_IDMAP) && (addr != phys))) |
1099 | return -EINVAL; |
1100 | |
1101 | ret = stage2_set_prot_attr(pgt, prot, &map_data.attr); |
1102 | if (ret) |
1103 | return ret; |
1104 | |
1105 | ret = kvm_pgtable_walk(pgt, addr, size, walker: &walker); |
1106 | dsb(ishst); |
1107 | return ret; |
1108 | } |
1109 | |
1110 | int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, |
1111 | void *mc, u8 owner_id) |
1112 | { |
1113 | int ret; |
1114 | struct stage2_map_data map_data = { |
1115 | .phys = KVM_PHYS_INVALID, |
1116 | .mmu = pgt->mmu, |
1117 | .memcache = mc, |
1118 | .owner_id = owner_id, |
1119 | .force_pte = true, |
1120 | }; |
1121 | struct kvm_pgtable_walker walker = { |
1122 | .cb = stage2_map_walker, |
1123 | .flags = KVM_PGTABLE_WALK_TABLE_PRE | |
1124 | KVM_PGTABLE_WALK_LEAF, |
1125 | .arg = &map_data, |
1126 | }; |
1127 | |
1128 | if (owner_id > KVM_MAX_OWNER_ID) |
1129 | return -EINVAL; |
1130 | |
1131 | ret = kvm_pgtable_walk(pgt, addr, size, walker: &walker); |
1132 | return ret; |
1133 | } |
1134 | |
1135 | static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, |
1136 | enum kvm_pgtable_walk_flags visit) |
1137 | { |
1138 | struct kvm_pgtable *pgt = ctx->arg; |
1139 | struct kvm_s2_mmu *mmu = pgt->mmu; |
1140 | struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; |
1141 | kvm_pte_t *childp = NULL; |
1142 | bool need_flush = false; |
1143 | |
1144 | if (!kvm_pte_valid(ctx->old)) { |
1145 | if (stage2_pte_is_counted(ctx->old)) { |
1146 | kvm_clear_pte(ctx->ptep); |
1147 | mm_ops->put_page(ctx->ptep); |
1148 | } |
1149 | return 0; |
1150 | } |
1151 | |
1152 | if (kvm_pte_table(ctx->old, ctx->level)) { |
1153 | childp = kvm_pte_follow(ctx->old, mm_ops); |
1154 | |
1155 | if (mm_ops->page_count(childp) != 1) |
1156 | return 0; |
1157 | } else if (stage2_pte_cacheable(pgt, ctx->old)) { |
1158 | need_flush = !stage2_has_fwb(pgt); |
1159 | } |
1160 | |
1161 | /* |
1162 | * This is similar to the map() path in that we unmap the entire |
1163 | * block entry and rely on the remaining portions being faulted |
1164 | * back lazily. |
1165 | */ |
1166 | stage2_unmap_put_pte(ctx, mmu, mm_ops); |
1167 | |
1168 | if (need_flush && mm_ops->dcache_clean_inval_poc) |
1169 | mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops), |
1170 | kvm_granule_size(ctx->level)); |
1171 | |
1172 | if (childp) |
1173 | mm_ops->put_page(childp); |
1174 | |
1175 | return 0; |
1176 | } |
1177 | |
1178 | int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) |
1179 | { |
1180 | int ret; |
1181 | struct kvm_pgtable_walker walker = { |
1182 | .cb = stage2_unmap_walker, |
1183 | .arg = pgt, |
1184 | .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, |
1185 | }; |
1186 | |
1187 | ret = kvm_pgtable_walk(pgt, addr, size, walker: &walker); |
1188 | if (stage2_unmap_defer_tlb_flush(pgt)) |
1189 | /* Perform the deferred TLB invalidations */ |
1190 | kvm_tlb_flush_vmid_range(mmu: pgt->mmu, addr, size); |
1191 | |
1192 | return ret; |
1193 | } |
1194 | |
1195 | struct stage2_attr_data { |
1196 | kvm_pte_t attr_set; |
1197 | kvm_pte_t attr_clr; |
1198 | kvm_pte_t pte; |
1199 | s8 level; |
1200 | }; |
1201 | |
1202 | static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, |
1203 | enum kvm_pgtable_walk_flags visit) |
1204 | { |
1205 | kvm_pte_t pte = ctx->old; |
1206 | struct stage2_attr_data *data = ctx->arg; |
1207 | struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; |
1208 | |
1209 | if (!kvm_pte_valid(ctx->old)) |
1210 | return -EAGAIN; |
1211 | |
1212 | data->level = ctx->level; |
1213 | data->pte = pte; |
1214 | pte &= ~data->attr_clr; |
1215 | pte |= data->attr_set; |
1216 | |
1217 | /* |
1218 | * We may race with the CPU trying to set the access flag here, |
1219 | * but worst-case the access flag update gets lost and will be |
1220 | * set on the next access instead. |
1221 | */ |
1222 | if (data->pte != pte) { |
1223 | /* |
1224 | * Invalidate instruction cache before updating the guest |
1225 | * stage-2 PTE if we are going to add executable permission. |
1226 | */ |
1227 | if (mm_ops->icache_inval_pou && |
1228 | stage2_pte_executable(pte) && !stage2_pte_executable(ctx->old)) |
1229 | mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops), |
1230 | kvm_granule_size(ctx->level)); |
1231 | |
1232 | if (!stage2_try_set_pte(ctx, pte)) |
1233 | return -EAGAIN; |
1234 | } |
1235 | |
1236 | return 0; |
1237 | } |
1238 | |
1239 | static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, |
1240 | u64 size, kvm_pte_t attr_set, |
1241 | kvm_pte_t attr_clr, kvm_pte_t *orig_pte, |
1242 | s8 *level, enum kvm_pgtable_walk_flags flags) |
1243 | { |
1244 | int ret; |
1245 | kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI; |
1246 | struct stage2_attr_data data = { |
1247 | .attr_set = attr_set & attr_mask, |
1248 | .attr_clr = attr_clr & attr_mask, |
1249 | }; |
1250 | struct kvm_pgtable_walker walker = { |
1251 | .cb = stage2_attr_walker, |
1252 | .arg = &data, |
1253 | .flags = flags | KVM_PGTABLE_WALK_LEAF, |
1254 | }; |
1255 | |
1256 | ret = kvm_pgtable_walk(pgt, addr, size, walker: &walker); |
1257 | if (ret) |
1258 | return ret; |
1259 | |
1260 | if (orig_pte) |
1261 | *orig_pte = data.pte; |
1262 | |
1263 | if (level) |
1264 | *level = data.level; |
1265 | return 0; |
1266 | } |
1267 | |
1268 | int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) |
1269 | { |
1270 | return stage2_update_leaf_attrs(pgt, addr, size, 0, |
1271 | KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, |
1272 | NULL, NULL, 0); |
1273 | } |
1274 | |
1275 | kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr) |
1276 | { |
1277 | kvm_pte_t pte = 0; |
1278 | int ret; |
1279 | |
1280 | ret = stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0, |
1281 | &pte, NULL, |
1282 | KVM_PGTABLE_WALK_HANDLE_FAULT | |
1283 | KVM_PGTABLE_WALK_SHARED); |
1284 | if (!ret) |
1285 | dsb(ishst); |
1286 | |
1287 | return pte; |
1288 | } |
1289 | |
1290 | struct stage2_age_data { |
1291 | bool mkold; |
1292 | bool young; |
1293 | }; |
1294 | |
1295 | static int stage2_age_walker(const struct kvm_pgtable_visit_ctx *ctx, |
1296 | enum kvm_pgtable_walk_flags visit) |
1297 | { |
1298 | kvm_pte_t new = ctx->old & ~KVM_PTE_LEAF_ATTR_LO_S2_AF; |
1299 | struct stage2_age_data *data = ctx->arg; |
1300 | |
1301 | if (!kvm_pte_valid(ctx->old) || new == ctx->old) |
1302 | return 0; |
1303 | |
1304 | data->young = true; |
1305 | |
1306 | /* |
1307 | * stage2_age_walker() is always called while holding the MMU lock for |
1308 | * write, so this will always succeed. Nonetheless, this deliberately |
1309 | * follows the race detection pattern of the other stage-2 walkers in |
1310 | * case the locking mechanics of the MMU notifiers is ever changed. |
1311 | */ |
1312 | if (data->mkold && !stage2_try_set_pte(ctx, new)) |
1313 | return -EAGAIN; |
1314 | |
1315 | /* |
1316 | * "But where's the TLBI?!", you scream. |
1317 | * "Over in the core code", I sigh. |
1318 | * |
1319 | * See the '->clear_flush_young()' callback on the KVM mmu notifier. |
1320 | */ |
1321 | return 0; |
1322 | } |
1323 | |
1324 | bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, |
1325 | u64 size, bool mkold) |
1326 | { |
1327 | struct stage2_age_data data = { |
1328 | .mkold = mkold, |
1329 | }; |
1330 | struct kvm_pgtable_walker walker = { |
1331 | .cb = stage2_age_walker, |
1332 | .arg = &data, |
1333 | .flags = KVM_PGTABLE_WALK_LEAF, |
1334 | }; |
1335 | |
1336 | WARN_ON(kvm_pgtable_walk(pgt, addr, size, walker: &walker)); |
1337 | return data.young; |
1338 | } |
1339 | |
1340 | int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, |
1341 | enum kvm_pgtable_prot prot) |
1342 | { |
1343 | int ret; |
1344 | s8 level; |
1345 | kvm_pte_t set = 0, clr = 0; |
1346 | |
1347 | if (prot & KVM_PTE_LEAF_ATTR_HI_SW) |
1348 | return -EINVAL; |
1349 | |
1350 | if (prot & KVM_PGTABLE_PROT_R) |
1351 | set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R; |
1352 | |
1353 | if (prot & KVM_PGTABLE_PROT_W) |
1354 | set |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W; |
1355 | |
1356 | if (prot & KVM_PGTABLE_PROT_X) |
1357 | clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; |
1358 | |
1359 | ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, |
1360 | KVM_PGTABLE_WALK_HANDLE_FAULT | |
1361 | KVM_PGTABLE_WALK_SHARED); |
1362 | if (!ret || ret == -EAGAIN) |
1363 | kvm_call_hyp(__kvm_tlb_flush_vmid_ipa_nsh, pgt->mmu, addr, level); |
1364 | return ret; |
1365 | } |
1366 | |
1367 | static int stage2_flush_walker(const struct kvm_pgtable_visit_ctx *ctx, |
1368 | enum kvm_pgtable_walk_flags visit) |
1369 | { |
1370 | struct kvm_pgtable *pgt = ctx->arg; |
1371 | struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; |
1372 | |
1373 | if (!kvm_pte_valid(ctx->old) || !stage2_pte_cacheable(pgt, ctx->old)) |
1374 | return 0; |
1375 | |
1376 | if (mm_ops->dcache_clean_inval_poc) |
1377 | mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops), |
1378 | kvm_granule_size(ctx->level)); |
1379 | return 0; |
1380 | } |
1381 | |
1382 | int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) |
1383 | { |
1384 | struct kvm_pgtable_walker walker = { |
1385 | .cb = stage2_flush_walker, |
1386 | .flags = KVM_PGTABLE_WALK_LEAF, |
1387 | .arg = pgt, |
1388 | }; |
1389 | |
1390 | if (stage2_has_fwb(pgt)) |
1391 | return 0; |
1392 | |
1393 | return kvm_pgtable_walk(pgt, addr, size, walker: &walker); |
1394 | } |
1395 | |
1396 | kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, |
1397 | u64 phys, s8 level, |
1398 | enum kvm_pgtable_prot prot, |
1399 | void *mc, bool force_pte) |
1400 | { |
1401 | struct stage2_map_data map_data = { |
1402 | .phys = phys, |
1403 | .mmu = pgt->mmu, |
1404 | .memcache = mc, |
1405 | .force_pte = force_pte, |
1406 | }; |
1407 | struct kvm_pgtable_walker walker = { |
1408 | .cb = stage2_map_walker, |
1409 | .flags = KVM_PGTABLE_WALK_LEAF | |
1410 | KVM_PGTABLE_WALK_SKIP_BBM_TLBI | |
1411 | KVM_PGTABLE_WALK_SKIP_CMO, |
1412 | .arg = &map_data, |
1413 | }; |
1414 | /* |
1415 | * The input address (.addr) is irrelevant for walking an |
1416 | * unlinked table. Construct an ambiguous IA range to map |
1417 | * kvm_granule_size(level) worth of memory. |
1418 | */ |
1419 | struct kvm_pgtable_walk_data data = { |
1420 | .walker = &walker, |
1421 | .addr = 0, |
1422 | .end = kvm_granule_size(level), |
1423 | }; |
1424 | struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; |
1425 | kvm_pte_t *pgtable; |
1426 | int ret; |
1427 | |
1428 | if (!IS_ALIGNED(phys, kvm_granule_size(level))) |
1429 | return ERR_PTR(-EINVAL); |
1430 | |
1431 | ret = stage2_set_prot_attr(pgt, prot, &map_data.attr); |
1432 | if (ret) |
1433 | return ERR_PTR(ret); |
1434 | |
1435 | pgtable = mm_ops->zalloc_page(mc); |
1436 | if (!pgtable) |
1437 | return ERR_PTR(-ENOMEM); |
1438 | |
1439 | ret = __kvm_pgtable_walk(&data, mm_ops, (kvm_pteref_t)pgtable, |
1440 | level + 1); |
1441 | if (ret) { |
1442 | kvm_pgtable_stage2_free_unlinked(mm_ops, pgtable, level); |
1443 | return ERR_PTR(ret); |
1444 | } |
1445 | |
1446 | return pgtable; |
1447 | } |
1448 | |
1449 | /* |
1450 | * Get the number of page-tables needed to replace a block with a |
1451 | * fully populated tree up to the PTE entries. Note that @level is |
1452 | * interpreted as in "level @level entry". |
1453 | */ |
1454 | static int stage2_block_get_nr_page_tables(s8 level) |
1455 | { |
1456 | switch (level) { |
1457 | case 1: |
1458 | return PTRS_PER_PTE + 1; |
1459 | case 2: |
1460 | return 1; |
1461 | case 3: |
1462 | return 0; |
1463 | default: |
1464 | WARN_ON_ONCE(level < KVM_PGTABLE_MIN_BLOCK_LEVEL || |
1465 | level > KVM_PGTABLE_LAST_LEVEL); |
1466 | return -EINVAL; |
1467 | }; |
1468 | } |
1469 | |
1470 | static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx, |
1471 | enum kvm_pgtable_walk_flags visit) |
1472 | { |
1473 | struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; |
1474 | struct kvm_mmu_memory_cache *mc = ctx->arg; |
1475 | struct kvm_s2_mmu *mmu; |
1476 | kvm_pte_t pte = ctx->old, new, *childp; |
1477 | enum kvm_pgtable_prot prot; |
1478 | s8 level = ctx->level; |
1479 | bool force_pte; |
1480 | int nr_pages; |
1481 | u64 phys; |
1482 | |
1483 | /* No huge-pages exist at the last level */ |
1484 | if (level == KVM_PGTABLE_LAST_LEVEL) |
1485 | return 0; |
1486 | |
1487 | /* We only split valid block mappings */ |
1488 | if (!kvm_pte_valid(pte)) |
1489 | return 0; |
1490 | |
1491 | nr_pages = stage2_block_get_nr_page_tables(level); |
1492 | if (nr_pages < 0) |
1493 | return nr_pages; |
1494 | |
1495 | if (mc->nobjs >= nr_pages) { |
1496 | /* Build a tree mapped down to the PTE granularity. */ |
1497 | force_pte = true; |
1498 | } else { |
1499 | /* |
1500 | * Don't force PTEs, so create_unlinked() below does |
1501 | * not populate the tree up to the PTE level. The |
1502 | * consequence is that the call will require a single |
1503 | * page of level 2 entries at level 1, or a single |
1504 | * page of PTEs at level 2. If we are at level 1, the |
1505 | * PTEs will be created recursively. |
1506 | */ |
1507 | force_pte = false; |
1508 | nr_pages = 1; |
1509 | } |
1510 | |
1511 | if (mc->nobjs < nr_pages) |
1512 | return -ENOMEM; |
1513 | |
1514 | mmu = container_of(mc, struct kvm_s2_mmu, split_page_cache); |
1515 | phys = kvm_pte_to_phys(pte); |
1516 | prot = kvm_pgtable_stage2_pte_prot(pte); |
1517 | |
1518 | childp = kvm_pgtable_stage2_create_unlinked(mmu->pgt, phys, |
1519 | level, prot, mc, force_pte); |
1520 | if (IS_ERR(childp)) |
1521 | return PTR_ERR(childp); |
1522 | |
1523 | if (!stage2_try_break_pte(ctx, mmu)) { |
1524 | kvm_pgtable_stage2_free_unlinked(mm_ops, childp, level); |
1525 | return -EAGAIN; |
1526 | } |
1527 | |
1528 | /* |
1529 | * Note, the contents of the page table are guaranteed to be made |
1530 | * visible before the new PTE is assigned because stage2_make_pte() |
1531 | * writes the PTE using smp_store_release(). |
1532 | */ |
1533 | new = kvm_init_table_pte(childp, mm_ops); |
1534 | stage2_make_pte(ctx, new); |
1535 | dsb(ishst); |
1536 | return 0; |
1537 | } |
1538 | |
1539 | int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size, |
1540 | struct kvm_mmu_memory_cache *mc) |
1541 | { |
1542 | struct kvm_pgtable_walker walker = { |
1543 | .cb = stage2_split_walker, |
1544 | .flags = KVM_PGTABLE_WALK_LEAF, |
1545 | .arg = mc, |
1546 | }; |
1547 | |
1548 | return kvm_pgtable_walk(pgt, addr, size, walker: &walker); |
1549 | } |
1550 | |
1551 | int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, |
1552 | struct kvm_pgtable_mm_ops *mm_ops, |
1553 | enum kvm_pgtable_stage2_flags flags, |
1554 | kvm_pgtable_force_pte_cb_t force_pte_cb) |
1555 | { |
1556 | size_t pgd_sz; |
1557 | u64 vtcr = mmu->vtcr; |
1558 | u32 ia_bits = VTCR_EL2_IPA(vtcr); |
1559 | u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); |
1560 | s8 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; |
1561 | |
1562 | pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; |
1563 | pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_pages_exact(pgd_sz); |
1564 | if (!pgt->pgd) |
1565 | return -ENOMEM; |
1566 | |
1567 | pgt->ia_bits = ia_bits; |
1568 | pgt->start_level = start_level; |
1569 | pgt->mm_ops = mm_ops; |
1570 | pgt->mmu = mmu; |
1571 | pgt->flags = flags; |
1572 | pgt->force_pte_cb = force_pte_cb; |
1573 | |
1574 | /* Ensure zeroed PGD pages are visible to the hardware walker */ |
1575 | dsb(ishst); |
1576 | return 0; |
1577 | } |
1578 | |
1579 | size_t kvm_pgtable_stage2_pgd_size(u64 vtcr) |
1580 | { |
1581 | u32 ia_bits = VTCR_EL2_IPA(vtcr); |
1582 | u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); |
1583 | s8 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; |
1584 | |
1585 | return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; |
1586 | } |
1587 | |
1588 | static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx, |
1589 | enum kvm_pgtable_walk_flags visit) |
1590 | { |
1591 | struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; |
1592 | |
1593 | if (!stage2_pte_is_counted(ctx->old)) |
1594 | return 0; |
1595 | |
1596 | mm_ops->put_page(ctx->ptep); |
1597 | |
1598 | if (kvm_pte_table(ctx->old, ctx->level)) |
1599 | mm_ops->put_page(kvm_pte_follow(ctx->old, mm_ops)); |
1600 | |
1601 | return 0; |
1602 | } |
1603 | |
1604 | void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) |
1605 | { |
1606 | size_t pgd_sz; |
1607 | struct kvm_pgtable_walker walker = { |
1608 | .cb = stage2_free_walker, |
1609 | .flags = KVM_PGTABLE_WALK_LEAF | |
1610 | KVM_PGTABLE_WALK_TABLE_POST, |
1611 | }; |
1612 | |
1613 | WARN_ON(kvm_pgtable_walk(pgt, addr: 0, size: BIT(pgt->ia_bits), walker: &walker)); |
1614 | pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE; |
1615 | pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(&walker, pgt->pgd), pgd_sz); |
1616 | pgt->pgd = NULL; |
1617 | } |
1618 | |
1619 | void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level) |
1620 | { |
1621 | kvm_pteref_t ptep = (kvm_pteref_t)pgtable; |
1622 | struct kvm_pgtable_walker walker = { |
1623 | .cb = stage2_free_walker, |
1624 | .flags = KVM_PGTABLE_WALK_LEAF | |
1625 | KVM_PGTABLE_WALK_TABLE_POST, |
1626 | }; |
1627 | struct kvm_pgtable_walk_data data = { |
1628 | .walker = &walker, |
1629 | |
1630 | /* |
1631 | * At this point the IPA really doesn't matter, as the page |
1632 | * table being traversed has already been removed from the stage |
1633 | * 2. Set an appropriate range to cover the entire page table. |
1634 | */ |
1635 | .addr = 0, |
1636 | .end = kvm_granule_size(level), |
1637 | }; |
1638 | |
1639 | WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level + 1)); |
1640 | |
1641 | WARN_ON(mm_ops->page_count(pgtable) != 1); |
1642 | mm_ops->put_page(pgtable); |
1643 | } |
1644 | |