1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * native hashtable management. |
4 | * |
5 | * SMP scalability work: |
6 | * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM |
7 | */ |
8 | |
9 | #undef DEBUG_LOW |
10 | |
11 | #include <linux/spinlock.h> |
12 | #include <linux/bitops.h> |
13 | #include <linux/of.h> |
14 | #include <linux/processor.h> |
15 | #include <linux/threads.h> |
16 | #include <linux/smp.h> |
17 | #include <linux/pgtable.h> |
18 | |
19 | #include <asm/machdep.h> |
20 | #include <asm/mmu.h> |
21 | #include <asm/mmu_context.h> |
22 | #include <asm/trace.h> |
23 | #include <asm/tlb.h> |
24 | #include <asm/cputable.h> |
25 | #include <asm/udbg.h> |
26 | #include <asm/kexec.h> |
27 | #include <asm/ppc-opcode.h> |
28 | #include <asm/feature-fixups.h> |
29 | |
30 | #include <misc/cxl-base.h> |
31 | |
32 | #ifdef DEBUG_LOW |
33 | #define DBG_LOW(fmt...) udbg_printf(fmt) |
34 | #else |
35 | #define DBG_LOW(fmt...) |
36 | #endif |
37 | |
38 | #ifdef __BIG_ENDIAN__ |
39 | #define HPTE_LOCK_BIT 3 |
40 | #else |
41 | #define HPTE_LOCK_BIT (56+3) |
42 | #endif |
43 | |
44 | static DEFINE_RAW_SPINLOCK(native_tlbie_lock); |
45 | |
46 | #ifdef CONFIG_LOCKDEP |
47 | static struct lockdep_map hpte_lock_map = |
48 | STATIC_LOCKDEP_MAP_INIT("hpte_lock" , &hpte_lock_map); |
49 | |
50 | static void acquire_hpte_lock(void) |
51 | { |
52 | lock_map_acquire(&hpte_lock_map); |
53 | } |
54 | |
55 | static void release_hpte_lock(void) |
56 | { |
57 | lock_map_release(&hpte_lock_map); |
58 | } |
59 | #else |
60 | static void acquire_hpte_lock(void) |
61 | { |
62 | } |
63 | |
64 | static void release_hpte_lock(void) |
65 | { |
66 | } |
67 | #endif |
68 | |
69 | static inline unsigned long ___tlbie(unsigned long vpn, int psize, |
70 | int apsize, int ssize) |
71 | { |
72 | unsigned long va; |
73 | unsigned int penc; |
74 | unsigned long sllp; |
75 | |
76 | /* |
77 | * We need 14 to 65 bits of va for a tlibe of 4K page |
78 | * With vpn we ignore the lower VPN_SHIFT bits already. |
79 | * And top two bits are already ignored because we can |
80 | * only accomodate 76 bits in a 64 bit vpn with a VPN_SHIFT |
81 | * of 12. |
82 | */ |
83 | va = vpn << VPN_SHIFT; |
84 | /* |
85 | * clear top 16 bits of 64bit va, non SLS segment |
86 | * Older versions of the architecture (2.02 and earler) require the |
87 | * masking of the top 16 bits. |
88 | */ |
89 | if (mmu_has_feature(MMU_FTR_TLBIE_CROP_VA)) |
90 | va &= ~(0xffffULL << 48); |
91 | |
92 | switch (psize) { |
93 | case MMU_PAGE_4K: |
94 | /* clear out bits after (52) [0....52.....63] */ |
95 | va &= ~((1ul << (64 - 52)) - 1); |
96 | va |= ssize << 8; |
97 | sllp = get_sllp_encoding(apsize); |
98 | va |= sllp << 5; |
99 | asm volatile(ASM_FTR_IFCLR("tlbie %0,0" , PPC_TLBIE(%1,%0), %2) |
100 | : : "r" (va), "r" (0), "i" (CPU_FTR_ARCH_206) |
101 | : "memory" ); |
102 | break; |
103 | default: |
104 | /* We need 14 to 14 + i bits of va */ |
105 | penc = mmu_psize_defs[psize].penc[apsize]; |
106 | va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1); |
107 | va |= penc << 12; |
108 | va |= ssize << 8; |
109 | /* |
110 | * AVAL bits: |
111 | * We don't need all the bits, but rest of the bits |
112 | * must be ignored by the processor. |
113 | * vpn cover upto 65 bits of va. (0...65) and we need |
114 | * 58..64 bits of va. |
115 | */ |
116 | va |= (vpn & 0xfe); /* AVAL */ |
117 | va |= 1; /* L */ |
118 | asm volatile(ASM_FTR_IFCLR("tlbie %0,1" , PPC_TLBIE(%1,%0), %2) |
119 | : : "r" (va), "r" (0), "i" (CPU_FTR_ARCH_206) |
120 | : "memory" ); |
121 | break; |
122 | } |
123 | return va; |
124 | } |
125 | |
126 | static inline void fixup_tlbie_vpn(unsigned long vpn, int psize, |
127 | int apsize, int ssize) |
128 | { |
129 | if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { |
130 | /* Radix flush for a hash guest */ |
131 | |
132 | unsigned long rb,rs,prs,r,ric; |
133 | |
134 | rb = PPC_BIT(52); /* IS = 2 */ |
135 | rs = 0; /* lpid = 0 */ |
136 | prs = 0; /* partition scoped */ |
137 | r = 1; /* radix format */ |
138 | ric = 0; /* RIC_FLSUH_TLB */ |
139 | |
140 | /* |
141 | * Need the extra ptesync to make sure we don't |
142 | * re-order the tlbie |
143 | */ |
144 | asm volatile("ptesync" : : :"memory" ); |
145 | asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) |
146 | : : "r" (rb), "i" (r), "i" (prs), |
147 | "i" (ric), "r" (rs) : "memory" ); |
148 | } |
149 | |
150 | |
151 | if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { |
152 | /* Need the extra ptesync to ensure we don't reorder tlbie*/ |
153 | asm volatile("ptesync" : : :"memory" ); |
154 | ___tlbie(vpn, psize, apsize, ssize); |
155 | } |
156 | } |
157 | |
158 | static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) |
159 | { |
160 | unsigned long rb; |
161 | |
162 | rb = ___tlbie(vpn, psize, apsize, ssize); |
163 | trace_tlbie(0, 0, rb, 0, 0, 0, 0); |
164 | } |
165 | |
166 | static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) |
167 | { |
168 | unsigned long va; |
169 | unsigned int penc; |
170 | unsigned long sllp; |
171 | |
172 | /* VPN_SHIFT can be atmost 12 */ |
173 | va = vpn << VPN_SHIFT; |
174 | /* |
175 | * clear top 16 bits of 64 bit va, non SLS segment |
176 | * Older versions of the architecture (2.02 and earler) require the |
177 | * masking of the top 16 bits. |
178 | */ |
179 | if (mmu_has_feature(MMU_FTR_TLBIE_CROP_VA)) |
180 | va &= ~(0xffffULL << 48); |
181 | |
182 | switch (psize) { |
183 | case MMU_PAGE_4K: |
184 | /* clear out bits after(52) [0....52.....63] */ |
185 | va &= ~((1ul << (64 - 52)) - 1); |
186 | va |= ssize << 8; |
187 | sllp = get_sllp_encoding(apsize); |
188 | va |= sllp << 5; |
189 | asm volatile(ASM_FTR_IFSET("tlbiel %0" , PPC_TLBIEL_v205(%0, 0), %1) |
190 | : : "r" (va), "i" (CPU_FTR_ARCH_206) |
191 | : "memory" ); |
192 | break; |
193 | default: |
194 | /* We need 14 to 14 + i bits of va */ |
195 | penc = mmu_psize_defs[psize].penc[apsize]; |
196 | va &= ~((1ul << mmu_psize_defs[apsize].shift) - 1); |
197 | va |= penc << 12; |
198 | va |= ssize << 8; |
199 | /* |
200 | * AVAL bits: |
201 | * We don't need all the bits, but rest of the bits |
202 | * must be ignored by the processor. |
203 | * vpn cover upto 65 bits of va. (0...65) and we need |
204 | * 58..64 bits of va. |
205 | */ |
206 | va |= (vpn & 0xfe); |
207 | va |= 1; /* L */ |
208 | asm volatile(ASM_FTR_IFSET("tlbiel %0" , PPC_TLBIEL_v205(%0, 1), %1) |
209 | : : "r" (va), "i" (CPU_FTR_ARCH_206) |
210 | : "memory" ); |
211 | break; |
212 | } |
213 | trace_tlbie(0, 1, va, 0, 0, 0, 0); |
214 | |
215 | } |
216 | |
217 | static inline void tlbie(unsigned long vpn, int psize, int apsize, |
218 | int ssize, int local) |
219 | { |
220 | unsigned int use_local; |
221 | int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); |
222 | |
223 | use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && !cxl_ctx_in_use(); |
224 | |
225 | if (use_local) |
226 | use_local = mmu_psize_defs[psize].tlbiel; |
227 | if (lock_tlbie && !use_local) |
228 | raw_spin_lock(&native_tlbie_lock); |
229 | asm volatile("ptesync" : : :"memory" ); |
230 | if (use_local) { |
231 | __tlbiel(vpn, psize, apsize, ssize); |
232 | ppc_after_tlbiel_barrier(); |
233 | } else { |
234 | __tlbie(vpn, psize, apsize, ssize); |
235 | fixup_tlbie_vpn(vpn, psize, apsize, ssize); |
236 | asm volatile("eieio; tlbsync; ptesync" : : :"memory" ); |
237 | } |
238 | if (lock_tlbie && !use_local) |
239 | raw_spin_unlock(&native_tlbie_lock); |
240 | } |
241 | |
242 | static inline void native_lock_hpte(struct hash_pte *hptep) |
243 | { |
244 | unsigned long *word = (unsigned long *)&hptep->v; |
245 | |
246 | acquire_hpte_lock(); |
247 | while (1) { |
248 | if (!test_and_set_bit_lock(HPTE_LOCK_BIT, addr: word)) |
249 | break; |
250 | spin_begin(); |
251 | while(test_bit(HPTE_LOCK_BIT, word)) |
252 | spin_cpu_relax(); |
253 | spin_end(); |
254 | } |
255 | } |
256 | |
257 | static inline void native_unlock_hpte(struct hash_pte *hptep) |
258 | { |
259 | unsigned long *word = (unsigned long *)&hptep->v; |
260 | |
261 | release_hpte_lock(); |
262 | clear_bit_unlock(HPTE_LOCK_BIT, addr: word); |
263 | } |
264 | |
265 | static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn, |
266 | unsigned long pa, unsigned long rflags, |
267 | unsigned long vflags, int psize, int apsize, int ssize) |
268 | { |
269 | struct hash_pte *hptep = htab_address + hpte_group; |
270 | unsigned long hpte_v, hpte_r; |
271 | unsigned long flags; |
272 | int i; |
273 | |
274 | local_irq_save(flags); |
275 | |
276 | if (!(vflags & HPTE_V_BOLTED)) { |
277 | DBG_LOW(" insert(group=%lx, vpn=%016lx, pa=%016lx," |
278 | " rflags=%lx, vflags=%lx, psize=%d)\n" , |
279 | hpte_group, vpn, pa, rflags, vflags, psize); |
280 | } |
281 | |
282 | for (i = 0; i < HPTES_PER_GROUP; i++) { |
283 | if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID)) { |
284 | /* retry with lock held */ |
285 | native_lock_hpte(hptep); |
286 | if (! (be64_to_cpu(hptep->v) & HPTE_V_VALID)) |
287 | break; |
288 | native_unlock_hpte(hptep); |
289 | } |
290 | |
291 | hptep++; |
292 | } |
293 | |
294 | if (i == HPTES_PER_GROUP) { |
295 | local_irq_restore(flags); |
296 | return -1; |
297 | } |
298 | |
299 | hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID; |
300 | hpte_r = hpte_encode_r(pa, psize, apsize) | rflags; |
301 | |
302 | if (!(vflags & HPTE_V_BOLTED)) { |
303 | DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n" , |
304 | i, hpte_v, hpte_r); |
305 | } |
306 | |
307 | if (cpu_has_feature(CPU_FTR_ARCH_300)) { |
308 | hpte_r = hpte_old_to_new_r(hpte_v, hpte_r); |
309 | hpte_v = hpte_old_to_new_v(hpte_v); |
310 | } |
311 | |
312 | hptep->r = cpu_to_be64(hpte_r); |
313 | /* Guarantee the second dword is visible before the valid bit */ |
314 | eieio(); |
315 | /* |
316 | * Now set the first dword including the valid bit |
317 | * NOTE: this also unlocks the hpte |
318 | */ |
319 | release_hpte_lock(); |
320 | hptep->v = cpu_to_be64(hpte_v); |
321 | |
322 | __asm__ __volatile__ ("ptesync" : : : "memory" ); |
323 | |
324 | local_irq_restore(flags); |
325 | |
326 | return i | (!!(vflags & HPTE_V_SECONDARY) << 3); |
327 | } |
328 | |
329 | static long native_hpte_remove(unsigned long hpte_group) |
330 | { |
331 | unsigned long hpte_v, flags; |
332 | struct hash_pte *hptep; |
333 | int i; |
334 | int slot_offset; |
335 | |
336 | local_irq_save(flags); |
337 | |
338 | DBG_LOW(" remove(group=%lx)\n" , hpte_group); |
339 | |
340 | /* pick a random entry to start at */ |
341 | slot_offset = mftb() & 0x7; |
342 | |
343 | for (i = 0; i < HPTES_PER_GROUP; i++) { |
344 | hptep = htab_address + hpte_group + slot_offset; |
345 | hpte_v = be64_to_cpu(hptep->v); |
346 | |
347 | if ((hpte_v & HPTE_V_VALID) && !(hpte_v & HPTE_V_BOLTED)) { |
348 | /* retry with lock held */ |
349 | native_lock_hpte(hptep); |
350 | hpte_v = be64_to_cpu(hptep->v); |
351 | if ((hpte_v & HPTE_V_VALID) |
352 | && !(hpte_v & HPTE_V_BOLTED)) |
353 | break; |
354 | native_unlock_hpte(hptep); |
355 | } |
356 | |
357 | slot_offset++; |
358 | slot_offset &= 0x7; |
359 | } |
360 | |
361 | if (i == HPTES_PER_GROUP) { |
362 | i = -1; |
363 | goto out; |
364 | } |
365 | |
366 | /* Invalidate the hpte. NOTE: this also unlocks it */ |
367 | release_hpte_lock(); |
368 | hptep->v = 0; |
369 | out: |
370 | local_irq_restore(flags); |
371 | return i; |
372 | } |
373 | |
374 | static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, |
375 | unsigned long vpn, int bpsize, |
376 | int apsize, int ssize, unsigned long flags) |
377 | { |
378 | struct hash_pte *hptep = htab_address + slot; |
379 | unsigned long hpte_v, want_v; |
380 | int ret = 0, local = 0; |
381 | unsigned long irqflags; |
382 | |
383 | local_irq_save(irqflags); |
384 | |
385 | want_v = hpte_encode_avpn(vpn, bpsize, ssize); |
386 | |
387 | DBG_LOW(" update(vpn=%016lx, avpnv=%016lx, group=%lx, newpp=%lx)" , |
388 | vpn, want_v & HPTE_V_AVPN, slot, newpp); |
389 | |
390 | hpte_v = hpte_get_old_v(hptep); |
391 | /* |
392 | * We need to invalidate the TLB always because hpte_remove doesn't do |
393 | * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less |
394 | * random entry from it. When we do that we don't invalidate the TLB |
395 | * (hpte_remove) because we assume the old translation is still |
396 | * technically "valid". |
397 | */ |
398 | if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) { |
399 | DBG_LOW(" -> miss\n" ); |
400 | ret = -1; |
401 | } else { |
402 | native_lock_hpte(hptep); |
403 | /* recheck with locks held */ |
404 | hpte_v = hpte_get_old_v(hptep); |
405 | if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) || |
406 | !(hpte_v & HPTE_V_VALID))) { |
407 | ret = -1; |
408 | } else { |
409 | DBG_LOW(" -> hit\n" ); |
410 | /* Update the HPTE */ |
411 | hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & |
412 | ~(HPTE_R_PPP | HPTE_R_N)) | |
413 | (newpp & (HPTE_R_PPP | HPTE_R_N | |
414 | HPTE_R_C))); |
415 | } |
416 | native_unlock_hpte(hptep); |
417 | } |
418 | |
419 | if (flags & HPTE_LOCAL_UPDATE) |
420 | local = 1; |
421 | /* |
422 | * Ensure it is out of the tlb too if it is not a nohpte fault |
423 | */ |
424 | if (!(flags & HPTE_NOHPTE_UPDATE)) |
425 | tlbie(vpn, psize: bpsize, apsize, ssize, local); |
426 | |
427 | local_irq_restore(irqflags); |
428 | |
429 | return ret; |
430 | } |
431 | |
432 | static long __native_hpte_find(unsigned long want_v, unsigned long slot) |
433 | { |
434 | struct hash_pte *hptep; |
435 | unsigned long hpte_v; |
436 | unsigned long i; |
437 | |
438 | for (i = 0; i < HPTES_PER_GROUP; i++) { |
439 | |
440 | hptep = htab_address + slot; |
441 | hpte_v = hpte_get_old_v(hptep); |
442 | if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) |
443 | /* HPTE matches */ |
444 | return slot; |
445 | ++slot; |
446 | } |
447 | |
448 | return -1; |
449 | } |
450 | |
451 | static long native_hpte_find(unsigned long vpn, int psize, int ssize) |
452 | { |
453 | unsigned long hpte_group; |
454 | unsigned long want_v; |
455 | unsigned long hash; |
456 | long slot; |
457 | |
458 | hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize); |
459 | want_v = hpte_encode_avpn(vpn, psize, ssize); |
460 | |
461 | /* |
462 | * We try to keep bolted entries always in primary hash |
463 | * But in some case we can find them in secondary too. |
464 | */ |
465 | hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP; |
466 | slot = __native_hpte_find(want_v, slot: hpte_group); |
467 | if (slot < 0) { |
468 | /* Try in secondary */ |
469 | hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP; |
470 | slot = __native_hpte_find(want_v, slot: hpte_group); |
471 | if (slot < 0) |
472 | return -1; |
473 | } |
474 | |
475 | return slot; |
476 | } |
477 | |
478 | /* |
479 | * Update the page protection bits. Intended to be used to create |
480 | * guard pages for kernel data structures on pages which are bolted |
481 | * in the HPT. Assumes pages being operated on will not be stolen. |
482 | * |
483 | * No need to lock here because we should be the only user. |
484 | */ |
485 | static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, |
486 | int psize, int ssize) |
487 | { |
488 | unsigned long vpn; |
489 | unsigned long vsid; |
490 | long slot; |
491 | struct hash_pte *hptep; |
492 | unsigned long flags; |
493 | |
494 | local_irq_save(flags); |
495 | |
496 | vsid = get_kernel_vsid(ea, ssize); |
497 | vpn = hpt_vpn(ea, vsid, ssize); |
498 | |
499 | slot = native_hpte_find(vpn, psize, ssize); |
500 | if (slot == -1) |
501 | panic(fmt: "could not find page to bolt\n" ); |
502 | hptep = htab_address + slot; |
503 | |
504 | /* Update the HPTE */ |
505 | hptep->r = cpu_to_be64((be64_to_cpu(hptep->r) & |
506 | ~(HPTE_R_PPP | HPTE_R_N)) | |
507 | (newpp & (HPTE_R_PPP | HPTE_R_N))); |
508 | /* |
509 | * Ensure it is out of the tlb too. Bolted entries base and |
510 | * actual page size will be same. |
511 | */ |
512 | tlbie(vpn, psize, apsize: psize, ssize, local: 0); |
513 | |
514 | local_irq_restore(flags); |
515 | } |
516 | |
517 | /* |
518 | * Remove a bolted kernel entry. Memory hotplug uses this. |
519 | * |
520 | * No need to lock here because we should be the only user. |
521 | */ |
522 | static int native_hpte_removebolted(unsigned long ea, int psize, int ssize) |
523 | { |
524 | unsigned long vpn; |
525 | unsigned long vsid; |
526 | long slot; |
527 | struct hash_pte *hptep; |
528 | unsigned long flags; |
529 | |
530 | local_irq_save(flags); |
531 | |
532 | vsid = get_kernel_vsid(ea, ssize); |
533 | vpn = hpt_vpn(ea, vsid, ssize); |
534 | |
535 | slot = native_hpte_find(vpn, psize, ssize); |
536 | if (slot == -1) |
537 | return -ENOENT; |
538 | |
539 | hptep = htab_address + slot; |
540 | |
541 | VM_WARN_ON(!(be64_to_cpu(hptep->v) & HPTE_V_BOLTED)); |
542 | |
543 | /* Invalidate the hpte */ |
544 | hptep->v = 0; |
545 | |
546 | /* Invalidate the TLB */ |
547 | tlbie(vpn, psize, apsize: psize, ssize, local: 0); |
548 | |
549 | local_irq_restore(flags); |
550 | |
551 | return 0; |
552 | } |
553 | |
554 | |
555 | static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, |
556 | int bpsize, int apsize, int ssize, int local) |
557 | { |
558 | struct hash_pte *hptep = htab_address + slot; |
559 | unsigned long hpte_v; |
560 | unsigned long want_v; |
561 | unsigned long flags; |
562 | |
563 | local_irq_save(flags); |
564 | |
565 | DBG_LOW(" invalidate(vpn=%016lx, hash: %lx)\n" , vpn, slot); |
566 | |
567 | want_v = hpte_encode_avpn(vpn, bpsize, ssize); |
568 | hpte_v = hpte_get_old_v(hptep); |
569 | |
570 | if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) { |
571 | native_lock_hpte(hptep); |
572 | /* recheck with locks held */ |
573 | hpte_v = hpte_get_old_v(hptep); |
574 | |
575 | if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) { |
576 | /* Invalidate the hpte. NOTE: this also unlocks it */ |
577 | release_hpte_lock(); |
578 | hptep->v = 0; |
579 | } else |
580 | native_unlock_hpte(hptep); |
581 | } |
582 | /* |
583 | * We need to invalidate the TLB always because hpte_remove doesn't do |
584 | * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less |
585 | * random entry from it. When we do that we don't invalidate the TLB |
586 | * (hpte_remove) because we assume the old translation is still |
587 | * technically "valid". |
588 | */ |
589 | tlbie(vpn, psize: bpsize, apsize, ssize, local); |
590 | |
591 | local_irq_restore(flags); |
592 | } |
593 | |
594 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
595 | static void native_hugepage_invalidate(unsigned long vsid, |
596 | unsigned long addr, |
597 | unsigned char *hpte_slot_array, |
598 | int psize, int ssize, int local) |
599 | { |
600 | int i; |
601 | struct hash_pte *hptep; |
602 | int actual_psize = MMU_PAGE_16M; |
603 | unsigned int max_hpte_count, valid; |
604 | unsigned long flags, s_addr = addr; |
605 | unsigned long hpte_v, want_v, shift; |
606 | unsigned long hidx, vpn = 0, hash, slot; |
607 | |
608 | shift = mmu_psize_defs[psize].shift; |
609 | max_hpte_count = 1U << (PMD_SHIFT - shift); |
610 | |
611 | local_irq_save(flags); |
612 | for (i = 0; i < max_hpte_count; i++) { |
613 | valid = hpte_valid(hpte_slot_array, i); |
614 | if (!valid) |
615 | continue; |
616 | hidx = hpte_hash_index(hpte_slot_array, i); |
617 | |
618 | /* get the vpn */ |
619 | addr = s_addr + (i * (1ul << shift)); |
620 | vpn = hpt_vpn(addr, vsid, ssize); |
621 | hash = hpt_hash(vpn, shift, ssize); |
622 | if (hidx & _PTEIDX_SECONDARY) |
623 | hash = ~hash; |
624 | |
625 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; |
626 | slot += hidx & _PTEIDX_GROUP_IX; |
627 | |
628 | hptep = htab_address + slot; |
629 | want_v = hpte_encode_avpn(vpn, psize, ssize); |
630 | hpte_v = hpte_get_old_v(hptep); |
631 | |
632 | /* Even if we miss, we need to invalidate the TLB */ |
633 | if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) { |
634 | /* recheck with locks held */ |
635 | native_lock_hpte(hptep); |
636 | hpte_v = hpte_get_old_v(hptep); |
637 | |
638 | if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID)) { |
639 | /* Invalidate the hpte. NOTE: this also unlocks it */ |
640 | release_hpte_lock(); |
641 | hptep->v = 0; |
642 | } else |
643 | native_unlock_hpte(hptep); |
644 | } |
645 | /* |
646 | * We need to do tlb invalidate for all the address, tlbie |
647 | * instruction compares entry_VA in tlb with the VA specified |
648 | * here |
649 | */ |
650 | tlbie(vpn, psize, apsize: actual_psize, ssize, local); |
651 | } |
652 | local_irq_restore(flags); |
653 | } |
654 | #else |
655 | static void native_hugepage_invalidate(unsigned long vsid, |
656 | unsigned long addr, |
657 | unsigned char *hpte_slot_array, |
658 | int psize, int ssize, int local) |
659 | { |
660 | WARN(1, "%s called without THP support\n" , __func__); |
661 | } |
662 | #endif |
663 | |
664 | static void hpte_decode(struct hash_pte *hpte, unsigned long slot, |
665 | int *psize, int *apsize, int *ssize, unsigned long *vpn) |
666 | { |
667 | unsigned long avpn, pteg, vpi; |
668 | unsigned long hpte_v = be64_to_cpu(hpte->v); |
669 | unsigned long hpte_r = be64_to_cpu(hpte->r); |
670 | unsigned long vsid, seg_off; |
671 | int size, a_size, shift; |
672 | /* Look at the 8 bit LP value */ |
673 | unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1); |
674 | |
675 | if (cpu_has_feature(CPU_FTR_ARCH_300)) { |
676 | hpte_v = hpte_new_to_old_v(hpte_v, hpte_r); |
677 | hpte_r = hpte_new_to_old_r(hpte_r); |
678 | } |
679 | if (!(hpte_v & HPTE_V_LARGE)) { |
680 | size = MMU_PAGE_4K; |
681 | a_size = MMU_PAGE_4K; |
682 | } else { |
683 | size = hpte_page_sizes[lp] & 0xf; |
684 | a_size = hpte_page_sizes[lp] >> 4; |
685 | } |
686 | /* This works for all page sizes, and for 256M and 1T segments */ |
687 | *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT; |
688 | shift = mmu_psize_defs[size].shift; |
689 | |
690 | avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm); |
691 | pteg = slot / HPTES_PER_GROUP; |
692 | if (hpte_v & HPTE_V_SECONDARY) |
693 | pteg = ~pteg; |
694 | |
695 | switch (*ssize) { |
696 | case MMU_SEGSIZE_256M: |
697 | /* We only have 28 - 23 bits of seg_off in avpn */ |
698 | seg_off = (avpn & 0x1f) << 23; |
699 | vsid = avpn >> 5; |
700 | /* We can find more bits from the pteg value */ |
701 | if (shift < 23) { |
702 | vpi = (vsid ^ pteg) & htab_hash_mask; |
703 | seg_off |= vpi << shift; |
704 | } |
705 | *vpn = vsid << (SID_SHIFT - VPN_SHIFT) | seg_off >> VPN_SHIFT; |
706 | break; |
707 | case MMU_SEGSIZE_1T: |
708 | /* We only have 40 - 23 bits of seg_off in avpn */ |
709 | seg_off = (avpn & 0x1ffff) << 23; |
710 | vsid = avpn >> 17; |
711 | if (shift < 23) { |
712 | vpi = (vsid ^ (vsid << 25) ^ pteg) & htab_hash_mask; |
713 | seg_off |= vpi << shift; |
714 | } |
715 | *vpn = vsid << (SID_SHIFT_1T - VPN_SHIFT) | seg_off >> VPN_SHIFT; |
716 | break; |
717 | default: |
718 | *vpn = size = 0; |
719 | } |
720 | *psize = size; |
721 | *apsize = a_size; |
722 | } |
723 | |
724 | /* |
725 | * clear all mappings on kexec. All cpus are in real mode (or they will |
726 | * be when they isi), and we are the only one left. We rely on our kernel |
727 | * mapping being 0xC0's and the hardware ignoring those two real bits. |
728 | * |
729 | * This must be called with interrupts disabled. |
730 | * |
731 | * Taking the native_tlbie_lock is unsafe here due to the possibility of |
732 | * lockdep being on. On pre POWER5 hardware, not taking the lock could |
733 | * cause deadlock. POWER5 and newer not taking the lock is fine. This only |
734 | * gets called during boot before secondary CPUs have come up and during |
735 | * crashdump and all bets are off anyway. |
736 | * |
737 | * TODO: add batching support when enabled. remember, no dynamic memory here, |
738 | * although there is the control page available... |
739 | */ |
740 | static notrace void native_hpte_clear(void) |
741 | { |
742 | unsigned long vpn = 0; |
743 | unsigned long slot, slots; |
744 | struct hash_pte *hptep = htab_address; |
745 | unsigned long hpte_v; |
746 | unsigned long pteg_count; |
747 | int psize, apsize, ssize; |
748 | |
749 | pteg_count = htab_hash_mask + 1; |
750 | |
751 | slots = pteg_count * HPTES_PER_GROUP; |
752 | |
753 | for (slot = 0; slot < slots; slot++, hptep++) { |
754 | /* |
755 | * we could lock the pte here, but we are the only cpu |
756 | * running, right? and for crash dump, we probably |
757 | * don't want to wait for a maybe bad cpu. |
758 | */ |
759 | hpte_v = be64_to_cpu(hptep->v); |
760 | |
761 | /* |
762 | * Call __tlbie() here rather than tlbie() since we can't take the |
763 | * native_tlbie_lock. |
764 | */ |
765 | if (hpte_v & HPTE_V_VALID) { |
766 | hpte_decode(hpte: hptep, slot, psize: &psize, apsize: &apsize, ssize: &ssize, vpn: &vpn); |
767 | hptep->v = 0; |
768 | ___tlbie(vpn, psize, apsize, ssize); |
769 | } |
770 | } |
771 | |
772 | asm volatile("eieio; tlbsync; ptesync" :::"memory" ); |
773 | } |
774 | |
775 | /* |
776 | * Batched hash table flush, we batch the tlbie's to avoid taking/releasing |
777 | * the lock all the time |
778 | */ |
779 | static void native_flush_hash_range(unsigned long number, int local) |
780 | { |
781 | unsigned long vpn = 0; |
782 | unsigned long hash, index, hidx, shift, slot; |
783 | struct hash_pte *hptep; |
784 | unsigned long hpte_v; |
785 | unsigned long want_v; |
786 | unsigned long flags; |
787 | real_pte_t pte; |
788 | struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch); |
789 | unsigned long psize = batch->psize; |
790 | int ssize = batch->ssize; |
791 | int i; |
792 | unsigned int use_local; |
793 | |
794 | use_local = local && mmu_has_feature(MMU_FTR_TLBIEL) && |
795 | mmu_psize_defs[psize].tlbiel && !cxl_ctx_in_use(); |
796 | |
797 | local_irq_save(flags); |
798 | |
799 | for (i = 0; i < number; i++) { |
800 | vpn = batch->vpn[i]; |
801 | pte = batch->pte[i]; |
802 | |
803 | pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { |
804 | hash = hpt_hash(vpn, shift, ssize); |
805 | hidx = __rpte_to_hidx(pte, index); |
806 | if (hidx & _PTEIDX_SECONDARY) |
807 | hash = ~hash; |
808 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; |
809 | slot += hidx & _PTEIDX_GROUP_IX; |
810 | hptep = htab_address + slot; |
811 | want_v = hpte_encode_avpn(vpn, psize, ssize); |
812 | hpte_v = hpte_get_old_v(hptep); |
813 | |
814 | if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) |
815 | continue; |
816 | /* lock and try again */ |
817 | native_lock_hpte(hptep); |
818 | hpte_v = hpte_get_old_v(hptep); |
819 | |
820 | if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) |
821 | native_unlock_hpte(hptep); |
822 | else { |
823 | release_hpte_lock(); |
824 | hptep->v = 0; |
825 | } |
826 | |
827 | } pte_iterate_hashed_end(); |
828 | } |
829 | |
830 | if (use_local) { |
831 | asm volatile("ptesync" :::"memory" ); |
832 | for (i = 0; i < number; i++) { |
833 | vpn = batch->vpn[i]; |
834 | pte = batch->pte[i]; |
835 | |
836 | pte_iterate_hashed_subpages(pte, psize, |
837 | vpn, index, shift) { |
838 | __tlbiel(vpn, psize, psize, ssize); |
839 | } pte_iterate_hashed_end(); |
840 | } |
841 | ppc_after_tlbiel_barrier(); |
842 | } else { |
843 | int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); |
844 | |
845 | if (lock_tlbie) |
846 | raw_spin_lock(&native_tlbie_lock); |
847 | |
848 | asm volatile("ptesync" :::"memory" ); |
849 | for (i = 0; i < number; i++) { |
850 | vpn = batch->vpn[i]; |
851 | pte = batch->pte[i]; |
852 | |
853 | pte_iterate_hashed_subpages(pte, psize, |
854 | vpn, index, shift) { |
855 | __tlbie(vpn, psize, psize, ssize); |
856 | } pte_iterate_hashed_end(); |
857 | } |
858 | /* |
859 | * Just do one more with the last used values. |
860 | */ |
861 | fixup_tlbie_vpn(vpn, psize, apsize: psize, ssize); |
862 | asm volatile("eieio; tlbsync; ptesync" :::"memory" ); |
863 | |
864 | if (lock_tlbie) |
865 | raw_spin_unlock(&native_tlbie_lock); |
866 | } |
867 | |
868 | local_irq_restore(flags); |
869 | } |
870 | |
871 | void __init hpte_init_native(void) |
872 | { |
873 | mmu_hash_ops.hpte_invalidate = native_hpte_invalidate; |
874 | mmu_hash_ops.hpte_updatepp = native_hpte_updatepp; |
875 | mmu_hash_ops.hpte_updateboltedpp = native_hpte_updateboltedpp; |
876 | mmu_hash_ops.hpte_removebolted = native_hpte_removebolted; |
877 | mmu_hash_ops.hpte_insert = native_hpte_insert; |
878 | mmu_hash_ops.hpte_remove = native_hpte_remove; |
879 | mmu_hash_ops.hpte_clear_all = native_hpte_clear; |
880 | mmu_hash_ops.flush_hash_range = native_flush_hash_range; |
881 | mmu_hash_ops.hugepage_invalidate = native_hugepage_invalidate; |
882 | } |
883 | |