1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation. |
4 | */ |
5 | |
6 | #define pr_fmt(fmt) "xive-kvm: " fmt |
7 | |
8 | #include <linux/kernel.h> |
9 | #include <linux/kvm_host.h> |
10 | #include <linux/err.h> |
11 | #include <linux/gfp.h> |
12 | #include <linux/spinlock.h> |
13 | #include <linux/delay.h> |
14 | #include <linux/percpu.h> |
15 | #include <linux/cpumask.h> |
16 | #include <linux/uaccess.h> |
17 | #include <linux/irqdomain.h> |
18 | #include <asm/kvm_book3s.h> |
19 | #include <asm/kvm_ppc.h> |
20 | #include <asm/hvcall.h> |
21 | #include <asm/xics.h> |
22 | #include <asm/xive.h> |
23 | #include <asm/xive-regs.h> |
24 | #include <asm/debug.h> |
25 | #include <asm/time.h> |
26 | #include <asm/opal.h> |
27 | |
28 | #include <linux/debugfs.h> |
29 | #include <linux/seq_file.h> |
30 | |
31 | #include "book3s_xive.h" |
32 | |
33 | #define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_mmio)) |
34 | #define __x_trig_page(xd) ((void __iomem *)((xd)->trig_mmio)) |
35 | |
36 | /* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */ |
37 | #define XICS_DUMMY 1 |
38 | |
39 | static void xive_vm_ack_pending(struct kvmppc_xive_vcpu *xc) |
40 | { |
41 | u8 cppr; |
42 | u16 ack; |
43 | |
44 | /* |
45 | * Ensure any previous store to CPPR is ordered vs. |
46 | * the subsequent loads from PIPR or ACK. |
47 | */ |
48 | eieio(); |
49 | |
50 | /* Perform the acknowledge OS to register cycle. */ |
51 | ack = be16_to_cpu(__raw_readw(xive_tima + TM_SPC_ACK_OS_REG)); |
52 | |
53 | /* Synchronize subsequent queue accesses */ |
54 | mb(); |
55 | |
56 | /* XXX Check grouping level */ |
57 | |
58 | /* Anything ? */ |
59 | if (!((ack >> 8) & TM_QW1_NSR_EO)) |
60 | return; |
61 | |
62 | /* Grab CPPR of the most favored pending interrupt */ |
63 | cppr = ack & 0xff; |
64 | if (cppr < 8) |
65 | xc->pending |= 1 << cppr; |
66 | |
67 | /* Check consistency */ |
68 | if (cppr >= xc->hw_cppr) |
69 | pr_warn("KVM-XIVE: CPU %d odd ack CPPR, got %d at %d\n" , |
70 | smp_processor_id(), cppr, xc->hw_cppr); |
71 | |
72 | /* |
73 | * Update our image of the HW CPPR. We don't yet modify |
74 | * xc->cppr, this will be done as we scan for interrupts |
75 | * in the queues. |
76 | */ |
77 | xc->hw_cppr = cppr; |
78 | } |
79 | |
80 | static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset) |
81 | { |
82 | u64 val; |
83 | |
84 | if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI) |
85 | offset |= XIVE_ESB_LD_ST_MO; |
86 | |
87 | val = __raw_readq(__x_eoi_page(xd) + offset); |
88 | #ifdef __LITTLE_ENDIAN__ |
89 | val >>= 64-8; |
90 | #endif |
91 | return (u8)val; |
92 | } |
93 | |
94 | |
95 | static void xive_vm_source_eoi(u32 hw_irq, struct xive_irq_data *xd) |
96 | { |
97 | /* If the XIVE supports the new "store EOI facility, use it */ |
98 | if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI) |
99 | __raw_writeq(val: 0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI); |
100 | else if (xd->flags & XIVE_IRQ_FLAG_LSI) { |
101 | /* |
102 | * For LSIs the HW EOI cycle is used rather than PQ bits, |
103 | * as they are automatically re-triggred in HW when still |
104 | * pending. |
105 | */ |
106 | __raw_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI); |
107 | } else { |
108 | uint64_t eoi_val; |
109 | |
110 | /* |
111 | * Otherwise for EOI, we use the special MMIO that does |
112 | * a clear of both P and Q and returns the old Q, |
113 | * except for LSIs where we use the "EOI cycle" special |
114 | * load. |
115 | * |
116 | * This allows us to then do a re-trigger if Q was set |
117 | * rather than synthetizing an interrupt in software |
118 | */ |
119 | eoi_val = xive_vm_esb_load(xd, offset: XIVE_ESB_SET_PQ_00); |
120 | |
121 | /* Re-trigger if needed */ |
122 | if ((eoi_val & 1) && __x_trig_page(xd)) |
123 | __raw_writeq(val: 0, __x_trig_page(xd)); |
124 | } |
125 | } |
126 | |
127 | enum { |
128 | scan_fetch, |
129 | scan_poll, |
130 | scan_eoi, |
131 | }; |
132 | |
133 | static u32 xive_vm_scan_interrupts(struct kvmppc_xive_vcpu *xc, |
134 | u8 pending, int scan_type) |
135 | { |
136 | u32 hirq = 0; |
137 | u8 prio = 0xff; |
138 | |
139 | /* Find highest pending priority */ |
140 | while ((xc->mfrr != 0xff || pending != 0) && hirq == 0) { |
141 | struct xive_q *q; |
142 | u32 idx, toggle; |
143 | __be32 *qpage; |
144 | |
145 | /* |
146 | * If pending is 0 this will return 0xff which is what |
147 | * we want |
148 | */ |
149 | prio = ffs(pending) - 1; |
150 | |
151 | /* Don't scan past the guest cppr */ |
152 | if (prio >= xc->cppr || prio > 7) { |
153 | if (xc->mfrr < xc->cppr) { |
154 | prio = xc->mfrr; |
155 | hirq = XICS_IPI; |
156 | } |
157 | break; |
158 | } |
159 | |
160 | /* Grab queue and pointers */ |
161 | q = &xc->queues[prio]; |
162 | idx = q->idx; |
163 | toggle = q->toggle; |
164 | |
165 | /* |
166 | * Snapshot the queue page. The test further down for EOI |
167 | * must use the same "copy" that was used by __xive_read_eq |
168 | * since qpage can be set concurrently and we don't want |
169 | * to miss an EOI. |
170 | */ |
171 | qpage = READ_ONCE(q->qpage); |
172 | |
173 | skip_ipi: |
174 | /* |
175 | * Try to fetch from the queue. Will return 0 for a |
176 | * non-queueing priority (ie, qpage = 0). |
177 | */ |
178 | hirq = __xive_read_eq(qpage, q->msk, &idx, &toggle); |
179 | |
180 | /* |
181 | * If this was a signal for an MFFR change done by |
182 | * H_IPI we skip it. Additionally, if we were fetching |
183 | * we EOI it now, thus re-enabling reception of a new |
184 | * such signal. |
185 | * |
186 | * We also need to do that if prio is 0 and we had no |
187 | * page for the queue. In this case, we have non-queued |
188 | * IPI that needs to be EOId. |
189 | * |
190 | * This is safe because if we have another pending MFRR |
191 | * change that wasn't observed above, the Q bit will have |
192 | * been set and another occurrence of the IPI will trigger. |
193 | */ |
194 | if (hirq == XICS_IPI || (prio == 0 && !qpage)) { |
195 | if (scan_type == scan_fetch) { |
196 | xive_vm_source_eoi(hw_irq: xc->vp_ipi, |
197 | xd: &xc->vp_ipi_data); |
198 | q->idx = idx; |
199 | q->toggle = toggle; |
200 | } |
201 | /* Loop back on same queue with updated idx/toggle */ |
202 | WARN_ON(hirq && hirq != XICS_IPI); |
203 | if (hirq) |
204 | goto skip_ipi; |
205 | } |
206 | |
207 | /* If it's the dummy interrupt, continue searching */ |
208 | if (hirq == XICS_DUMMY) |
209 | goto skip_ipi; |
210 | |
211 | /* Clear the pending bit if the queue is now empty */ |
212 | if (!hirq) { |
213 | pending &= ~(1 << prio); |
214 | |
215 | /* |
216 | * Check if the queue count needs adjusting due to |
217 | * interrupts being moved away. |
218 | */ |
219 | if (atomic_read(v: &q->pending_count)) { |
220 | int p = atomic_xchg(v: &q->pending_count, new: 0); |
221 | |
222 | if (p) { |
223 | WARN_ON(p > atomic_read(&q->count)); |
224 | atomic_sub(i: p, v: &q->count); |
225 | } |
226 | } |
227 | } |
228 | |
229 | /* |
230 | * If the most favoured prio we found pending is less |
231 | * favored (or equal) than a pending IPI, we return |
232 | * the IPI instead. |
233 | */ |
234 | if (prio >= xc->mfrr && xc->mfrr < xc->cppr) { |
235 | prio = xc->mfrr; |
236 | hirq = XICS_IPI; |
237 | break; |
238 | } |
239 | |
240 | /* If fetching, update queue pointers */ |
241 | if (scan_type == scan_fetch) { |
242 | q->idx = idx; |
243 | q->toggle = toggle; |
244 | } |
245 | } |
246 | |
247 | /* If we are just taking a "peek", do nothing else */ |
248 | if (scan_type == scan_poll) |
249 | return hirq; |
250 | |
251 | /* Update the pending bits */ |
252 | xc->pending = pending; |
253 | |
254 | /* |
255 | * If this is an EOI that's it, no CPPR adjustment done here, |
256 | * all we needed was cleanup the stale pending bits and check |
257 | * if there's anything left. |
258 | */ |
259 | if (scan_type == scan_eoi) |
260 | return hirq; |
261 | |
262 | /* |
263 | * If we found an interrupt, adjust what the guest CPPR should |
264 | * be as if we had just fetched that interrupt from HW. |
265 | * |
266 | * Note: This can only make xc->cppr smaller as the previous |
267 | * loop will only exit with hirq != 0 if prio is lower than |
268 | * the current xc->cppr. Thus we don't need to re-check xc->mfrr |
269 | * for pending IPIs. |
270 | */ |
271 | if (hirq) |
272 | xc->cppr = prio; |
273 | /* |
274 | * If it was an IPI the HW CPPR might have been lowered too much |
275 | * as the HW interrupt we use for IPIs is routed to priority 0. |
276 | * |
277 | * We re-sync it here. |
278 | */ |
279 | if (xc->cppr != xc->hw_cppr) { |
280 | xc->hw_cppr = xc->cppr; |
281 | __raw_writeb(val: xc->cppr, addr: xive_tima + TM_QW1_OS + TM_CPPR); |
282 | } |
283 | |
284 | return hirq; |
285 | } |
286 | |
287 | static unsigned long xive_vm_h_xirr(struct kvm_vcpu *vcpu) |
288 | { |
289 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
290 | u8 old_cppr; |
291 | u32 hirq; |
292 | |
293 | pr_devel("H_XIRR\n" ); |
294 | |
295 | xc->stat_vm_h_xirr++; |
296 | |
297 | /* First collect pending bits from HW */ |
298 | xive_vm_ack_pending(xc); |
299 | |
300 | pr_devel(" new pending=0x%02x hw_cppr=%d cppr=%d\n" , |
301 | xc->pending, xc->hw_cppr, xc->cppr); |
302 | |
303 | /* Grab previous CPPR and reverse map it */ |
304 | old_cppr = xive_prio_to_guest(xc->cppr); |
305 | |
306 | /* Scan for actual interrupts */ |
307 | hirq = xive_vm_scan_interrupts(xc, pending: xc->pending, scan_type: scan_fetch); |
308 | |
309 | pr_devel(" got hirq=0x%x hw_cppr=%d cppr=%d\n" , |
310 | hirq, xc->hw_cppr, xc->cppr); |
311 | |
312 | /* That should never hit */ |
313 | if (hirq & 0xff000000) |
314 | pr_warn("XIVE: Weird guest interrupt number 0x%08x\n" , hirq); |
315 | |
316 | /* |
317 | * XXX We could check if the interrupt is masked here and |
318 | * filter it. If we chose to do so, we would need to do: |
319 | * |
320 | * if (masked) { |
321 | * lock(); |
322 | * if (masked) { |
323 | * old_Q = true; |
324 | * hirq = 0; |
325 | * } |
326 | * unlock(); |
327 | * } |
328 | */ |
329 | |
330 | /* Return interrupt and old CPPR in GPR4 */ |
331 | kvmppc_set_gpr(vcpu, 4, hirq | (old_cppr << 24)); |
332 | |
333 | return H_SUCCESS; |
334 | } |
335 | |
336 | static unsigned long xive_vm_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server) |
337 | { |
338 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
339 | u8 pending = xc->pending; |
340 | u32 hirq; |
341 | |
342 | pr_devel("H_IPOLL(server=%ld)\n" , server); |
343 | |
344 | xc->stat_vm_h_ipoll++; |
345 | |
346 | /* Grab the target VCPU if not the current one */ |
347 | if (xc->server_num != server) { |
348 | vcpu = kvmppc_xive_find_server(vcpu->kvm, server); |
349 | if (!vcpu) |
350 | return H_PARAMETER; |
351 | xc = vcpu->arch.xive_vcpu; |
352 | |
353 | /* Scan all priorities */ |
354 | pending = 0xff; |
355 | } else { |
356 | /* Grab pending interrupt if any */ |
357 | __be64 qw1 = __raw_readq(addr: xive_tima + TM_QW1_OS); |
358 | u8 pipr = be64_to_cpu(qw1) & 0xff; |
359 | |
360 | if (pipr < 8) |
361 | pending |= 1 << pipr; |
362 | } |
363 | |
364 | hirq = xive_vm_scan_interrupts(xc, pending, scan_type: scan_poll); |
365 | |
366 | /* Return interrupt and old CPPR in GPR4 */ |
367 | kvmppc_set_gpr(vcpu, 4, hirq | (xc->cppr << 24)); |
368 | |
369 | return H_SUCCESS; |
370 | } |
371 | |
372 | static void xive_vm_push_pending_to_hw(struct kvmppc_xive_vcpu *xc) |
373 | { |
374 | u8 pending, prio; |
375 | |
376 | pending = xc->pending; |
377 | if (xc->mfrr != 0xff) { |
378 | if (xc->mfrr < 8) |
379 | pending |= 1 << xc->mfrr; |
380 | else |
381 | pending |= 0x80; |
382 | } |
383 | if (!pending) |
384 | return; |
385 | prio = ffs(pending) - 1; |
386 | |
387 | __raw_writeb(val: prio, addr: xive_tima + TM_SPC_SET_OS_PENDING); |
388 | } |
389 | |
390 | static void xive_vm_scan_for_rerouted_irqs(struct kvmppc_xive *xive, |
391 | struct kvmppc_xive_vcpu *xc) |
392 | { |
393 | unsigned int prio; |
394 | |
395 | /* For each priority that is now masked */ |
396 | for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) { |
397 | struct xive_q *q = &xc->queues[prio]; |
398 | struct kvmppc_xive_irq_state *state; |
399 | struct kvmppc_xive_src_block *sb; |
400 | u32 idx, toggle, entry, irq, hw_num; |
401 | struct xive_irq_data *xd; |
402 | __be32 *qpage; |
403 | u16 src; |
404 | |
405 | idx = q->idx; |
406 | toggle = q->toggle; |
407 | qpage = READ_ONCE(q->qpage); |
408 | if (!qpage) |
409 | continue; |
410 | |
411 | /* For each interrupt in the queue */ |
412 | for (;;) { |
413 | entry = be32_to_cpup(p: qpage + idx); |
414 | |
415 | /* No more ? */ |
416 | if ((entry >> 31) == toggle) |
417 | break; |
418 | irq = entry & 0x7fffffff; |
419 | |
420 | /* Skip dummies and IPIs */ |
421 | if (irq == XICS_DUMMY || irq == XICS_IPI) |
422 | goto next; |
423 | sb = kvmppc_xive_find_source(xive, irq, &src); |
424 | if (!sb) |
425 | goto next; |
426 | state = &sb->irq_state[src]; |
427 | |
428 | /* Has it been rerouted ? */ |
429 | if (xc->server_num == state->act_server) |
430 | goto next; |
431 | |
432 | /* |
433 | * Allright, it *has* been re-routed, kill it from |
434 | * the queue. |
435 | */ |
436 | qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY); |
437 | |
438 | /* Find the HW interrupt */ |
439 | kvmppc_xive_select_irq(state, &hw_num, &xd); |
440 | |
441 | /* If it's not an LSI, set PQ to 11 the EOI will force a resend */ |
442 | if (!(xd->flags & XIVE_IRQ_FLAG_LSI)) |
443 | xive_vm_esb_load(xd, offset: XIVE_ESB_SET_PQ_11); |
444 | |
445 | /* EOI the source */ |
446 | xive_vm_source_eoi(hw_irq: hw_num, xd); |
447 | |
448 | next: |
449 | idx = (idx + 1) & q->msk; |
450 | if (idx == 0) |
451 | toggle ^= 1; |
452 | } |
453 | } |
454 | } |
455 | |
456 | static int xive_vm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) |
457 | { |
458 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
459 | struct kvmppc_xive *xive = vcpu->kvm->arch.xive; |
460 | u8 old_cppr; |
461 | |
462 | pr_devel("H_CPPR(cppr=%ld)\n" , cppr); |
463 | |
464 | xc->stat_vm_h_cppr++; |
465 | |
466 | /* Map CPPR */ |
467 | cppr = xive_prio_from_guest(cppr); |
468 | |
469 | /* Remember old and update SW state */ |
470 | old_cppr = xc->cppr; |
471 | xc->cppr = cppr; |
472 | |
473 | /* |
474 | * Order the above update of xc->cppr with the subsequent |
475 | * read of xc->mfrr inside push_pending_to_hw() |
476 | */ |
477 | smp_mb(); |
478 | |
479 | if (cppr > old_cppr) { |
480 | /* |
481 | * We are masking less, we need to look for pending things |
482 | * to deliver and set VP pending bits accordingly to trigger |
483 | * a new interrupt otherwise we might miss MFRR changes for |
484 | * which we have optimized out sending an IPI signal. |
485 | */ |
486 | xive_vm_push_pending_to_hw(xc); |
487 | } else { |
488 | /* |
489 | * We are masking more, we need to check the queue for any |
490 | * interrupt that has been routed to another CPU, take |
491 | * it out (replace it with the dummy) and retrigger it. |
492 | * |
493 | * This is necessary since those interrupts may otherwise |
494 | * never be processed, at least not until this CPU restores |
495 | * its CPPR. |
496 | * |
497 | * This is in theory racy vs. HW adding new interrupts to |
498 | * the queue. In practice this works because the interesting |
499 | * cases are when the guest has done a set_xive() to move the |
500 | * interrupt away, which flushes the xive, followed by the |
501 | * target CPU doing a H_CPPR. So any new interrupt coming into |
502 | * the queue must still be routed to us and isn't a source |
503 | * of concern. |
504 | */ |
505 | xive_vm_scan_for_rerouted_irqs(xive, xc); |
506 | } |
507 | |
508 | /* Apply new CPPR */ |
509 | xc->hw_cppr = cppr; |
510 | __raw_writeb(cppr, xive_tima + TM_QW1_OS + TM_CPPR); |
511 | |
512 | return H_SUCCESS; |
513 | } |
514 | |
515 | static int xive_vm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) |
516 | { |
517 | struct kvmppc_xive *xive = vcpu->kvm->arch.xive; |
518 | struct kvmppc_xive_src_block *sb; |
519 | struct kvmppc_xive_irq_state *state; |
520 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
521 | struct xive_irq_data *xd; |
522 | u8 new_cppr = xirr >> 24; |
523 | u32 irq = xirr & 0x00ffffff, hw_num; |
524 | u16 src; |
525 | int rc = 0; |
526 | |
527 | pr_devel("H_EOI(xirr=%08lx)\n" , xirr); |
528 | |
529 | xc->stat_vm_h_eoi++; |
530 | |
531 | xc->cppr = xive_prio_from_guest(new_cppr); |
532 | |
533 | /* |
534 | * IPIs are synthetized from MFRR and thus don't need |
535 | * any special EOI handling. The underlying interrupt |
536 | * used to signal MFRR changes is EOId when fetched from |
537 | * the queue. |
538 | */ |
539 | if (irq == XICS_IPI || irq == 0) { |
540 | /* |
541 | * This barrier orders the setting of xc->cppr vs. |
542 | * subsequent test of xc->mfrr done inside |
543 | * scan_interrupts and push_pending_to_hw |
544 | */ |
545 | smp_mb(); |
546 | goto bail; |
547 | } |
548 | |
549 | /* Find interrupt source */ |
550 | sb = kvmppc_xive_find_source(xive, irq, &src); |
551 | if (!sb) { |
552 | pr_devel(" source not found !\n" ); |
553 | rc = H_PARAMETER; |
554 | /* Same as above */ |
555 | smp_mb(); |
556 | goto bail; |
557 | } |
558 | state = &sb->irq_state[src]; |
559 | kvmppc_xive_select_irq(state, &hw_num, &xd); |
560 | |
561 | state->in_eoi = true; |
562 | |
563 | /* |
564 | * This barrier orders both setting of in_eoi above vs, |
565 | * subsequent test of guest_priority, and the setting |
566 | * of xc->cppr vs. subsequent test of xc->mfrr done inside |
567 | * scan_interrupts and push_pending_to_hw |
568 | */ |
569 | smp_mb(); |
570 | |
571 | again: |
572 | if (state->guest_priority == MASKED) { |
573 | arch_spin_lock(&sb->lock); |
574 | if (state->guest_priority != MASKED) { |
575 | arch_spin_unlock(&sb->lock); |
576 | goto again; |
577 | } |
578 | pr_devel(" EOI on saved P...\n" ); |
579 | |
580 | /* Clear old_p, that will cause unmask to perform an EOI */ |
581 | state->old_p = false; |
582 | |
583 | arch_spin_unlock(&sb->lock); |
584 | } else { |
585 | pr_devel(" EOI on source...\n" ); |
586 | |
587 | /* Perform EOI on the source */ |
588 | xive_vm_source_eoi(hw_irq: hw_num, xd); |
589 | |
590 | /* If it's an emulated LSI, check level and resend */ |
591 | if (state->lsi && state->asserted) |
592 | __raw_writeq(val: 0, __x_trig_page(xd)); |
593 | |
594 | } |
595 | |
596 | /* |
597 | * This barrier orders the above guest_priority check |
598 | * and spin_lock/unlock with clearing in_eoi below. |
599 | * |
600 | * It also has to be a full mb() as it must ensure |
601 | * the MMIOs done in source_eoi() are completed before |
602 | * state->in_eoi is visible. |
603 | */ |
604 | mb(); |
605 | state->in_eoi = false; |
606 | bail: |
607 | |
608 | /* Re-evaluate pending IRQs and update HW */ |
609 | xive_vm_scan_interrupts(xc, pending: xc->pending, scan_type: scan_eoi); |
610 | xive_vm_push_pending_to_hw(xc); |
611 | pr_devel(" after scan pending=%02x\n" , xc->pending); |
612 | |
613 | /* Apply new CPPR */ |
614 | xc->hw_cppr = xc->cppr; |
615 | __raw_writeb(xc->cppr, xive_tima + TM_QW1_OS + TM_CPPR); |
616 | |
617 | return rc; |
618 | } |
619 | |
620 | static int xive_vm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, |
621 | unsigned long mfrr) |
622 | { |
623 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
624 | |
625 | pr_devel("H_IPI(server=%08lx,mfrr=%ld)\n" , server, mfrr); |
626 | |
627 | xc->stat_vm_h_ipi++; |
628 | |
629 | /* Find target */ |
630 | vcpu = kvmppc_xive_find_server(vcpu->kvm, server); |
631 | if (!vcpu) |
632 | return H_PARAMETER; |
633 | xc = vcpu->arch.xive_vcpu; |
634 | |
635 | /* Locklessly write over MFRR */ |
636 | xc->mfrr = mfrr; |
637 | |
638 | /* |
639 | * The load of xc->cppr below and the subsequent MMIO store |
640 | * to the IPI must happen after the above mfrr update is |
641 | * globally visible so that: |
642 | * |
643 | * - Synchronize with another CPU doing an H_EOI or a H_CPPR |
644 | * updating xc->cppr then reading xc->mfrr. |
645 | * |
646 | * - The target of the IPI sees the xc->mfrr update |
647 | */ |
648 | mb(); |
649 | |
650 | /* Shoot the IPI if most favored than target cppr */ |
651 | if (mfrr < xc->cppr) |
652 | __raw_writeq(val: 0, __x_trig_page(&xc->vp_ipi_data)); |
653 | |
654 | return H_SUCCESS; |
655 | } |
656 | |
657 | /* |
658 | * We leave a gap of a couple of interrupts in the queue to |
659 | * account for the IPI and additional safety guard. |
660 | */ |
661 | #define XIVE_Q_GAP 2 |
662 | |
663 | static bool kvmppc_xive_vcpu_has_save_restore(struct kvm_vcpu *vcpu) |
664 | { |
665 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
666 | |
667 | /* Check enablement at VP level */ |
668 | return xc->vp_cam & TM_QW1W2_HO; |
669 | } |
670 | |
671 | bool kvmppc_xive_check_save_restore(struct kvm_vcpu *vcpu) |
672 | { |
673 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
674 | struct kvmppc_xive *xive = xc->xive; |
675 | |
676 | if (xive->flags & KVMPPC_XIVE_FLAG_SAVE_RESTORE) |
677 | return kvmppc_xive_vcpu_has_save_restore(vcpu); |
678 | |
679 | return true; |
680 | } |
681 | |
682 | /* |
683 | * Push a vcpu's context to the XIVE on guest entry. |
684 | * This assumes we are in virtual mode (MMU on) |
685 | */ |
686 | void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) |
687 | { |
688 | void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt; |
689 | u64 pq; |
690 | |
691 | /* |
692 | * Nothing to do if the platform doesn't have a XIVE |
693 | * or this vCPU doesn't have its own XIVE context |
694 | * (e.g. because it's not using an in-kernel interrupt controller). |
695 | */ |
696 | if (!tima || !vcpu->arch.xive_cam_word) |
697 | return; |
698 | |
699 | eieio(); |
700 | if (!kvmppc_xive_vcpu_has_save_restore(vcpu)) |
701 | __raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS); |
702 | __raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2); |
703 | vcpu->arch.xive_pushed = 1; |
704 | eieio(); |
705 | |
706 | /* |
707 | * We clear the irq_pending flag. There is a small chance of a |
708 | * race vs. the escalation interrupt happening on another |
709 | * processor setting it again, but the only consequence is to |
710 | * cause a spurious wakeup on the next H_CEDE, which is not an |
711 | * issue. |
712 | */ |
713 | vcpu->arch.irq_pending = 0; |
714 | |
715 | /* |
716 | * In single escalation mode, if the escalation interrupt is |
717 | * on, we mask it. |
718 | */ |
719 | if (vcpu->arch.xive_esc_on) { |
720 | pq = __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr + |
721 | XIVE_ESB_SET_PQ_01)); |
722 | mb(); |
723 | |
724 | /* |
725 | * We have a possible subtle race here: The escalation |
726 | * interrupt might have fired and be on its way to the |
727 | * host queue while we mask it, and if we unmask it |
728 | * early enough (re-cede right away), there is a |
729 | * theoretical possibility that it fires again, thus |
730 | * landing in the target queue more than once which is |
731 | * a big no-no. |
732 | * |
733 | * Fortunately, solving this is rather easy. If the |
734 | * above load setting PQ to 01 returns a previous |
735 | * value where P is set, then we know the escalation |
736 | * interrupt is somewhere on its way to the host. In |
737 | * that case we simply don't clear the xive_esc_on |
738 | * flag below. It will be eventually cleared by the |
739 | * handler for the escalation interrupt. |
740 | * |
741 | * Then, when doing a cede, we check that flag again |
742 | * before re-enabling the escalation interrupt, and if |
743 | * set, we abort the cede. |
744 | */ |
745 | if (!(pq & XIVE_ESB_VAL_P)) |
746 | /* Now P is 0, we can clear the flag */ |
747 | vcpu->arch.xive_esc_on = 0; |
748 | } |
749 | } |
750 | EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu); |
751 | |
752 | /* |
753 | * Pull a vcpu's context from the XIVE on guest exit. |
754 | * This assumes we are in virtual mode (MMU on) |
755 | */ |
756 | void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu) |
757 | { |
758 | void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt; |
759 | |
760 | if (!vcpu->arch.xive_pushed) |
761 | return; |
762 | |
763 | /* |
764 | * Should not have been pushed if there is no tima |
765 | */ |
766 | if (WARN_ON(!tima)) |
767 | return; |
768 | |
769 | eieio(); |
770 | /* First load to pull the context, we ignore the value */ |
771 | __raw_readl(tima + TM_SPC_PULL_OS_CTX); |
772 | /* Second load to recover the context state (Words 0 and 1) */ |
773 | if (!kvmppc_xive_vcpu_has_save_restore(vcpu)) |
774 | vcpu->arch.xive_saved_state.w01 = __raw_readq(tima + TM_QW1_OS); |
775 | |
776 | /* Fixup some of the state for the next load */ |
777 | vcpu->arch.xive_saved_state.lsmfb = 0; |
778 | vcpu->arch.xive_saved_state.ack = 0xff; |
779 | vcpu->arch.xive_pushed = 0; |
780 | eieio(); |
781 | } |
782 | EXPORT_SYMBOL_GPL(kvmppc_xive_pull_vcpu); |
783 | |
784 | bool kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu) |
785 | { |
786 | void __iomem *esc_vaddr = (void __iomem *)vcpu->arch.xive_esc_vaddr; |
787 | bool ret = true; |
788 | |
789 | if (!esc_vaddr) |
790 | return ret; |
791 | |
792 | /* we are using XIVE with single escalation */ |
793 | |
794 | if (vcpu->arch.xive_esc_on) { |
795 | /* |
796 | * If we still have a pending escalation, abort the cede, |
797 | * and we must set PQ to 10 rather than 00 so that we don't |
798 | * potentially end up with two entries for the escalation |
799 | * interrupt in the XIVE interrupt queue. In that case |
800 | * we also don't want to set xive_esc_on to 1 here in |
801 | * case we race with xive_esc_irq(). |
802 | */ |
803 | ret = false; |
804 | /* |
805 | * The escalation interrupts are special as we don't EOI them. |
806 | * There is no need to use the load-after-store ordering offset |
807 | * to set PQ to 10 as we won't use StoreEOI. |
808 | */ |
809 | __raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_10); |
810 | } else { |
811 | vcpu->arch.xive_esc_on = true; |
812 | mb(); |
813 | __raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_00); |
814 | } |
815 | mb(); |
816 | |
817 | return ret; |
818 | } |
819 | EXPORT_SYMBOL_GPL(kvmppc_xive_rearm_escalation); |
820 | |
821 | /* |
822 | * This is a simple trigger for a generic XIVE IRQ. This must |
823 | * only be called for interrupts that support a trigger page |
824 | */ |
825 | static bool xive_irq_trigger(struct xive_irq_data *xd) |
826 | { |
827 | /* This should be only for MSIs */ |
828 | if (WARN_ON(xd->flags & XIVE_IRQ_FLAG_LSI)) |
829 | return false; |
830 | |
831 | /* Those interrupts should always have a trigger page */ |
832 | if (WARN_ON(!xd->trig_mmio)) |
833 | return false; |
834 | |
835 | out_be64(xd->trig_mmio, 0); |
836 | |
837 | return true; |
838 | } |
839 | |
840 | static irqreturn_t xive_esc_irq(int irq, void *data) |
841 | { |
842 | struct kvm_vcpu *vcpu = data; |
843 | |
844 | vcpu->arch.irq_pending = 1; |
845 | smp_mb(); |
846 | if (vcpu->arch.ceded || vcpu->arch.nested) |
847 | kvmppc_fast_vcpu_kick(vcpu); |
848 | |
849 | /* Since we have the no-EOI flag, the interrupt is effectively |
850 | * disabled now. Clearing xive_esc_on means we won't bother |
851 | * doing so on the next entry. |
852 | * |
853 | * This also allows the entry code to know that if a PQ combination |
854 | * of 10 is observed while xive_esc_on is true, it means the queue |
855 | * contains an unprocessed escalation interrupt. We don't make use of |
856 | * that knowledge today but might (see comment in book3s_hv_rmhandler.S) |
857 | */ |
858 | vcpu->arch.xive_esc_on = false; |
859 | |
860 | /* This orders xive_esc_on = false vs. subsequent stale_p = true */ |
861 | smp_wmb(); /* goes with smp_mb() in cleanup_single_escalation */ |
862 | |
863 | return IRQ_HANDLED; |
864 | } |
865 | |
866 | int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio, |
867 | bool single_escalation) |
868 | { |
869 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
870 | struct xive_q *q = &xc->queues[prio]; |
871 | char *name = NULL; |
872 | int rc; |
873 | |
874 | /* Already there ? */ |
875 | if (xc->esc_virq[prio]) |
876 | return 0; |
877 | |
878 | /* Hook up the escalation interrupt */ |
879 | xc->esc_virq[prio] = irq_create_mapping(NULL, hwirq: q->esc_irq); |
880 | if (!xc->esc_virq[prio]) { |
881 | pr_err("Failed to map escalation interrupt for queue %d of VCPU %d\n" , |
882 | prio, xc->server_num); |
883 | return -EIO; |
884 | } |
885 | |
886 | if (single_escalation) |
887 | name = kasprintf(GFP_KERNEL, fmt: "kvm-%lld-%d" , |
888 | vcpu->kvm->arch.lpid, xc->server_num); |
889 | else |
890 | name = kasprintf(GFP_KERNEL, fmt: "kvm-%lld-%d-%d" , |
891 | vcpu->kvm->arch.lpid, xc->server_num, prio); |
892 | if (!name) { |
893 | pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n" , |
894 | prio, xc->server_num); |
895 | rc = -ENOMEM; |
896 | goto error; |
897 | } |
898 | |
899 | pr_devel("Escalation %s irq %d (prio %d)\n" , name, xc->esc_virq[prio], prio); |
900 | |
901 | rc = request_irq(irq: xc->esc_virq[prio], handler: xive_esc_irq, |
902 | IRQF_NO_THREAD, name, dev: vcpu); |
903 | if (rc) { |
904 | pr_err("Failed to request escalation interrupt for queue %d of VCPU %d\n" , |
905 | prio, xc->server_num); |
906 | goto error; |
907 | } |
908 | xc->esc_virq_names[prio] = name; |
909 | |
910 | /* In single escalation mode, we grab the ESB MMIO of the |
911 | * interrupt and mask it. Also populate the VCPU v/raddr |
912 | * of the ESB page for use by asm entry/exit code. Finally |
913 | * set the XIVE_IRQ_FLAG_NO_EOI flag which will prevent the |
914 | * core code from performing an EOI on the escalation |
915 | * interrupt, thus leaving it effectively masked after |
916 | * it fires once. |
917 | */ |
918 | if (single_escalation) { |
919 | struct irq_data *d = irq_get_irq_data(irq: xc->esc_virq[prio]); |
920 | struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); |
921 | |
922 | xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01); |
923 | vcpu->arch.xive_esc_raddr = xd->eoi_page; |
924 | vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio; |
925 | xd->flags |= XIVE_IRQ_FLAG_NO_EOI; |
926 | } |
927 | |
928 | return 0; |
929 | error: |
930 | irq_dispose_mapping(virq: xc->esc_virq[prio]); |
931 | xc->esc_virq[prio] = 0; |
932 | kfree(objp: name); |
933 | return rc; |
934 | } |
935 | |
936 | static int xive_provision_queue(struct kvm_vcpu *vcpu, u8 prio) |
937 | { |
938 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
939 | struct kvmppc_xive *xive = xc->xive; |
940 | struct xive_q *q = &xc->queues[prio]; |
941 | void *qpage; |
942 | int rc; |
943 | |
944 | if (WARN_ON(q->qpage)) |
945 | return 0; |
946 | |
947 | /* Allocate the queue and retrieve infos on current node for now */ |
948 | qpage = (__be32 *)__get_free_pages(GFP_KERNEL, order: xive->q_page_order); |
949 | if (!qpage) { |
950 | pr_err("Failed to allocate queue %d for VCPU %d\n" , |
951 | prio, xc->server_num); |
952 | return -ENOMEM; |
953 | } |
954 | memset(qpage, 0, 1 << xive->q_order); |
955 | |
956 | /* |
957 | * Reconfigure the queue. This will set q->qpage only once the |
958 | * queue is fully configured. This is a requirement for prio 0 |
959 | * as we will stop doing EOIs for every IPI as soon as we observe |
960 | * qpage being non-NULL, and instead will only EOI when we receive |
961 | * corresponding queue 0 entries |
962 | */ |
963 | rc = xive_native_configure_queue(xc->vp_id, q, prio, qpage, |
964 | xive->q_order, true); |
965 | if (rc) |
966 | pr_err("Failed to configure queue %d for VCPU %d\n" , |
967 | prio, xc->server_num); |
968 | return rc; |
969 | } |
970 | |
971 | /* Called with xive->lock held */ |
972 | static int xive_check_provisioning(struct kvm *kvm, u8 prio) |
973 | { |
974 | struct kvmppc_xive *xive = kvm->arch.xive; |
975 | struct kvm_vcpu *vcpu; |
976 | unsigned long i; |
977 | int rc; |
978 | |
979 | lockdep_assert_held(&xive->lock); |
980 | |
981 | /* Already provisioned ? */ |
982 | if (xive->qmap & (1 << prio)) |
983 | return 0; |
984 | |
985 | pr_devel("Provisioning prio... %d\n" , prio); |
986 | |
987 | /* Provision each VCPU and enable escalations if needed */ |
988 | kvm_for_each_vcpu(i, vcpu, kvm) { |
989 | if (!vcpu->arch.xive_vcpu) |
990 | continue; |
991 | rc = xive_provision_queue(vcpu, prio); |
992 | if (rc == 0 && !kvmppc_xive_has_single_escalation(xive)) |
993 | kvmppc_xive_attach_escalation(vcpu, prio, |
994 | single_escalation: kvmppc_xive_has_single_escalation(xive)); |
995 | if (rc) |
996 | return rc; |
997 | } |
998 | |
999 | /* Order previous stores and mark it as provisioned */ |
1000 | mb(); |
1001 | xive->qmap |= (1 << prio); |
1002 | return 0; |
1003 | } |
1004 | |
1005 | static void xive_inc_q_pending(struct kvm *kvm, u32 server, u8 prio) |
1006 | { |
1007 | struct kvm_vcpu *vcpu; |
1008 | struct kvmppc_xive_vcpu *xc; |
1009 | struct xive_q *q; |
1010 | |
1011 | /* Locate target server */ |
1012 | vcpu = kvmppc_xive_find_server(kvm, server); |
1013 | if (!vcpu) { |
1014 | pr_warn("%s: Can't find server %d\n" , __func__, server); |
1015 | return; |
1016 | } |
1017 | xc = vcpu->arch.xive_vcpu; |
1018 | if (WARN_ON(!xc)) |
1019 | return; |
1020 | |
1021 | q = &xc->queues[prio]; |
1022 | atomic_inc(v: &q->pending_count); |
1023 | } |
1024 | |
1025 | static int xive_try_pick_queue(struct kvm_vcpu *vcpu, u8 prio) |
1026 | { |
1027 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
1028 | struct xive_q *q; |
1029 | u32 max; |
1030 | |
1031 | if (WARN_ON(!xc)) |
1032 | return -ENXIO; |
1033 | if (!xc->valid) |
1034 | return -ENXIO; |
1035 | |
1036 | q = &xc->queues[prio]; |
1037 | if (WARN_ON(!q->qpage)) |
1038 | return -ENXIO; |
1039 | |
1040 | /* Calculate max number of interrupts in that queue. */ |
1041 | max = (q->msk + 1) - XIVE_Q_GAP; |
1042 | return atomic_add_unless(v: &q->count, a: 1, u: max) ? 0 : -EBUSY; |
1043 | } |
1044 | |
1045 | int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio) |
1046 | { |
1047 | struct kvm_vcpu *vcpu; |
1048 | unsigned long i; |
1049 | int rc; |
1050 | |
1051 | /* Locate target server */ |
1052 | vcpu = kvmppc_xive_find_server(kvm, *server); |
1053 | if (!vcpu) { |
1054 | pr_devel("Can't find server %d\n" , *server); |
1055 | return -EINVAL; |
1056 | } |
1057 | |
1058 | pr_devel("Finding irq target on 0x%x/%d...\n" , *server, prio); |
1059 | |
1060 | /* Try pick it */ |
1061 | rc = xive_try_pick_queue(vcpu, prio); |
1062 | if (rc == 0) |
1063 | return rc; |
1064 | |
1065 | pr_devel(" .. failed, looking up candidate...\n" ); |
1066 | |
1067 | /* Failed, pick another VCPU */ |
1068 | kvm_for_each_vcpu(i, vcpu, kvm) { |
1069 | if (!vcpu->arch.xive_vcpu) |
1070 | continue; |
1071 | rc = xive_try_pick_queue(vcpu, prio); |
1072 | if (rc == 0) { |
1073 | *server = vcpu->arch.xive_vcpu->server_num; |
1074 | pr_devel(" found on 0x%x/%d\n" , *server, prio); |
1075 | return rc; |
1076 | } |
1077 | } |
1078 | pr_devel(" no available target !\n" ); |
1079 | |
1080 | /* No available target ! */ |
1081 | return -EBUSY; |
1082 | } |
1083 | |
1084 | static u8 xive_lock_and_mask(struct kvmppc_xive *xive, |
1085 | struct kvmppc_xive_src_block *sb, |
1086 | struct kvmppc_xive_irq_state *state) |
1087 | { |
1088 | struct xive_irq_data *xd; |
1089 | u32 hw_num; |
1090 | u8 old_prio; |
1091 | u64 val; |
1092 | |
1093 | /* |
1094 | * Take the lock, set masked, try again if racing |
1095 | * with H_EOI |
1096 | */ |
1097 | for (;;) { |
1098 | arch_spin_lock(&sb->lock); |
1099 | old_prio = state->guest_priority; |
1100 | state->guest_priority = MASKED; |
1101 | mb(); |
1102 | if (!state->in_eoi) |
1103 | break; |
1104 | state->guest_priority = old_prio; |
1105 | arch_spin_unlock(&sb->lock); |
1106 | } |
1107 | |
1108 | /* No change ? Bail */ |
1109 | if (old_prio == MASKED) |
1110 | return old_prio; |
1111 | |
1112 | /* Get the right irq */ |
1113 | kvmppc_xive_select_irq(state, &hw_num, &xd); |
1114 | |
1115 | /* Set PQ to 10, return old P and old Q and remember them */ |
1116 | val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_10); |
1117 | state->old_p = !!(val & 2); |
1118 | state->old_q = !!(val & 1); |
1119 | |
1120 | /* |
1121 | * Synchronize hardware to sensure the queues are updated when |
1122 | * masking |
1123 | */ |
1124 | xive_native_sync_source(hw_num); |
1125 | |
1126 | return old_prio; |
1127 | } |
1128 | |
1129 | static void xive_lock_for_unmask(struct kvmppc_xive_src_block *sb, |
1130 | struct kvmppc_xive_irq_state *state) |
1131 | { |
1132 | /* |
1133 | * Take the lock try again if racing with H_EOI |
1134 | */ |
1135 | for (;;) { |
1136 | arch_spin_lock(&sb->lock); |
1137 | if (!state->in_eoi) |
1138 | break; |
1139 | arch_spin_unlock(&sb->lock); |
1140 | } |
1141 | } |
1142 | |
1143 | static void xive_finish_unmask(struct kvmppc_xive *xive, |
1144 | struct kvmppc_xive_src_block *sb, |
1145 | struct kvmppc_xive_irq_state *state, |
1146 | u8 prio) |
1147 | { |
1148 | struct xive_irq_data *xd; |
1149 | u32 hw_num; |
1150 | |
1151 | /* If we aren't changing a thing, move on */ |
1152 | if (state->guest_priority != MASKED) |
1153 | goto bail; |
1154 | |
1155 | /* Get the right irq */ |
1156 | kvmppc_xive_select_irq(state, &hw_num, &xd); |
1157 | |
1158 | /* Old Q set, set PQ to 11 */ |
1159 | if (state->old_q) |
1160 | xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_11); |
1161 | |
1162 | /* |
1163 | * If not old P, then perform an "effective" EOI, |
1164 | * on the source. This will handle the cases where |
1165 | * FW EOI is needed. |
1166 | */ |
1167 | if (!state->old_p) |
1168 | xive_vm_source_eoi(hw_irq: hw_num, xd); |
1169 | |
1170 | /* Synchronize ordering and mark unmasked */ |
1171 | mb(); |
1172 | bail: |
1173 | state->guest_priority = prio; |
1174 | } |
1175 | |
1176 | /* |
1177 | * Target an interrupt to a given server/prio, this will fallback |
1178 | * to another server if necessary and perform the HW targetting |
1179 | * updates as needed |
1180 | * |
1181 | * NOTE: Must be called with the state lock held |
1182 | */ |
1183 | static int xive_target_interrupt(struct kvm *kvm, |
1184 | struct kvmppc_xive_irq_state *state, |
1185 | u32 server, u8 prio) |
1186 | { |
1187 | struct kvmppc_xive *xive = kvm->arch.xive; |
1188 | u32 hw_num; |
1189 | int rc; |
1190 | |
1191 | /* |
1192 | * This will return a tentative server and actual |
1193 | * priority. The count for that new target will have |
1194 | * already been incremented. |
1195 | */ |
1196 | rc = kvmppc_xive_select_target(kvm, server: &server, prio); |
1197 | |
1198 | /* |
1199 | * We failed to find a target ? Not much we can do |
1200 | * at least until we support the GIQ. |
1201 | */ |
1202 | if (rc) |
1203 | return rc; |
1204 | |
1205 | /* |
1206 | * Increment the old queue pending count if there |
1207 | * was one so that the old queue count gets adjusted later |
1208 | * when observed to be empty. |
1209 | */ |
1210 | if (state->act_priority != MASKED) |
1211 | xive_inc_q_pending(kvm, |
1212 | server: state->act_server, |
1213 | prio: state->act_priority); |
1214 | /* |
1215 | * Update state and HW |
1216 | */ |
1217 | state->act_priority = prio; |
1218 | state->act_server = server; |
1219 | |
1220 | /* Get the right irq */ |
1221 | kvmppc_xive_select_irq(state, &hw_num, NULL); |
1222 | |
1223 | return xive_native_configure_irq(hw_num, |
1224 | kvmppc_xive_vp(xive, server), |
1225 | prio, state->number); |
1226 | } |
1227 | |
1228 | /* |
1229 | * Targetting rules: In order to avoid losing track of |
1230 | * pending interrupts across mask and unmask, which would |
1231 | * allow queue overflows, we implement the following rules: |
1232 | * |
1233 | * - Unless it was never enabled (or we run out of capacity) |
1234 | * an interrupt is always targetted at a valid server/queue |
1235 | * pair even when "masked" by the guest. This pair tends to |
1236 | * be the last one used but it can be changed under some |
1237 | * circumstances. That allows us to separate targetting |
1238 | * from masking, we only handle accounting during (re)targetting, |
1239 | * this also allows us to let an interrupt drain into its target |
1240 | * queue after masking, avoiding complex schemes to remove |
1241 | * interrupts out of remote processor queues. |
1242 | * |
1243 | * - When masking, we set PQ to 10 and save the previous value |
1244 | * of P and Q. |
1245 | * |
1246 | * - When unmasking, if saved Q was set, we set PQ to 11 |
1247 | * otherwise we leave PQ to the HW state which will be either |
1248 | * 10 if nothing happened or 11 if the interrupt fired while |
1249 | * masked. Effectively we are OR'ing the previous Q into the |
1250 | * HW Q. |
1251 | * |
1252 | * Then if saved P is clear, we do an effective EOI (Q->P->Trigger) |
1253 | * which will unmask the interrupt and shoot a new one if Q was |
1254 | * set. |
1255 | * |
1256 | * Otherwise (saved P is set) we leave PQ unchanged (so 10 or 11, |
1257 | * effectively meaning an H_EOI from the guest is still expected |
1258 | * for that interrupt). |
1259 | * |
1260 | * - If H_EOI occurs while masked, we clear the saved P. |
1261 | * |
1262 | * - When changing target, we account on the new target and |
1263 | * increment a separate "pending" counter on the old one. |
1264 | * This pending counter will be used to decrement the old |
1265 | * target's count when its queue has been observed empty. |
1266 | */ |
1267 | |
1268 | int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server, |
1269 | u32 priority) |
1270 | { |
1271 | struct kvmppc_xive *xive = kvm->arch.xive; |
1272 | struct kvmppc_xive_src_block *sb; |
1273 | struct kvmppc_xive_irq_state *state; |
1274 | u8 new_act_prio; |
1275 | int rc = 0; |
1276 | u16 idx; |
1277 | |
1278 | if (!xive) |
1279 | return -ENODEV; |
1280 | |
1281 | pr_devel("set_xive ! irq 0x%x server 0x%x prio %d\n" , |
1282 | irq, server, priority); |
1283 | |
1284 | /* First, check provisioning of queues */ |
1285 | if (priority != MASKED) { |
1286 | mutex_lock(&xive->lock); |
1287 | rc = xive_check_provisioning(kvm: xive->kvm, |
1288 | prio: xive_prio_from_guest(priority)); |
1289 | mutex_unlock(lock: &xive->lock); |
1290 | } |
1291 | if (rc) { |
1292 | pr_devel(" provisioning failure %d !\n" , rc); |
1293 | return rc; |
1294 | } |
1295 | |
1296 | sb = kvmppc_xive_find_source(xive, irq, &idx); |
1297 | if (!sb) |
1298 | return -EINVAL; |
1299 | state = &sb->irq_state[idx]; |
1300 | |
1301 | /* |
1302 | * We first handle masking/unmasking since the locking |
1303 | * might need to be retried due to EOIs, we'll handle |
1304 | * targetting changes later. These functions will return |
1305 | * with the SB lock held. |
1306 | * |
1307 | * xive_lock_and_mask() will also set state->guest_priority |
1308 | * but won't otherwise change other fields of the state. |
1309 | * |
1310 | * xive_lock_for_unmask will not actually unmask, this will |
1311 | * be done later by xive_finish_unmask() once the targetting |
1312 | * has been done, so we don't try to unmask an interrupt |
1313 | * that hasn't yet been targetted. |
1314 | */ |
1315 | if (priority == MASKED) |
1316 | xive_lock_and_mask(xive, sb, state); |
1317 | else |
1318 | xive_lock_for_unmask(sb, state); |
1319 | |
1320 | |
1321 | /* |
1322 | * Then we handle targetting. |
1323 | * |
1324 | * First calculate a new "actual priority" |
1325 | */ |
1326 | new_act_prio = state->act_priority; |
1327 | if (priority != MASKED) |
1328 | new_act_prio = xive_prio_from_guest(priority); |
1329 | |
1330 | pr_devel(" new_act_prio=%x act_server=%x act_prio=%x\n" , |
1331 | new_act_prio, state->act_server, state->act_priority); |
1332 | |
1333 | /* |
1334 | * Then check if we actually need to change anything, |
1335 | * |
1336 | * The condition for re-targetting the interrupt is that |
1337 | * we have a valid new priority (new_act_prio is not 0xff) |
1338 | * and either the server or the priority changed. |
1339 | * |
1340 | * Note: If act_priority was ff and the new priority is |
1341 | * also ff, we don't do anything and leave the interrupt |
1342 | * untargetted. An attempt of doing an int_on on an |
1343 | * untargetted interrupt will fail. If that is a problem |
1344 | * we could initialize interrupts with valid default |
1345 | */ |
1346 | |
1347 | if (new_act_prio != MASKED && |
1348 | (state->act_server != server || |
1349 | state->act_priority != new_act_prio)) |
1350 | rc = xive_target_interrupt(kvm, state, server, prio: new_act_prio); |
1351 | |
1352 | /* |
1353 | * Perform the final unmasking of the interrupt source |
1354 | * if necessary |
1355 | */ |
1356 | if (priority != MASKED) |
1357 | xive_finish_unmask(xive, sb, state, prio: priority); |
1358 | |
1359 | /* |
1360 | * Finally Update saved_priority to match. Only int_on/off |
1361 | * set this field to a different value. |
1362 | */ |
1363 | state->saved_priority = priority; |
1364 | |
1365 | arch_spin_unlock(&sb->lock); |
1366 | return rc; |
1367 | } |
1368 | |
1369 | int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server, |
1370 | u32 *priority) |
1371 | { |
1372 | struct kvmppc_xive *xive = kvm->arch.xive; |
1373 | struct kvmppc_xive_src_block *sb; |
1374 | struct kvmppc_xive_irq_state *state; |
1375 | u16 idx; |
1376 | |
1377 | if (!xive) |
1378 | return -ENODEV; |
1379 | |
1380 | sb = kvmppc_xive_find_source(xive, irq, &idx); |
1381 | if (!sb) |
1382 | return -EINVAL; |
1383 | state = &sb->irq_state[idx]; |
1384 | arch_spin_lock(&sb->lock); |
1385 | *server = state->act_server; |
1386 | *priority = state->guest_priority; |
1387 | arch_spin_unlock(&sb->lock); |
1388 | |
1389 | return 0; |
1390 | } |
1391 | |
1392 | int kvmppc_xive_int_on(struct kvm *kvm, u32 irq) |
1393 | { |
1394 | struct kvmppc_xive *xive = kvm->arch.xive; |
1395 | struct kvmppc_xive_src_block *sb; |
1396 | struct kvmppc_xive_irq_state *state; |
1397 | u16 idx; |
1398 | |
1399 | if (!xive) |
1400 | return -ENODEV; |
1401 | |
1402 | sb = kvmppc_xive_find_source(xive, irq, &idx); |
1403 | if (!sb) |
1404 | return -EINVAL; |
1405 | state = &sb->irq_state[idx]; |
1406 | |
1407 | pr_devel("int_on(irq=0x%x)\n" , irq); |
1408 | |
1409 | /* |
1410 | * Check if interrupt was not targetted |
1411 | */ |
1412 | if (state->act_priority == MASKED) { |
1413 | pr_devel("int_on on untargetted interrupt\n" ); |
1414 | return -EINVAL; |
1415 | } |
1416 | |
1417 | /* If saved_priority is 0xff, do nothing */ |
1418 | if (state->saved_priority == MASKED) |
1419 | return 0; |
1420 | |
1421 | /* |
1422 | * Lock and unmask it. |
1423 | */ |
1424 | xive_lock_for_unmask(sb, state); |
1425 | xive_finish_unmask(xive, sb, state, prio: state->saved_priority); |
1426 | arch_spin_unlock(&sb->lock); |
1427 | |
1428 | return 0; |
1429 | } |
1430 | |
1431 | int kvmppc_xive_int_off(struct kvm *kvm, u32 irq) |
1432 | { |
1433 | struct kvmppc_xive *xive = kvm->arch.xive; |
1434 | struct kvmppc_xive_src_block *sb; |
1435 | struct kvmppc_xive_irq_state *state; |
1436 | u16 idx; |
1437 | |
1438 | if (!xive) |
1439 | return -ENODEV; |
1440 | |
1441 | sb = kvmppc_xive_find_source(xive, irq, &idx); |
1442 | if (!sb) |
1443 | return -EINVAL; |
1444 | state = &sb->irq_state[idx]; |
1445 | |
1446 | pr_devel("int_off(irq=0x%x)\n" , irq); |
1447 | |
1448 | /* |
1449 | * Lock and mask |
1450 | */ |
1451 | state->saved_priority = xive_lock_and_mask(xive, sb, state); |
1452 | arch_spin_unlock(&sb->lock); |
1453 | |
1454 | return 0; |
1455 | } |
1456 | |
1457 | static bool xive_restore_pending_irq(struct kvmppc_xive *xive, u32 irq) |
1458 | { |
1459 | struct kvmppc_xive_src_block *sb; |
1460 | struct kvmppc_xive_irq_state *state; |
1461 | u16 idx; |
1462 | |
1463 | sb = kvmppc_xive_find_source(xive, irq, &idx); |
1464 | if (!sb) |
1465 | return false; |
1466 | state = &sb->irq_state[idx]; |
1467 | if (!state->valid) |
1468 | return false; |
1469 | |
1470 | /* |
1471 | * Trigger the IPI. This assumes we never restore a pass-through |
1472 | * interrupt which should be safe enough |
1473 | */ |
1474 | xive_irq_trigger(xd: &state->ipi_data); |
1475 | |
1476 | return true; |
1477 | } |
1478 | |
1479 | u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu) |
1480 | { |
1481 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
1482 | |
1483 | if (!xc) |
1484 | return 0; |
1485 | |
1486 | /* Return the per-cpu state for state saving/migration */ |
1487 | return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT | |
1488 | (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT | |
1489 | (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT; |
1490 | } |
1491 | |
1492 | int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval) |
1493 | { |
1494 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
1495 | struct kvmppc_xive *xive = vcpu->kvm->arch.xive; |
1496 | u8 cppr, mfrr; |
1497 | u32 xisr; |
1498 | |
1499 | if (!xc || !xive) |
1500 | return -ENOENT; |
1501 | |
1502 | /* Grab individual state fields. We don't use pending_pri */ |
1503 | cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT; |
1504 | xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) & |
1505 | KVM_REG_PPC_ICP_XISR_MASK; |
1506 | mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT; |
1507 | |
1508 | pr_devel("set_icp vcpu %d cppr=0x%x mfrr=0x%x xisr=0x%x\n" , |
1509 | xc->server_num, cppr, mfrr, xisr); |
1510 | |
1511 | /* |
1512 | * We can't update the state of a "pushed" VCPU, but that |
1513 | * shouldn't happen because the vcpu->mutex makes running a |
1514 | * vcpu mutually exclusive with doing one_reg get/set on it. |
1515 | */ |
1516 | if (WARN_ON(vcpu->arch.xive_pushed)) |
1517 | return -EIO; |
1518 | |
1519 | /* Update VCPU HW saved state */ |
1520 | vcpu->arch.xive_saved_state.cppr = cppr; |
1521 | xc->hw_cppr = xc->cppr = cppr; |
1522 | |
1523 | /* |
1524 | * Update MFRR state. If it's not 0xff, we mark the VCPU as |
1525 | * having a pending MFRR change, which will re-evaluate the |
1526 | * target. The VCPU will thus potentially get a spurious |
1527 | * interrupt but that's not a big deal. |
1528 | */ |
1529 | xc->mfrr = mfrr; |
1530 | if (mfrr < cppr) |
1531 | xive_irq_trigger(xd: &xc->vp_ipi_data); |
1532 | |
1533 | /* |
1534 | * Now saved XIRR is "interesting". It means there's something in |
1535 | * the legacy "1 element" queue... for an IPI we simply ignore it, |
1536 | * as the MFRR restore will handle that. For anything else we need |
1537 | * to force a resend of the source. |
1538 | * However the source may not have been setup yet. If that's the |
1539 | * case, we keep that info and increment a counter in the xive to |
1540 | * tell subsequent xive_set_source() to go look. |
1541 | */ |
1542 | if (xisr > XICS_IPI && !xive_restore_pending_irq(xive, xisr)) { |
1543 | xc->delayed_irq = xisr; |
1544 | xive->delayed_irqs++; |
1545 | pr_devel(" xisr restore delayed\n" ); |
1546 | } |
1547 | |
1548 | return 0; |
1549 | } |
1550 | |
1551 | int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq, |
1552 | unsigned long host_irq) |
1553 | { |
1554 | struct kvmppc_xive *xive = kvm->arch.xive; |
1555 | struct kvmppc_xive_src_block *sb; |
1556 | struct kvmppc_xive_irq_state *state; |
1557 | struct irq_data *host_data = |
1558 | irq_domain_get_irq_data(domain: irq_get_default_host(), virq: host_irq); |
1559 | unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d: host_data); |
1560 | u16 idx; |
1561 | u8 prio; |
1562 | int rc; |
1563 | |
1564 | if (!xive) |
1565 | return -ENODEV; |
1566 | |
1567 | pr_debug("%s: GIRQ 0x%lx host IRQ %ld XIVE HW IRQ 0x%x\n" , |
1568 | __func__, guest_irq, host_irq, hw_irq); |
1569 | |
1570 | sb = kvmppc_xive_find_source(xive, guest_irq, &idx); |
1571 | if (!sb) |
1572 | return -EINVAL; |
1573 | state = &sb->irq_state[idx]; |
1574 | |
1575 | /* |
1576 | * Mark the passed-through interrupt as going to a VCPU, |
1577 | * this will prevent further EOIs and similar operations |
1578 | * from the XIVE code. It will also mask the interrupt |
1579 | * to either PQ=10 or 11 state, the latter if the interrupt |
1580 | * is pending. This will allow us to unmask or retrigger it |
1581 | * after routing it to the guest with a simple EOI. |
1582 | * |
1583 | * The "state" argument is a "token", all it needs is to be |
1584 | * non-NULL to switch to passed-through or NULL for the |
1585 | * other way around. We may not yet have an actual VCPU |
1586 | * target here and we don't really care. |
1587 | */ |
1588 | rc = irq_set_vcpu_affinity(irq: host_irq, vcpu_info: state); |
1589 | if (rc) { |
1590 | pr_err("Failed to set VCPU affinity for host IRQ %ld\n" , host_irq); |
1591 | return rc; |
1592 | } |
1593 | |
1594 | /* |
1595 | * Mask and read state of IPI. We need to know if its P bit |
1596 | * is set as that means it's potentially already using a |
1597 | * queue entry in the target |
1598 | */ |
1599 | prio = xive_lock_and_mask(xive, sb, state); |
1600 | pr_devel(" old IPI prio %02x P:%d Q:%d\n" , prio, |
1601 | state->old_p, state->old_q); |
1602 | |
1603 | /* Turn the IPI hard off */ |
1604 | xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); |
1605 | |
1606 | /* |
1607 | * Reset ESB guest mapping. Needed when ESB pages are exposed |
1608 | * to the guest in XIVE native mode |
1609 | */ |
1610 | if (xive->ops && xive->ops->reset_mapped) |
1611 | xive->ops->reset_mapped(kvm, guest_irq); |
1612 | |
1613 | /* Grab info about irq */ |
1614 | state->pt_number = hw_irq; |
1615 | state->pt_data = irq_data_get_irq_handler_data(d: host_data); |
1616 | |
1617 | /* |
1618 | * Configure the IRQ to match the existing configuration of |
1619 | * the IPI if it was already targetted. Otherwise this will |
1620 | * mask the interrupt in a lossy way (act_priority is 0xff) |
1621 | * which is fine for a never started interrupt. |
1622 | */ |
1623 | xive_native_configure_irq(hw_irq, |
1624 | kvmppc_xive_vp(xive, state->act_server), |
1625 | state->act_priority, state->number); |
1626 | |
1627 | /* |
1628 | * We do an EOI to enable the interrupt (and retrigger if needed) |
1629 | * if the guest has the interrupt unmasked and the P bit was *not* |
1630 | * set in the IPI. If it was set, we know a slot may still be in |
1631 | * use in the target queue thus we have to wait for a guest |
1632 | * originated EOI |
1633 | */ |
1634 | if (prio != MASKED && !state->old_p) |
1635 | xive_vm_source_eoi(hw_irq, xd: state->pt_data); |
1636 | |
1637 | /* Clear old_p/old_q as they are no longer relevant */ |
1638 | state->old_p = state->old_q = false; |
1639 | |
1640 | /* Restore guest prio (unlocks EOI) */ |
1641 | mb(); |
1642 | state->guest_priority = prio; |
1643 | arch_spin_unlock(&sb->lock); |
1644 | |
1645 | return 0; |
1646 | } |
1647 | EXPORT_SYMBOL_GPL(kvmppc_xive_set_mapped); |
1648 | |
1649 | int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq, |
1650 | unsigned long host_irq) |
1651 | { |
1652 | struct kvmppc_xive *xive = kvm->arch.xive; |
1653 | struct kvmppc_xive_src_block *sb; |
1654 | struct kvmppc_xive_irq_state *state; |
1655 | u16 idx; |
1656 | u8 prio; |
1657 | int rc; |
1658 | |
1659 | if (!xive) |
1660 | return -ENODEV; |
1661 | |
1662 | pr_debug("%s: GIRQ 0x%lx host IRQ %ld\n" , __func__, guest_irq, host_irq); |
1663 | |
1664 | sb = kvmppc_xive_find_source(xive, guest_irq, &idx); |
1665 | if (!sb) |
1666 | return -EINVAL; |
1667 | state = &sb->irq_state[idx]; |
1668 | |
1669 | /* |
1670 | * Mask and read state of IRQ. We need to know if its P bit |
1671 | * is set as that means it's potentially already using a |
1672 | * queue entry in the target |
1673 | */ |
1674 | prio = xive_lock_and_mask(xive, sb, state); |
1675 | pr_devel(" old IRQ prio %02x P:%d Q:%d\n" , prio, |
1676 | state->old_p, state->old_q); |
1677 | |
1678 | /* |
1679 | * If old_p is set, the interrupt is pending, we switch it to |
1680 | * PQ=11. This will force a resend in the host so the interrupt |
1681 | * isn't lost to whatever host driver may pick it up |
1682 | */ |
1683 | if (state->old_p) |
1684 | xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_11); |
1685 | |
1686 | /* Release the passed-through interrupt to the host */ |
1687 | rc = irq_set_vcpu_affinity(irq: host_irq, NULL); |
1688 | if (rc) { |
1689 | pr_err("Failed to clr VCPU affinity for host IRQ %ld\n" , host_irq); |
1690 | return rc; |
1691 | } |
1692 | |
1693 | /* Forget about the IRQ */ |
1694 | state->pt_number = 0; |
1695 | state->pt_data = NULL; |
1696 | |
1697 | /* |
1698 | * Reset ESB guest mapping. Needed when ESB pages are exposed |
1699 | * to the guest in XIVE native mode |
1700 | */ |
1701 | if (xive->ops && xive->ops->reset_mapped) { |
1702 | xive->ops->reset_mapped(kvm, guest_irq); |
1703 | } |
1704 | |
1705 | /* Reconfigure the IPI */ |
1706 | xive_native_configure_irq(state->ipi_number, |
1707 | kvmppc_xive_vp(xive, state->act_server), |
1708 | state->act_priority, state->number); |
1709 | |
1710 | /* |
1711 | * If old_p is set (we have a queue entry potentially |
1712 | * occupied) or the interrupt is masked, we set the IPI |
1713 | * to PQ=10 state. Otherwise we just re-enable it (PQ=00). |
1714 | */ |
1715 | if (prio == MASKED || state->old_p) |
1716 | xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_10); |
1717 | else |
1718 | xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_00); |
1719 | |
1720 | /* Restore guest prio (unlocks EOI) */ |
1721 | mb(); |
1722 | state->guest_priority = prio; |
1723 | arch_spin_unlock(&sb->lock); |
1724 | |
1725 | return 0; |
1726 | } |
1727 | EXPORT_SYMBOL_GPL(kvmppc_xive_clr_mapped); |
1728 | |
1729 | void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu) |
1730 | { |
1731 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
1732 | struct kvm *kvm = vcpu->kvm; |
1733 | struct kvmppc_xive *xive = kvm->arch.xive; |
1734 | int i, j; |
1735 | |
1736 | for (i = 0; i <= xive->max_sbid; i++) { |
1737 | struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; |
1738 | |
1739 | if (!sb) |
1740 | continue; |
1741 | for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) { |
1742 | struct kvmppc_xive_irq_state *state = &sb->irq_state[j]; |
1743 | |
1744 | if (!state->valid) |
1745 | continue; |
1746 | if (state->act_priority == MASKED) |
1747 | continue; |
1748 | if (state->act_server != xc->server_num) |
1749 | continue; |
1750 | |
1751 | /* Clean it up */ |
1752 | arch_spin_lock(&sb->lock); |
1753 | state->act_priority = MASKED; |
1754 | xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01); |
1755 | xive_native_configure_irq(state->ipi_number, 0, MASKED, 0); |
1756 | if (state->pt_number) { |
1757 | xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01); |
1758 | xive_native_configure_irq(state->pt_number, 0, MASKED, 0); |
1759 | } |
1760 | arch_spin_unlock(&sb->lock); |
1761 | } |
1762 | } |
1763 | |
1764 | /* Disable vcpu's escalation interrupt */ |
1765 | if (vcpu->arch.xive_esc_on) { |
1766 | __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr + |
1767 | XIVE_ESB_SET_PQ_01)); |
1768 | vcpu->arch.xive_esc_on = false; |
1769 | } |
1770 | |
1771 | /* |
1772 | * Clear pointers to escalation interrupt ESB. |
1773 | * This is safe because the vcpu->mutex is held, preventing |
1774 | * any other CPU from concurrently executing a KVM_RUN ioctl. |
1775 | */ |
1776 | vcpu->arch.xive_esc_vaddr = 0; |
1777 | vcpu->arch.xive_esc_raddr = 0; |
1778 | } |
1779 | |
1780 | /* |
1781 | * In single escalation mode, the escalation interrupt is marked so |
1782 | * that EOI doesn't re-enable it, but just sets the stale_p flag to |
1783 | * indicate that the P bit has already been dealt with. However, the |
1784 | * assembly code that enters the guest sets PQ to 00 without clearing |
1785 | * stale_p (because it has no easy way to address it). Hence we have |
1786 | * to adjust stale_p before shutting down the interrupt. |
1787 | */ |
1788 | void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, int irq) |
1789 | { |
1790 | struct irq_data *d = irq_get_irq_data(irq); |
1791 | struct xive_irq_data *xd = irq_data_get_irq_handler_data(d); |
1792 | |
1793 | /* |
1794 | * This slightly odd sequence gives the right result |
1795 | * (i.e. stale_p set if xive_esc_on is false) even if |
1796 | * we race with xive_esc_irq() and xive_irq_eoi(). |
1797 | */ |
1798 | xd->stale_p = false; |
1799 | smp_mb(); /* paired with smb_wmb in xive_esc_irq */ |
1800 | if (!vcpu->arch.xive_esc_on) |
1801 | xd->stale_p = true; |
1802 | } |
1803 | |
1804 | void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu) |
1805 | { |
1806 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
1807 | struct kvmppc_xive *xive = vcpu->kvm->arch.xive; |
1808 | int i; |
1809 | |
1810 | if (!kvmppc_xics_enabled(vcpu)) |
1811 | return; |
1812 | |
1813 | if (!xc) |
1814 | return; |
1815 | |
1816 | pr_devel("cleanup_vcpu(cpu=%d)\n" , xc->server_num); |
1817 | |
1818 | /* Ensure no interrupt is still routed to that VP */ |
1819 | xc->valid = false; |
1820 | kvmppc_xive_disable_vcpu_interrupts(vcpu); |
1821 | |
1822 | /* Mask the VP IPI */ |
1823 | xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01); |
1824 | |
1825 | /* Free escalations */ |
1826 | for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { |
1827 | if (xc->esc_virq[i]) { |
1828 | if (kvmppc_xive_has_single_escalation(xc->xive)) |
1829 | xive_cleanup_single_escalation(vcpu, xc->esc_virq[i]); |
1830 | free_irq(xc->esc_virq[i], vcpu); |
1831 | irq_dispose_mapping(xc->esc_virq[i]); |
1832 | kfree(xc->esc_virq_names[i]); |
1833 | } |
1834 | } |
1835 | |
1836 | /* Disable the VP */ |
1837 | xive_native_disable_vp(xc->vp_id); |
1838 | |
1839 | /* Clear the cam word so guest entry won't try to push context */ |
1840 | vcpu->arch.xive_cam_word = 0; |
1841 | |
1842 | /* Free the queues */ |
1843 | for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { |
1844 | struct xive_q *q = &xc->queues[i]; |
1845 | |
1846 | xive_native_disable_queue(xc->vp_id, q, i); |
1847 | if (q->qpage) { |
1848 | free_pages((unsigned long)q->qpage, |
1849 | xive->q_page_order); |
1850 | q->qpage = NULL; |
1851 | } |
1852 | } |
1853 | |
1854 | /* Free the IPI */ |
1855 | if (xc->vp_ipi) { |
1856 | xive_cleanup_irq_data(&xc->vp_ipi_data); |
1857 | xive_native_free_irq(xc->vp_ipi); |
1858 | } |
1859 | /* Free the VP */ |
1860 | kfree(objp: xc); |
1861 | |
1862 | /* Cleanup the vcpu */ |
1863 | vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; |
1864 | vcpu->arch.xive_vcpu = NULL; |
1865 | } |
1866 | |
1867 | static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu) |
1868 | { |
1869 | /* We have a block of xive->nr_servers VPs. We just need to check |
1870 | * packed vCPU ids are below that. |
1871 | */ |
1872 | return kvmppc_pack_vcpu_id(xive->kvm, cpu) < xive->nr_servers; |
1873 | } |
1874 | |
1875 | int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp) |
1876 | { |
1877 | u32 vp_id; |
1878 | |
1879 | if (!kvmppc_xive_vcpu_id_valid(xive, cpu)) { |
1880 | pr_devel("Out of bounds !\n" ); |
1881 | return -EINVAL; |
1882 | } |
1883 | |
1884 | if (xive->vp_base == XIVE_INVALID_VP) { |
1885 | xive->vp_base = xive_native_alloc_vp_block(xive->nr_servers); |
1886 | pr_devel("VP_Base=%x nr_servers=%d\n" , xive->vp_base, xive->nr_servers); |
1887 | |
1888 | if (xive->vp_base == XIVE_INVALID_VP) |
1889 | return -ENOSPC; |
1890 | } |
1891 | |
1892 | vp_id = kvmppc_xive_vp(xive, cpu); |
1893 | if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) { |
1894 | pr_devel("Duplicate !\n" ); |
1895 | return -EEXIST; |
1896 | } |
1897 | |
1898 | *vp = vp_id; |
1899 | |
1900 | return 0; |
1901 | } |
1902 | |
1903 | int kvmppc_xive_connect_vcpu(struct kvm_device *dev, |
1904 | struct kvm_vcpu *vcpu, u32 cpu) |
1905 | { |
1906 | struct kvmppc_xive *xive = dev->private; |
1907 | struct kvmppc_xive_vcpu *xc; |
1908 | int i, r = -EBUSY; |
1909 | u32 vp_id; |
1910 | |
1911 | pr_devel("connect_vcpu(cpu=%d)\n" , cpu); |
1912 | |
1913 | if (dev->ops != &kvm_xive_ops) { |
1914 | pr_devel("Wrong ops !\n" ); |
1915 | return -EPERM; |
1916 | } |
1917 | if (xive->kvm != vcpu->kvm) |
1918 | return -EPERM; |
1919 | if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) |
1920 | return -EBUSY; |
1921 | |
1922 | /* We need to synchronize with queue provisioning */ |
1923 | mutex_lock(&xive->lock); |
1924 | |
1925 | r = kvmppc_xive_compute_vp_id(xive, cpu, vp: &vp_id); |
1926 | if (r) |
1927 | goto bail; |
1928 | |
1929 | xc = kzalloc(sizeof(*xc), GFP_KERNEL); |
1930 | if (!xc) { |
1931 | r = -ENOMEM; |
1932 | goto bail; |
1933 | } |
1934 | |
1935 | vcpu->arch.xive_vcpu = xc; |
1936 | xc->xive = xive; |
1937 | xc->vcpu = vcpu; |
1938 | xc->server_num = cpu; |
1939 | xc->vp_id = vp_id; |
1940 | xc->mfrr = 0xff; |
1941 | xc->valid = true; |
1942 | |
1943 | r = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id); |
1944 | if (r) |
1945 | goto bail; |
1946 | |
1947 | if (!kvmppc_xive_check_save_restore(vcpu)) { |
1948 | pr_err("inconsistent save-restore setup for VCPU %d\n" , cpu); |
1949 | r = -EIO; |
1950 | goto bail; |
1951 | } |
1952 | |
1953 | /* Configure VCPU fields for use by assembly push/pull */ |
1954 | vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000); |
1955 | vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO); |
1956 | |
1957 | /* Allocate IPI */ |
1958 | xc->vp_ipi = xive_native_alloc_irq(); |
1959 | if (!xc->vp_ipi) { |
1960 | pr_err("Failed to allocate xive irq for VCPU IPI\n" ); |
1961 | r = -EIO; |
1962 | goto bail; |
1963 | } |
1964 | pr_devel(" IPI=0x%x\n" , xc->vp_ipi); |
1965 | |
1966 | r = xive_native_populate_irq_data(xc->vp_ipi, &xc->vp_ipi_data); |
1967 | if (r) |
1968 | goto bail; |
1969 | |
1970 | /* |
1971 | * Enable the VP first as the single escalation mode will |
1972 | * affect escalation interrupts numbering |
1973 | */ |
1974 | r = xive_native_enable_vp(xc->vp_id, kvmppc_xive_has_single_escalation(xive)); |
1975 | if (r) { |
1976 | pr_err("Failed to enable VP in OPAL, err %d\n" , r); |
1977 | goto bail; |
1978 | } |
1979 | |
1980 | /* |
1981 | * Initialize queues. Initially we set them all for no queueing |
1982 | * and we enable escalation for queue 0 only which we'll use for |
1983 | * our mfrr change notifications. If the VCPU is hot-plugged, we |
1984 | * do handle provisioning however based on the existing "map" |
1985 | * of enabled queues. |
1986 | */ |
1987 | for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { |
1988 | struct xive_q *q = &xc->queues[i]; |
1989 | |
1990 | /* Single escalation, no queue 7 */ |
1991 | if (i == 7 && kvmppc_xive_has_single_escalation(xive)) |
1992 | break; |
1993 | |
1994 | /* Is queue already enabled ? Provision it */ |
1995 | if (xive->qmap & (1 << i)) { |
1996 | r = xive_provision_queue(vcpu, i); |
1997 | if (r == 0 && !kvmppc_xive_has_single_escalation(xive)) |
1998 | kvmppc_xive_attach_escalation( |
1999 | vcpu, i, kvmppc_xive_has_single_escalation(xive)); |
2000 | if (r) |
2001 | goto bail; |
2002 | } else { |
2003 | r = xive_native_configure_queue(xc->vp_id, |
2004 | q, i, NULL, 0, true); |
2005 | if (r) { |
2006 | pr_err("Failed to configure queue %d for VCPU %d\n" , |
2007 | i, cpu); |
2008 | goto bail; |
2009 | } |
2010 | } |
2011 | } |
2012 | |
2013 | /* If not done above, attach priority 0 escalation */ |
2014 | r = kvmppc_xive_attach_escalation(vcpu, prio: 0, single_escalation: kvmppc_xive_has_single_escalation(xive)); |
2015 | if (r) |
2016 | goto bail; |
2017 | |
2018 | /* Route the IPI */ |
2019 | r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI); |
2020 | if (!r) |
2021 | xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_00); |
2022 | |
2023 | bail: |
2024 | mutex_unlock(lock: &xive->lock); |
2025 | if (r) { |
2026 | kvmppc_xive_cleanup_vcpu(vcpu); |
2027 | return r; |
2028 | } |
2029 | |
2030 | vcpu->arch.irq_type = KVMPPC_IRQ_XICS; |
2031 | return 0; |
2032 | } |
2033 | |
2034 | /* |
2035 | * Scanning of queues before/after migration save |
2036 | */ |
2037 | static void xive_pre_save_set_queued(struct kvmppc_xive *xive, u32 irq) |
2038 | { |
2039 | struct kvmppc_xive_src_block *sb; |
2040 | struct kvmppc_xive_irq_state *state; |
2041 | u16 idx; |
2042 | |
2043 | sb = kvmppc_xive_find_source(xive, irq, &idx); |
2044 | if (!sb) |
2045 | return; |
2046 | |
2047 | state = &sb->irq_state[idx]; |
2048 | |
2049 | /* Some sanity checking */ |
2050 | if (!state->valid) { |
2051 | pr_err("invalid irq 0x%x in cpu queue!\n" , irq); |
2052 | return; |
2053 | } |
2054 | |
2055 | /* |
2056 | * If the interrupt is in a queue it should have P set. |
2057 | * We warn so that gets reported. A backtrace isn't useful |
2058 | * so no need to use a WARN_ON. |
2059 | */ |
2060 | if (!state->saved_p) |
2061 | pr_err("Interrupt 0x%x is marked in a queue but P not set !\n" , irq); |
2062 | |
2063 | /* Set flag */ |
2064 | state->in_queue = true; |
2065 | } |
2066 | |
2067 | static void xive_pre_save_mask_irq(struct kvmppc_xive *xive, |
2068 | struct kvmppc_xive_src_block *sb, |
2069 | u32 irq) |
2070 | { |
2071 | struct kvmppc_xive_irq_state *state = &sb->irq_state[irq]; |
2072 | |
2073 | if (!state->valid) |
2074 | return; |
2075 | |
2076 | /* Mask and save state, this will also sync HW queues */ |
2077 | state->saved_scan_prio = xive_lock_and_mask(xive, sb, state); |
2078 | |
2079 | /* Transfer P and Q */ |
2080 | state->saved_p = state->old_p; |
2081 | state->saved_q = state->old_q; |
2082 | |
2083 | /* Unlock */ |
2084 | arch_spin_unlock(&sb->lock); |
2085 | } |
2086 | |
2087 | static void xive_pre_save_unmask_irq(struct kvmppc_xive *xive, |
2088 | struct kvmppc_xive_src_block *sb, |
2089 | u32 irq) |
2090 | { |
2091 | struct kvmppc_xive_irq_state *state = &sb->irq_state[irq]; |
2092 | |
2093 | if (!state->valid) |
2094 | return; |
2095 | |
2096 | /* |
2097 | * Lock / exclude EOI (not technically necessary if the |
2098 | * guest isn't running concurrently. If this becomes a |
2099 | * performance issue we can probably remove the lock. |
2100 | */ |
2101 | xive_lock_for_unmask(sb, state); |
2102 | |
2103 | /* Restore mask/prio if it wasn't masked */ |
2104 | if (state->saved_scan_prio != MASKED) |
2105 | xive_finish_unmask(xive, sb, state, prio: state->saved_scan_prio); |
2106 | |
2107 | /* Unlock */ |
2108 | arch_spin_unlock(&sb->lock); |
2109 | } |
2110 | |
2111 | static void xive_pre_save_queue(struct kvmppc_xive *xive, struct xive_q *q) |
2112 | { |
2113 | u32 idx = q->idx; |
2114 | u32 toggle = q->toggle; |
2115 | u32 irq; |
2116 | |
2117 | do { |
2118 | irq = __xive_read_eq(q->qpage, q->msk, &idx, &toggle); |
2119 | if (irq > XICS_IPI) |
2120 | xive_pre_save_set_queued(xive, irq); |
2121 | } while(irq); |
2122 | } |
2123 | |
2124 | static void xive_pre_save_scan(struct kvmppc_xive *xive) |
2125 | { |
2126 | struct kvm_vcpu *vcpu = NULL; |
2127 | unsigned long i; |
2128 | int j; |
2129 | |
2130 | /* |
2131 | * See comment in xive_get_source() about how this |
2132 | * work. Collect a stable state for all interrupts |
2133 | */ |
2134 | for (i = 0; i <= xive->max_sbid; i++) { |
2135 | struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; |
2136 | if (!sb) |
2137 | continue; |
2138 | for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) |
2139 | xive_pre_save_mask_irq(xive, sb, j); |
2140 | } |
2141 | |
2142 | /* Then scan the queues and update the "in_queue" flag */ |
2143 | kvm_for_each_vcpu(i, vcpu, xive->kvm) { |
2144 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
2145 | if (!xc) |
2146 | continue; |
2147 | for (j = 0; j < KVMPPC_XIVE_Q_COUNT; j++) { |
2148 | if (xc->queues[j].qpage) |
2149 | xive_pre_save_queue(xive, &xc->queues[j]); |
2150 | } |
2151 | } |
2152 | |
2153 | /* Finally restore interrupt states */ |
2154 | for (i = 0; i <= xive->max_sbid; i++) { |
2155 | struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; |
2156 | if (!sb) |
2157 | continue; |
2158 | for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) |
2159 | xive_pre_save_unmask_irq(xive, sb, j); |
2160 | } |
2161 | } |
2162 | |
2163 | static void xive_post_save_scan(struct kvmppc_xive *xive) |
2164 | { |
2165 | u32 i, j; |
2166 | |
2167 | /* Clear all the in_queue flags */ |
2168 | for (i = 0; i <= xive->max_sbid; i++) { |
2169 | struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; |
2170 | if (!sb) |
2171 | continue; |
2172 | for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) |
2173 | sb->irq_state[j].in_queue = false; |
2174 | } |
2175 | |
2176 | /* Next get_source() will do a new scan */ |
2177 | xive->saved_src_count = 0; |
2178 | } |
2179 | |
2180 | /* |
2181 | * This returns the source configuration and state to user space. |
2182 | */ |
2183 | static int xive_get_source(struct kvmppc_xive *xive, long irq, u64 addr) |
2184 | { |
2185 | struct kvmppc_xive_src_block *sb; |
2186 | struct kvmppc_xive_irq_state *state; |
2187 | u64 __user *ubufp = (u64 __user *) addr; |
2188 | u64 val, prio; |
2189 | u16 idx; |
2190 | |
2191 | sb = kvmppc_xive_find_source(xive, irq, &idx); |
2192 | if (!sb) |
2193 | return -ENOENT; |
2194 | |
2195 | state = &sb->irq_state[idx]; |
2196 | |
2197 | if (!state->valid) |
2198 | return -ENOENT; |
2199 | |
2200 | pr_devel("get_source(%ld)...\n" , irq); |
2201 | |
2202 | /* |
2203 | * So to properly save the state into something that looks like a |
2204 | * XICS migration stream we cannot treat interrupts individually. |
2205 | * |
2206 | * We need, instead, mask them all (& save their previous PQ state) |
2207 | * to get a stable state in the HW, then sync them to ensure that |
2208 | * any interrupt that had already fired hits its queue, and finally |
2209 | * scan all the queues to collect which interrupts are still present |
2210 | * in the queues, so we can set the "pending" flag on them and |
2211 | * they can be resent on restore. |
2212 | * |
2213 | * So we do it all when the "first" interrupt gets saved, all the |
2214 | * state is collected at that point, the rest of xive_get_source() |
2215 | * will merely collect and convert that state to the expected |
2216 | * userspace bit mask. |
2217 | */ |
2218 | if (xive->saved_src_count == 0) |
2219 | xive_pre_save_scan(xive); |
2220 | xive->saved_src_count++; |
2221 | |
2222 | /* Convert saved state into something compatible with xics */ |
2223 | val = state->act_server; |
2224 | prio = state->saved_scan_prio; |
2225 | |
2226 | if (prio == MASKED) { |
2227 | val |= KVM_XICS_MASKED; |
2228 | prio = state->saved_priority; |
2229 | } |
2230 | val |= prio << KVM_XICS_PRIORITY_SHIFT; |
2231 | if (state->lsi) { |
2232 | val |= KVM_XICS_LEVEL_SENSITIVE; |
2233 | if (state->saved_p) |
2234 | val |= KVM_XICS_PENDING; |
2235 | } else { |
2236 | if (state->saved_p) |
2237 | val |= KVM_XICS_PRESENTED; |
2238 | |
2239 | if (state->saved_q) |
2240 | val |= KVM_XICS_QUEUED; |
2241 | |
2242 | /* |
2243 | * We mark it pending (which will attempt a re-delivery) |
2244 | * if we are in a queue *or* we were masked and had |
2245 | * Q set which is equivalent to the XICS "masked pending" |
2246 | * state |
2247 | */ |
2248 | if (state->in_queue || (prio == MASKED && state->saved_q)) |
2249 | val |= KVM_XICS_PENDING; |
2250 | } |
2251 | |
2252 | /* |
2253 | * If that was the last interrupt saved, reset the |
2254 | * in_queue flags |
2255 | */ |
2256 | if (xive->saved_src_count == xive->src_count) |
2257 | xive_post_save_scan(xive); |
2258 | |
2259 | /* Copy the result to userspace */ |
2260 | if (put_user(val, ubufp)) |
2261 | return -EFAULT; |
2262 | |
2263 | return 0; |
2264 | } |
2265 | |
2266 | struct kvmppc_xive_src_block *kvmppc_xive_create_src_block( |
2267 | struct kvmppc_xive *xive, int irq) |
2268 | { |
2269 | struct kvmppc_xive_src_block *sb; |
2270 | int i, bid; |
2271 | |
2272 | bid = irq >> KVMPPC_XICS_ICS_SHIFT; |
2273 | |
2274 | mutex_lock(&xive->lock); |
2275 | |
2276 | /* block already exists - somebody else got here first */ |
2277 | if (xive->src_blocks[bid]) |
2278 | goto out; |
2279 | |
2280 | /* Create the ICS */ |
2281 | sb = kzalloc(sizeof(*sb), GFP_KERNEL); |
2282 | if (!sb) |
2283 | goto out; |
2284 | |
2285 | sb->id = bid; |
2286 | |
2287 | for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { |
2288 | sb->irq_state[i].number = (bid << KVMPPC_XICS_ICS_SHIFT) | i; |
2289 | sb->irq_state[i].eisn = 0; |
2290 | sb->irq_state[i].guest_priority = MASKED; |
2291 | sb->irq_state[i].saved_priority = MASKED; |
2292 | sb->irq_state[i].act_priority = MASKED; |
2293 | } |
2294 | smp_wmb(); |
2295 | xive->src_blocks[bid] = sb; |
2296 | |
2297 | if (bid > xive->max_sbid) |
2298 | xive->max_sbid = bid; |
2299 | |
2300 | out: |
2301 | mutex_unlock(lock: &xive->lock); |
2302 | return xive->src_blocks[bid]; |
2303 | } |
2304 | |
2305 | static bool xive_check_delayed_irq(struct kvmppc_xive *xive, u32 irq) |
2306 | { |
2307 | struct kvm *kvm = xive->kvm; |
2308 | struct kvm_vcpu *vcpu = NULL; |
2309 | unsigned long i; |
2310 | |
2311 | kvm_for_each_vcpu(i, vcpu, kvm) { |
2312 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
2313 | |
2314 | if (!xc) |
2315 | continue; |
2316 | |
2317 | if (xc->delayed_irq == irq) { |
2318 | xc->delayed_irq = 0; |
2319 | xive->delayed_irqs--; |
2320 | return true; |
2321 | } |
2322 | } |
2323 | return false; |
2324 | } |
2325 | |
2326 | static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr) |
2327 | { |
2328 | struct kvmppc_xive_src_block *sb; |
2329 | struct kvmppc_xive_irq_state *state; |
2330 | u64 __user *ubufp = (u64 __user *) addr; |
2331 | u16 idx; |
2332 | u64 val; |
2333 | u8 act_prio, guest_prio; |
2334 | u32 server; |
2335 | int rc = 0; |
2336 | |
2337 | if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS) |
2338 | return -ENOENT; |
2339 | |
2340 | pr_devel("set_source(irq=0x%lx)\n" , irq); |
2341 | |
2342 | /* Find the source */ |
2343 | sb = kvmppc_xive_find_source(xive, irq, &idx); |
2344 | if (!sb) { |
2345 | pr_devel("No source, creating source block...\n" ); |
2346 | sb = kvmppc_xive_create_src_block(xive, irq); |
2347 | if (!sb) { |
2348 | pr_devel("Failed to create block...\n" ); |
2349 | return -ENOMEM; |
2350 | } |
2351 | } |
2352 | state = &sb->irq_state[idx]; |
2353 | |
2354 | /* Read user passed data */ |
2355 | if (get_user(val, ubufp)) { |
2356 | pr_devel("fault getting user info !\n" ); |
2357 | return -EFAULT; |
2358 | } |
2359 | |
2360 | server = val & KVM_XICS_DESTINATION_MASK; |
2361 | guest_prio = val >> KVM_XICS_PRIORITY_SHIFT; |
2362 | |
2363 | pr_devel(" val=0x016%llx (server=0x%x, guest_prio=%d)\n" , |
2364 | val, server, guest_prio); |
2365 | |
2366 | /* |
2367 | * If the source doesn't already have an IPI, allocate |
2368 | * one and get the corresponding data |
2369 | */ |
2370 | if (!state->ipi_number) { |
2371 | state->ipi_number = xive_native_alloc_irq(); |
2372 | if (state->ipi_number == 0) { |
2373 | pr_devel("Failed to allocate IPI !\n" ); |
2374 | return -ENOMEM; |
2375 | } |
2376 | xive_native_populate_irq_data(state->ipi_number, &state->ipi_data); |
2377 | pr_devel(" src_ipi=0x%x\n" , state->ipi_number); |
2378 | } |
2379 | |
2380 | /* |
2381 | * We use lock_and_mask() to set us in the right masked |
2382 | * state. We will override that state from the saved state |
2383 | * further down, but this will handle the cases of interrupts |
2384 | * that need FW masking. We set the initial guest_priority to |
2385 | * 0 before calling it to ensure it actually performs the masking. |
2386 | */ |
2387 | state->guest_priority = 0; |
2388 | xive_lock_and_mask(xive, sb, state); |
2389 | |
2390 | /* |
2391 | * Now, we select a target if we have one. If we don't we |
2392 | * leave the interrupt untargetted. It means that an interrupt |
2393 | * can become "untargetted" across migration if it was masked |
2394 | * by set_xive() but there is little we can do about it. |
2395 | */ |
2396 | |
2397 | /* First convert prio and mark interrupt as untargetted */ |
2398 | act_prio = xive_prio_from_guest(guest_prio); |
2399 | state->act_priority = MASKED; |
2400 | |
2401 | /* |
2402 | * We need to drop the lock due to the mutex below. Hopefully |
2403 | * nothing is touching that interrupt yet since it hasn't been |
2404 | * advertized to a running guest yet |
2405 | */ |
2406 | arch_spin_unlock(&sb->lock); |
2407 | |
2408 | /* If we have a priority target the interrupt */ |
2409 | if (act_prio != MASKED) { |
2410 | /* First, check provisioning of queues */ |
2411 | mutex_lock(&xive->lock); |
2412 | rc = xive_check_provisioning(kvm: xive->kvm, prio: act_prio); |
2413 | mutex_unlock(lock: &xive->lock); |
2414 | |
2415 | /* Target interrupt */ |
2416 | if (rc == 0) |
2417 | rc = xive_target_interrupt(kvm: xive->kvm, state, |
2418 | server, prio: act_prio); |
2419 | /* |
2420 | * If provisioning or targetting failed, leave it |
2421 | * alone and masked. It will remain disabled until |
2422 | * the guest re-targets it. |
2423 | */ |
2424 | } |
2425 | |
2426 | /* |
2427 | * Find out if this was a delayed irq stashed in an ICP, |
2428 | * in which case, treat it as pending |
2429 | */ |
2430 | if (xive->delayed_irqs && xive_check_delayed_irq(xive, irq)) { |
2431 | val |= KVM_XICS_PENDING; |
2432 | pr_devel(" Found delayed ! forcing PENDING !\n" ); |
2433 | } |
2434 | |
2435 | /* Cleanup the SW state */ |
2436 | state->old_p = false; |
2437 | state->old_q = false; |
2438 | state->lsi = false; |
2439 | state->asserted = false; |
2440 | |
2441 | /* Restore LSI state */ |
2442 | if (val & KVM_XICS_LEVEL_SENSITIVE) { |
2443 | state->lsi = true; |
2444 | if (val & KVM_XICS_PENDING) |
2445 | state->asserted = true; |
2446 | pr_devel(" LSI ! Asserted=%d\n" , state->asserted); |
2447 | } |
2448 | |
2449 | /* |
2450 | * Restore P and Q. If the interrupt was pending, we |
2451 | * force Q and !P, which will trigger a resend. |
2452 | * |
2453 | * That means that a guest that had both an interrupt |
2454 | * pending (queued) and Q set will restore with only |
2455 | * one instance of that interrupt instead of 2, but that |
2456 | * is perfectly fine as coalescing interrupts that haven't |
2457 | * been presented yet is always allowed. |
2458 | */ |
2459 | if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING)) |
2460 | state->old_p = true; |
2461 | if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING) |
2462 | state->old_q = true; |
2463 | |
2464 | pr_devel(" P=%d, Q=%d\n" , state->old_p, state->old_q); |
2465 | |
2466 | /* |
2467 | * If the interrupt was unmasked, update guest priority and |
2468 | * perform the appropriate state transition and do a |
2469 | * re-trigger if necessary. |
2470 | */ |
2471 | if (val & KVM_XICS_MASKED) { |
2472 | pr_devel(" masked, saving prio\n" ); |
2473 | state->guest_priority = MASKED; |
2474 | state->saved_priority = guest_prio; |
2475 | } else { |
2476 | pr_devel(" unmasked, restoring to prio %d\n" , guest_prio); |
2477 | xive_finish_unmask(xive, sb, state, prio: guest_prio); |
2478 | state->saved_priority = guest_prio; |
2479 | } |
2480 | |
2481 | /* Increment the number of valid sources and mark this one valid */ |
2482 | if (!state->valid) |
2483 | xive->src_count++; |
2484 | state->valid = true; |
2485 | |
2486 | return 0; |
2487 | } |
2488 | |
2489 | int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, |
2490 | bool line_status) |
2491 | { |
2492 | struct kvmppc_xive *xive = kvm->arch.xive; |
2493 | struct kvmppc_xive_src_block *sb; |
2494 | struct kvmppc_xive_irq_state *state; |
2495 | u16 idx; |
2496 | |
2497 | if (!xive) |
2498 | return -ENODEV; |
2499 | |
2500 | sb = kvmppc_xive_find_source(xive, irq, &idx); |
2501 | if (!sb) |
2502 | return -EINVAL; |
2503 | |
2504 | /* Perform locklessly .... (we need to do some RCUisms here...) */ |
2505 | state = &sb->irq_state[idx]; |
2506 | if (!state->valid) |
2507 | return -EINVAL; |
2508 | |
2509 | /* We don't allow a trigger on a passed-through interrupt */ |
2510 | if (state->pt_number) |
2511 | return -EINVAL; |
2512 | |
2513 | if ((level == 1 && state->lsi) || level == KVM_INTERRUPT_SET_LEVEL) |
2514 | state->asserted = true; |
2515 | else if (level == 0 || level == KVM_INTERRUPT_UNSET) { |
2516 | state->asserted = false; |
2517 | return 0; |
2518 | } |
2519 | |
2520 | /* Trigger the IPI */ |
2521 | xive_irq_trigger(xd: &state->ipi_data); |
2522 | |
2523 | return 0; |
2524 | } |
2525 | |
2526 | int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr) |
2527 | { |
2528 | u32 __user *ubufp = (u32 __user *) addr; |
2529 | u32 nr_servers; |
2530 | int rc = 0; |
2531 | |
2532 | if (get_user(nr_servers, ubufp)) |
2533 | return -EFAULT; |
2534 | |
2535 | pr_devel("%s nr_servers=%u\n" , __func__, nr_servers); |
2536 | |
2537 | if (!nr_servers || nr_servers > KVM_MAX_VCPU_IDS) |
2538 | return -EINVAL; |
2539 | |
2540 | mutex_lock(&xive->lock); |
2541 | if (xive->vp_base != XIVE_INVALID_VP) |
2542 | /* The VP block is allocated once and freed when the device |
2543 | * is released. Better not allow to change its size since its |
2544 | * used by connect_vcpu to validate vCPU ids are valid (eg, |
2545 | * setting it back to a higher value could allow connect_vcpu |
2546 | * to come up with a VP id that goes beyond the VP block, which |
2547 | * is likely to cause a crash in OPAL). |
2548 | */ |
2549 | rc = -EBUSY; |
2550 | else if (nr_servers > KVM_MAX_VCPUS) |
2551 | /* We don't need more servers. Higher vCPU ids get packed |
2552 | * down below KVM_MAX_VCPUS by kvmppc_pack_vcpu_id(). |
2553 | */ |
2554 | xive->nr_servers = KVM_MAX_VCPUS; |
2555 | else |
2556 | xive->nr_servers = nr_servers; |
2557 | |
2558 | mutex_unlock(lock: &xive->lock); |
2559 | |
2560 | return rc; |
2561 | } |
2562 | |
2563 | static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) |
2564 | { |
2565 | struct kvmppc_xive *xive = dev->private; |
2566 | |
2567 | /* We honor the existing XICS ioctl */ |
2568 | switch (attr->group) { |
2569 | case KVM_DEV_XICS_GRP_SOURCES: |
2570 | return xive_set_source(xive, irq: attr->attr, addr: attr->addr); |
2571 | case KVM_DEV_XICS_GRP_CTRL: |
2572 | switch (attr->attr) { |
2573 | case KVM_DEV_XICS_NR_SERVERS: |
2574 | return kvmppc_xive_set_nr_servers(xive, addr: attr->addr); |
2575 | } |
2576 | } |
2577 | return -ENXIO; |
2578 | } |
2579 | |
2580 | static int xive_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) |
2581 | { |
2582 | struct kvmppc_xive *xive = dev->private; |
2583 | |
2584 | /* We honor the existing XICS ioctl */ |
2585 | switch (attr->group) { |
2586 | case KVM_DEV_XICS_GRP_SOURCES: |
2587 | return xive_get_source(xive, irq: attr->attr, addr: attr->addr); |
2588 | } |
2589 | return -ENXIO; |
2590 | } |
2591 | |
2592 | static int xive_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) |
2593 | { |
2594 | /* We honor the same limits as XICS, at least for now */ |
2595 | switch (attr->group) { |
2596 | case KVM_DEV_XICS_GRP_SOURCES: |
2597 | if (attr->attr >= KVMPPC_XICS_FIRST_IRQ && |
2598 | attr->attr < KVMPPC_XICS_NR_IRQS) |
2599 | return 0; |
2600 | break; |
2601 | case KVM_DEV_XICS_GRP_CTRL: |
2602 | switch (attr->attr) { |
2603 | case KVM_DEV_XICS_NR_SERVERS: |
2604 | return 0; |
2605 | } |
2606 | } |
2607 | return -ENXIO; |
2608 | } |
2609 | |
2610 | static void kvmppc_xive_cleanup_irq(u32 hw_num, struct xive_irq_data *xd) |
2611 | { |
2612 | xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01); |
2613 | xive_native_configure_irq(hw_num, 0, MASKED, 0); |
2614 | } |
2615 | |
2616 | void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb) |
2617 | { |
2618 | int i; |
2619 | |
2620 | for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { |
2621 | struct kvmppc_xive_irq_state *state = &sb->irq_state[i]; |
2622 | |
2623 | if (!state->valid) |
2624 | continue; |
2625 | |
2626 | kvmppc_xive_cleanup_irq(state->ipi_number, &state->ipi_data); |
2627 | xive_cleanup_irq_data(&state->ipi_data); |
2628 | xive_native_free_irq(state->ipi_number); |
2629 | |
2630 | /* Pass-through, cleanup too but keep IRQ hw data */ |
2631 | if (state->pt_number) |
2632 | kvmppc_xive_cleanup_irq(state->pt_number, state->pt_data); |
2633 | |
2634 | state->valid = false; |
2635 | } |
2636 | } |
2637 | |
2638 | /* |
2639 | * Called when device fd is closed. kvm->lock is held. |
2640 | */ |
2641 | static void kvmppc_xive_release(struct kvm_device *dev) |
2642 | { |
2643 | struct kvmppc_xive *xive = dev->private; |
2644 | struct kvm *kvm = xive->kvm; |
2645 | struct kvm_vcpu *vcpu; |
2646 | unsigned long i; |
2647 | |
2648 | pr_devel("Releasing xive device\n" ); |
2649 | |
2650 | /* |
2651 | * Since this is the device release function, we know that |
2652 | * userspace does not have any open fd referring to the |
2653 | * device. Therefore there can not be any of the device |
2654 | * attribute set/get functions being executed concurrently, |
2655 | * and similarly, the connect_vcpu and set/clr_mapped |
2656 | * functions also cannot be being executed. |
2657 | */ |
2658 | |
2659 | debugfs_remove(dentry: xive->dentry); |
2660 | |
2661 | /* |
2662 | * We should clean up the vCPU interrupt presenters first. |
2663 | */ |
2664 | kvm_for_each_vcpu(i, vcpu, kvm) { |
2665 | /* |
2666 | * Take vcpu->mutex to ensure that no one_reg get/set ioctl |
2667 | * (i.e. kvmppc_xive_[gs]et_icp) can be done concurrently. |
2668 | * Holding the vcpu->mutex also means that the vcpu cannot |
2669 | * be executing the KVM_RUN ioctl, and therefore it cannot |
2670 | * be executing the XIVE push or pull code or accessing |
2671 | * the XIVE MMIO regions. |
2672 | */ |
2673 | mutex_lock(&vcpu->mutex); |
2674 | kvmppc_xive_cleanup_vcpu(vcpu); |
2675 | mutex_unlock(lock: &vcpu->mutex); |
2676 | } |
2677 | |
2678 | /* |
2679 | * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type |
2680 | * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe |
2681 | * against xive code getting called during vcpu execution or |
2682 | * set/get one_reg operations. |
2683 | */ |
2684 | kvm->arch.xive = NULL; |
2685 | |
2686 | /* Mask and free interrupts */ |
2687 | for (i = 0; i <= xive->max_sbid; i++) { |
2688 | if (xive->src_blocks[i]) |
2689 | kvmppc_xive_free_sources(sb: xive->src_blocks[i]); |
2690 | kfree(objp: xive->src_blocks[i]); |
2691 | xive->src_blocks[i] = NULL; |
2692 | } |
2693 | |
2694 | if (xive->vp_base != XIVE_INVALID_VP) |
2695 | xive_native_free_vp_block(xive->vp_base); |
2696 | |
2697 | /* |
2698 | * A reference of the kvmppc_xive pointer is now kept under |
2699 | * the xive_devices struct of the machine for reuse. It is |
2700 | * freed when the VM is destroyed for now until we fix all the |
2701 | * execution paths. |
2702 | */ |
2703 | |
2704 | kfree(objp: dev); |
2705 | } |
2706 | |
2707 | /* |
2708 | * When the guest chooses the interrupt mode (XICS legacy or XIVE |
2709 | * native), the VM will switch of KVM device. The previous device will |
2710 | * be "released" before the new one is created. |
2711 | * |
2712 | * Until we are sure all execution paths are well protected, provide a |
2713 | * fail safe (transitional) method for device destruction, in which |
2714 | * the XIVE device pointer is recycled and not directly freed. |
2715 | */ |
2716 | struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type) |
2717 | { |
2718 | struct kvmppc_xive **kvm_xive_device = type == KVM_DEV_TYPE_XIVE ? |
2719 | &kvm->arch.xive_devices.native : |
2720 | &kvm->arch.xive_devices.xics_on_xive; |
2721 | struct kvmppc_xive *xive = *kvm_xive_device; |
2722 | |
2723 | if (!xive) { |
2724 | xive = kzalloc(sizeof(*xive), GFP_KERNEL); |
2725 | *kvm_xive_device = xive; |
2726 | } else { |
2727 | memset(xive, 0, sizeof(*xive)); |
2728 | } |
2729 | |
2730 | return xive; |
2731 | } |
2732 | |
2733 | /* |
2734 | * Create a XICS device with XIVE backend. kvm->lock is held. |
2735 | */ |
2736 | static int kvmppc_xive_create(struct kvm_device *dev, u32 type) |
2737 | { |
2738 | struct kvmppc_xive *xive; |
2739 | struct kvm *kvm = dev->kvm; |
2740 | |
2741 | pr_devel("Creating xive for partition\n" ); |
2742 | |
2743 | /* Already there ? */ |
2744 | if (kvm->arch.xive) |
2745 | return -EEXIST; |
2746 | |
2747 | xive = kvmppc_xive_get_device(kvm, type); |
2748 | if (!xive) |
2749 | return -ENOMEM; |
2750 | |
2751 | dev->private = xive; |
2752 | xive->dev = dev; |
2753 | xive->kvm = kvm; |
2754 | mutex_init(&xive->lock); |
2755 | |
2756 | /* We use the default queue size set by the host */ |
2757 | xive->q_order = xive_native_default_eq_shift(); |
2758 | if (xive->q_order < PAGE_SHIFT) |
2759 | xive->q_page_order = 0; |
2760 | else |
2761 | xive->q_page_order = xive->q_order - PAGE_SHIFT; |
2762 | |
2763 | /* VP allocation is delayed to the first call to connect_vcpu */ |
2764 | xive->vp_base = XIVE_INVALID_VP; |
2765 | /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets |
2766 | * on a POWER9 system. |
2767 | */ |
2768 | xive->nr_servers = KVM_MAX_VCPUS; |
2769 | |
2770 | if (xive_native_has_single_escalation()) |
2771 | xive->flags |= KVMPPC_XIVE_FLAG_SINGLE_ESCALATION; |
2772 | |
2773 | if (xive_native_has_save_restore()) |
2774 | xive->flags |= KVMPPC_XIVE_FLAG_SAVE_RESTORE; |
2775 | |
2776 | kvm->arch.xive = xive; |
2777 | return 0; |
2778 | } |
2779 | |
2780 | int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req) |
2781 | { |
2782 | /* The VM should have configured XICS mode before doing XICS hcalls. */ |
2783 | if (!kvmppc_xics_enabled(vcpu)) |
2784 | return H_TOO_HARD; |
2785 | |
2786 | switch (req) { |
2787 | case H_XIRR: |
2788 | return xive_vm_h_xirr(vcpu); |
2789 | case H_CPPR: |
2790 | return xive_vm_h_cppr(vcpu, cppr: kvmppc_get_gpr(vcpu, 4)); |
2791 | case H_EOI: |
2792 | return xive_vm_h_eoi(vcpu, xirr: kvmppc_get_gpr(vcpu, 4)); |
2793 | case H_IPI: |
2794 | return xive_vm_h_ipi(vcpu, server: kvmppc_get_gpr(vcpu, 4), |
2795 | mfrr: kvmppc_get_gpr(vcpu, 5)); |
2796 | case H_IPOLL: |
2797 | return xive_vm_h_ipoll(vcpu, server: kvmppc_get_gpr(vcpu, 4)); |
2798 | case H_XIRR_X: |
2799 | xive_vm_h_xirr(vcpu); |
2800 | kvmppc_set_gpr(vcpu, 5, get_tb() + kvmppc_get_tb_offset(vcpu)); |
2801 | return H_SUCCESS; |
2802 | } |
2803 | |
2804 | return H_UNSUPPORTED; |
2805 | } |
2806 | EXPORT_SYMBOL_GPL(kvmppc_xive_xics_hcall); |
2807 | |
2808 | int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu) |
2809 | { |
2810 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
2811 | unsigned int i; |
2812 | |
2813 | for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) { |
2814 | struct xive_q *q = &xc->queues[i]; |
2815 | u32 i0, i1, idx; |
2816 | |
2817 | if (!q->qpage && !xc->esc_virq[i]) |
2818 | continue; |
2819 | |
2820 | if (q->qpage) { |
2821 | seq_printf(m, " q[%d]: " , i); |
2822 | idx = q->idx; |
2823 | i0 = be32_to_cpup(q->qpage + idx); |
2824 | idx = (idx + 1) & q->msk; |
2825 | i1 = be32_to_cpup(q->qpage + idx); |
2826 | seq_printf(m, "T=%d %08x %08x...\n" , q->toggle, |
2827 | i0, i1); |
2828 | } |
2829 | if (xc->esc_virq[i]) { |
2830 | struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]); |
2831 | struct xive_irq_data *xd = |
2832 | irq_data_get_irq_handler_data(d); |
2833 | u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET); |
2834 | |
2835 | seq_printf(m, " ESC %d %c%c EOI @%llx" , |
2836 | xc->esc_virq[i], |
2837 | (pq & XIVE_ESB_VAL_P) ? 'P' : '-', |
2838 | (pq & XIVE_ESB_VAL_Q) ? 'Q' : '-', |
2839 | xd->eoi_page); |
2840 | seq_puts(m, "\n" ); |
2841 | } |
2842 | } |
2843 | return 0; |
2844 | } |
2845 | |
2846 | void kvmppc_xive_debug_show_sources(struct seq_file *m, |
2847 | struct kvmppc_xive_src_block *sb) |
2848 | { |
2849 | int i; |
2850 | |
2851 | seq_puts(m, s: " LISN HW/CHIP TYPE PQ EISN CPU/PRIO\n" ); |
2852 | for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { |
2853 | struct kvmppc_xive_irq_state *state = &sb->irq_state[i]; |
2854 | struct xive_irq_data *xd; |
2855 | u64 pq; |
2856 | u32 hw_num; |
2857 | |
2858 | if (!state->valid) |
2859 | continue; |
2860 | |
2861 | kvmppc_xive_select_irq(state, &hw_num, &xd); |
2862 | |
2863 | pq = xive_vm_esb_load(xd, XIVE_ESB_GET); |
2864 | |
2865 | seq_printf(m, "%08x %08x/%02x" , state->number, hw_num, |
2866 | xd->src_chip); |
2867 | if (state->lsi) |
2868 | seq_printf(m, " %cLSI" , state->asserted ? '^' : ' '); |
2869 | else |
2870 | seq_puts(m, " MSI" ); |
2871 | |
2872 | seq_printf(m, " %s %c%c %08x % 4d/%d" , |
2873 | state->ipi_number == hw_num ? "IPI" : " PT" , |
2874 | pq & XIVE_ESB_VAL_P ? 'P' : '-', |
2875 | pq & XIVE_ESB_VAL_Q ? 'Q' : '-', |
2876 | state->eisn, state->act_server, |
2877 | state->act_priority); |
2878 | |
2879 | seq_puts(m, "\n" ); |
2880 | } |
2881 | } |
2882 | |
2883 | static int xive_debug_show(struct seq_file *m, void *private) |
2884 | { |
2885 | struct kvmppc_xive *xive = m->private; |
2886 | struct kvm *kvm = xive->kvm; |
2887 | struct kvm_vcpu *vcpu; |
2888 | u64 t_rm_h_xirr = 0; |
2889 | u64 t_rm_h_ipoll = 0; |
2890 | u64 t_rm_h_cppr = 0; |
2891 | u64 t_rm_h_eoi = 0; |
2892 | u64 t_rm_h_ipi = 0; |
2893 | u64 t_vm_h_xirr = 0; |
2894 | u64 t_vm_h_ipoll = 0; |
2895 | u64 t_vm_h_cppr = 0; |
2896 | u64 t_vm_h_eoi = 0; |
2897 | u64 t_vm_h_ipi = 0; |
2898 | unsigned long i; |
2899 | |
2900 | if (!kvm) |
2901 | return 0; |
2902 | |
2903 | seq_puts(m, s: "=========\nVCPU state\n=========\n" ); |
2904 | |
2905 | kvm_for_each_vcpu(i, vcpu, kvm) { |
2906 | struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; |
2907 | |
2908 | if (!xc) |
2909 | continue; |
2910 | |
2911 | seq_printf(m, fmt: "VCPU %d: VP:%#x/%02x\n" |
2912 | " CPPR:%#x HWCPPR:%#x MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n" , |
2913 | xc->server_num, xc->vp_id, xc->vp_chip_id, |
2914 | xc->cppr, xc->hw_cppr, |
2915 | xc->mfrr, xc->pending, |
2916 | xc->stat_rm_h_xirr, xc->stat_vm_h_xirr); |
2917 | |
2918 | kvmppc_xive_debug_show_queues(m, vcpu); |
2919 | |
2920 | t_rm_h_xirr += xc->stat_rm_h_xirr; |
2921 | t_rm_h_ipoll += xc->stat_rm_h_ipoll; |
2922 | t_rm_h_cppr += xc->stat_rm_h_cppr; |
2923 | t_rm_h_eoi += xc->stat_rm_h_eoi; |
2924 | t_rm_h_ipi += xc->stat_rm_h_ipi; |
2925 | t_vm_h_xirr += xc->stat_vm_h_xirr; |
2926 | t_vm_h_ipoll += xc->stat_vm_h_ipoll; |
2927 | t_vm_h_cppr += xc->stat_vm_h_cppr; |
2928 | t_vm_h_eoi += xc->stat_vm_h_eoi; |
2929 | t_vm_h_ipi += xc->stat_vm_h_ipi; |
2930 | } |
2931 | |
2932 | seq_puts(m, s: "Hcalls totals\n" ); |
2933 | seq_printf(m, fmt: " H_XIRR R=%10lld V=%10lld\n" , t_rm_h_xirr, t_vm_h_xirr); |
2934 | seq_printf(m, fmt: " H_IPOLL R=%10lld V=%10lld\n" , t_rm_h_ipoll, t_vm_h_ipoll); |
2935 | seq_printf(m, fmt: " H_CPPR R=%10lld V=%10lld\n" , t_rm_h_cppr, t_vm_h_cppr); |
2936 | seq_printf(m, fmt: " H_EOI R=%10lld V=%10lld\n" , t_rm_h_eoi, t_vm_h_eoi); |
2937 | seq_printf(m, fmt: " H_IPI R=%10lld V=%10lld\n" , t_rm_h_ipi, t_vm_h_ipi); |
2938 | |
2939 | seq_puts(m, s: "=========\nSources\n=========\n" ); |
2940 | |
2941 | for (i = 0; i <= xive->max_sbid; i++) { |
2942 | struct kvmppc_xive_src_block *sb = xive->src_blocks[i]; |
2943 | |
2944 | if (sb) { |
2945 | arch_spin_lock(&sb->lock); |
2946 | kvmppc_xive_debug_show_sources(m, sb); |
2947 | arch_spin_unlock(&sb->lock); |
2948 | } |
2949 | } |
2950 | |
2951 | return 0; |
2952 | } |
2953 | |
2954 | DEFINE_SHOW_ATTRIBUTE(xive_debug); |
2955 | |
2956 | static void xive_debugfs_init(struct kvmppc_xive *xive) |
2957 | { |
2958 | xive->dentry = debugfs_create_file(name: "xive" , S_IRUGO, parent: xive->kvm->debugfs_dentry, |
2959 | data: xive, fops: &xive_debug_fops); |
2960 | |
2961 | pr_debug("%s: created\n" , __func__); |
2962 | } |
2963 | |
2964 | static void kvmppc_xive_init(struct kvm_device *dev) |
2965 | { |
2966 | struct kvmppc_xive *xive = dev->private; |
2967 | |
2968 | /* Register some debug interfaces */ |
2969 | xive_debugfs_init(xive); |
2970 | } |
2971 | |
2972 | struct kvm_device_ops kvm_xive_ops = { |
2973 | .name = "kvm-xive" , |
2974 | .create = kvmppc_xive_create, |
2975 | .init = kvmppc_xive_init, |
2976 | .release = kvmppc_xive_release, |
2977 | .set_attr = xive_set_attr, |
2978 | .get_attr = xive_get_attr, |
2979 | .has_attr = xive_has_attr, |
2980 | }; |
2981 | |