1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright 2012 Michael Ellerman, IBM Corporation. |
4 | * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation. |
5 | */ |
6 | |
7 | #include <linux/kernel.h> |
8 | #include <linux/kvm_host.h> |
9 | #include <linux/err.h> |
10 | #include <linux/gfp.h> |
11 | #include <linux/anon_inodes.h> |
12 | #include <linux/spinlock.h> |
13 | #include <linux/debugfs.h> |
14 | #include <linux/uaccess.h> |
15 | |
16 | #include <asm/kvm_book3s.h> |
17 | #include <asm/kvm_ppc.h> |
18 | #include <asm/hvcall.h> |
19 | #include <asm/xics.h> |
20 | #include <asm/time.h> |
21 | |
22 | #include <linux/seq_file.h> |
23 | |
24 | #include "book3s_xics.h" |
25 | |
26 | #if 1 |
27 | #define XICS_DBG(fmt...) do { } while (0) |
28 | #else |
29 | #define XICS_DBG(fmt...) trace_printk(fmt) |
30 | #endif |
31 | |
32 | #define ENABLE_REALMODE true |
33 | #define DEBUG_REALMODE false |
34 | |
35 | /* |
36 | * LOCKING |
37 | * ======= |
38 | * |
39 | * Each ICS has a spin lock protecting the information about the IRQ |
40 | * sources and avoiding simultaneous deliveries of the same interrupt. |
41 | * |
42 | * ICP operations are done via a single compare & swap transaction |
43 | * (most ICP state fits in the union kvmppc_icp_state) |
44 | */ |
45 | |
46 | /* |
47 | * TODO |
48 | * ==== |
49 | * |
50 | * - To speed up resends, keep a bitmap of "resend" set bits in the |
51 | * ICS |
52 | * |
53 | * - Speed up server# -> ICP lookup (array ? hash table ?) |
54 | * |
55 | * - Make ICS lockless as well, or at least a per-interrupt lock or hashed |
56 | * locks array to improve scalability |
57 | */ |
58 | |
59 | /* -- ICS routines -- */ |
60 | |
61 | static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, |
62 | u32 new_irq, bool check_resend); |
63 | |
64 | /* |
65 | * Return value ideally indicates how the interrupt was handled, but no |
66 | * callers look at it (given that we don't implement KVM_IRQ_LINE_STATUS), |
67 | * so just return 0. |
68 | */ |
69 | static int ics_deliver_irq(struct kvmppc_xics *xics, u32 irq, u32 level) |
70 | { |
71 | struct ics_irq_state *state; |
72 | struct kvmppc_ics *ics; |
73 | u16 src; |
74 | u32 pq_old, pq_new; |
75 | |
76 | XICS_DBG("ics deliver %#x (level: %d)\n" , irq, level); |
77 | |
78 | ics = kvmppc_xics_find_ics(xics, irq, &src); |
79 | if (!ics) { |
80 | XICS_DBG("ics_deliver_irq: IRQ 0x%06x not found !\n" , irq); |
81 | return -EINVAL; |
82 | } |
83 | state = &ics->irq_state[src]; |
84 | if (!state->exists) |
85 | return -EINVAL; |
86 | |
87 | if (level == KVM_INTERRUPT_SET_LEVEL || level == KVM_INTERRUPT_SET) |
88 | level = 1; |
89 | else if (level == KVM_INTERRUPT_UNSET) |
90 | level = 0; |
91 | /* |
92 | * Take other values the same as 1, consistent with original code. |
93 | * maybe WARN here? |
94 | */ |
95 | |
96 | if (!state->lsi && level == 0) /* noop for MSI */ |
97 | return 0; |
98 | |
99 | do { |
100 | pq_old = state->pq_state; |
101 | if (state->lsi) { |
102 | if (level) { |
103 | if (pq_old & PQ_PRESENTED) |
104 | /* Setting already set LSI ... */ |
105 | return 0; |
106 | |
107 | pq_new = PQ_PRESENTED; |
108 | } else |
109 | pq_new = 0; |
110 | } else |
111 | pq_new = ((pq_old << 1) & 3) | PQ_PRESENTED; |
112 | } while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old); |
113 | |
114 | /* Test P=1, Q=0, this is the only case where we present */ |
115 | if (pq_new == PQ_PRESENTED) |
116 | icp_deliver_irq(xics, NULL, new_irq: irq, check_resend: false); |
117 | |
118 | /* Record which CPU this arrived on for passed-through interrupts */ |
119 | if (state->host_irq) |
120 | state->intr_cpu = raw_smp_processor_id(); |
121 | |
122 | return 0; |
123 | } |
124 | |
125 | static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics, |
126 | struct kvmppc_icp *icp) |
127 | { |
128 | int i; |
129 | |
130 | for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { |
131 | struct ics_irq_state *state = &ics->irq_state[i]; |
132 | if (state->resend) { |
133 | XICS_DBG("resend %#x prio %#x\n" , state->number, |
134 | state->priority); |
135 | icp_deliver_irq(xics, icp, new_irq: state->number, check_resend: true); |
136 | } |
137 | } |
138 | } |
139 | |
140 | static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics, |
141 | struct ics_irq_state *state, |
142 | u32 server, u32 priority, u32 saved_priority) |
143 | { |
144 | bool deliver; |
145 | unsigned long flags; |
146 | |
147 | local_irq_save(flags); |
148 | arch_spin_lock(&ics->lock); |
149 | |
150 | state->server = server; |
151 | state->priority = priority; |
152 | state->saved_priority = saved_priority; |
153 | deliver = false; |
154 | if ((state->masked_pending || state->resend) && priority != MASKED) { |
155 | state->masked_pending = 0; |
156 | state->resend = 0; |
157 | deliver = true; |
158 | } |
159 | |
160 | arch_spin_unlock(&ics->lock); |
161 | local_irq_restore(flags); |
162 | |
163 | return deliver; |
164 | } |
165 | |
166 | int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority) |
167 | { |
168 | struct kvmppc_xics *xics = kvm->arch.xics; |
169 | struct kvmppc_icp *icp; |
170 | struct kvmppc_ics *ics; |
171 | struct ics_irq_state *state; |
172 | u16 src; |
173 | |
174 | if (!xics) |
175 | return -ENODEV; |
176 | |
177 | ics = kvmppc_xics_find_ics(xics, irq, &src); |
178 | if (!ics) |
179 | return -EINVAL; |
180 | state = &ics->irq_state[src]; |
181 | |
182 | icp = kvmppc_xics_find_server(kvm, server); |
183 | if (!icp) |
184 | return -EINVAL; |
185 | |
186 | XICS_DBG("set_xive %#x server %#x prio %#x MP:%d RS:%d\n" , |
187 | irq, server, priority, |
188 | state->masked_pending, state->resend); |
189 | |
190 | if (write_xive(xics, ics, state, server, priority, saved_priority: priority)) |
191 | icp_deliver_irq(xics, icp, new_irq: irq, check_resend: false); |
192 | |
193 | return 0; |
194 | } |
195 | |
196 | int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority) |
197 | { |
198 | struct kvmppc_xics *xics = kvm->arch.xics; |
199 | struct kvmppc_ics *ics; |
200 | struct ics_irq_state *state; |
201 | u16 src; |
202 | unsigned long flags; |
203 | |
204 | if (!xics) |
205 | return -ENODEV; |
206 | |
207 | ics = kvmppc_xics_find_ics(xics, irq, &src); |
208 | if (!ics) |
209 | return -EINVAL; |
210 | state = &ics->irq_state[src]; |
211 | |
212 | local_irq_save(flags); |
213 | arch_spin_lock(&ics->lock); |
214 | *server = state->server; |
215 | *priority = state->priority; |
216 | arch_spin_unlock(&ics->lock); |
217 | local_irq_restore(flags); |
218 | |
219 | return 0; |
220 | } |
221 | |
222 | int kvmppc_xics_int_on(struct kvm *kvm, u32 irq) |
223 | { |
224 | struct kvmppc_xics *xics = kvm->arch.xics; |
225 | struct kvmppc_icp *icp; |
226 | struct kvmppc_ics *ics; |
227 | struct ics_irq_state *state; |
228 | u16 src; |
229 | |
230 | if (!xics) |
231 | return -ENODEV; |
232 | |
233 | ics = kvmppc_xics_find_ics(xics, irq, &src); |
234 | if (!ics) |
235 | return -EINVAL; |
236 | state = &ics->irq_state[src]; |
237 | |
238 | icp = kvmppc_xics_find_server(kvm, state->server); |
239 | if (!icp) |
240 | return -EINVAL; |
241 | |
242 | if (write_xive(xics, ics, state, server: state->server, priority: state->saved_priority, |
243 | saved_priority: state->saved_priority)) |
244 | icp_deliver_irq(xics, icp, new_irq: irq, check_resend: false); |
245 | |
246 | return 0; |
247 | } |
248 | |
249 | int kvmppc_xics_int_off(struct kvm *kvm, u32 irq) |
250 | { |
251 | struct kvmppc_xics *xics = kvm->arch.xics; |
252 | struct kvmppc_ics *ics; |
253 | struct ics_irq_state *state; |
254 | u16 src; |
255 | |
256 | if (!xics) |
257 | return -ENODEV; |
258 | |
259 | ics = kvmppc_xics_find_ics(xics, irq, &src); |
260 | if (!ics) |
261 | return -EINVAL; |
262 | state = &ics->irq_state[src]; |
263 | |
264 | write_xive(xics, ics, state, server: state->server, priority: MASKED, saved_priority: state->priority); |
265 | |
266 | return 0; |
267 | } |
268 | |
269 | /* -- ICP routines, including hcalls -- */ |
270 | |
271 | static inline bool icp_try_update(struct kvmppc_icp *icp, |
272 | union kvmppc_icp_state old, |
273 | union kvmppc_icp_state new, |
274 | bool change_self) |
275 | { |
276 | bool success; |
277 | |
278 | /* Calculate new output value */ |
279 | new.out_ee = (new.xisr && (new.pending_pri < new.cppr)); |
280 | |
281 | /* Attempt atomic update */ |
282 | success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw; |
283 | if (!success) |
284 | goto bail; |
285 | |
286 | XICS_DBG("UPD [%04lx] - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n" , |
287 | icp->server_num, |
288 | old.cppr, old.mfrr, old.pending_pri, old.xisr, |
289 | old.need_resend, old.out_ee); |
290 | XICS_DBG("UPD - C:%02x M:%02x PP: %02x PI:%06x R:%d O:%d\n" , |
291 | new.cppr, new.mfrr, new.pending_pri, new.xisr, |
292 | new.need_resend, new.out_ee); |
293 | /* |
294 | * Check for output state update |
295 | * |
296 | * Note that this is racy since another processor could be updating |
297 | * the state already. This is why we never clear the interrupt output |
298 | * here, we only ever set it. The clear only happens prior to doing |
299 | * an update and only by the processor itself. Currently we do it |
300 | * in Accept (H_XIRR) and Up_Cppr (H_XPPR). |
301 | * |
302 | * We also do not try to figure out whether the EE state has changed, |
303 | * we unconditionally set it if the new state calls for it. The reason |
304 | * for that is that we opportunistically remove the pending interrupt |
305 | * flag when raising CPPR, so we need to set it back here if an |
306 | * interrupt is still pending. |
307 | */ |
308 | if (new.out_ee) { |
309 | kvmppc_book3s_queue_irqprio(icp->vcpu, |
310 | BOOK3S_INTERRUPT_EXTERNAL); |
311 | if (!change_self) |
312 | kvmppc_fast_vcpu_kick(icp->vcpu); |
313 | } |
314 | bail: |
315 | return success; |
316 | } |
317 | |
318 | static void icp_check_resend(struct kvmppc_xics *xics, |
319 | struct kvmppc_icp *icp) |
320 | { |
321 | u32 icsid; |
322 | |
323 | /* Order this load with the test for need_resend in the caller */ |
324 | smp_rmb(); |
325 | for_each_set_bit(icsid, icp->resend_map, xics->max_icsid + 1) { |
326 | struct kvmppc_ics *ics = xics->ics[icsid]; |
327 | |
328 | if (!test_and_clear_bit(nr: icsid, addr: icp->resend_map)) |
329 | continue; |
330 | if (!ics) |
331 | continue; |
332 | ics_check_resend(xics, ics, icp); |
333 | } |
334 | } |
335 | |
336 | static bool icp_try_to_deliver(struct kvmppc_icp *icp, u32 irq, u8 priority, |
337 | u32 *reject) |
338 | { |
339 | union kvmppc_icp_state old_state, new_state; |
340 | bool success; |
341 | |
342 | XICS_DBG("try deliver %#x(P:%#x) to server %#lx\n" , irq, priority, |
343 | icp->server_num); |
344 | |
345 | do { |
346 | old_state = new_state = READ_ONCE(icp->state); |
347 | |
348 | *reject = 0; |
349 | |
350 | /* See if we can deliver */ |
351 | success = new_state.cppr > priority && |
352 | new_state.mfrr > priority && |
353 | new_state.pending_pri > priority; |
354 | |
355 | /* |
356 | * If we can, check for a rejection and perform the |
357 | * delivery |
358 | */ |
359 | if (success) { |
360 | *reject = new_state.xisr; |
361 | new_state.xisr = irq; |
362 | new_state.pending_pri = priority; |
363 | } else { |
364 | /* |
365 | * If we failed to deliver we set need_resend |
366 | * so a subsequent CPPR state change causes us |
367 | * to try a new delivery. |
368 | */ |
369 | new_state.need_resend = true; |
370 | } |
371 | |
372 | } while (!icp_try_update(icp, old: old_state, new: new_state, change_self: false)); |
373 | |
374 | return success; |
375 | } |
376 | |
377 | static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp, |
378 | u32 new_irq, bool check_resend) |
379 | { |
380 | struct ics_irq_state *state; |
381 | struct kvmppc_ics *ics; |
382 | u32 reject; |
383 | u16 src; |
384 | unsigned long flags; |
385 | |
386 | /* |
387 | * This is used both for initial delivery of an interrupt and |
388 | * for subsequent rejection. |
389 | * |
390 | * Rejection can be racy vs. resends. We have evaluated the |
391 | * rejection in an atomic ICP transaction which is now complete, |
392 | * so potentially the ICP can already accept the interrupt again. |
393 | * |
394 | * So we need to retry the delivery. Essentially the reject path |
395 | * boils down to a failed delivery. Always. |
396 | * |
397 | * Now the interrupt could also have moved to a different target, |
398 | * thus we may need to re-do the ICP lookup as well |
399 | */ |
400 | |
401 | again: |
402 | /* Get the ICS state and lock it */ |
403 | ics = kvmppc_xics_find_ics(xics, new_irq, &src); |
404 | if (!ics) { |
405 | XICS_DBG("icp_deliver_irq: IRQ 0x%06x not found !\n" , new_irq); |
406 | return; |
407 | } |
408 | state = &ics->irq_state[src]; |
409 | |
410 | /* Get a lock on the ICS */ |
411 | local_irq_save(flags); |
412 | arch_spin_lock(&ics->lock); |
413 | |
414 | /* Get our server */ |
415 | if (!icp || state->server != icp->server_num) { |
416 | icp = kvmppc_xics_find_server(xics->kvm, state->server); |
417 | if (!icp) { |
418 | pr_warn("icp_deliver_irq: IRQ 0x%06x server 0x%x not found !\n" , |
419 | new_irq, state->server); |
420 | goto out; |
421 | } |
422 | } |
423 | |
424 | if (check_resend) |
425 | if (!state->resend) |
426 | goto out; |
427 | |
428 | /* Clear the resend bit of that interrupt */ |
429 | state->resend = 0; |
430 | |
431 | /* |
432 | * If masked, bail out |
433 | * |
434 | * Note: PAPR doesn't mention anything about masked pending |
435 | * when doing a resend, only when doing a delivery. |
436 | * |
437 | * However that would have the effect of losing a masked |
438 | * interrupt that was rejected and isn't consistent with |
439 | * the whole masked_pending business which is about not |
440 | * losing interrupts that occur while masked. |
441 | * |
442 | * I don't differentiate normal deliveries and resends, this |
443 | * implementation will differ from PAPR and not lose such |
444 | * interrupts. |
445 | */ |
446 | if (state->priority == MASKED) { |
447 | XICS_DBG("irq %#x masked pending\n" , new_irq); |
448 | state->masked_pending = 1; |
449 | goto out; |
450 | } |
451 | |
452 | /* |
453 | * Try the delivery, this will set the need_resend flag |
454 | * in the ICP as part of the atomic transaction if the |
455 | * delivery is not possible. |
456 | * |
457 | * Note that if successful, the new delivery might have itself |
458 | * rejected an interrupt that was "delivered" before we took the |
459 | * ics spin lock. |
460 | * |
461 | * In this case we do the whole sequence all over again for the |
462 | * new guy. We cannot assume that the rejected interrupt is less |
463 | * favored than the new one, and thus doesn't need to be delivered, |
464 | * because by the time we exit icp_try_to_deliver() the target |
465 | * processor may well have already consumed & completed it, and thus |
466 | * the rejected interrupt might actually be already acceptable. |
467 | */ |
468 | if (icp_try_to_deliver(icp, irq: new_irq, priority: state->priority, reject: &reject)) { |
469 | /* |
470 | * Delivery was successful, did we reject somebody else ? |
471 | */ |
472 | if (reject && reject != XICS_IPI) { |
473 | arch_spin_unlock(&ics->lock); |
474 | local_irq_restore(flags); |
475 | new_irq = reject; |
476 | check_resend = false; |
477 | goto again; |
478 | } |
479 | } else { |
480 | /* |
481 | * We failed to deliver the interrupt we need to set the |
482 | * resend map bit and mark the ICS state as needing a resend |
483 | */ |
484 | state->resend = 1; |
485 | |
486 | /* |
487 | * Make sure when checking resend, we don't miss the resend |
488 | * if resend_map bit is seen and cleared. |
489 | */ |
490 | smp_wmb(); |
491 | set_bit(nr: ics->icsid, addr: icp->resend_map); |
492 | |
493 | /* |
494 | * If the need_resend flag got cleared in the ICP some time |
495 | * between icp_try_to_deliver() atomic update and now, then |
496 | * we know it might have missed the resend_map bit. So we |
497 | * retry |
498 | */ |
499 | smp_mb(); |
500 | if (!icp->state.need_resend) { |
501 | state->resend = 0; |
502 | arch_spin_unlock(&ics->lock); |
503 | local_irq_restore(flags); |
504 | check_resend = false; |
505 | goto again; |
506 | } |
507 | } |
508 | out: |
509 | arch_spin_unlock(&ics->lock); |
510 | local_irq_restore(flags); |
511 | } |
512 | |
513 | static void icp_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp, |
514 | u8 new_cppr) |
515 | { |
516 | union kvmppc_icp_state old_state, new_state; |
517 | bool resend; |
518 | |
519 | /* |
520 | * This handles several related states in one operation: |
521 | * |
522 | * ICP State: Down_CPPR |
523 | * |
524 | * Load CPPR with new value and if the XISR is 0 |
525 | * then check for resends: |
526 | * |
527 | * ICP State: Resend |
528 | * |
529 | * If MFRR is more favored than CPPR, check for IPIs |
530 | * and notify ICS of a potential resend. This is done |
531 | * asynchronously (when used in real mode, we will have |
532 | * to exit here). |
533 | * |
534 | * We do not handle the complete Check_IPI as documented |
535 | * here. In the PAPR, this state will be used for both |
536 | * Set_MFRR and Down_CPPR. However, we know that we aren't |
537 | * changing the MFRR state here so we don't need to handle |
538 | * the case of an MFRR causing a reject of a pending irq, |
539 | * this will have been handled when the MFRR was set in the |
540 | * first place. |
541 | * |
542 | * Thus we don't have to handle rejects, only resends. |
543 | * |
544 | * When implementing real mode for HV KVM, resend will lead to |
545 | * a H_TOO_HARD return and the whole transaction will be handled |
546 | * in virtual mode. |
547 | */ |
548 | do { |
549 | old_state = new_state = READ_ONCE(icp->state); |
550 | |
551 | /* Down_CPPR */ |
552 | new_state.cppr = new_cppr; |
553 | |
554 | /* |
555 | * Cut down Resend / Check_IPI / IPI |
556 | * |
557 | * The logic is that we cannot have a pending interrupt |
558 | * trumped by an IPI at this point (see above), so we |
559 | * know that either the pending interrupt is already an |
560 | * IPI (in which case we don't care to override it) or |
561 | * it's either more favored than us or non existent |
562 | */ |
563 | if (new_state.mfrr < new_cppr && |
564 | new_state.mfrr <= new_state.pending_pri) { |
565 | WARN_ON(new_state.xisr != XICS_IPI && |
566 | new_state.xisr != 0); |
567 | new_state.pending_pri = new_state.mfrr; |
568 | new_state.xisr = XICS_IPI; |
569 | } |
570 | |
571 | /* Latch/clear resend bit */ |
572 | resend = new_state.need_resend; |
573 | new_state.need_resend = 0; |
574 | |
575 | } while (!icp_try_update(icp, old: old_state, new: new_state, change_self: true)); |
576 | |
577 | /* |
578 | * Now handle resend checks. Those are asynchronous to the ICP |
579 | * state update in HW (ie bus transactions) so we can handle them |
580 | * separately here too |
581 | */ |
582 | if (resend) |
583 | icp_check_resend(xics, icp); |
584 | } |
585 | |
586 | static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu) |
587 | { |
588 | union kvmppc_icp_state old_state, new_state; |
589 | struct kvmppc_icp *icp = vcpu->arch.icp; |
590 | u32 xirr; |
591 | |
592 | /* First, remove EE from the processor */ |
593 | kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL); |
594 | |
595 | /* |
596 | * ICP State: Accept_Interrupt |
597 | * |
598 | * Return the pending interrupt (if any) along with the |
599 | * current CPPR, then clear the XISR & set CPPR to the |
600 | * pending priority |
601 | */ |
602 | do { |
603 | old_state = new_state = READ_ONCE(icp->state); |
604 | |
605 | xirr = old_state.xisr | (((u32)old_state.cppr) << 24); |
606 | if (!old_state.xisr) |
607 | break; |
608 | new_state.cppr = new_state.pending_pri; |
609 | new_state.pending_pri = 0xff; |
610 | new_state.xisr = 0; |
611 | |
612 | } while (!icp_try_update(icp, old: old_state, new: new_state, change_self: true)); |
613 | |
614 | XICS_DBG("h_xirr vcpu %d xirr %#x\n" , vcpu->vcpu_id, xirr); |
615 | |
616 | return xirr; |
617 | } |
618 | |
619 | static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, |
620 | unsigned long mfrr) |
621 | { |
622 | union kvmppc_icp_state old_state, new_state; |
623 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; |
624 | struct kvmppc_icp *icp; |
625 | u32 reject; |
626 | bool resend; |
627 | bool local; |
628 | |
629 | XICS_DBG("h_ipi vcpu %d to server %lu mfrr %#lx\n" , |
630 | vcpu->vcpu_id, server, mfrr); |
631 | |
632 | icp = vcpu->arch.icp; |
633 | local = icp->server_num == server; |
634 | if (!local) { |
635 | icp = kvmppc_xics_find_server(vcpu->kvm, server); |
636 | if (!icp) |
637 | return H_PARAMETER; |
638 | } |
639 | |
640 | /* |
641 | * ICP state: Set_MFRR |
642 | * |
643 | * If the CPPR is more favored than the new MFRR, then |
644 | * nothing needs to be rejected as there can be no XISR to |
645 | * reject. If the MFRR is being made less favored then |
646 | * there might be a previously-rejected interrupt needing |
647 | * to be resent. |
648 | * |
649 | * ICP state: Check_IPI |
650 | * |
651 | * If the CPPR is less favored, then we might be replacing |
652 | * an interrupt, and thus need to possibly reject it. |
653 | * |
654 | * ICP State: IPI |
655 | * |
656 | * Besides rejecting any pending interrupts, we also |
657 | * update XISR and pending_pri to mark IPI as pending. |
658 | * |
659 | * PAPR does not describe this state, but if the MFRR is being |
660 | * made less favored than its earlier value, there might be |
661 | * a previously-rejected interrupt needing to be resent. |
662 | * Ideally, we would want to resend only if |
663 | * prio(pending_interrupt) < mfrr && |
664 | * prio(pending_interrupt) < cppr |
665 | * where pending interrupt is the one that was rejected. But |
666 | * we don't have that state, so we simply trigger a resend |
667 | * whenever the MFRR is made less favored. |
668 | */ |
669 | do { |
670 | old_state = new_state = READ_ONCE(icp->state); |
671 | |
672 | /* Set_MFRR */ |
673 | new_state.mfrr = mfrr; |
674 | |
675 | /* Check_IPI */ |
676 | reject = 0; |
677 | resend = false; |
678 | if (mfrr < new_state.cppr) { |
679 | /* Reject a pending interrupt if not an IPI */ |
680 | if (mfrr <= new_state.pending_pri) { |
681 | reject = new_state.xisr; |
682 | new_state.pending_pri = mfrr; |
683 | new_state.xisr = XICS_IPI; |
684 | } |
685 | } |
686 | |
687 | if (mfrr > old_state.mfrr) { |
688 | resend = new_state.need_resend; |
689 | new_state.need_resend = 0; |
690 | } |
691 | } while (!icp_try_update(icp, old: old_state, new: new_state, change_self: local)); |
692 | |
693 | /* Handle reject */ |
694 | if (reject && reject != XICS_IPI) |
695 | icp_deliver_irq(xics, icp, new_irq: reject, check_resend: false); |
696 | |
697 | /* Handle resend */ |
698 | if (resend) |
699 | icp_check_resend(xics, icp); |
700 | |
701 | return H_SUCCESS; |
702 | } |
703 | |
704 | static int kvmppc_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server) |
705 | { |
706 | union kvmppc_icp_state state; |
707 | struct kvmppc_icp *icp; |
708 | |
709 | icp = vcpu->arch.icp; |
710 | if (icp->server_num != server) { |
711 | icp = kvmppc_xics_find_server(vcpu->kvm, server); |
712 | if (!icp) |
713 | return H_PARAMETER; |
714 | } |
715 | state = READ_ONCE(icp->state); |
716 | kvmppc_set_gpr(vcpu, 4, ((u32)state.cppr << 24) | state.xisr); |
717 | kvmppc_set_gpr(vcpu, 5, state.mfrr); |
718 | return H_SUCCESS; |
719 | } |
720 | |
721 | static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) |
722 | { |
723 | union kvmppc_icp_state old_state, new_state; |
724 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; |
725 | struct kvmppc_icp *icp = vcpu->arch.icp; |
726 | u32 reject; |
727 | |
728 | XICS_DBG("h_cppr vcpu %d cppr %#lx\n" , vcpu->vcpu_id, cppr); |
729 | |
730 | /* |
731 | * ICP State: Set_CPPR |
732 | * |
733 | * We can safely compare the new value with the current |
734 | * value outside of the transaction as the CPPR is only |
735 | * ever changed by the processor on itself |
736 | */ |
737 | if (cppr > icp->state.cppr) |
738 | icp_down_cppr(xics, icp, new_cppr: cppr); |
739 | else if (cppr == icp->state.cppr) |
740 | return; |
741 | |
742 | /* |
743 | * ICP State: Up_CPPR |
744 | * |
745 | * The processor is raising its priority, this can result |
746 | * in a rejection of a pending interrupt: |
747 | * |
748 | * ICP State: Reject_Current |
749 | * |
750 | * We can remove EE from the current processor, the update |
751 | * transaction will set it again if needed |
752 | */ |
753 | kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL); |
754 | |
755 | do { |
756 | old_state = new_state = READ_ONCE(icp->state); |
757 | |
758 | reject = 0; |
759 | new_state.cppr = cppr; |
760 | |
761 | if (cppr <= new_state.pending_pri) { |
762 | reject = new_state.xisr; |
763 | new_state.xisr = 0; |
764 | new_state.pending_pri = 0xff; |
765 | } |
766 | |
767 | } while (!icp_try_update(icp, old: old_state, new: new_state, change_self: true)); |
768 | |
769 | /* |
770 | * Check for rejects. They are handled by doing a new delivery |
771 | * attempt (see comments in icp_deliver_irq). |
772 | */ |
773 | if (reject && reject != XICS_IPI) |
774 | icp_deliver_irq(xics, icp, new_irq: reject, check_resend: false); |
775 | } |
776 | |
777 | static int ics_eoi(struct kvm_vcpu *vcpu, u32 irq) |
778 | { |
779 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; |
780 | struct kvmppc_icp *icp = vcpu->arch.icp; |
781 | struct kvmppc_ics *ics; |
782 | struct ics_irq_state *state; |
783 | u16 src; |
784 | u32 pq_old, pq_new; |
785 | |
786 | /* |
787 | * ICS EOI handling: For LSI, if P bit is still set, we need to |
788 | * resend it. |
789 | * |
790 | * For MSI, we move Q bit into P (and clear Q). If it is set, |
791 | * resend it. |
792 | */ |
793 | |
794 | ics = kvmppc_xics_find_ics(xics, irq, &src); |
795 | if (!ics) { |
796 | XICS_DBG("ios_eoi: IRQ 0x%06x not found !\n" , irq); |
797 | return H_PARAMETER; |
798 | } |
799 | state = &ics->irq_state[src]; |
800 | |
801 | if (state->lsi) |
802 | pq_new = state->pq_state; |
803 | else |
804 | do { |
805 | pq_old = state->pq_state; |
806 | pq_new = pq_old >> 1; |
807 | } while (cmpxchg(&state->pq_state, pq_old, pq_new) != pq_old); |
808 | |
809 | if (pq_new & PQ_PRESENTED) |
810 | icp_deliver_irq(xics, icp, new_irq: irq, check_resend: false); |
811 | |
812 | kvm_notify_acked_irq(kvm: vcpu->kvm, irqchip: 0, pin: irq); |
813 | |
814 | return H_SUCCESS; |
815 | } |
816 | |
817 | static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr) |
818 | { |
819 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; |
820 | struct kvmppc_icp *icp = vcpu->arch.icp; |
821 | u32 irq = xirr & 0x00ffffff; |
822 | |
823 | XICS_DBG("h_eoi vcpu %d eoi %#lx\n" , vcpu->vcpu_id, xirr); |
824 | |
825 | /* |
826 | * ICP State: EOI |
827 | * |
828 | * Note: If EOI is incorrectly used by SW to lower the CPPR |
829 | * value (ie more favored), we do not check for rejection of |
830 | * a pending interrupt, this is a SW error and PAPR specifies |
831 | * that we don't have to deal with it. |
832 | * |
833 | * The sending of an EOI to the ICS is handled after the |
834 | * CPPR update |
835 | * |
836 | * ICP State: Down_CPPR which we handle |
837 | * in a separate function as it's shared with H_CPPR. |
838 | */ |
839 | icp_down_cppr(xics, icp, new_cppr: xirr >> 24); |
840 | |
841 | /* IPIs have no EOI */ |
842 | if (irq == XICS_IPI) |
843 | return H_SUCCESS; |
844 | |
845 | return ics_eoi(vcpu, irq); |
846 | } |
847 | |
848 | int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall) |
849 | { |
850 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; |
851 | struct kvmppc_icp *icp = vcpu->arch.icp; |
852 | |
853 | XICS_DBG("XICS_RM: H_%x completing, act: %x state: %lx tgt: %p\n" , |
854 | hcall, icp->rm_action, icp->rm_dbgstate.raw, icp->rm_dbgtgt); |
855 | |
856 | if (icp->rm_action & XICS_RM_KICK_VCPU) { |
857 | icp->n_rm_kick_vcpu++; |
858 | kvmppc_fast_vcpu_kick(icp->rm_kick_target); |
859 | } |
860 | if (icp->rm_action & XICS_RM_CHECK_RESEND) { |
861 | icp->n_rm_check_resend++; |
862 | icp_check_resend(xics, icp: icp->rm_resend_icp); |
863 | } |
864 | if (icp->rm_action & XICS_RM_NOTIFY_EOI) { |
865 | icp->n_rm_notify_eoi++; |
866 | kvm_notify_acked_irq(kvm: vcpu->kvm, irqchip: 0, pin: icp->rm_eoied_irq); |
867 | } |
868 | |
869 | icp->rm_action = 0; |
870 | |
871 | return H_SUCCESS; |
872 | } |
873 | EXPORT_SYMBOL_GPL(kvmppc_xics_rm_complete); |
874 | |
875 | int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req) |
876 | { |
877 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; |
878 | unsigned long res; |
879 | int rc = H_SUCCESS; |
880 | |
881 | /* Check if we have an ICP */ |
882 | if (!xics || !vcpu->arch.icp) |
883 | return H_HARDWARE; |
884 | |
885 | /* These requests don't have real-mode implementations at present */ |
886 | switch (req) { |
887 | case H_XIRR_X: |
888 | res = kvmppc_h_xirr(vcpu); |
889 | kvmppc_set_gpr(vcpu, 4, res); |
890 | kvmppc_set_gpr(vcpu, 5, get_tb()); |
891 | return rc; |
892 | case H_IPOLL: |
893 | rc = kvmppc_h_ipoll(vcpu, server: kvmppc_get_gpr(vcpu, 4)); |
894 | return rc; |
895 | } |
896 | |
897 | /* Check for real mode returning too hard */ |
898 | if (xics->real_mode && is_kvmppc_hv_enabled(vcpu->kvm)) |
899 | return kvmppc_xics_rm_complete(vcpu, req); |
900 | |
901 | switch (req) { |
902 | case H_XIRR: |
903 | res = kvmppc_h_xirr(vcpu); |
904 | kvmppc_set_gpr(vcpu, 4, res); |
905 | break; |
906 | case H_CPPR: |
907 | kvmppc_h_cppr(vcpu, cppr: kvmppc_get_gpr(vcpu, 4)); |
908 | break; |
909 | case H_EOI: |
910 | rc = kvmppc_h_eoi(vcpu, xirr: kvmppc_get_gpr(vcpu, 4)); |
911 | break; |
912 | case H_IPI: |
913 | rc = kvmppc_h_ipi(vcpu, server: kvmppc_get_gpr(vcpu, 4), |
914 | mfrr: kvmppc_get_gpr(vcpu, 5)); |
915 | break; |
916 | } |
917 | |
918 | return rc; |
919 | } |
920 | EXPORT_SYMBOL_GPL(kvmppc_xics_hcall); |
921 | |
922 | |
923 | /* -- Initialisation code etc. -- */ |
924 | |
925 | static void xics_debugfs_irqmap(struct seq_file *m, |
926 | struct kvmppc_passthru_irqmap *pimap) |
927 | { |
928 | int i; |
929 | |
930 | if (!pimap) |
931 | return; |
932 | seq_printf(m, fmt: "========\nPIRQ mappings: %d maps\n===========\n" , |
933 | pimap->n_mapped); |
934 | for (i = 0; i < pimap->n_mapped; i++) { |
935 | seq_printf(m, fmt: "r_hwirq=%x, v_hwirq=%x\n" , |
936 | pimap->mapped[i].r_hwirq, pimap->mapped[i].v_hwirq); |
937 | } |
938 | } |
939 | |
940 | static int xics_debug_show(struct seq_file *m, void *private) |
941 | { |
942 | struct kvmppc_xics *xics = m->private; |
943 | struct kvm *kvm = xics->kvm; |
944 | struct kvm_vcpu *vcpu; |
945 | int icsid; |
946 | unsigned long flags, i; |
947 | unsigned long t_rm_kick_vcpu, t_rm_check_resend; |
948 | unsigned long t_rm_notify_eoi; |
949 | unsigned long t_reject, t_check_resend; |
950 | |
951 | if (!kvm) |
952 | return 0; |
953 | |
954 | t_rm_kick_vcpu = 0; |
955 | t_rm_notify_eoi = 0; |
956 | t_rm_check_resend = 0; |
957 | t_check_resend = 0; |
958 | t_reject = 0; |
959 | |
960 | xics_debugfs_irqmap(m, pimap: kvm->arch.pimap); |
961 | |
962 | seq_printf(m, fmt: "=========\nICP state\n=========\n" ); |
963 | |
964 | kvm_for_each_vcpu(i, vcpu, kvm) { |
965 | struct kvmppc_icp *icp = vcpu->arch.icp; |
966 | union kvmppc_icp_state state; |
967 | |
968 | if (!icp) |
969 | continue; |
970 | |
971 | state.raw = READ_ONCE(icp->state.raw); |
972 | seq_printf(m, fmt: "cpu server %#lx XIRR:%#x PPRI:%#x CPPR:%#x MFRR:%#x OUT:%d NR:%d\n" , |
973 | icp->server_num, state.xisr, |
974 | state.pending_pri, state.cppr, state.mfrr, |
975 | state.out_ee, state.need_resend); |
976 | t_rm_kick_vcpu += icp->n_rm_kick_vcpu; |
977 | t_rm_notify_eoi += icp->n_rm_notify_eoi; |
978 | t_rm_check_resend += icp->n_rm_check_resend; |
979 | t_check_resend += icp->n_check_resend; |
980 | t_reject += icp->n_reject; |
981 | } |
982 | |
983 | seq_printf(m, fmt: "ICP Guest->Host totals: kick_vcpu=%lu check_resend=%lu notify_eoi=%lu\n" , |
984 | t_rm_kick_vcpu, t_rm_check_resend, |
985 | t_rm_notify_eoi); |
986 | seq_printf(m, fmt: "ICP Real Mode totals: check_resend=%lu resend=%lu\n" , |
987 | t_check_resend, t_reject); |
988 | for (icsid = 0; icsid <= KVMPPC_XICS_MAX_ICS_ID; icsid++) { |
989 | struct kvmppc_ics *ics = xics->ics[icsid]; |
990 | |
991 | if (!ics) |
992 | continue; |
993 | |
994 | seq_printf(m, "=========\nICS state for ICS 0x%x\n=========\n" , |
995 | icsid); |
996 | |
997 | local_irq_save(flags); |
998 | arch_spin_lock(&ics->lock); |
999 | |
1000 | for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { |
1001 | struct ics_irq_state *irq = &ics->irq_state[i]; |
1002 | |
1003 | seq_printf(m, "irq 0x%06x: server %#x prio %#x save prio %#x pq_state %d resend %d masked pending %d\n" , |
1004 | irq->number, irq->server, irq->priority, |
1005 | irq->saved_priority, irq->pq_state, |
1006 | irq->resend, irq->masked_pending); |
1007 | |
1008 | } |
1009 | arch_spin_unlock(&ics->lock); |
1010 | local_irq_restore(flags); |
1011 | } |
1012 | return 0; |
1013 | } |
1014 | |
1015 | DEFINE_SHOW_ATTRIBUTE(xics_debug); |
1016 | |
1017 | static void xics_debugfs_init(struct kvmppc_xics *xics) |
1018 | { |
1019 | xics->dentry = debugfs_create_file(name: "xics" , mode: 0444, parent: xics->kvm->debugfs_dentry, |
1020 | data: xics, fops: &xics_debug_fops); |
1021 | |
1022 | pr_debug("%s: created\n" , __func__); |
1023 | } |
1024 | |
1025 | static struct kvmppc_ics *kvmppc_xics_create_ics(struct kvm *kvm, |
1026 | struct kvmppc_xics *xics, int irq) |
1027 | { |
1028 | struct kvmppc_ics *ics; |
1029 | int i, icsid; |
1030 | |
1031 | icsid = irq >> KVMPPC_XICS_ICS_SHIFT; |
1032 | |
1033 | mutex_lock(&kvm->lock); |
1034 | |
1035 | /* ICS already exists - somebody else got here first */ |
1036 | if (xics->ics[icsid]) |
1037 | goto out; |
1038 | |
1039 | /* Create the ICS */ |
1040 | ics = kzalloc(sizeof(struct kvmppc_ics), GFP_KERNEL); |
1041 | if (!ics) |
1042 | goto out; |
1043 | |
1044 | ics->icsid = icsid; |
1045 | |
1046 | for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) { |
1047 | ics->irq_state[i].number = (icsid << KVMPPC_XICS_ICS_SHIFT) | i; |
1048 | ics->irq_state[i].priority = MASKED; |
1049 | ics->irq_state[i].saved_priority = MASKED; |
1050 | } |
1051 | smp_wmb(); |
1052 | xics->ics[icsid] = ics; |
1053 | |
1054 | if (icsid > xics->max_icsid) |
1055 | xics->max_icsid = icsid; |
1056 | |
1057 | out: |
1058 | mutex_unlock(lock: &kvm->lock); |
1059 | return xics->ics[icsid]; |
1060 | } |
1061 | |
1062 | static int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server_num) |
1063 | { |
1064 | struct kvmppc_icp *icp; |
1065 | |
1066 | if (!vcpu->kvm->arch.xics) |
1067 | return -ENODEV; |
1068 | |
1069 | if (kvmppc_xics_find_server(vcpu->kvm, server_num)) |
1070 | return -EEXIST; |
1071 | |
1072 | icp = kzalloc(sizeof(struct kvmppc_icp), GFP_KERNEL); |
1073 | if (!icp) |
1074 | return -ENOMEM; |
1075 | |
1076 | icp->vcpu = vcpu; |
1077 | icp->server_num = server_num; |
1078 | icp->state.mfrr = MASKED; |
1079 | icp->state.pending_pri = MASKED; |
1080 | vcpu->arch.icp = icp; |
1081 | |
1082 | XICS_DBG("created server for vcpu %d\n" , vcpu->vcpu_id); |
1083 | |
1084 | return 0; |
1085 | } |
1086 | |
1087 | u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu) |
1088 | { |
1089 | struct kvmppc_icp *icp = vcpu->arch.icp; |
1090 | union kvmppc_icp_state state; |
1091 | |
1092 | if (!icp) |
1093 | return 0; |
1094 | state = icp->state; |
1095 | return ((u64)state.cppr << KVM_REG_PPC_ICP_CPPR_SHIFT) | |
1096 | ((u64)state.xisr << KVM_REG_PPC_ICP_XISR_SHIFT) | |
1097 | ((u64)state.mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT) | |
1098 | ((u64)state.pending_pri << KVM_REG_PPC_ICP_PPRI_SHIFT); |
1099 | } |
1100 | |
1101 | int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval) |
1102 | { |
1103 | struct kvmppc_icp *icp = vcpu->arch.icp; |
1104 | struct kvmppc_xics *xics = vcpu->kvm->arch.xics; |
1105 | union kvmppc_icp_state old_state, new_state; |
1106 | struct kvmppc_ics *ics; |
1107 | u8 cppr, mfrr, pending_pri; |
1108 | u32 xisr; |
1109 | u16 src; |
1110 | bool resend; |
1111 | |
1112 | if (!icp || !xics) |
1113 | return -ENOENT; |
1114 | |
1115 | cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT; |
1116 | xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) & |
1117 | KVM_REG_PPC_ICP_XISR_MASK; |
1118 | mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT; |
1119 | pending_pri = icpval >> KVM_REG_PPC_ICP_PPRI_SHIFT; |
1120 | |
1121 | /* Require the new state to be internally consistent */ |
1122 | if (xisr == 0) { |
1123 | if (pending_pri != 0xff) |
1124 | return -EINVAL; |
1125 | } else if (xisr == XICS_IPI) { |
1126 | if (pending_pri != mfrr || pending_pri >= cppr) |
1127 | return -EINVAL; |
1128 | } else { |
1129 | if (pending_pri >= mfrr || pending_pri >= cppr) |
1130 | return -EINVAL; |
1131 | ics = kvmppc_xics_find_ics(xics, xisr, &src); |
1132 | if (!ics) |
1133 | return -EINVAL; |
1134 | } |
1135 | |
1136 | new_state.raw = 0; |
1137 | new_state.cppr = cppr; |
1138 | new_state.xisr = xisr; |
1139 | new_state.mfrr = mfrr; |
1140 | new_state.pending_pri = pending_pri; |
1141 | |
1142 | /* |
1143 | * Deassert the CPU interrupt request. |
1144 | * icp_try_update will reassert it if necessary. |
1145 | */ |
1146 | kvmppc_book3s_dequeue_irqprio(icp->vcpu, BOOK3S_INTERRUPT_EXTERNAL); |
1147 | |
1148 | /* |
1149 | * Note that if we displace an interrupt from old_state.xisr, |
1150 | * we don't mark it as rejected. We expect userspace to set |
1151 | * the state of the interrupt sources to be consistent with |
1152 | * the ICP states (either before or afterwards, which doesn't |
1153 | * matter). We do handle resends due to CPPR becoming less |
1154 | * favoured because that is necessary to end up with a |
1155 | * consistent state in the situation where userspace restores |
1156 | * the ICS states before the ICP states. |
1157 | */ |
1158 | do { |
1159 | old_state = READ_ONCE(icp->state); |
1160 | |
1161 | if (new_state.mfrr <= old_state.mfrr) { |
1162 | resend = false; |
1163 | new_state.need_resend = old_state.need_resend; |
1164 | } else { |
1165 | resend = old_state.need_resend; |
1166 | new_state.need_resend = 0; |
1167 | } |
1168 | } while (!icp_try_update(icp, old: old_state, new: new_state, change_self: false)); |
1169 | |
1170 | if (resend) |
1171 | icp_check_resend(xics, icp); |
1172 | |
1173 | return 0; |
1174 | } |
1175 | |
1176 | static int xics_get_source(struct kvmppc_xics *xics, long irq, u64 addr) |
1177 | { |
1178 | int ret; |
1179 | struct kvmppc_ics *ics; |
1180 | struct ics_irq_state *irqp; |
1181 | u64 __user *ubufp = (u64 __user *) addr; |
1182 | u16 idx; |
1183 | u64 val, prio; |
1184 | unsigned long flags; |
1185 | |
1186 | ics = kvmppc_xics_find_ics(xics, irq, &idx); |
1187 | if (!ics) |
1188 | return -ENOENT; |
1189 | |
1190 | irqp = &ics->irq_state[idx]; |
1191 | local_irq_save(flags); |
1192 | arch_spin_lock(&ics->lock); |
1193 | ret = -ENOENT; |
1194 | if (irqp->exists) { |
1195 | val = irqp->server; |
1196 | prio = irqp->priority; |
1197 | if (prio == MASKED) { |
1198 | val |= KVM_XICS_MASKED; |
1199 | prio = irqp->saved_priority; |
1200 | } |
1201 | val |= prio << KVM_XICS_PRIORITY_SHIFT; |
1202 | if (irqp->lsi) { |
1203 | val |= KVM_XICS_LEVEL_SENSITIVE; |
1204 | if (irqp->pq_state & PQ_PRESENTED) |
1205 | val |= KVM_XICS_PENDING; |
1206 | } else if (irqp->masked_pending || irqp->resend) |
1207 | val |= KVM_XICS_PENDING; |
1208 | |
1209 | if (irqp->pq_state & PQ_PRESENTED) |
1210 | val |= KVM_XICS_PRESENTED; |
1211 | |
1212 | if (irqp->pq_state & PQ_QUEUED) |
1213 | val |= KVM_XICS_QUEUED; |
1214 | |
1215 | ret = 0; |
1216 | } |
1217 | arch_spin_unlock(&ics->lock); |
1218 | local_irq_restore(flags); |
1219 | |
1220 | if (!ret && put_user(val, ubufp)) |
1221 | ret = -EFAULT; |
1222 | |
1223 | return ret; |
1224 | } |
1225 | |
1226 | static int xics_set_source(struct kvmppc_xics *xics, long irq, u64 addr) |
1227 | { |
1228 | struct kvmppc_ics *ics; |
1229 | struct ics_irq_state *irqp; |
1230 | u64 __user *ubufp = (u64 __user *) addr; |
1231 | u16 idx; |
1232 | u64 val; |
1233 | u8 prio; |
1234 | u32 server; |
1235 | unsigned long flags; |
1236 | |
1237 | if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS) |
1238 | return -ENOENT; |
1239 | |
1240 | ics = kvmppc_xics_find_ics(xics, irq, &idx); |
1241 | if (!ics) { |
1242 | ics = kvmppc_xics_create_ics(kvm: xics->kvm, xics, irq); |
1243 | if (!ics) |
1244 | return -ENOMEM; |
1245 | } |
1246 | irqp = &ics->irq_state[idx]; |
1247 | if (get_user(val, ubufp)) |
1248 | return -EFAULT; |
1249 | |
1250 | server = val & KVM_XICS_DESTINATION_MASK; |
1251 | prio = val >> KVM_XICS_PRIORITY_SHIFT; |
1252 | if (prio != MASKED && |
1253 | kvmppc_xics_find_server(xics->kvm, server) == NULL) |
1254 | return -EINVAL; |
1255 | |
1256 | local_irq_save(flags); |
1257 | arch_spin_lock(&ics->lock); |
1258 | irqp->server = server; |
1259 | irqp->saved_priority = prio; |
1260 | if (val & KVM_XICS_MASKED) |
1261 | prio = MASKED; |
1262 | irqp->priority = prio; |
1263 | irqp->resend = 0; |
1264 | irqp->masked_pending = 0; |
1265 | irqp->lsi = 0; |
1266 | irqp->pq_state = 0; |
1267 | if (val & KVM_XICS_LEVEL_SENSITIVE) |
1268 | irqp->lsi = 1; |
1269 | /* If PENDING, set P in case P is not saved because of old code */ |
1270 | if (val & KVM_XICS_PRESENTED || val & KVM_XICS_PENDING) |
1271 | irqp->pq_state |= PQ_PRESENTED; |
1272 | if (val & KVM_XICS_QUEUED) |
1273 | irqp->pq_state |= PQ_QUEUED; |
1274 | irqp->exists = 1; |
1275 | arch_spin_unlock(&ics->lock); |
1276 | local_irq_restore(flags); |
1277 | |
1278 | if (val & KVM_XICS_PENDING) |
1279 | icp_deliver_irq(xics, NULL, new_irq: irqp->number, check_resend: false); |
1280 | |
1281 | return 0; |
1282 | } |
1283 | |
1284 | int kvmppc_xics_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, |
1285 | bool line_status) |
1286 | { |
1287 | struct kvmppc_xics *xics = kvm->arch.xics; |
1288 | |
1289 | if (!xics) |
1290 | return -ENODEV; |
1291 | return ics_deliver_irq(xics, irq, level); |
1292 | } |
1293 | |
1294 | static int xics_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) |
1295 | { |
1296 | struct kvmppc_xics *xics = dev->private; |
1297 | |
1298 | switch (attr->group) { |
1299 | case KVM_DEV_XICS_GRP_SOURCES: |
1300 | return xics_set_source(xics, irq: attr->attr, addr: attr->addr); |
1301 | } |
1302 | return -ENXIO; |
1303 | } |
1304 | |
1305 | static int xics_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) |
1306 | { |
1307 | struct kvmppc_xics *xics = dev->private; |
1308 | |
1309 | switch (attr->group) { |
1310 | case KVM_DEV_XICS_GRP_SOURCES: |
1311 | return xics_get_source(xics, irq: attr->attr, addr: attr->addr); |
1312 | } |
1313 | return -ENXIO; |
1314 | } |
1315 | |
1316 | static int xics_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) |
1317 | { |
1318 | switch (attr->group) { |
1319 | case KVM_DEV_XICS_GRP_SOURCES: |
1320 | if (attr->attr >= KVMPPC_XICS_FIRST_IRQ && |
1321 | attr->attr < KVMPPC_XICS_NR_IRQS) |
1322 | return 0; |
1323 | break; |
1324 | } |
1325 | return -ENXIO; |
1326 | } |
1327 | |
1328 | /* |
1329 | * Called when device fd is closed. kvm->lock is held. |
1330 | */ |
1331 | static void kvmppc_xics_release(struct kvm_device *dev) |
1332 | { |
1333 | struct kvmppc_xics *xics = dev->private; |
1334 | unsigned long i; |
1335 | struct kvm *kvm = xics->kvm; |
1336 | struct kvm_vcpu *vcpu; |
1337 | |
1338 | pr_devel("Releasing xics device\n" ); |
1339 | |
1340 | /* |
1341 | * Since this is the device release function, we know that |
1342 | * userspace does not have any open fd referring to the |
1343 | * device. Therefore there can not be any of the device |
1344 | * attribute set/get functions being executed concurrently, |
1345 | * and similarly, the connect_vcpu and set/clr_mapped |
1346 | * functions also cannot be being executed. |
1347 | */ |
1348 | |
1349 | debugfs_remove(dentry: xics->dentry); |
1350 | |
1351 | /* |
1352 | * We should clean up the vCPU interrupt presenters first. |
1353 | */ |
1354 | kvm_for_each_vcpu(i, vcpu, kvm) { |
1355 | /* |
1356 | * Take vcpu->mutex to ensure that no one_reg get/set ioctl |
1357 | * (i.e. kvmppc_xics_[gs]et_icp) can be done concurrently. |
1358 | * Holding the vcpu->mutex also means that execution is |
1359 | * excluded for the vcpu until the ICP was freed. When the vcpu |
1360 | * can execute again, vcpu->arch.icp and vcpu->arch.irq_type |
1361 | * have been cleared and the vcpu will not be going into the |
1362 | * XICS code anymore. |
1363 | */ |
1364 | mutex_lock(&vcpu->mutex); |
1365 | kvmppc_xics_free_icp(vcpu); |
1366 | mutex_unlock(lock: &vcpu->mutex); |
1367 | } |
1368 | |
1369 | if (kvm) |
1370 | kvm->arch.xics = NULL; |
1371 | |
1372 | for (i = 0; i <= xics->max_icsid; i++) { |
1373 | kfree(objp: xics->ics[i]); |
1374 | xics->ics[i] = NULL; |
1375 | } |
1376 | /* |
1377 | * A reference of the kvmppc_xics pointer is now kept under |
1378 | * the xics_device pointer of the machine for reuse. It is |
1379 | * freed when the VM is destroyed for now until we fix all the |
1380 | * execution paths. |
1381 | */ |
1382 | kfree(objp: dev); |
1383 | } |
1384 | |
1385 | static struct kvmppc_xics *kvmppc_xics_get_device(struct kvm *kvm) |
1386 | { |
1387 | struct kvmppc_xics **kvm_xics_device = &kvm->arch.xics_device; |
1388 | struct kvmppc_xics *xics = *kvm_xics_device; |
1389 | |
1390 | if (!xics) { |
1391 | xics = kzalloc(sizeof(*xics), GFP_KERNEL); |
1392 | *kvm_xics_device = xics; |
1393 | } else { |
1394 | memset(xics, 0, sizeof(*xics)); |
1395 | } |
1396 | |
1397 | return xics; |
1398 | } |
1399 | |
1400 | static int kvmppc_xics_create(struct kvm_device *dev, u32 type) |
1401 | { |
1402 | struct kvmppc_xics *xics; |
1403 | struct kvm *kvm = dev->kvm; |
1404 | |
1405 | pr_devel("Creating xics for partition\n" ); |
1406 | |
1407 | /* Already there ? */ |
1408 | if (kvm->arch.xics) |
1409 | return -EEXIST; |
1410 | |
1411 | xics = kvmppc_xics_get_device(kvm); |
1412 | if (!xics) |
1413 | return -ENOMEM; |
1414 | |
1415 | dev->private = xics; |
1416 | xics->dev = dev; |
1417 | xics->kvm = kvm; |
1418 | kvm->arch.xics = xics; |
1419 | |
1420 | #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE |
1421 | if (cpu_has_feature(CPU_FTR_ARCH_206) && |
1422 | cpu_has_feature(CPU_FTR_HVMODE)) { |
1423 | /* Enable real mode support */ |
1424 | xics->real_mode = ENABLE_REALMODE; |
1425 | xics->real_mode_dbg = DEBUG_REALMODE; |
1426 | } |
1427 | #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ |
1428 | |
1429 | return 0; |
1430 | } |
1431 | |
1432 | static void kvmppc_xics_init(struct kvm_device *dev) |
1433 | { |
1434 | struct kvmppc_xics *xics = dev->private; |
1435 | |
1436 | xics_debugfs_init(xics); |
1437 | } |
1438 | |
1439 | struct kvm_device_ops kvm_xics_ops = { |
1440 | .name = "kvm-xics" , |
1441 | .create = kvmppc_xics_create, |
1442 | .init = kvmppc_xics_init, |
1443 | .release = kvmppc_xics_release, |
1444 | .set_attr = xics_set_attr, |
1445 | .get_attr = xics_get_attr, |
1446 | .has_attr = xics_has_attr, |
1447 | }; |
1448 | |
1449 | int kvmppc_xics_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, |
1450 | u32 xcpu) |
1451 | { |
1452 | struct kvmppc_xics *xics = dev->private; |
1453 | int r = -EBUSY; |
1454 | |
1455 | if (dev->ops != &kvm_xics_ops) |
1456 | return -EPERM; |
1457 | if (xics->kvm != vcpu->kvm) |
1458 | return -EPERM; |
1459 | if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT) |
1460 | return -EBUSY; |
1461 | |
1462 | r = kvmppc_xics_create_icp(vcpu, server_num: xcpu); |
1463 | if (!r) |
1464 | vcpu->arch.irq_type = KVMPPC_IRQ_XICS; |
1465 | |
1466 | return r; |
1467 | } |
1468 | |
1469 | void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) |
1470 | { |
1471 | if (!vcpu->arch.icp) |
1472 | return; |
1473 | kfree(objp: vcpu->arch.icp); |
1474 | vcpu->arch.icp = NULL; |
1475 | vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT; |
1476 | } |
1477 | |
1478 | void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long irq, |
1479 | unsigned long host_irq) |
1480 | { |
1481 | struct kvmppc_xics *xics = kvm->arch.xics; |
1482 | struct kvmppc_ics *ics; |
1483 | u16 idx; |
1484 | |
1485 | ics = kvmppc_xics_find_ics(xics, irq, &idx); |
1486 | if (!ics) |
1487 | return; |
1488 | |
1489 | ics->irq_state[idx].host_irq = host_irq; |
1490 | ics->irq_state[idx].intr_cpu = -1; |
1491 | } |
1492 | EXPORT_SYMBOL_GPL(kvmppc_xics_set_mapped); |
1493 | |
1494 | void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long irq, |
1495 | unsigned long host_irq) |
1496 | { |
1497 | struct kvmppc_xics *xics = kvm->arch.xics; |
1498 | struct kvmppc_ics *ics; |
1499 | u16 idx; |
1500 | |
1501 | ics = kvmppc_xics_find_ics(xics, irq, &idx); |
1502 | if (!ics) |
1503 | return; |
1504 | |
1505 | ics->irq_state[idx].host_irq = 0; |
1506 | } |
1507 | EXPORT_SYMBOL_GPL(kvmppc_xics_clr_mapped); |
1508 | |