1 | /* |
2 | * Copyright (C) 2001 MandrakeSoft S.A. |
3 | * Copyright 2010 Red Hat, Inc. and/or its affiliates. |
4 | * |
5 | * MandrakeSoft S.A. |
6 | * 43, rue d'Aboukir |
7 | * 75002 Paris - France |
8 | * http://www.linux-mandrake.com/ |
9 | * http://www.mandrakesoft.com/ |
10 | * |
11 | * This library is free software; you can redistribute it and/or |
12 | * modify it under the terms of the GNU Lesser General Public |
13 | * License as published by the Free Software Foundation; either |
14 | * version 2 of the License, or (at your option) any later version. |
15 | * |
16 | * This library is distributed in the hope that it will be useful, |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
19 | * Lesser General Public License for more details. |
20 | * |
21 | * You should have received a copy of the GNU Lesser General Public |
22 | * License along with this library; if not, write to the Free Software |
23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
24 | * |
25 | * Yunhong Jiang <yunhong.jiang@intel.com> |
26 | * Yaozu (Eddie) Dong <eddie.dong@intel.com> |
27 | * Based on Xen 3.1 code. |
28 | */ |
29 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
30 | |
31 | #include <linux/kvm_host.h> |
32 | #include <linux/kvm.h> |
33 | #include <linux/mm.h> |
34 | #include <linux/highmem.h> |
35 | #include <linux/smp.h> |
36 | #include <linux/hrtimer.h> |
37 | #include <linux/io.h> |
38 | #include <linux/slab.h> |
39 | #include <linux/export.h> |
40 | #include <linux/nospec.h> |
41 | #include <asm/processor.h> |
42 | #include <asm/page.h> |
43 | #include <asm/current.h> |
44 | #include <trace/events/kvm.h> |
45 | |
46 | #include "ioapic.h" |
47 | #include "lapic.h" |
48 | #include "irq.h" |
49 | |
50 | static int ioapic_service(struct kvm_ioapic *vioapic, int irq, |
51 | bool line_status); |
52 | |
53 | static void kvm_ioapic_update_eoi_one(struct kvm_vcpu *vcpu, |
54 | struct kvm_ioapic *ioapic, |
55 | int trigger_mode, |
56 | int pin); |
57 | |
58 | static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic) |
59 | { |
60 | unsigned long result = 0; |
61 | |
62 | switch (ioapic->ioregsel) { |
63 | case IOAPIC_REG_VERSION: |
64 | result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16) |
65 | | (IOAPIC_VERSION_ID & 0xff)); |
66 | break; |
67 | |
68 | case IOAPIC_REG_APIC_ID: |
69 | case IOAPIC_REG_ARB_ID: |
70 | result = ((ioapic->id & 0xf) << 24); |
71 | break; |
72 | |
73 | default: |
74 | { |
75 | u32 redir_index = (ioapic->ioregsel - 0x10) >> 1; |
76 | u64 redir_content = ~0ULL; |
77 | |
78 | if (redir_index < IOAPIC_NUM_PINS) { |
79 | u32 index = array_index_nospec( |
80 | redir_index, IOAPIC_NUM_PINS); |
81 | |
82 | redir_content = ioapic->redirtbl[index].bits; |
83 | } |
84 | |
85 | result = (ioapic->ioregsel & 0x1) ? |
86 | (redir_content >> 32) & 0xffffffff : |
87 | redir_content & 0xffffffff; |
88 | break; |
89 | } |
90 | } |
91 | |
92 | return result; |
93 | } |
94 | |
95 | static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) |
96 | { |
97 | ioapic->rtc_status.pending_eoi = 0; |
98 | bitmap_zero(dst: ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_IDS); |
99 | } |
100 | |
101 | static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic); |
102 | |
103 | static void rtc_status_pending_eoi_check_valid(struct kvm_ioapic *ioapic) |
104 | { |
105 | if (WARN_ON(ioapic->rtc_status.pending_eoi < 0)) |
106 | kvm_rtc_eoi_tracking_restore_all(ioapic); |
107 | } |
108 | |
109 | static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) |
110 | { |
111 | bool new_val, old_val; |
112 | struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; |
113 | struct dest_map *dest_map = &ioapic->rtc_status.dest_map; |
114 | union kvm_ioapic_redirect_entry *e; |
115 | |
116 | e = &ioapic->redirtbl[RTC_GSI]; |
117 | if (!kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT, |
118 | dest: e->fields.dest_id, |
119 | dest_mode: kvm_lapic_irq_dest_mode(dest_mode_logical: !!e->fields.dest_mode))) |
120 | return; |
121 | |
122 | new_val = kvm_apic_pending_eoi(vcpu, vector: e->fields.vector); |
123 | old_val = test_bit(vcpu->vcpu_id, dest_map->map); |
124 | |
125 | if (new_val == old_val) |
126 | return; |
127 | |
128 | if (new_val) { |
129 | __set_bit(vcpu->vcpu_id, dest_map->map); |
130 | dest_map->vectors[vcpu->vcpu_id] = e->fields.vector; |
131 | ioapic->rtc_status.pending_eoi++; |
132 | } else { |
133 | __clear_bit(vcpu->vcpu_id, dest_map->map); |
134 | ioapic->rtc_status.pending_eoi--; |
135 | rtc_status_pending_eoi_check_valid(ioapic); |
136 | } |
137 | } |
138 | |
139 | void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) |
140 | { |
141 | struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; |
142 | |
143 | spin_lock(lock: &ioapic->lock); |
144 | __rtc_irq_eoi_tracking_restore_one(vcpu); |
145 | spin_unlock(lock: &ioapic->lock); |
146 | } |
147 | |
148 | static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic) |
149 | { |
150 | struct kvm_vcpu *vcpu; |
151 | unsigned long i; |
152 | |
153 | if (RTC_GSI >= IOAPIC_NUM_PINS) |
154 | return; |
155 | |
156 | rtc_irq_eoi_tracking_reset(ioapic); |
157 | kvm_for_each_vcpu(i, vcpu, ioapic->kvm) |
158 | __rtc_irq_eoi_tracking_restore_one(vcpu); |
159 | } |
160 | |
161 | static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu, |
162 | int vector) |
163 | { |
164 | struct dest_map *dest_map = &ioapic->rtc_status.dest_map; |
165 | |
166 | /* RTC special handling */ |
167 | if (test_bit(vcpu->vcpu_id, dest_map->map) && |
168 | (vector == dest_map->vectors[vcpu->vcpu_id]) && |
169 | (test_and_clear_bit(nr: vcpu->vcpu_id, |
170 | addr: ioapic->rtc_status.dest_map.map))) { |
171 | --ioapic->rtc_status.pending_eoi; |
172 | rtc_status_pending_eoi_check_valid(ioapic); |
173 | } |
174 | } |
175 | |
176 | static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic) |
177 | { |
178 | if (ioapic->rtc_status.pending_eoi > 0) |
179 | return true; /* coalesced */ |
180 | |
181 | return false; |
182 | } |
183 | |
184 | static void ioapic_lazy_update_eoi(struct kvm_ioapic *ioapic, int irq) |
185 | { |
186 | unsigned long i; |
187 | struct kvm_vcpu *vcpu; |
188 | union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; |
189 | |
190 | kvm_for_each_vcpu(i, vcpu, ioapic->kvm) { |
191 | if (!kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT, |
192 | dest: entry->fields.dest_id, |
193 | dest_mode: entry->fields.dest_mode) || |
194 | kvm_apic_pending_eoi(vcpu, vector: entry->fields.vector)) |
195 | continue; |
196 | |
197 | /* |
198 | * If no longer has pending EOI in LAPICs, update |
199 | * EOI for this vector. |
200 | */ |
201 | rtc_irq_eoi(ioapic, vcpu, vector: entry->fields.vector); |
202 | break; |
203 | } |
204 | } |
205 | |
206 | static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, |
207 | int irq_level, bool line_status) |
208 | { |
209 | union kvm_ioapic_redirect_entry entry; |
210 | u32 mask = 1 << irq; |
211 | u32 old_irr; |
212 | int edge, ret; |
213 | |
214 | entry = ioapic->redirtbl[irq]; |
215 | edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG); |
216 | |
217 | if (!irq_level) { |
218 | ioapic->irr &= ~mask; |
219 | ret = 1; |
220 | goto out; |
221 | } |
222 | |
223 | /* |
224 | * AMD SVM AVIC accelerate EOI write iff the interrupt is edge |
225 | * triggered, in which case the in-kernel IOAPIC will not be able |
226 | * to receive the EOI. In this case, we do a lazy update of the |
227 | * pending EOI when trying to set IOAPIC irq. |
228 | */ |
229 | if (edge && kvm_apicv_activated(kvm: ioapic->kvm)) |
230 | ioapic_lazy_update_eoi(ioapic, irq); |
231 | |
232 | /* |
233 | * Return 0 for coalesced interrupts; for edge-triggered interrupts, |
234 | * this only happens if a previous edge has not been delivered due |
235 | * to masking. For level interrupts, the remote_irr field tells |
236 | * us if the interrupt is waiting for an EOI. |
237 | * |
238 | * RTC is special: it is edge-triggered, but userspace likes to know |
239 | * if it has been already ack-ed via EOI because coalesced RTC |
240 | * interrupts lead to time drift in Windows guests. So we track |
241 | * EOI manually for the RTC interrupt. |
242 | */ |
243 | if (irq == RTC_GSI && line_status && |
244 | rtc_irq_check_coalesced(ioapic)) { |
245 | ret = 0; |
246 | goto out; |
247 | } |
248 | |
249 | old_irr = ioapic->irr; |
250 | ioapic->irr |= mask; |
251 | if (edge) { |
252 | ioapic->irr_delivered &= ~mask; |
253 | if (old_irr == ioapic->irr) { |
254 | ret = 0; |
255 | goto out; |
256 | } |
257 | } |
258 | |
259 | ret = ioapic_service(vioapic: ioapic, irq, line_status); |
260 | |
261 | out: |
262 | trace_kvm_ioapic_set_irq(e: entry.bits, pin: irq, coalesced: ret == 0); |
263 | return ret; |
264 | } |
265 | |
266 | static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr) |
267 | { |
268 | u32 idx; |
269 | |
270 | rtc_irq_eoi_tracking_reset(ioapic); |
271 | for_each_set_bit(idx, &irr, IOAPIC_NUM_PINS) |
272 | ioapic_set_irq(ioapic, irq: idx, irq_level: 1, line_status: true); |
273 | |
274 | kvm_rtc_eoi_tracking_restore_all(ioapic); |
275 | } |
276 | |
277 | |
278 | void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) |
279 | { |
280 | struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; |
281 | struct dest_map *dest_map = &ioapic->rtc_status.dest_map; |
282 | union kvm_ioapic_redirect_entry *e; |
283 | int index; |
284 | |
285 | spin_lock(lock: &ioapic->lock); |
286 | |
287 | /* Make sure we see any missing RTC EOI */ |
288 | if (test_bit(vcpu->vcpu_id, dest_map->map)) |
289 | __set_bit(dest_map->vectors[vcpu->vcpu_id], |
290 | ioapic_handled_vectors); |
291 | |
292 | for (index = 0; index < IOAPIC_NUM_PINS; index++) { |
293 | e = &ioapic->redirtbl[index]; |
294 | if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG || |
295 | kvm_irq_has_notifier(kvm: ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin: index) || |
296 | index == RTC_GSI) { |
297 | u16 dm = kvm_lapic_irq_dest_mode(dest_mode_logical: !!e->fields.dest_mode); |
298 | |
299 | if (kvm_apic_match_dest(vcpu, NULL, APIC_DEST_NOSHORT, |
300 | dest: e->fields.dest_id, dest_mode: dm) || |
301 | kvm_apic_pending_eoi(vcpu, vector: e->fields.vector)) |
302 | __set_bit(e->fields.vector, |
303 | ioapic_handled_vectors); |
304 | } |
305 | } |
306 | spin_unlock(lock: &ioapic->lock); |
307 | } |
308 | |
309 | void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm) |
310 | { |
311 | if (!ioapic_in_kernel(kvm)) |
312 | return; |
313 | kvm_make_scan_ioapic_request(kvm); |
314 | } |
315 | |
316 | static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) |
317 | { |
318 | unsigned index; |
319 | bool mask_before, mask_after; |
320 | union kvm_ioapic_redirect_entry *e; |
321 | int old_remote_irr, old_delivery_status, old_dest_id, old_dest_mode; |
322 | DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS); |
323 | |
324 | switch (ioapic->ioregsel) { |
325 | case IOAPIC_REG_VERSION: |
326 | /* Writes are ignored. */ |
327 | break; |
328 | |
329 | case IOAPIC_REG_APIC_ID: |
330 | ioapic->id = (val >> 24) & 0xf; |
331 | break; |
332 | |
333 | case IOAPIC_REG_ARB_ID: |
334 | break; |
335 | |
336 | default: |
337 | index = (ioapic->ioregsel - 0x10) >> 1; |
338 | |
339 | if (index >= IOAPIC_NUM_PINS) |
340 | return; |
341 | index = array_index_nospec(index, IOAPIC_NUM_PINS); |
342 | e = &ioapic->redirtbl[index]; |
343 | mask_before = e->fields.mask; |
344 | /* Preserve read-only fields */ |
345 | old_remote_irr = e->fields.remote_irr; |
346 | old_delivery_status = e->fields.delivery_status; |
347 | old_dest_id = e->fields.dest_id; |
348 | old_dest_mode = e->fields.dest_mode; |
349 | if (ioapic->ioregsel & 1) { |
350 | e->bits &= 0xffffffff; |
351 | e->bits |= (u64) val << 32; |
352 | } else { |
353 | e->bits &= ~0xffffffffULL; |
354 | e->bits |= (u32) val; |
355 | } |
356 | e->fields.remote_irr = old_remote_irr; |
357 | e->fields.delivery_status = old_delivery_status; |
358 | |
359 | /* |
360 | * Some OSes (Linux, Xen) assume that Remote IRR bit will |
361 | * be cleared by IOAPIC hardware when the entry is configured |
362 | * as edge-triggered. This behavior is used to simulate an |
363 | * explicit EOI on IOAPICs that don't have the EOI register. |
364 | */ |
365 | if (e->fields.trig_mode == IOAPIC_EDGE_TRIG) |
366 | e->fields.remote_irr = 0; |
367 | |
368 | mask_after = e->fields.mask; |
369 | if (mask_before != mask_after) |
370 | kvm_fire_mask_notifiers(kvm: ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin: index, mask: mask_after); |
371 | if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG && |
372 | ioapic->irr & (1 << index) && !e->fields.mask && !e->fields.remote_irr) { |
373 | /* |
374 | * Pending status in irr may be outdated: the IRQ line may have |
375 | * already been deasserted by a device while the IRQ was masked. |
376 | * This occurs, for instance, if the interrupt is handled in a |
377 | * Linux guest as a oneshot interrupt (IRQF_ONESHOT). In this |
378 | * case the guest acknowledges the interrupt to the device in |
379 | * its threaded irq handler, i.e. after the EOI but before |
380 | * unmasking, so at the time of unmasking the IRQ line is |
381 | * already down but our pending irr bit is still set. In such |
382 | * cases, injecting this pending interrupt to the guest is |
383 | * buggy: the guest will receive an extra unwanted interrupt. |
384 | * |
385 | * So we need to check here if the IRQ is actually still pending. |
386 | * As we are generally not able to probe the IRQ line status |
387 | * directly, we do it through irqfd resampler. Namely, we clear |
388 | * the pending status and notify the resampler that this interrupt |
389 | * is done, without actually injecting it into the guest. If the |
390 | * IRQ line is actually already deasserted, we are done. If it is |
391 | * still asserted, a new interrupt will be shortly triggered |
392 | * through irqfd and injected into the guest. |
393 | * |
394 | * If, however, it's not possible to resample (no irqfd resampler |
395 | * registered for this irq), then unconditionally inject this |
396 | * pending interrupt into the guest, so the guest will not miss |
397 | * an interrupt, although may get an extra unwanted interrupt. |
398 | */ |
399 | if (kvm_notify_irqfd_resampler(kvm: ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin: index)) |
400 | ioapic->irr &= ~(1 << index); |
401 | else |
402 | ioapic_service(vioapic: ioapic, irq: index, line_status: false); |
403 | } |
404 | if (e->fields.delivery_mode == APIC_DM_FIXED) { |
405 | struct kvm_lapic_irq irq; |
406 | |
407 | irq.vector = e->fields.vector; |
408 | irq.delivery_mode = e->fields.delivery_mode << 8; |
409 | irq.dest_mode = |
410 | kvm_lapic_irq_dest_mode(dest_mode_logical: !!e->fields.dest_mode); |
411 | irq.level = false; |
412 | irq.trig_mode = e->fields.trig_mode; |
413 | irq.shorthand = APIC_DEST_NOSHORT; |
414 | irq.dest_id = e->fields.dest_id; |
415 | irq.msi_redir_hint = false; |
416 | bitmap_zero(dst: vcpu_bitmap, KVM_MAX_VCPUS); |
417 | kvm_bitmap_or_dest_vcpus(kvm: ioapic->kvm, irq: &irq, |
418 | vcpu_bitmap); |
419 | if (old_dest_mode != e->fields.dest_mode || |
420 | old_dest_id != e->fields.dest_id) { |
421 | /* |
422 | * Update vcpu_bitmap with vcpus specified in |
423 | * the previous request as well. This is done to |
424 | * keep ioapic_handled_vectors synchronized. |
425 | */ |
426 | irq.dest_id = old_dest_id; |
427 | irq.dest_mode = |
428 | kvm_lapic_irq_dest_mode( |
429 | dest_mode_logical: !!e->fields.dest_mode); |
430 | kvm_bitmap_or_dest_vcpus(kvm: ioapic->kvm, irq: &irq, |
431 | vcpu_bitmap); |
432 | } |
433 | kvm_make_scan_ioapic_request_mask(kvm: ioapic->kvm, |
434 | vcpu_bitmap); |
435 | } else { |
436 | kvm_make_scan_ioapic_request(kvm: ioapic->kvm); |
437 | } |
438 | break; |
439 | } |
440 | } |
441 | |
442 | static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) |
443 | { |
444 | union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq]; |
445 | struct kvm_lapic_irq irqe; |
446 | int ret; |
447 | |
448 | if (entry->fields.mask || |
449 | (entry->fields.trig_mode == IOAPIC_LEVEL_TRIG && |
450 | entry->fields.remote_irr)) |
451 | return -1; |
452 | |
453 | irqe.dest_id = entry->fields.dest_id; |
454 | irqe.vector = entry->fields.vector; |
455 | irqe.dest_mode = kvm_lapic_irq_dest_mode(dest_mode_logical: !!entry->fields.dest_mode); |
456 | irqe.trig_mode = entry->fields.trig_mode; |
457 | irqe.delivery_mode = entry->fields.delivery_mode << 8; |
458 | irqe.level = 1; |
459 | irqe.shorthand = APIC_DEST_NOSHORT; |
460 | irqe.msi_redir_hint = false; |
461 | |
462 | if (irqe.trig_mode == IOAPIC_EDGE_TRIG) |
463 | ioapic->irr_delivered |= 1 << irq; |
464 | |
465 | if (irq == RTC_GSI && line_status) { |
466 | /* |
467 | * pending_eoi cannot ever become negative (see |
468 | * rtc_status_pending_eoi_check_valid) and the caller |
469 | * ensures that it is only called if it is >= zero, namely |
470 | * if rtc_irq_check_coalesced returns false). |
471 | */ |
472 | BUG_ON(ioapic->rtc_status.pending_eoi != 0); |
473 | ret = kvm_irq_delivery_to_apic(kvm: ioapic->kvm, NULL, irq: &irqe, |
474 | dest_map: &ioapic->rtc_status.dest_map); |
475 | ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret); |
476 | } else |
477 | ret = kvm_irq_delivery_to_apic(kvm: ioapic->kvm, NULL, irq: &irqe, NULL); |
478 | |
479 | if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG) |
480 | entry->fields.remote_irr = 1; |
481 | |
482 | return ret; |
483 | } |
484 | |
485 | int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, |
486 | int level, bool line_status) |
487 | { |
488 | int ret, irq_level; |
489 | |
490 | BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS); |
491 | |
492 | spin_lock(lock: &ioapic->lock); |
493 | irq_level = __kvm_irq_line_state(irq_state: &ioapic->irq_states[irq], |
494 | irq_source_id, level); |
495 | ret = ioapic_set_irq(ioapic, irq, irq_level, line_status); |
496 | |
497 | spin_unlock(lock: &ioapic->lock); |
498 | |
499 | return ret; |
500 | } |
501 | |
502 | void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id) |
503 | { |
504 | int i; |
505 | |
506 | spin_lock(lock: &ioapic->lock); |
507 | for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) |
508 | __clear_bit(irq_source_id, &ioapic->irq_states[i]); |
509 | spin_unlock(lock: &ioapic->lock); |
510 | } |
511 | |
512 | static void kvm_ioapic_eoi_inject_work(struct work_struct *work) |
513 | { |
514 | int i; |
515 | struct kvm_ioapic *ioapic = container_of(work, struct kvm_ioapic, |
516 | eoi_inject.work); |
517 | spin_lock(lock: &ioapic->lock); |
518 | for (i = 0; i < IOAPIC_NUM_PINS; i++) { |
519 | union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; |
520 | |
521 | if (ent->fields.trig_mode != IOAPIC_LEVEL_TRIG) |
522 | continue; |
523 | |
524 | if (ioapic->irr & (1 << i) && !ent->fields.remote_irr) |
525 | ioapic_service(ioapic, irq: i, line_status: false); |
526 | } |
527 | spin_unlock(lock: &ioapic->lock); |
528 | } |
529 | |
530 | #define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000 |
531 | static void kvm_ioapic_update_eoi_one(struct kvm_vcpu *vcpu, |
532 | struct kvm_ioapic *ioapic, |
533 | int trigger_mode, |
534 | int pin) |
535 | { |
536 | struct kvm_lapic *apic = vcpu->arch.apic; |
537 | union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[pin]; |
538 | |
539 | /* |
540 | * We are dropping lock while calling ack notifiers because ack |
541 | * notifier callbacks for assigned devices call into IOAPIC |
542 | * recursively. Since remote_irr is cleared only after call |
543 | * to notifiers if the same vector will be delivered while lock |
544 | * is dropped it will be put into irr and will be delivered |
545 | * after ack notifier returns. |
546 | */ |
547 | spin_unlock(lock: &ioapic->lock); |
548 | kvm_notify_acked_irq(kvm: ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin); |
549 | spin_lock(lock: &ioapic->lock); |
550 | |
551 | if (trigger_mode != IOAPIC_LEVEL_TRIG || |
552 | kvm_lapic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) |
553 | return; |
554 | |
555 | ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); |
556 | ent->fields.remote_irr = 0; |
557 | if (!ent->fields.mask && (ioapic->irr & (1 << pin))) { |
558 | ++ioapic->irq_eoi[pin]; |
559 | if (ioapic->irq_eoi[pin] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) { |
560 | /* |
561 | * Real hardware does not deliver the interrupt |
562 | * immediately during eoi broadcast, and this |
563 | * lets a buggy guest make slow progress |
564 | * even if it does not correctly handle a |
565 | * level-triggered interrupt. Emulate this |
566 | * behavior if we detect an interrupt storm. |
567 | */ |
568 | schedule_delayed_work(dwork: &ioapic->eoi_inject, HZ / 100); |
569 | ioapic->irq_eoi[pin] = 0; |
570 | trace_kvm_ioapic_delayed_eoi_inj(e: ent->bits); |
571 | } else { |
572 | ioapic_service(ioapic, irq: pin, line_status: false); |
573 | } |
574 | } else { |
575 | ioapic->irq_eoi[pin] = 0; |
576 | } |
577 | } |
578 | |
579 | void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode) |
580 | { |
581 | int i; |
582 | struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; |
583 | |
584 | spin_lock(lock: &ioapic->lock); |
585 | rtc_irq_eoi(ioapic, vcpu, vector); |
586 | for (i = 0; i < IOAPIC_NUM_PINS; i++) { |
587 | union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i]; |
588 | |
589 | if (ent->fields.vector != vector) |
590 | continue; |
591 | kvm_ioapic_update_eoi_one(vcpu, ioapic, trigger_mode, pin: i); |
592 | } |
593 | spin_unlock(lock: &ioapic->lock); |
594 | } |
595 | |
596 | static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev) |
597 | { |
598 | return container_of(dev, struct kvm_ioapic, dev); |
599 | } |
600 | |
601 | static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr) |
602 | { |
603 | return ((addr >= ioapic->base_address && |
604 | (addr < ioapic->base_address + IOAPIC_MEM_LENGTH))); |
605 | } |
606 | |
607 | static int ioapic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, |
608 | gpa_t addr, int len, void *val) |
609 | { |
610 | struct kvm_ioapic *ioapic = to_ioapic(dev: this); |
611 | u32 result; |
612 | if (!ioapic_in_range(ioapic, addr)) |
613 | return -EOPNOTSUPP; |
614 | |
615 | ASSERT(!(addr & 0xf)); /* check alignment */ |
616 | |
617 | addr &= 0xff; |
618 | spin_lock(lock: &ioapic->lock); |
619 | switch (addr) { |
620 | case IOAPIC_REG_SELECT: |
621 | result = ioapic->ioregsel; |
622 | break; |
623 | |
624 | case IOAPIC_REG_WINDOW: |
625 | result = ioapic_read_indirect(ioapic); |
626 | break; |
627 | |
628 | default: |
629 | result = 0; |
630 | break; |
631 | } |
632 | spin_unlock(lock: &ioapic->lock); |
633 | |
634 | switch (len) { |
635 | case 8: |
636 | *(u64 *) val = result; |
637 | break; |
638 | case 1: |
639 | case 2: |
640 | case 4: |
641 | memcpy(val, (char *)&result, len); |
642 | break; |
643 | default: |
644 | printk(KERN_WARNING "ioapic: wrong length %d\n" , len); |
645 | } |
646 | return 0; |
647 | } |
648 | |
649 | static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, |
650 | gpa_t addr, int len, const void *val) |
651 | { |
652 | struct kvm_ioapic *ioapic = to_ioapic(dev: this); |
653 | u32 data; |
654 | if (!ioapic_in_range(ioapic, addr)) |
655 | return -EOPNOTSUPP; |
656 | |
657 | ASSERT(!(addr & 0xf)); /* check alignment */ |
658 | |
659 | switch (len) { |
660 | case 8: |
661 | case 4: |
662 | data = *(u32 *) val; |
663 | break; |
664 | case 2: |
665 | data = *(u16 *) val; |
666 | break; |
667 | case 1: |
668 | data = *(u8 *) val; |
669 | break; |
670 | default: |
671 | printk(KERN_WARNING "ioapic: Unsupported size %d\n" , len); |
672 | return 0; |
673 | } |
674 | |
675 | addr &= 0xff; |
676 | spin_lock(lock: &ioapic->lock); |
677 | switch (addr) { |
678 | case IOAPIC_REG_SELECT: |
679 | ioapic->ioregsel = data & 0xFF; /* 8-bit register */ |
680 | break; |
681 | |
682 | case IOAPIC_REG_WINDOW: |
683 | ioapic_write_indirect(ioapic, val: data); |
684 | break; |
685 | |
686 | default: |
687 | break; |
688 | } |
689 | spin_unlock(lock: &ioapic->lock); |
690 | return 0; |
691 | } |
692 | |
693 | static void kvm_ioapic_reset(struct kvm_ioapic *ioapic) |
694 | { |
695 | int i; |
696 | |
697 | cancel_delayed_work_sync(dwork: &ioapic->eoi_inject); |
698 | for (i = 0; i < IOAPIC_NUM_PINS; i++) |
699 | ioapic->redirtbl[i].fields.mask = 1; |
700 | ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; |
701 | ioapic->ioregsel = 0; |
702 | ioapic->irr = 0; |
703 | ioapic->irr_delivered = 0; |
704 | ioapic->id = 0; |
705 | memset(ioapic->irq_eoi, 0x00, sizeof(ioapic->irq_eoi)); |
706 | rtc_irq_eoi_tracking_reset(ioapic); |
707 | } |
708 | |
709 | static const struct kvm_io_device_ops ioapic_mmio_ops = { |
710 | .read = ioapic_mmio_read, |
711 | .write = ioapic_mmio_write, |
712 | }; |
713 | |
714 | int kvm_ioapic_init(struct kvm *kvm) |
715 | { |
716 | struct kvm_ioapic *ioapic; |
717 | int ret; |
718 | |
719 | ioapic = kzalloc(size: sizeof(struct kvm_ioapic), GFP_KERNEL_ACCOUNT); |
720 | if (!ioapic) |
721 | return -ENOMEM; |
722 | spin_lock_init(&ioapic->lock); |
723 | INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work); |
724 | kvm->arch.vioapic = ioapic; |
725 | kvm_ioapic_reset(ioapic); |
726 | kvm_iodevice_init(dev: &ioapic->dev, ops: &ioapic_mmio_ops); |
727 | ioapic->kvm = kvm; |
728 | mutex_lock(&kvm->slots_lock); |
729 | ret = kvm_io_bus_register_dev(kvm, bus_idx: KVM_MMIO_BUS, addr: ioapic->base_address, |
730 | IOAPIC_MEM_LENGTH, dev: &ioapic->dev); |
731 | mutex_unlock(lock: &kvm->slots_lock); |
732 | if (ret < 0) { |
733 | kvm->arch.vioapic = NULL; |
734 | kfree(objp: ioapic); |
735 | } |
736 | |
737 | return ret; |
738 | } |
739 | |
740 | void kvm_ioapic_destroy(struct kvm *kvm) |
741 | { |
742 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
743 | |
744 | if (!ioapic) |
745 | return; |
746 | |
747 | cancel_delayed_work_sync(dwork: &ioapic->eoi_inject); |
748 | mutex_lock(&kvm->slots_lock); |
749 | kvm_io_bus_unregister_dev(kvm, bus_idx: KVM_MMIO_BUS, dev: &ioapic->dev); |
750 | mutex_unlock(lock: &kvm->slots_lock); |
751 | kvm->arch.vioapic = NULL; |
752 | kfree(objp: ioapic); |
753 | } |
754 | |
755 | void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) |
756 | { |
757 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
758 | |
759 | spin_lock(lock: &ioapic->lock); |
760 | memcpy(state, ioapic, sizeof(struct kvm_ioapic_state)); |
761 | state->irr &= ~ioapic->irr_delivered; |
762 | spin_unlock(lock: &ioapic->lock); |
763 | } |
764 | |
765 | void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) |
766 | { |
767 | struct kvm_ioapic *ioapic = kvm->arch.vioapic; |
768 | |
769 | spin_lock(lock: &ioapic->lock); |
770 | memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); |
771 | ioapic->irr = 0; |
772 | ioapic->irr_delivered = 0; |
773 | kvm_make_scan_ioapic_request(kvm); |
774 | kvm_ioapic_inject_all(ioapic, irr: state->irr); |
775 | spin_unlock(lock: &ioapic->lock); |
776 | } |
777 | |