1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Xen event channels |
4 | * |
5 | * Xen models interrupts with abstract event channels. Because each |
6 | * domain gets 1024 event channels, but NR_IRQ is not that large, we |
7 | * must dynamically map irqs<->event channels. The event channels |
8 | * interface with the rest of the kernel by defining a xen interrupt |
9 | * chip. When an event is received, it is mapped to an irq and sent |
10 | * through the normal interrupt processing path. |
11 | * |
12 | * There are four kinds of events which can be mapped to an event |
13 | * channel: |
14 | * |
15 | * 1. Inter-domain notifications. This includes all the virtual |
16 | * device events, since they're driven by front-ends in another domain |
17 | * (typically dom0). |
18 | * 2. VIRQs, typically used for timers. These are per-cpu events. |
19 | * 3. IPIs. |
20 | * 4. PIRQs - Hardware interrupts. |
21 | * |
22 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 |
23 | */ |
24 | |
25 | #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt |
26 | |
27 | #include <linux/linkage.h> |
28 | #include <linux/interrupt.h> |
29 | #include <linux/irq.h> |
30 | #include <linux/moduleparam.h> |
31 | #include <linux/string.h> |
32 | #include <linux/memblock.h> |
33 | #include <linux/slab.h> |
34 | #include <linux/irqnr.h> |
35 | #include <linux/pci.h> |
36 | #include <linux/rcupdate.h> |
37 | #include <linux/spinlock.h> |
38 | #include <linux/cpuhotplug.h> |
39 | #include <linux/atomic.h> |
40 | #include <linux/ktime.h> |
41 | |
42 | #ifdef CONFIG_X86 |
43 | #include <asm/desc.h> |
44 | #include <asm/ptrace.h> |
45 | #include <asm/idtentry.h> |
46 | #include <asm/irq.h> |
47 | #include <asm/io_apic.h> |
48 | #include <asm/i8259.h> |
49 | #include <asm/xen/cpuid.h> |
50 | #include <asm/xen/pci.h> |
51 | #endif |
52 | #include <asm/sync_bitops.h> |
53 | #include <asm/xen/hypercall.h> |
54 | #include <asm/xen/hypervisor.h> |
55 | #include <xen/page.h> |
56 | |
57 | #include <xen/xen.h> |
58 | #include <xen/hvm.h> |
59 | #include <xen/xen-ops.h> |
60 | #include <xen/events.h> |
61 | #include <xen/interface/xen.h> |
62 | #include <xen/interface/event_channel.h> |
63 | #include <xen/interface/hvm/hvm_op.h> |
64 | #include <xen/interface/hvm/params.h> |
65 | #include <xen/interface/physdev.h> |
66 | #include <xen/interface/sched.h> |
67 | #include <xen/interface/vcpu.h> |
68 | #include <xen/xenbus.h> |
69 | #include <asm/hw_irq.h> |
70 | |
71 | #include "events_internal.h" |
72 | |
73 | #undef MODULE_PARAM_PREFIX |
74 | #define MODULE_PARAM_PREFIX "xen." |
75 | |
76 | /* Interrupt types. */ |
77 | enum xen_irq_type { |
78 | IRQT_UNBOUND = 0, |
79 | IRQT_PIRQ, |
80 | IRQT_VIRQ, |
81 | IRQT_IPI, |
82 | IRQT_EVTCHN |
83 | }; |
84 | |
85 | /* |
86 | * Packed IRQ information: |
87 | * type - enum xen_irq_type |
88 | * event channel - irq->event channel mapping |
89 | * cpu - cpu this event channel is bound to |
90 | * index - type-specific information: |
91 | * PIRQ - vector, with MSB being "needs EIO", or physical IRQ of the HVM |
92 | * guest, or GSI (real passthrough IRQ) of the device. |
93 | * VIRQ - virq number |
94 | * IPI - IPI vector |
95 | * EVTCHN - |
96 | */ |
97 | struct irq_info { |
98 | struct list_head list; |
99 | struct list_head eoi_list; |
100 | struct rcu_work rwork; |
101 | short refcnt; |
102 | u8 spurious_cnt; |
103 | u8 is_accounted; |
104 | short type; /* type: IRQT_* */ |
105 | u8 mask_reason; /* Why is event channel masked */ |
106 | #define EVT_MASK_REASON_EXPLICIT 0x01 |
107 | #define EVT_MASK_REASON_TEMPORARY 0x02 |
108 | #define EVT_MASK_REASON_EOI_PENDING 0x04 |
109 | u8 is_active; /* Is event just being handled? */ |
110 | unsigned irq; |
111 | evtchn_port_t evtchn; /* event channel */ |
112 | unsigned short cpu; /* cpu bound */ |
113 | unsigned short eoi_cpu; /* EOI must happen on this cpu-1 */ |
114 | unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */ |
115 | u64 eoi_time; /* Time in jiffies when to EOI. */ |
116 | raw_spinlock_t lock; |
117 | bool is_static; /* Is event channel static */ |
118 | |
119 | union { |
120 | unsigned short virq; |
121 | enum ipi_vector ipi; |
122 | struct { |
123 | unsigned short pirq; |
124 | unsigned short gsi; |
125 | unsigned char vector; |
126 | unsigned char flags; |
127 | uint16_t domid; |
128 | } pirq; |
129 | struct xenbus_device *interdomain; |
130 | } u; |
131 | }; |
132 | |
133 | #define PIRQ_NEEDS_EOI (1 << 0) |
134 | #define PIRQ_SHAREABLE (1 << 1) |
135 | #define PIRQ_MSI_GROUP (1 << 2) |
136 | |
137 | static uint __read_mostly event_loop_timeout = 2; |
138 | module_param(event_loop_timeout, uint, 0644); |
139 | |
140 | static uint __read_mostly event_eoi_delay = 10; |
141 | module_param(event_eoi_delay, uint, 0644); |
142 | |
143 | const struct evtchn_ops *evtchn_ops; |
144 | |
145 | /* |
146 | * This lock protects updates to the following mapping and reference-count |
147 | * arrays. The lock does not need to be acquired to read the mapping tables. |
148 | */ |
149 | static DEFINE_MUTEX(irq_mapping_update_lock); |
150 | |
151 | /* |
152 | * Lock hierarchy: |
153 | * |
154 | * irq_mapping_update_lock |
155 | * IRQ-desc lock |
156 | * percpu eoi_list_lock |
157 | * irq_info->lock |
158 | */ |
159 | |
160 | static LIST_HEAD(xen_irq_list_head); |
161 | |
162 | /* IRQ <-> VIRQ mapping. */ |
163 | static DEFINE_PER_CPU(int [NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1}; |
164 | |
165 | /* IRQ <-> IPI mapping */ |
166 | static DEFINE_PER_CPU(int [XEN_NR_IPIS], ipi_to_irq) = {[0 ... XEN_NR_IPIS-1] = -1}; |
167 | |
168 | /* Event channel distribution data */ |
169 | static atomic_t channels_on_cpu[NR_CPUS]; |
170 | |
171 | static int **evtchn_to_irq; |
172 | #ifdef CONFIG_X86 |
173 | static unsigned long *pirq_eoi_map; |
174 | #endif |
175 | static bool (*pirq_needs_eoi)(unsigned irq); |
176 | |
177 | #define EVTCHN_ROW(e) (e / (PAGE_SIZE/sizeof(**evtchn_to_irq))) |
178 | #define EVTCHN_COL(e) (e % (PAGE_SIZE/sizeof(**evtchn_to_irq))) |
179 | #define EVTCHN_PER_ROW (PAGE_SIZE / sizeof(**evtchn_to_irq)) |
180 | |
181 | /* Xen will never allocate port zero for any purpose. */ |
182 | #define VALID_EVTCHN(chn) ((chn) != 0) |
183 | |
184 | static struct irq_info *legacy_info_ptrs[NR_IRQS_LEGACY]; |
185 | |
186 | static struct irq_chip xen_dynamic_chip; |
187 | static struct irq_chip xen_lateeoi_chip; |
188 | static struct irq_chip xen_percpu_chip; |
189 | static struct irq_chip xen_pirq_chip; |
190 | static void enable_dynirq(struct irq_data *data); |
191 | static void disable_dynirq(struct irq_data *data); |
192 | |
193 | static DEFINE_PER_CPU(unsigned int, irq_epoch); |
194 | |
195 | static void clear_evtchn_to_irq_row(int *evtchn_row) |
196 | { |
197 | unsigned col; |
198 | |
199 | for (col = 0; col < EVTCHN_PER_ROW; col++) |
200 | WRITE_ONCE(evtchn_row[col], -1); |
201 | } |
202 | |
203 | static void clear_evtchn_to_irq_all(void) |
204 | { |
205 | unsigned row; |
206 | |
207 | for (row = 0; row < EVTCHN_ROW(xen_evtchn_max_channels()); row++) { |
208 | if (evtchn_to_irq[row] == NULL) |
209 | continue; |
210 | clear_evtchn_to_irq_row(evtchn_row: evtchn_to_irq[row]); |
211 | } |
212 | } |
213 | |
214 | static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq) |
215 | { |
216 | unsigned row; |
217 | unsigned col; |
218 | int *evtchn_row; |
219 | |
220 | if (evtchn >= xen_evtchn_max_channels()) |
221 | return -EINVAL; |
222 | |
223 | row = EVTCHN_ROW(evtchn); |
224 | col = EVTCHN_COL(evtchn); |
225 | |
226 | if (evtchn_to_irq[row] == NULL) { |
227 | /* Unallocated irq entries return -1 anyway */ |
228 | if (irq == -1) |
229 | return 0; |
230 | |
231 | evtchn_row = (int *) __get_free_pages(GFP_KERNEL, order: 0); |
232 | if (evtchn_row == NULL) |
233 | return -ENOMEM; |
234 | |
235 | clear_evtchn_to_irq_row(evtchn_row); |
236 | |
237 | /* |
238 | * We've prepared an empty row for the mapping. If a different |
239 | * thread was faster inserting it, we can drop ours. |
240 | */ |
241 | if (cmpxchg(&evtchn_to_irq[row], NULL, evtchn_row) != NULL) |
242 | free_page((unsigned long) evtchn_row); |
243 | } |
244 | |
245 | WRITE_ONCE(evtchn_to_irq[row][col], irq); |
246 | return 0; |
247 | } |
248 | |
249 | int get_evtchn_to_irq(evtchn_port_t evtchn) |
250 | { |
251 | if (evtchn >= xen_evtchn_max_channels()) |
252 | return -1; |
253 | if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL) |
254 | return -1; |
255 | return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]); |
256 | } |
257 | |
258 | /* Get info for IRQ */ |
259 | static struct irq_info *info_for_irq(unsigned irq) |
260 | { |
261 | if (irq < nr_legacy_irqs()) |
262 | return legacy_info_ptrs[irq]; |
263 | else |
264 | return irq_get_chip_data(irq); |
265 | } |
266 | |
267 | static void set_info_for_irq(unsigned int irq, struct irq_info *info) |
268 | { |
269 | if (irq < nr_legacy_irqs()) |
270 | legacy_info_ptrs[irq] = info; |
271 | else |
272 | irq_set_chip_data(irq, data: info); |
273 | } |
274 | |
275 | /* Per CPU channel accounting */ |
276 | static void channels_on_cpu_dec(struct irq_info *info) |
277 | { |
278 | if (!info->is_accounted) |
279 | return; |
280 | |
281 | info->is_accounted = 0; |
282 | |
283 | if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids)) |
284 | return; |
285 | |
286 | WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], -1 , 0)); |
287 | } |
288 | |
289 | static void channels_on_cpu_inc(struct irq_info *info) |
290 | { |
291 | if (WARN_ON_ONCE(info->cpu >= nr_cpu_ids)) |
292 | return; |
293 | |
294 | if (WARN_ON_ONCE(!atomic_add_unless(&channels_on_cpu[info->cpu], 1, |
295 | INT_MAX))) |
296 | return; |
297 | |
298 | info->is_accounted = 1; |
299 | } |
300 | |
301 | static void delayed_free_irq(struct work_struct *work) |
302 | { |
303 | struct irq_info *info = container_of(to_rcu_work(work), struct irq_info, |
304 | rwork); |
305 | unsigned int irq = info->irq; |
306 | |
307 | /* Remove the info pointer only now, with no potential users left. */ |
308 | set_info_for_irq(irq, NULL); |
309 | |
310 | kfree(objp: info); |
311 | |
312 | /* Legacy IRQ descriptors are managed by the arch. */ |
313 | if (irq >= nr_legacy_irqs()) |
314 | irq_free_desc(irq); |
315 | } |
316 | |
317 | /* Constructors for packed IRQ information. */ |
318 | static int xen_irq_info_common_setup(struct irq_info *info, |
319 | unsigned irq, |
320 | enum xen_irq_type type, |
321 | evtchn_port_t evtchn, |
322 | unsigned short cpu) |
323 | { |
324 | int ret; |
325 | |
326 | BUG_ON(info->type != IRQT_UNBOUND && info->type != type); |
327 | |
328 | info->type = type; |
329 | info->irq = irq; |
330 | info->evtchn = evtchn; |
331 | info->cpu = cpu; |
332 | info->mask_reason = EVT_MASK_REASON_EXPLICIT; |
333 | raw_spin_lock_init(&info->lock); |
334 | |
335 | ret = set_evtchn_to_irq(evtchn, irq); |
336 | if (ret < 0) |
337 | return ret; |
338 | |
339 | irq_clear_status_flags(irq, clr: IRQ_NOREQUEST|IRQ_NOAUTOEN); |
340 | |
341 | return xen_evtchn_port_setup(evtchn); |
342 | } |
343 | |
344 | static int xen_irq_info_evtchn_setup(unsigned irq, |
345 | evtchn_port_t evtchn, |
346 | struct xenbus_device *dev) |
347 | { |
348 | struct irq_info *info = info_for_irq(irq); |
349 | int ret; |
350 | |
351 | ret = xen_irq_info_common_setup(info, irq, type: IRQT_EVTCHN, evtchn, cpu: 0); |
352 | info->u.interdomain = dev; |
353 | if (dev) |
354 | atomic_inc(v: &dev->event_channels); |
355 | |
356 | return ret; |
357 | } |
358 | |
359 | static int xen_irq_info_ipi_setup(unsigned cpu, |
360 | unsigned irq, |
361 | evtchn_port_t evtchn, |
362 | enum ipi_vector ipi) |
363 | { |
364 | struct irq_info *info = info_for_irq(irq); |
365 | |
366 | info->u.ipi = ipi; |
367 | |
368 | per_cpu(ipi_to_irq, cpu)[ipi] = irq; |
369 | |
370 | return xen_irq_info_common_setup(info, irq, type: IRQT_IPI, evtchn, cpu: 0); |
371 | } |
372 | |
373 | static int xen_irq_info_virq_setup(unsigned cpu, |
374 | unsigned irq, |
375 | evtchn_port_t evtchn, |
376 | unsigned virq) |
377 | { |
378 | struct irq_info *info = info_for_irq(irq); |
379 | |
380 | info->u.virq = virq; |
381 | |
382 | per_cpu(virq_to_irq, cpu)[virq] = irq; |
383 | |
384 | return xen_irq_info_common_setup(info, irq, type: IRQT_VIRQ, evtchn, cpu: 0); |
385 | } |
386 | |
387 | static int xen_irq_info_pirq_setup(unsigned irq, |
388 | evtchn_port_t evtchn, |
389 | unsigned pirq, |
390 | unsigned gsi, |
391 | uint16_t domid, |
392 | unsigned char flags) |
393 | { |
394 | struct irq_info *info = info_for_irq(irq); |
395 | |
396 | info->u.pirq.pirq = pirq; |
397 | info->u.pirq.gsi = gsi; |
398 | info->u.pirq.domid = domid; |
399 | info->u.pirq.flags = flags; |
400 | |
401 | return xen_irq_info_common_setup(info, irq, type: IRQT_PIRQ, evtchn, cpu: 0); |
402 | } |
403 | |
404 | static void xen_irq_info_cleanup(struct irq_info *info) |
405 | { |
406 | set_evtchn_to_irq(evtchn: info->evtchn, irq: -1); |
407 | xen_evtchn_port_remove(evtchn: info->evtchn, cpu: info->cpu); |
408 | info->evtchn = 0; |
409 | channels_on_cpu_dec(info); |
410 | } |
411 | |
412 | /* |
413 | * Accessors for packed IRQ information. |
414 | */ |
415 | evtchn_port_t evtchn_from_irq(unsigned irq) |
416 | { |
417 | const struct irq_info *info = NULL; |
418 | |
419 | if (likely(irq < nr_irqs)) |
420 | info = info_for_irq(irq); |
421 | if (!info) |
422 | return 0; |
423 | |
424 | return info->evtchn; |
425 | } |
426 | |
427 | unsigned int irq_from_evtchn(evtchn_port_t evtchn) |
428 | { |
429 | return get_evtchn_to_irq(evtchn); |
430 | } |
431 | EXPORT_SYMBOL_GPL(irq_from_evtchn); |
432 | |
433 | int irq_from_virq(unsigned int cpu, unsigned int virq) |
434 | { |
435 | return per_cpu(virq_to_irq, cpu)[virq]; |
436 | } |
437 | |
438 | static enum ipi_vector ipi_from_irq(unsigned irq) |
439 | { |
440 | struct irq_info *info = info_for_irq(irq); |
441 | |
442 | BUG_ON(info == NULL); |
443 | BUG_ON(info->type != IRQT_IPI); |
444 | |
445 | return info->u.ipi; |
446 | } |
447 | |
448 | static unsigned virq_from_irq(unsigned irq) |
449 | { |
450 | struct irq_info *info = info_for_irq(irq); |
451 | |
452 | BUG_ON(info == NULL); |
453 | BUG_ON(info->type != IRQT_VIRQ); |
454 | |
455 | return info->u.virq; |
456 | } |
457 | |
458 | static unsigned pirq_from_irq(unsigned irq) |
459 | { |
460 | struct irq_info *info = info_for_irq(irq); |
461 | |
462 | BUG_ON(info == NULL); |
463 | BUG_ON(info->type != IRQT_PIRQ); |
464 | |
465 | return info->u.pirq.pirq; |
466 | } |
467 | |
468 | static enum xen_irq_type type_from_irq(unsigned irq) |
469 | { |
470 | return info_for_irq(irq)->type; |
471 | } |
472 | |
473 | static unsigned cpu_from_irq(unsigned irq) |
474 | { |
475 | return info_for_irq(irq)->cpu; |
476 | } |
477 | |
478 | unsigned int cpu_from_evtchn(evtchn_port_t evtchn) |
479 | { |
480 | int irq = get_evtchn_to_irq(evtchn); |
481 | unsigned ret = 0; |
482 | |
483 | if (irq != -1) |
484 | ret = cpu_from_irq(irq); |
485 | |
486 | return ret; |
487 | } |
488 | |
489 | static void do_mask(struct irq_info *info, u8 reason) |
490 | { |
491 | unsigned long flags; |
492 | |
493 | raw_spin_lock_irqsave(&info->lock, flags); |
494 | |
495 | if (!info->mask_reason) |
496 | mask_evtchn(port: info->evtchn); |
497 | |
498 | info->mask_reason |= reason; |
499 | |
500 | raw_spin_unlock_irqrestore(&info->lock, flags); |
501 | } |
502 | |
503 | static void do_unmask(struct irq_info *info, u8 reason) |
504 | { |
505 | unsigned long flags; |
506 | |
507 | raw_spin_lock_irqsave(&info->lock, flags); |
508 | |
509 | info->mask_reason &= ~reason; |
510 | |
511 | if (!info->mask_reason) |
512 | unmask_evtchn(port: info->evtchn); |
513 | |
514 | raw_spin_unlock_irqrestore(&info->lock, flags); |
515 | } |
516 | |
517 | #ifdef CONFIG_X86 |
518 | static bool pirq_check_eoi_map(unsigned irq) |
519 | { |
520 | return test_bit(pirq_from_irq(irq), pirq_eoi_map); |
521 | } |
522 | #endif |
523 | |
524 | static bool pirq_needs_eoi_flag(unsigned irq) |
525 | { |
526 | struct irq_info *info = info_for_irq(irq); |
527 | BUG_ON(info->type != IRQT_PIRQ); |
528 | |
529 | return info->u.pirq.flags & PIRQ_NEEDS_EOI; |
530 | } |
531 | |
532 | static void bind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int cpu, |
533 | bool force_affinity) |
534 | { |
535 | int irq = get_evtchn_to_irq(evtchn); |
536 | struct irq_info *info = info_for_irq(irq); |
537 | |
538 | BUG_ON(irq == -1); |
539 | |
540 | if (IS_ENABLED(CONFIG_SMP) && force_affinity) { |
541 | struct irq_data *data = irq_get_irq_data(irq); |
542 | |
543 | irq_data_update_affinity(d: data, cpumask_of(cpu)); |
544 | irq_data_update_effective_affinity(d: data, cpumask_of(cpu)); |
545 | } |
546 | |
547 | xen_evtchn_port_bind_to_cpu(evtchn, cpu, old_cpu: info->cpu); |
548 | |
549 | channels_on_cpu_dec(info); |
550 | info->cpu = cpu; |
551 | channels_on_cpu_inc(info); |
552 | } |
553 | |
554 | /** |
555 | * notify_remote_via_irq - send event to remote end of event channel via irq |
556 | * @irq: irq of event channel to send event to |
557 | * |
558 | * Unlike notify_remote_via_evtchn(), this is safe to use across |
559 | * save/restore. Notifications on a broken connection are silently |
560 | * dropped. |
561 | */ |
562 | void notify_remote_via_irq(int irq) |
563 | { |
564 | evtchn_port_t evtchn = evtchn_from_irq(irq); |
565 | |
566 | if (VALID_EVTCHN(evtchn)) |
567 | notify_remote_via_evtchn(port: evtchn); |
568 | } |
569 | EXPORT_SYMBOL_GPL(notify_remote_via_irq); |
570 | |
571 | struct lateeoi_work { |
572 | struct delayed_work delayed; |
573 | spinlock_t eoi_list_lock; |
574 | struct list_head eoi_list; |
575 | }; |
576 | |
577 | static DEFINE_PER_CPU(struct lateeoi_work, lateeoi); |
578 | |
579 | static void lateeoi_list_del(struct irq_info *info) |
580 | { |
581 | struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu); |
582 | unsigned long flags; |
583 | |
584 | spin_lock_irqsave(&eoi->eoi_list_lock, flags); |
585 | list_del_init(entry: &info->eoi_list); |
586 | spin_unlock_irqrestore(lock: &eoi->eoi_list_lock, flags); |
587 | } |
588 | |
589 | static void lateeoi_list_add(struct irq_info *info) |
590 | { |
591 | struct lateeoi_work *eoi = &per_cpu(lateeoi, info->eoi_cpu); |
592 | struct irq_info *elem; |
593 | u64 now = get_jiffies_64(); |
594 | unsigned long delay; |
595 | unsigned long flags; |
596 | |
597 | if (now < info->eoi_time) |
598 | delay = info->eoi_time - now; |
599 | else |
600 | delay = 1; |
601 | |
602 | spin_lock_irqsave(&eoi->eoi_list_lock, flags); |
603 | |
604 | if (list_empty(head: &eoi->eoi_list)) { |
605 | list_add(new: &info->eoi_list, head: &eoi->eoi_list); |
606 | mod_delayed_work_on(cpu: info->eoi_cpu, wq: system_wq, |
607 | dwork: &eoi->delayed, delay); |
608 | } else { |
609 | list_for_each_entry_reverse(elem, &eoi->eoi_list, eoi_list) { |
610 | if (elem->eoi_time <= info->eoi_time) |
611 | break; |
612 | } |
613 | list_add(new: &info->eoi_list, head: &elem->eoi_list); |
614 | } |
615 | |
616 | spin_unlock_irqrestore(lock: &eoi->eoi_list_lock, flags); |
617 | } |
618 | |
619 | static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious) |
620 | { |
621 | evtchn_port_t evtchn; |
622 | unsigned int cpu; |
623 | unsigned int delay = 0; |
624 | |
625 | evtchn = info->evtchn; |
626 | if (!VALID_EVTCHN(evtchn) || !list_empty(head: &info->eoi_list)) |
627 | return; |
628 | |
629 | if (spurious) { |
630 | struct xenbus_device *dev = info->u.interdomain; |
631 | unsigned int threshold = 1; |
632 | |
633 | if (dev && dev->spurious_threshold) |
634 | threshold = dev->spurious_threshold; |
635 | |
636 | if ((1 << info->spurious_cnt) < (HZ << 2)) { |
637 | if (info->spurious_cnt != 0xFF) |
638 | info->spurious_cnt++; |
639 | } |
640 | if (info->spurious_cnt > threshold) { |
641 | delay = 1 << (info->spurious_cnt - 1 - threshold); |
642 | if (delay > HZ) |
643 | delay = HZ; |
644 | if (!info->eoi_time) |
645 | info->eoi_cpu = smp_processor_id(); |
646 | info->eoi_time = get_jiffies_64() + delay; |
647 | if (dev) |
648 | atomic_add(i: delay, v: &dev->jiffies_eoi_delayed); |
649 | } |
650 | if (dev) |
651 | atomic_inc(v: &dev->spurious_events); |
652 | } else { |
653 | info->spurious_cnt = 0; |
654 | } |
655 | |
656 | cpu = info->eoi_cpu; |
657 | if (info->eoi_time && |
658 | (info->irq_epoch == per_cpu(irq_epoch, cpu) || delay)) { |
659 | lateeoi_list_add(info); |
660 | return; |
661 | } |
662 | |
663 | info->eoi_time = 0; |
664 | |
665 | /* is_active hasn't been reset yet, do it now. */ |
666 | smp_store_release(&info->is_active, 0); |
667 | do_unmask(info, EVT_MASK_REASON_EOI_PENDING); |
668 | } |
669 | |
670 | static void xen_irq_lateeoi_worker(struct work_struct *work) |
671 | { |
672 | struct lateeoi_work *eoi; |
673 | struct irq_info *info; |
674 | u64 now = get_jiffies_64(); |
675 | unsigned long flags; |
676 | |
677 | eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed); |
678 | |
679 | rcu_read_lock(); |
680 | |
681 | while (true) { |
682 | spin_lock_irqsave(&eoi->eoi_list_lock, flags); |
683 | |
684 | info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info, |
685 | eoi_list); |
686 | |
687 | if (info == NULL) |
688 | break; |
689 | |
690 | if (now < info->eoi_time) { |
691 | mod_delayed_work_on(cpu: info->eoi_cpu, wq: system_wq, |
692 | dwork: &eoi->delayed, |
693 | delay: info->eoi_time - now); |
694 | break; |
695 | } |
696 | |
697 | list_del_init(entry: &info->eoi_list); |
698 | |
699 | spin_unlock_irqrestore(lock: &eoi->eoi_list_lock, flags); |
700 | |
701 | info->eoi_time = 0; |
702 | |
703 | xen_irq_lateeoi_locked(info, spurious: false); |
704 | } |
705 | |
706 | spin_unlock_irqrestore(lock: &eoi->eoi_list_lock, flags); |
707 | |
708 | rcu_read_unlock(); |
709 | } |
710 | |
711 | static void xen_cpu_init_eoi(unsigned int cpu) |
712 | { |
713 | struct lateeoi_work *eoi = &per_cpu(lateeoi, cpu); |
714 | |
715 | INIT_DELAYED_WORK(&eoi->delayed, xen_irq_lateeoi_worker); |
716 | spin_lock_init(&eoi->eoi_list_lock); |
717 | INIT_LIST_HEAD(list: &eoi->eoi_list); |
718 | } |
719 | |
720 | void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags) |
721 | { |
722 | struct irq_info *info; |
723 | |
724 | rcu_read_lock(); |
725 | |
726 | info = info_for_irq(irq); |
727 | |
728 | if (info) |
729 | xen_irq_lateeoi_locked(info, spurious: eoi_flags & XEN_EOI_FLAG_SPURIOUS); |
730 | |
731 | rcu_read_unlock(); |
732 | } |
733 | EXPORT_SYMBOL_GPL(xen_irq_lateeoi); |
734 | |
735 | static void xen_irq_init(unsigned irq) |
736 | { |
737 | struct irq_info *info; |
738 | |
739 | info = kzalloc(size: sizeof(*info), GFP_KERNEL); |
740 | if (info == NULL) |
741 | panic(fmt: "Unable to allocate metadata for IRQ%d\n" , irq); |
742 | |
743 | info->type = IRQT_UNBOUND; |
744 | info->refcnt = -1; |
745 | INIT_RCU_WORK(&info->rwork, delayed_free_irq); |
746 | |
747 | set_info_for_irq(irq, info); |
748 | /* |
749 | * Interrupt affinity setting can be immediate. No point |
750 | * in delaying it until an interrupt is handled. |
751 | */ |
752 | irq_set_status_flags(irq, set: IRQ_MOVE_PCNTXT); |
753 | |
754 | INIT_LIST_HEAD(list: &info->eoi_list); |
755 | list_add_tail(new: &info->list, head: &xen_irq_list_head); |
756 | } |
757 | |
758 | static int __must_check xen_allocate_irqs_dynamic(int nvec) |
759 | { |
760 | int i, irq = irq_alloc_descs(-1, 0, nvec, -1); |
761 | |
762 | if (irq >= 0) { |
763 | for (i = 0; i < nvec; i++) |
764 | xen_irq_init(irq: irq + i); |
765 | } |
766 | |
767 | return irq; |
768 | } |
769 | |
770 | static inline int __must_check xen_allocate_irq_dynamic(void) |
771 | { |
772 | |
773 | return xen_allocate_irqs_dynamic(nvec: 1); |
774 | } |
775 | |
776 | static int __must_check xen_allocate_irq_gsi(unsigned gsi) |
777 | { |
778 | int irq; |
779 | |
780 | /* |
781 | * A PV guest has no concept of a GSI (since it has no ACPI |
782 | * nor access to/knowledge of the physical APICs). Therefore |
783 | * all IRQs are dynamically allocated from the entire IRQ |
784 | * space. |
785 | */ |
786 | if (xen_pv_domain() && !xen_initial_domain()) |
787 | return xen_allocate_irq_dynamic(); |
788 | |
789 | /* Legacy IRQ descriptors are already allocated by the arch. */ |
790 | if (gsi < nr_legacy_irqs()) |
791 | irq = gsi; |
792 | else |
793 | irq = irq_alloc_desc_at(gsi, -1); |
794 | |
795 | xen_irq_init(irq); |
796 | |
797 | return irq; |
798 | } |
799 | |
800 | static void xen_free_irq(unsigned irq) |
801 | { |
802 | struct irq_info *info = info_for_irq(irq); |
803 | |
804 | if (WARN_ON(!info)) |
805 | return; |
806 | |
807 | if (!list_empty(head: &info->eoi_list)) |
808 | lateeoi_list_del(info); |
809 | |
810 | list_del(entry: &info->list); |
811 | |
812 | WARN_ON(info->refcnt > 0); |
813 | |
814 | queue_rcu_work(wq: system_wq, rwork: &info->rwork); |
815 | } |
816 | |
817 | /* Not called for lateeoi events. */ |
818 | static void event_handler_exit(struct irq_info *info) |
819 | { |
820 | smp_store_release(&info->is_active, 0); |
821 | clear_evtchn(port: info->evtchn); |
822 | } |
823 | |
824 | static void pirq_query_unmask(int irq) |
825 | { |
826 | struct physdev_irq_status_query irq_status; |
827 | struct irq_info *info = info_for_irq(irq); |
828 | |
829 | BUG_ON(info->type != IRQT_PIRQ); |
830 | |
831 | irq_status.irq = pirq_from_irq(irq); |
832 | if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, arg: &irq_status)) |
833 | irq_status.flags = 0; |
834 | |
835 | info->u.pirq.flags &= ~PIRQ_NEEDS_EOI; |
836 | if (irq_status.flags & XENIRQSTAT_needs_eoi) |
837 | info->u.pirq.flags |= PIRQ_NEEDS_EOI; |
838 | } |
839 | |
840 | static void eoi_pirq(struct irq_data *data) |
841 | { |
842 | struct irq_info *info = info_for_irq(irq: data->irq); |
843 | evtchn_port_t evtchn = info ? info->evtchn : 0; |
844 | struct physdev_eoi eoi = { .irq = pirq_from_irq(irq: data->irq) }; |
845 | int rc = 0; |
846 | |
847 | if (!VALID_EVTCHN(evtchn)) |
848 | return; |
849 | |
850 | event_handler_exit(info); |
851 | |
852 | if (pirq_needs_eoi(data->irq)) { |
853 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, arg: &eoi); |
854 | WARN_ON(rc); |
855 | } |
856 | } |
857 | |
858 | static void mask_ack_pirq(struct irq_data *data) |
859 | { |
860 | disable_dynirq(data); |
861 | eoi_pirq(data); |
862 | } |
863 | |
864 | static unsigned int __startup_pirq(unsigned int irq) |
865 | { |
866 | struct evtchn_bind_pirq bind_pirq; |
867 | struct irq_info *info = info_for_irq(irq); |
868 | evtchn_port_t evtchn = evtchn_from_irq(irq); |
869 | int rc; |
870 | |
871 | BUG_ON(info->type != IRQT_PIRQ); |
872 | |
873 | if (VALID_EVTCHN(evtchn)) |
874 | goto out; |
875 | |
876 | bind_pirq.pirq = pirq_from_irq(irq); |
877 | /* NB. We are happy to share unless we are probing. */ |
878 | bind_pirq.flags = info->u.pirq.flags & PIRQ_SHAREABLE ? |
879 | BIND_PIRQ__WILL_SHARE : 0; |
880 | rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, arg: &bind_pirq); |
881 | if (rc != 0) { |
882 | pr_warn("Failed to obtain physical IRQ %d\n" , irq); |
883 | return 0; |
884 | } |
885 | evtchn = bind_pirq.port; |
886 | |
887 | pirq_query_unmask(irq); |
888 | |
889 | rc = set_evtchn_to_irq(evtchn, irq); |
890 | if (rc) |
891 | goto err; |
892 | |
893 | info->evtchn = evtchn; |
894 | bind_evtchn_to_cpu(evtchn, cpu: 0, force_affinity: false); |
895 | |
896 | rc = xen_evtchn_port_setup(evtchn); |
897 | if (rc) |
898 | goto err; |
899 | |
900 | out: |
901 | do_unmask(info, EVT_MASK_REASON_EXPLICIT); |
902 | |
903 | eoi_pirq(data: irq_get_irq_data(irq)); |
904 | |
905 | return 0; |
906 | |
907 | err: |
908 | pr_err("irq%d: Failed to set port to irq mapping (%d)\n" , irq, rc); |
909 | xen_evtchn_close(port: evtchn); |
910 | return 0; |
911 | } |
912 | |
913 | static unsigned int startup_pirq(struct irq_data *data) |
914 | { |
915 | return __startup_pirq(irq: data->irq); |
916 | } |
917 | |
918 | static void shutdown_pirq(struct irq_data *data) |
919 | { |
920 | unsigned int irq = data->irq; |
921 | struct irq_info *info = info_for_irq(irq); |
922 | evtchn_port_t evtchn = evtchn_from_irq(irq); |
923 | |
924 | BUG_ON(info->type != IRQT_PIRQ); |
925 | |
926 | if (!VALID_EVTCHN(evtchn)) |
927 | return; |
928 | |
929 | do_mask(info, EVT_MASK_REASON_EXPLICIT); |
930 | xen_evtchn_close(port: evtchn); |
931 | xen_irq_info_cleanup(info); |
932 | } |
933 | |
934 | static void enable_pirq(struct irq_data *data) |
935 | { |
936 | enable_dynirq(data); |
937 | } |
938 | |
939 | static void disable_pirq(struct irq_data *data) |
940 | { |
941 | disable_dynirq(data); |
942 | } |
943 | |
944 | int xen_irq_from_gsi(unsigned gsi) |
945 | { |
946 | struct irq_info *info; |
947 | |
948 | list_for_each_entry(info, &xen_irq_list_head, list) { |
949 | if (info->type != IRQT_PIRQ) |
950 | continue; |
951 | |
952 | if (info->u.pirq.gsi == gsi) |
953 | return info->irq; |
954 | } |
955 | |
956 | return -1; |
957 | } |
958 | EXPORT_SYMBOL_GPL(xen_irq_from_gsi); |
959 | |
960 | static void __unbind_from_irq(unsigned int irq) |
961 | { |
962 | evtchn_port_t evtchn = evtchn_from_irq(irq); |
963 | struct irq_info *info = info_for_irq(irq); |
964 | |
965 | if (info->refcnt > 0) { |
966 | info->refcnt--; |
967 | if (info->refcnt != 0) |
968 | return; |
969 | } |
970 | |
971 | if (VALID_EVTCHN(evtchn)) { |
972 | unsigned int cpu = cpu_from_irq(irq); |
973 | struct xenbus_device *dev; |
974 | |
975 | if (!info->is_static) |
976 | xen_evtchn_close(port: evtchn); |
977 | |
978 | switch (type_from_irq(irq)) { |
979 | case IRQT_VIRQ: |
980 | per_cpu(virq_to_irq, cpu)[virq_from_irq(irq)] = -1; |
981 | break; |
982 | case IRQT_IPI: |
983 | per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1; |
984 | break; |
985 | case IRQT_EVTCHN: |
986 | dev = info->u.interdomain; |
987 | if (dev) |
988 | atomic_dec(v: &dev->event_channels); |
989 | break; |
990 | default: |
991 | break; |
992 | } |
993 | |
994 | xen_irq_info_cleanup(info); |
995 | } |
996 | |
997 | xen_free_irq(irq); |
998 | } |
999 | |
1000 | /* |
1001 | * Do not make any assumptions regarding the relationship between the |
1002 | * IRQ number returned here and the Xen pirq argument. |
1003 | * |
1004 | * Note: We don't assign an event channel until the irq actually started |
1005 | * up. Return an existing irq if we've already got one for the gsi. |
1006 | * |
1007 | * Shareable implies level triggered, not shareable implies edge |
1008 | * triggered here. |
1009 | */ |
1010 | int xen_bind_pirq_gsi_to_irq(unsigned gsi, |
1011 | unsigned pirq, int shareable, char *name) |
1012 | { |
1013 | int irq; |
1014 | struct physdev_irq irq_op; |
1015 | int ret; |
1016 | |
1017 | mutex_lock(&irq_mapping_update_lock); |
1018 | |
1019 | irq = xen_irq_from_gsi(gsi); |
1020 | if (irq != -1) { |
1021 | pr_info("%s: returning irq %d for gsi %u\n" , |
1022 | __func__, irq, gsi); |
1023 | goto out; |
1024 | } |
1025 | |
1026 | irq = xen_allocate_irq_gsi(gsi); |
1027 | if (irq < 0) |
1028 | goto out; |
1029 | |
1030 | irq_op.irq = irq; |
1031 | irq_op.vector = 0; |
1032 | |
1033 | /* Only the privileged domain can do this. For non-priv, the pcifront |
1034 | * driver provides a PCI bus that does the call to do exactly |
1035 | * this in the priv domain. */ |
1036 | if (xen_initial_domain() && |
1037 | HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, arg: &irq_op)) { |
1038 | xen_free_irq(irq); |
1039 | irq = -ENOSPC; |
1040 | goto out; |
1041 | } |
1042 | |
1043 | ret = xen_irq_info_pirq_setup(irq, evtchn: 0, pirq, gsi, DOMID_SELF, |
1044 | flags: shareable ? PIRQ_SHAREABLE : 0); |
1045 | if (ret < 0) { |
1046 | __unbind_from_irq(irq); |
1047 | irq = ret; |
1048 | goto out; |
1049 | } |
1050 | |
1051 | pirq_query_unmask(irq); |
1052 | /* We try to use the handler with the appropriate semantic for the |
1053 | * type of interrupt: if the interrupt is an edge triggered |
1054 | * interrupt we use handle_edge_irq. |
1055 | * |
1056 | * On the other hand if the interrupt is level triggered we use |
1057 | * handle_fasteoi_irq like the native code does for this kind of |
1058 | * interrupts. |
1059 | * |
1060 | * Depending on the Xen version, pirq_needs_eoi might return true |
1061 | * not only for level triggered interrupts but for edge triggered |
1062 | * interrupts too. In any case Xen always honors the eoi mechanism, |
1063 | * not injecting any more pirqs of the same kind if the first one |
1064 | * hasn't received an eoi yet. Therefore using the fasteoi handler |
1065 | * is the right choice either way. |
1066 | */ |
1067 | if (shareable) |
1068 | irq_set_chip_and_handler_name(irq, chip: &xen_pirq_chip, |
1069 | handle: handle_fasteoi_irq, name); |
1070 | else |
1071 | irq_set_chip_and_handler_name(irq, chip: &xen_pirq_chip, |
1072 | handle: handle_edge_irq, name); |
1073 | |
1074 | out: |
1075 | mutex_unlock(lock: &irq_mapping_update_lock); |
1076 | |
1077 | return irq; |
1078 | } |
1079 | |
1080 | #ifdef CONFIG_PCI_MSI |
1081 | int xen_allocate_pirq_msi(struct pci_dev *dev, struct msi_desc *msidesc) |
1082 | { |
1083 | int rc; |
1084 | struct physdev_get_free_pirq op_get_free_pirq; |
1085 | |
1086 | op_get_free_pirq.type = MAP_PIRQ_TYPE_MSI; |
1087 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_get_free_pirq, arg: &op_get_free_pirq); |
1088 | |
1089 | WARN_ONCE(rc == -ENOSYS, |
1090 | "hypervisor does not support the PHYSDEVOP_get_free_pirq interface\n" ); |
1091 | |
1092 | return rc ? -1 : op_get_free_pirq.pirq; |
1093 | } |
1094 | |
1095 | int xen_bind_pirq_msi_to_irq(struct pci_dev *dev, struct msi_desc *msidesc, |
1096 | int pirq, int nvec, const char *name, domid_t domid) |
1097 | { |
1098 | int i, irq, ret; |
1099 | |
1100 | mutex_lock(&irq_mapping_update_lock); |
1101 | |
1102 | irq = xen_allocate_irqs_dynamic(nvec); |
1103 | if (irq < 0) |
1104 | goto out; |
1105 | |
1106 | for (i = 0; i < nvec; i++) { |
1107 | irq_set_chip_and_handler_name(irq: irq + i, chip: &xen_pirq_chip, handle: handle_edge_irq, name); |
1108 | |
1109 | ret = xen_irq_info_pirq_setup(irq: irq + i, evtchn: 0, pirq: pirq + i, gsi: 0, domid, |
1110 | flags: i == 0 ? 0 : PIRQ_MSI_GROUP); |
1111 | if (ret < 0) |
1112 | goto error_irq; |
1113 | } |
1114 | |
1115 | ret = irq_set_msi_desc(irq, entry: msidesc); |
1116 | if (ret < 0) |
1117 | goto error_irq; |
1118 | out: |
1119 | mutex_unlock(lock: &irq_mapping_update_lock); |
1120 | return irq; |
1121 | error_irq: |
1122 | while (nvec--) |
1123 | __unbind_from_irq(irq: irq + nvec); |
1124 | mutex_unlock(lock: &irq_mapping_update_lock); |
1125 | return ret; |
1126 | } |
1127 | #endif |
1128 | |
1129 | int xen_destroy_irq(int irq) |
1130 | { |
1131 | struct physdev_unmap_pirq unmap_irq; |
1132 | struct irq_info *info = info_for_irq(irq); |
1133 | int rc = -ENOENT; |
1134 | |
1135 | mutex_lock(&irq_mapping_update_lock); |
1136 | |
1137 | /* |
1138 | * If trying to remove a vector in a MSI group different |
1139 | * than the first one skip the PIRQ unmap unless this vector |
1140 | * is the first one in the group. |
1141 | */ |
1142 | if (xen_initial_domain() && !(info->u.pirq.flags & PIRQ_MSI_GROUP)) { |
1143 | unmap_irq.pirq = info->u.pirq.pirq; |
1144 | unmap_irq.domid = info->u.pirq.domid; |
1145 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, arg: &unmap_irq); |
1146 | /* If another domain quits without making the pci_disable_msix |
1147 | * call, the Xen hypervisor takes care of freeing the PIRQs |
1148 | * (free_domain_pirqs). |
1149 | */ |
1150 | if ((rc == -ESRCH && info->u.pirq.domid != DOMID_SELF)) |
1151 | pr_info("domain %d does not have %d anymore\n" , |
1152 | info->u.pirq.domid, info->u.pirq.pirq); |
1153 | else if (rc) { |
1154 | pr_warn("unmap irq failed %d\n" , rc); |
1155 | goto out; |
1156 | } |
1157 | } |
1158 | |
1159 | xen_free_irq(irq); |
1160 | |
1161 | out: |
1162 | mutex_unlock(lock: &irq_mapping_update_lock); |
1163 | return rc; |
1164 | } |
1165 | |
1166 | int xen_irq_from_pirq(unsigned pirq) |
1167 | { |
1168 | int irq; |
1169 | |
1170 | struct irq_info *info; |
1171 | |
1172 | mutex_lock(&irq_mapping_update_lock); |
1173 | |
1174 | list_for_each_entry(info, &xen_irq_list_head, list) { |
1175 | if (info->type != IRQT_PIRQ) |
1176 | continue; |
1177 | irq = info->irq; |
1178 | if (info->u.pirq.pirq == pirq) |
1179 | goto out; |
1180 | } |
1181 | irq = -1; |
1182 | out: |
1183 | mutex_unlock(lock: &irq_mapping_update_lock); |
1184 | |
1185 | return irq; |
1186 | } |
1187 | |
1188 | |
1189 | int xen_pirq_from_irq(unsigned irq) |
1190 | { |
1191 | return pirq_from_irq(irq); |
1192 | } |
1193 | EXPORT_SYMBOL_GPL(xen_pirq_from_irq); |
1194 | |
1195 | static int bind_evtchn_to_irq_chip(evtchn_port_t evtchn, struct irq_chip *chip, |
1196 | struct xenbus_device *dev) |
1197 | { |
1198 | int irq; |
1199 | int ret; |
1200 | |
1201 | if (evtchn >= xen_evtchn_max_channels()) |
1202 | return -ENOMEM; |
1203 | |
1204 | mutex_lock(&irq_mapping_update_lock); |
1205 | |
1206 | irq = get_evtchn_to_irq(evtchn); |
1207 | |
1208 | if (irq == -1) { |
1209 | irq = xen_allocate_irq_dynamic(); |
1210 | if (irq < 0) |
1211 | goto out; |
1212 | |
1213 | irq_set_chip_and_handler_name(irq, chip, |
1214 | handle: handle_edge_irq, name: "event" ); |
1215 | |
1216 | ret = xen_irq_info_evtchn_setup(irq, evtchn, dev); |
1217 | if (ret < 0) { |
1218 | __unbind_from_irq(irq); |
1219 | irq = ret; |
1220 | goto out; |
1221 | } |
1222 | /* |
1223 | * New interdomain events are initially bound to vCPU0 This |
1224 | * is required to setup the event channel in the first |
1225 | * place and also important for UP guests because the |
1226 | * affinity setting is not invoked on them so nothing would |
1227 | * bind the channel. |
1228 | */ |
1229 | bind_evtchn_to_cpu(evtchn, cpu: 0, force_affinity: false); |
1230 | } else { |
1231 | struct irq_info *info = info_for_irq(irq); |
1232 | if (!WARN_ON(!info || info->type != IRQT_EVTCHN)) |
1233 | info->refcnt++; |
1234 | } |
1235 | |
1236 | out: |
1237 | mutex_unlock(lock: &irq_mapping_update_lock); |
1238 | |
1239 | return irq; |
1240 | } |
1241 | |
1242 | int bind_evtchn_to_irq(evtchn_port_t evtchn) |
1243 | { |
1244 | return bind_evtchn_to_irq_chip(evtchn, chip: &xen_dynamic_chip, NULL); |
1245 | } |
1246 | EXPORT_SYMBOL_GPL(bind_evtchn_to_irq); |
1247 | |
1248 | int bind_evtchn_to_irq_lateeoi(evtchn_port_t evtchn) |
1249 | { |
1250 | return bind_evtchn_to_irq_chip(evtchn, chip: &xen_lateeoi_chip, NULL); |
1251 | } |
1252 | EXPORT_SYMBOL_GPL(bind_evtchn_to_irq_lateeoi); |
1253 | |
1254 | static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) |
1255 | { |
1256 | struct evtchn_bind_ipi bind_ipi; |
1257 | evtchn_port_t evtchn; |
1258 | int ret, irq; |
1259 | |
1260 | mutex_lock(&irq_mapping_update_lock); |
1261 | |
1262 | irq = per_cpu(ipi_to_irq, cpu)[ipi]; |
1263 | |
1264 | if (irq == -1) { |
1265 | irq = xen_allocate_irq_dynamic(); |
1266 | if (irq < 0) |
1267 | goto out; |
1268 | |
1269 | irq_set_chip_and_handler_name(irq, chip: &xen_percpu_chip, |
1270 | handle: handle_percpu_irq, name: "ipi" ); |
1271 | |
1272 | bind_ipi.vcpu = xen_vcpu_nr(cpu); |
1273 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, |
1274 | arg: &bind_ipi) != 0) |
1275 | BUG(); |
1276 | evtchn = bind_ipi.port; |
1277 | |
1278 | ret = xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi); |
1279 | if (ret < 0) { |
1280 | __unbind_from_irq(irq); |
1281 | irq = ret; |
1282 | goto out; |
1283 | } |
1284 | /* |
1285 | * Force the affinity mask to the target CPU so proc shows |
1286 | * the correct target. |
1287 | */ |
1288 | bind_evtchn_to_cpu(evtchn, cpu, force_affinity: true); |
1289 | } else { |
1290 | struct irq_info *info = info_for_irq(irq); |
1291 | WARN_ON(info == NULL || info->type != IRQT_IPI); |
1292 | } |
1293 | |
1294 | out: |
1295 | mutex_unlock(lock: &irq_mapping_update_lock); |
1296 | return irq; |
1297 | } |
1298 | |
1299 | static int bind_interdomain_evtchn_to_irq_chip(struct xenbus_device *dev, |
1300 | evtchn_port_t remote_port, |
1301 | struct irq_chip *chip) |
1302 | { |
1303 | struct evtchn_bind_interdomain bind_interdomain; |
1304 | int err; |
1305 | |
1306 | bind_interdomain.remote_dom = dev->otherend_id; |
1307 | bind_interdomain.remote_port = remote_port; |
1308 | |
1309 | err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, |
1310 | arg: &bind_interdomain); |
1311 | |
1312 | return err ? : bind_evtchn_to_irq_chip(evtchn: bind_interdomain.local_port, |
1313 | chip, dev); |
1314 | } |
1315 | |
1316 | int bind_interdomain_evtchn_to_irq_lateeoi(struct xenbus_device *dev, |
1317 | evtchn_port_t remote_port) |
1318 | { |
1319 | return bind_interdomain_evtchn_to_irq_chip(dev, remote_port, |
1320 | chip: &xen_lateeoi_chip); |
1321 | } |
1322 | EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irq_lateeoi); |
1323 | |
1324 | static int find_virq(unsigned int virq, unsigned int cpu, evtchn_port_t *evtchn) |
1325 | { |
1326 | struct evtchn_status status; |
1327 | evtchn_port_t port; |
1328 | int rc = -ENOENT; |
1329 | |
1330 | memset(&status, 0, sizeof(status)); |
1331 | for (port = 0; port < xen_evtchn_max_channels(); port++) { |
1332 | status.dom = DOMID_SELF; |
1333 | status.port = port; |
1334 | rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, arg: &status); |
1335 | if (rc < 0) |
1336 | continue; |
1337 | if (status.status != EVTCHNSTAT_virq) |
1338 | continue; |
1339 | if (status.u.virq == virq && status.vcpu == xen_vcpu_nr(cpu)) { |
1340 | *evtchn = port; |
1341 | break; |
1342 | } |
1343 | } |
1344 | return rc; |
1345 | } |
1346 | |
1347 | /** |
1348 | * xen_evtchn_nr_channels - number of usable event channel ports |
1349 | * |
1350 | * This may be less than the maximum supported by the current |
1351 | * hypervisor ABI. Use xen_evtchn_max_channels() for the maximum |
1352 | * supported. |
1353 | */ |
1354 | unsigned xen_evtchn_nr_channels(void) |
1355 | { |
1356 | return evtchn_ops->nr_channels(); |
1357 | } |
1358 | EXPORT_SYMBOL_GPL(xen_evtchn_nr_channels); |
1359 | |
1360 | int bind_virq_to_irq(unsigned int virq, unsigned int cpu, bool percpu) |
1361 | { |
1362 | struct evtchn_bind_virq bind_virq; |
1363 | evtchn_port_t evtchn = 0; |
1364 | int irq, ret; |
1365 | |
1366 | mutex_lock(&irq_mapping_update_lock); |
1367 | |
1368 | irq = per_cpu(virq_to_irq, cpu)[virq]; |
1369 | |
1370 | if (irq == -1) { |
1371 | irq = xen_allocate_irq_dynamic(); |
1372 | if (irq < 0) |
1373 | goto out; |
1374 | |
1375 | if (percpu) |
1376 | irq_set_chip_and_handler_name(irq, chip: &xen_percpu_chip, |
1377 | handle: handle_percpu_irq, name: "virq" ); |
1378 | else |
1379 | irq_set_chip_and_handler_name(irq, chip: &xen_dynamic_chip, |
1380 | handle: handle_edge_irq, name: "virq" ); |
1381 | |
1382 | bind_virq.virq = virq; |
1383 | bind_virq.vcpu = xen_vcpu_nr(cpu); |
1384 | ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, |
1385 | arg: &bind_virq); |
1386 | if (ret == 0) |
1387 | evtchn = bind_virq.port; |
1388 | else { |
1389 | if (ret == -EEXIST) |
1390 | ret = find_virq(virq, cpu, evtchn: &evtchn); |
1391 | BUG_ON(ret < 0); |
1392 | } |
1393 | |
1394 | ret = xen_irq_info_virq_setup(cpu, irq, evtchn, virq); |
1395 | if (ret < 0) { |
1396 | __unbind_from_irq(irq); |
1397 | irq = ret; |
1398 | goto out; |
1399 | } |
1400 | |
1401 | /* |
1402 | * Force the affinity mask for percpu interrupts so proc |
1403 | * shows the correct target. |
1404 | */ |
1405 | bind_evtchn_to_cpu(evtchn, cpu, force_affinity: percpu); |
1406 | } else { |
1407 | struct irq_info *info = info_for_irq(irq); |
1408 | WARN_ON(info == NULL || info->type != IRQT_VIRQ); |
1409 | } |
1410 | |
1411 | out: |
1412 | mutex_unlock(lock: &irq_mapping_update_lock); |
1413 | |
1414 | return irq; |
1415 | } |
1416 | |
1417 | static void unbind_from_irq(unsigned int irq) |
1418 | { |
1419 | mutex_lock(&irq_mapping_update_lock); |
1420 | __unbind_from_irq(irq); |
1421 | mutex_unlock(lock: &irq_mapping_update_lock); |
1422 | } |
1423 | |
1424 | static int bind_evtchn_to_irqhandler_chip(evtchn_port_t evtchn, |
1425 | irq_handler_t handler, |
1426 | unsigned long irqflags, |
1427 | const char *devname, void *dev_id, |
1428 | struct irq_chip *chip) |
1429 | { |
1430 | int irq, retval; |
1431 | |
1432 | irq = bind_evtchn_to_irq_chip(evtchn, chip, NULL); |
1433 | if (irq < 0) |
1434 | return irq; |
1435 | retval = request_irq(irq, handler, flags: irqflags, name: devname, dev: dev_id); |
1436 | if (retval != 0) { |
1437 | unbind_from_irq(irq); |
1438 | return retval; |
1439 | } |
1440 | |
1441 | return irq; |
1442 | } |
1443 | |
1444 | int bind_evtchn_to_irqhandler(evtchn_port_t evtchn, |
1445 | irq_handler_t handler, |
1446 | unsigned long irqflags, |
1447 | const char *devname, void *dev_id) |
1448 | { |
1449 | return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags, |
1450 | devname, dev_id, |
1451 | chip: &xen_dynamic_chip); |
1452 | } |
1453 | EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler); |
1454 | |
1455 | int bind_evtchn_to_irqhandler_lateeoi(evtchn_port_t evtchn, |
1456 | irq_handler_t handler, |
1457 | unsigned long irqflags, |
1458 | const char *devname, void *dev_id) |
1459 | { |
1460 | return bind_evtchn_to_irqhandler_chip(evtchn, handler, irqflags, |
1461 | devname, dev_id, |
1462 | chip: &xen_lateeoi_chip); |
1463 | } |
1464 | EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler_lateeoi); |
1465 | |
1466 | static int bind_interdomain_evtchn_to_irqhandler_chip( |
1467 | struct xenbus_device *dev, evtchn_port_t remote_port, |
1468 | irq_handler_t handler, unsigned long irqflags, |
1469 | const char *devname, void *dev_id, struct irq_chip *chip) |
1470 | { |
1471 | int irq, retval; |
1472 | |
1473 | irq = bind_interdomain_evtchn_to_irq_chip(dev, remote_port, chip); |
1474 | if (irq < 0) |
1475 | return irq; |
1476 | |
1477 | retval = request_irq(irq, handler, flags: irqflags, name: devname, dev: dev_id); |
1478 | if (retval != 0) { |
1479 | unbind_from_irq(irq); |
1480 | return retval; |
1481 | } |
1482 | |
1483 | return irq; |
1484 | } |
1485 | |
1486 | int bind_interdomain_evtchn_to_irqhandler_lateeoi(struct xenbus_device *dev, |
1487 | evtchn_port_t remote_port, |
1488 | irq_handler_t handler, |
1489 | unsigned long irqflags, |
1490 | const char *devname, |
1491 | void *dev_id) |
1492 | { |
1493 | return bind_interdomain_evtchn_to_irqhandler_chip(dev, |
1494 | remote_port, handler, irqflags, devname, |
1495 | dev_id, chip: &xen_lateeoi_chip); |
1496 | } |
1497 | EXPORT_SYMBOL_GPL(bind_interdomain_evtchn_to_irqhandler_lateeoi); |
1498 | |
1499 | int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, |
1500 | irq_handler_t handler, |
1501 | unsigned long irqflags, const char *devname, void *dev_id) |
1502 | { |
1503 | int irq, retval; |
1504 | |
1505 | irq = bind_virq_to_irq(virq, cpu, percpu: irqflags & IRQF_PERCPU); |
1506 | if (irq < 0) |
1507 | return irq; |
1508 | retval = request_irq(irq, handler, flags: irqflags, name: devname, dev: dev_id); |
1509 | if (retval != 0) { |
1510 | unbind_from_irq(irq); |
1511 | return retval; |
1512 | } |
1513 | |
1514 | return irq; |
1515 | } |
1516 | EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler); |
1517 | |
1518 | int bind_ipi_to_irqhandler(enum ipi_vector ipi, |
1519 | unsigned int cpu, |
1520 | irq_handler_t handler, |
1521 | unsigned long irqflags, |
1522 | const char *devname, |
1523 | void *dev_id) |
1524 | { |
1525 | int irq, retval; |
1526 | |
1527 | irq = bind_ipi_to_irq(ipi, cpu); |
1528 | if (irq < 0) |
1529 | return irq; |
1530 | |
1531 | irqflags |= IRQF_NO_SUSPEND | IRQF_FORCE_RESUME | IRQF_EARLY_RESUME; |
1532 | retval = request_irq(irq, handler, flags: irqflags, name: devname, dev: dev_id); |
1533 | if (retval != 0) { |
1534 | unbind_from_irq(irq); |
1535 | return retval; |
1536 | } |
1537 | |
1538 | return irq; |
1539 | } |
1540 | |
1541 | void unbind_from_irqhandler(unsigned int irq, void *dev_id) |
1542 | { |
1543 | struct irq_info *info = info_for_irq(irq); |
1544 | |
1545 | if (WARN_ON(!info)) |
1546 | return; |
1547 | free_irq(irq, dev_id); |
1548 | unbind_from_irq(irq); |
1549 | } |
1550 | EXPORT_SYMBOL_GPL(unbind_from_irqhandler); |
1551 | |
1552 | /** |
1553 | * xen_set_irq_priority() - set an event channel priority. |
1554 | * @irq:irq bound to an event channel. |
1555 | * @priority: priority between XEN_IRQ_PRIORITY_MAX and XEN_IRQ_PRIORITY_MIN. |
1556 | */ |
1557 | int xen_set_irq_priority(unsigned irq, unsigned priority) |
1558 | { |
1559 | struct evtchn_set_priority set_priority; |
1560 | |
1561 | set_priority.port = evtchn_from_irq(irq); |
1562 | set_priority.priority = priority; |
1563 | |
1564 | return HYPERVISOR_event_channel_op(EVTCHNOP_set_priority, |
1565 | arg: &set_priority); |
1566 | } |
1567 | EXPORT_SYMBOL_GPL(xen_set_irq_priority); |
1568 | |
1569 | int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static) |
1570 | { |
1571 | int irq = get_evtchn_to_irq(evtchn); |
1572 | struct irq_info *info; |
1573 | |
1574 | if (irq == -1) |
1575 | return -ENOENT; |
1576 | |
1577 | info = info_for_irq(irq); |
1578 | |
1579 | if (!info) |
1580 | return -ENOENT; |
1581 | |
1582 | WARN_ON(info->refcnt != -1); |
1583 | |
1584 | info->refcnt = 1; |
1585 | info->is_static = is_static; |
1586 | |
1587 | return 0; |
1588 | } |
1589 | EXPORT_SYMBOL_GPL(evtchn_make_refcounted); |
1590 | |
1591 | int evtchn_get(evtchn_port_t evtchn) |
1592 | { |
1593 | int irq; |
1594 | struct irq_info *info; |
1595 | int err = -ENOENT; |
1596 | |
1597 | if (evtchn >= xen_evtchn_max_channels()) |
1598 | return -EINVAL; |
1599 | |
1600 | mutex_lock(&irq_mapping_update_lock); |
1601 | |
1602 | irq = get_evtchn_to_irq(evtchn); |
1603 | if (irq == -1) |
1604 | goto done; |
1605 | |
1606 | info = info_for_irq(irq); |
1607 | |
1608 | if (!info) |
1609 | goto done; |
1610 | |
1611 | err = -EINVAL; |
1612 | if (info->refcnt <= 0 || info->refcnt == SHRT_MAX) |
1613 | goto done; |
1614 | |
1615 | info->refcnt++; |
1616 | err = 0; |
1617 | done: |
1618 | mutex_unlock(lock: &irq_mapping_update_lock); |
1619 | |
1620 | return err; |
1621 | } |
1622 | EXPORT_SYMBOL_GPL(evtchn_get); |
1623 | |
1624 | void evtchn_put(evtchn_port_t evtchn) |
1625 | { |
1626 | int irq = get_evtchn_to_irq(evtchn); |
1627 | if (WARN_ON(irq == -1)) |
1628 | return; |
1629 | unbind_from_irq(irq); |
1630 | } |
1631 | EXPORT_SYMBOL_GPL(evtchn_put); |
1632 | |
1633 | void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) |
1634 | { |
1635 | int irq; |
1636 | |
1637 | #ifdef CONFIG_X86 |
1638 | if (unlikely(vector == XEN_NMI_VECTOR)) { |
1639 | int rc = HYPERVISOR_vcpu_op(VCPUOP_send_nmi, vcpuid: xen_vcpu_nr(cpu), |
1640 | NULL); |
1641 | if (rc < 0) |
1642 | printk(KERN_WARNING "Sending nmi to CPU%d failed (rc:%d)\n" , cpu, rc); |
1643 | return; |
1644 | } |
1645 | #endif |
1646 | irq = per_cpu(ipi_to_irq, cpu)[vector]; |
1647 | BUG_ON(irq < 0); |
1648 | notify_remote_via_irq(irq); |
1649 | } |
1650 | |
1651 | struct evtchn_loop_ctrl { |
1652 | ktime_t timeout; |
1653 | unsigned count; |
1654 | bool defer_eoi; |
1655 | }; |
1656 | |
1657 | void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) |
1658 | { |
1659 | int irq; |
1660 | struct irq_info *info; |
1661 | struct xenbus_device *dev; |
1662 | |
1663 | irq = get_evtchn_to_irq(evtchn: port); |
1664 | if (irq == -1) |
1665 | return; |
1666 | |
1667 | /* |
1668 | * Check for timeout every 256 events. |
1669 | * We are setting the timeout value only after the first 256 |
1670 | * events in order to not hurt the common case of few loop |
1671 | * iterations. The 256 is basically an arbitrary value. |
1672 | * |
1673 | * In case we are hitting the timeout we need to defer all further |
1674 | * EOIs in order to ensure to leave the event handling loop rather |
1675 | * sooner than later. |
1676 | */ |
1677 | if (!ctrl->defer_eoi && !(++ctrl->count & 0xff)) { |
1678 | ktime_t kt = ktime_get(); |
1679 | |
1680 | if (!ctrl->timeout) { |
1681 | kt = ktime_add_ms(kt, |
1682 | msec: jiffies_to_msecs(j: event_loop_timeout)); |
1683 | ctrl->timeout = kt; |
1684 | } else if (kt > ctrl->timeout) { |
1685 | ctrl->defer_eoi = true; |
1686 | } |
1687 | } |
1688 | |
1689 | info = info_for_irq(irq); |
1690 | if (xchg_acquire(&info->is_active, 1)) |
1691 | return; |
1692 | |
1693 | dev = (info->type == IRQT_EVTCHN) ? info->u.interdomain : NULL; |
1694 | if (dev) |
1695 | atomic_inc(v: &dev->events); |
1696 | |
1697 | if (ctrl->defer_eoi) { |
1698 | info->eoi_cpu = smp_processor_id(); |
1699 | info->irq_epoch = __this_cpu_read(irq_epoch); |
1700 | info->eoi_time = get_jiffies_64() + event_eoi_delay; |
1701 | } |
1702 | |
1703 | generic_handle_irq(irq); |
1704 | } |
1705 | |
1706 | int xen_evtchn_do_upcall(void) |
1707 | { |
1708 | struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); |
1709 | int ret = vcpu_info->evtchn_upcall_pending ? IRQ_HANDLED : IRQ_NONE; |
1710 | int cpu = smp_processor_id(); |
1711 | struct evtchn_loop_ctrl ctrl = { 0 }; |
1712 | |
1713 | /* |
1714 | * When closing an event channel the associated IRQ must not be freed |
1715 | * until all cpus have left the event handling loop. This is ensured |
1716 | * by taking the rcu_read_lock() while handling events, as freeing of |
1717 | * the IRQ is handled via queue_rcu_work() _after_ closing the event |
1718 | * channel. |
1719 | */ |
1720 | rcu_read_lock(); |
1721 | |
1722 | do { |
1723 | vcpu_info->evtchn_upcall_pending = 0; |
1724 | |
1725 | xen_evtchn_handle_events(cpu, ctrl: &ctrl); |
1726 | |
1727 | BUG_ON(!irqs_disabled()); |
1728 | |
1729 | virt_rmb(); /* Hypervisor can set upcall pending. */ |
1730 | |
1731 | } while (vcpu_info->evtchn_upcall_pending); |
1732 | |
1733 | rcu_read_unlock(); |
1734 | |
1735 | /* |
1736 | * Increment irq_epoch only now to defer EOIs only for |
1737 | * xen_irq_lateeoi() invocations occurring from inside the loop |
1738 | * above. |
1739 | */ |
1740 | __this_cpu_inc(irq_epoch); |
1741 | |
1742 | return ret; |
1743 | } |
1744 | EXPORT_SYMBOL_GPL(xen_evtchn_do_upcall); |
1745 | |
1746 | /* Rebind a new event channel to an existing irq. */ |
1747 | void rebind_evtchn_irq(evtchn_port_t evtchn, int irq) |
1748 | { |
1749 | struct irq_info *info = info_for_irq(irq); |
1750 | |
1751 | if (WARN_ON(!info)) |
1752 | return; |
1753 | |
1754 | /* Make sure the irq is masked, since the new event channel |
1755 | will also be masked. */ |
1756 | disable_irq(irq); |
1757 | |
1758 | mutex_lock(&irq_mapping_update_lock); |
1759 | |
1760 | /* After resume the irq<->evtchn mappings are all cleared out */ |
1761 | BUG_ON(get_evtchn_to_irq(evtchn) != -1); |
1762 | /* Expect irq to have been bound before, |
1763 | so there should be a proper type */ |
1764 | BUG_ON(info->type == IRQT_UNBOUND); |
1765 | |
1766 | (void)xen_irq_info_evtchn_setup(irq, evtchn, NULL); |
1767 | |
1768 | mutex_unlock(lock: &irq_mapping_update_lock); |
1769 | |
1770 | bind_evtchn_to_cpu(evtchn, cpu: info->cpu, force_affinity: false); |
1771 | |
1772 | /* Unmask the event channel. */ |
1773 | enable_irq(irq); |
1774 | } |
1775 | |
1776 | /* Rebind an evtchn so that it gets delivered to a specific cpu */ |
1777 | static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu) |
1778 | { |
1779 | struct evtchn_bind_vcpu bind_vcpu; |
1780 | evtchn_port_t evtchn = info ? info->evtchn : 0; |
1781 | |
1782 | if (!VALID_EVTCHN(evtchn)) |
1783 | return -1; |
1784 | |
1785 | if (!xen_support_evtchn_rebind()) |
1786 | return -1; |
1787 | |
1788 | /* Send future instances of this interrupt to other vcpu. */ |
1789 | bind_vcpu.port = evtchn; |
1790 | bind_vcpu.vcpu = xen_vcpu_nr(cpu: tcpu); |
1791 | |
1792 | /* |
1793 | * Mask the event while changing the VCPU binding to prevent |
1794 | * it being delivered on an unexpected VCPU. |
1795 | */ |
1796 | do_mask(info, EVT_MASK_REASON_TEMPORARY); |
1797 | |
1798 | /* |
1799 | * If this fails, it usually just indicates that we're dealing with a |
1800 | * virq or IPI channel, which don't actually need to be rebound. Ignore |
1801 | * it, but don't do the xenlinux-level rebind in that case. |
1802 | */ |
1803 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, arg: &bind_vcpu) >= 0) |
1804 | bind_evtchn_to_cpu(evtchn, cpu: tcpu, force_affinity: false); |
1805 | |
1806 | do_unmask(info, EVT_MASK_REASON_TEMPORARY); |
1807 | |
1808 | return 0; |
1809 | } |
1810 | |
1811 | /* |
1812 | * Find the CPU within @dest mask which has the least number of channels |
1813 | * assigned. This is not precise as the per cpu counts can be modified |
1814 | * concurrently. |
1815 | */ |
1816 | static unsigned int select_target_cpu(const struct cpumask *dest) |
1817 | { |
1818 | unsigned int cpu, best_cpu = UINT_MAX, minch = UINT_MAX; |
1819 | |
1820 | for_each_cpu_and(cpu, dest, cpu_online_mask) { |
1821 | unsigned int curch = atomic_read(v: &channels_on_cpu[cpu]); |
1822 | |
1823 | if (curch < minch) { |
1824 | minch = curch; |
1825 | best_cpu = cpu; |
1826 | } |
1827 | } |
1828 | |
1829 | /* |
1830 | * Catch the unlikely case that dest contains no online CPUs. Can't |
1831 | * recurse. |
1832 | */ |
1833 | if (best_cpu == UINT_MAX) |
1834 | return select_target_cpu(cpu_online_mask); |
1835 | |
1836 | return best_cpu; |
1837 | } |
1838 | |
1839 | static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, |
1840 | bool force) |
1841 | { |
1842 | unsigned int tcpu = select_target_cpu(dest); |
1843 | int ret; |
1844 | |
1845 | ret = xen_rebind_evtchn_to_cpu(info: info_for_irq(irq: data->irq), tcpu); |
1846 | if (!ret) |
1847 | irq_data_update_effective_affinity(d: data, cpumask_of(tcpu)); |
1848 | |
1849 | return ret; |
1850 | } |
1851 | |
1852 | static void enable_dynirq(struct irq_data *data) |
1853 | { |
1854 | struct irq_info *info = info_for_irq(irq: data->irq); |
1855 | evtchn_port_t evtchn = info ? info->evtchn : 0; |
1856 | |
1857 | if (VALID_EVTCHN(evtchn)) |
1858 | do_unmask(info, EVT_MASK_REASON_EXPLICIT); |
1859 | } |
1860 | |
1861 | static void disable_dynirq(struct irq_data *data) |
1862 | { |
1863 | struct irq_info *info = info_for_irq(irq: data->irq); |
1864 | evtchn_port_t evtchn = info ? info->evtchn : 0; |
1865 | |
1866 | if (VALID_EVTCHN(evtchn)) |
1867 | do_mask(info, EVT_MASK_REASON_EXPLICIT); |
1868 | } |
1869 | |
1870 | static void ack_dynirq(struct irq_data *data) |
1871 | { |
1872 | struct irq_info *info = info_for_irq(irq: data->irq); |
1873 | evtchn_port_t evtchn = info ? info->evtchn : 0; |
1874 | |
1875 | if (VALID_EVTCHN(evtchn)) |
1876 | event_handler_exit(info); |
1877 | } |
1878 | |
1879 | static void mask_ack_dynirq(struct irq_data *data) |
1880 | { |
1881 | disable_dynirq(data); |
1882 | ack_dynirq(data); |
1883 | } |
1884 | |
1885 | static void lateeoi_ack_dynirq(struct irq_data *data) |
1886 | { |
1887 | struct irq_info *info = info_for_irq(irq: data->irq); |
1888 | evtchn_port_t evtchn = info ? info->evtchn : 0; |
1889 | |
1890 | if (VALID_EVTCHN(evtchn)) { |
1891 | do_mask(info, EVT_MASK_REASON_EOI_PENDING); |
1892 | /* |
1893 | * Don't call event_handler_exit(). |
1894 | * Need to keep is_active non-zero in order to ignore re-raised |
1895 | * events after cpu affinity changes while a lateeoi is pending. |
1896 | */ |
1897 | clear_evtchn(port: evtchn); |
1898 | } |
1899 | } |
1900 | |
1901 | static void lateeoi_mask_ack_dynirq(struct irq_data *data) |
1902 | { |
1903 | struct irq_info *info = info_for_irq(irq: data->irq); |
1904 | evtchn_port_t evtchn = info ? info->evtchn : 0; |
1905 | |
1906 | if (VALID_EVTCHN(evtchn)) { |
1907 | do_mask(info, EVT_MASK_REASON_EXPLICIT); |
1908 | event_handler_exit(info); |
1909 | } |
1910 | } |
1911 | |
1912 | static int retrigger_dynirq(struct irq_data *data) |
1913 | { |
1914 | struct irq_info *info = info_for_irq(irq: data->irq); |
1915 | evtchn_port_t evtchn = info ? info->evtchn : 0; |
1916 | |
1917 | if (!VALID_EVTCHN(evtchn)) |
1918 | return 0; |
1919 | |
1920 | do_mask(info, EVT_MASK_REASON_TEMPORARY); |
1921 | set_evtchn(evtchn); |
1922 | do_unmask(info, EVT_MASK_REASON_TEMPORARY); |
1923 | |
1924 | return 1; |
1925 | } |
1926 | |
1927 | static void restore_pirqs(void) |
1928 | { |
1929 | int pirq, rc, irq, gsi; |
1930 | struct physdev_map_pirq map_irq; |
1931 | struct irq_info *info; |
1932 | |
1933 | list_for_each_entry(info, &xen_irq_list_head, list) { |
1934 | if (info->type != IRQT_PIRQ) |
1935 | continue; |
1936 | |
1937 | pirq = info->u.pirq.pirq; |
1938 | gsi = info->u.pirq.gsi; |
1939 | irq = info->irq; |
1940 | |
1941 | /* save/restore of PT devices doesn't work, so at this point the |
1942 | * only devices present are GSI based emulated devices */ |
1943 | if (!gsi) |
1944 | continue; |
1945 | |
1946 | map_irq.domid = DOMID_SELF; |
1947 | map_irq.type = MAP_PIRQ_TYPE_GSI; |
1948 | map_irq.index = gsi; |
1949 | map_irq.pirq = pirq; |
1950 | |
1951 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, arg: &map_irq); |
1952 | if (rc) { |
1953 | pr_warn("xen map irq failed gsi=%d irq=%d pirq=%d rc=%d\n" , |
1954 | gsi, irq, pirq, rc); |
1955 | xen_free_irq(irq); |
1956 | continue; |
1957 | } |
1958 | |
1959 | printk(KERN_DEBUG "xen: --> irq=%d, pirq=%d\n" , irq, map_irq.pirq); |
1960 | |
1961 | __startup_pirq(irq); |
1962 | } |
1963 | } |
1964 | |
1965 | static void restore_cpu_virqs(unsigned int cpu) |
1966 | { |
1967 | struct evtchn_bind_virq bind_virq; |
1968 | evtchn_port_t evtchn; |
1969 | int virq, irq; |
1970 | |
1971 | for (virq = 0; virq < NR_VIRQS; virq++) { |
1972 | if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) |
1973 | continue; |
1974 | |
1975 | BUG_ON(virq_from_irq(irq) != virq); |
1976 | |
1977 | /* Get a new binding from Xen. */ |
1978 | bind_virq.virq = virq; |
1979 | bind_virq.vcpu = xen_vcpu_nr(cpu); |
1980 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, |
1981 | arg: &bind_virq) != 0) |
1982 | BUG(); |
1983 | evtchn = bind_virq.port; |
1984 | |
1985 | /* Record the new mapping. */ |
1986 | (void)xen_irq_info_virq_setup(cpu, irq, evtchn, virq); |
1987 | /* The affinity mask is still valid */ |
1988 | bind_evtchn_to_cpu(evtchn, cpu, force_affinity: false); |
1989 | } |
1990 | } |
1991 | |
1992 | static void restore_cpu_ipis(unsigned int cpu) |
1993 | { |
1994 | struct evtchn_bind_ipi bind_ipi; |
1995 | evtchn_port_t evtchn; |
1996 | int ipi, irq; |
1997 | |
1998 | for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) { |
1999 | if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) |
2000 | continue; |
2001 | |
2002 | BUG_ON(ipi_from_irq(irq) != ipi); |
2003 | |
2004 | /* Get a new binding from Xen. */ |
2005 | bind_ipi.vcpu = xen_vcpu_nr(cpu); |
2006 | if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, |
2007 | arg: &bind_ipi) != 0) |
2008 | BUG(); |
2009 | evtchn = bind_ipi.port; |
2010 | |
2011 | /* Record the new mapping. */ |
2012 | (void)xen_irq_info_ipi_setup(cpu, irq, evtchn, ipi); |
2013 | /* The affinity mask is still valid */ |
2014 | bind_evtchn_to_cpu(evtchn, cpu, force_affinity: false); |
2015 | } |
2016 | } |
2017 | |
2018 | /* Clear an irq's pending state, in preparation for polling on it */ |
2019 | void xen_clear_irq_pending(int irq) |
2020 | { |
2021 | struct irq_info *info = info_for_irq(irq); |
2022 | evtchn_port_t evtchn = info ? info->evtchn : 0; |
2023 | |
2024 | if (VALID_EVTCHN(evtchn)) |
2025 | event_handler_exit(info); |
2026 | } |
2027 | EXPORT_SYMBOL(xen_clear_irq_pending); |
2028 | void xen_set_irq_pending(int irq) |
2029 | { |
2030 | evtchn_port_t evtchn = evtchn_from_irq(irq); |
2031 | |
2032 | if (VALID_EVTCHN(evtchn)) |
2033 | set_evtchn(evtchn); |
2034 | } |
2035 | |
2036 | bool xen_test_irq_pending(int irq) |
2037 | { |
2038 | evtchn_port_t evtchn = evtchn_from_irq(irq); |
2039 | bool ret = false; |
2040 | |
2041 | if (VALID_EVTCHN(evtchn)) |
2042 | ret = test_evtchn(port: evtchn); |
2043 | |
2044 | return ret; |
2045 | } |
2046 | |
2047 | /* Poll waiting for an irq to become pending with timeout. In the usual case, |
2048 | * the irq will be disabled so it won't deliver an interrupt. */ |
2049 | void xen_poll_irq_timeout(int irq, u64 timeout) |
2050 | { |
2051 | evtchn_port_t evtchn = evtchn_from_irq(irq); |
2052 | |
2053 | if (VALID_EVTCHN(evtchn)) { |
2054 | struct sched_poll poll; |
2055 | |
2056 | poll.nr_ports = 1; |
2057 | poll.timeout = timeout; |
2058 | set_xen_guest_handle(poll.ports, &evtchn); |
2059 | |
2060 | if (HYPERVISOR_sched_op(SCHEDOP_poll, arg: &poll) != 0) |
2061 | BUG(); |
2062 | } |
2063 | } |
2064 | EXPORT_SYMBOL(xen_poll_irq_timeout); |
2065 | /* Poll waiting for an irq to become pending. In the usual case, the |
2066 | * irq will be disabled so it won't deliver an interrupt. */ |
2067 | void xen_poll_irq(int irq) |
2068 | { |
2069 | xen_poll_irq_timeout(irq, 0 /* no timeout */); |
2070 | } |
2071 | |
2072 | /* Check whether the IRQ line is shared with other guests. */ |
2073 | int xen_test_irq_shared(int irq) |
2074 | { |
2075 | struct irq_info *info = info_for_irq(irq); |
2076 | struct physdev_irq_status_query irq_status; |
2077 | |
2078 | if (WARN_ON(!info)) |
2079 | return -ENOENT; |
2080 | |
2081 | irq_status.irq = info->u.pirq.pirq; |
2082 | |
2083 | if (HYPERVISOR_physdev_op(PHYSDEVOP_irq_status_query, arg: &irq_status)) |
2084 | return 0; |
2085 | return !(irq_status.flags & XENIRQSTAT_shared); |
2086 | } |
2087 | EXPORT_SYMBOL_GPL(xen_test_irq_shared); |
2088 | |
2089 | void xen_irq_resume(void) |
2090 | { |
2091 | unsigned int cpu; |
2092 | struct irq_info *info; |
2093 | |
2094 | /* New event-channel space is not 'live' yet. */ |
2095 | xen_evtchn_resume(); |
2096 | |
2097 | /* No IRQ <-> event-channel mappings. */ |
2098 | list_for_each_entry(info, &xen_irq_list_head, list) { |
2099 | /* Zap event-channel binding */ |
2100 | info->evtchn = 0; |
2101 | /* Adjust accounting */ |
2102 | channels_on_cpu_dec(info); |
2103 | } |
2104 | |
2105 | clear_evtchn_to_irq_all(); |
2106 | |
2107 | for_each_possible_cpu(cpu) { |
2108 | restore_cpu_virqs(cpu); |
2109 | restore_cpu_ipis(cpu); |
2110 | } |
2111 | |
2112 | restore_pirqs(); |
2113 | } |
2114 | |
2115 | static struct irq_chip xen_dynamic_chip __read_mostly = { |
2116 | .name = "xen-dyn" , |
2117 | |
2118 | .irq_disable = disable_dynirq, |
2119 | .irq_mask = disable_dynirq, |
2120 | .irq_unmask = enable_dynirq, |
2121 | |
2122 | .irq_ack = ack_dynirq, |
2123 | .irq_mask_ack = mask_ack_dynirq, |
2124 | |
2125 | .irq_set_affinity = set_affinity_irq, |
2126 | .irq_retrigger = retrigger_dynirq, |
2127 | }; |
2128 | |
2129 | static struct irq_chip xen_lateeoi_chip __read_mostly = { |
2130 | /* The chip name needs to contain "xen-dyn" for irqbalance to work. */ |
2131 | .name = "xen-dyn-lateeoi" , |
2132 | |
2133 | .irq_disable = disable_dynirq, |
2134 | .irq_mask = disable_dynirq, |
2135 | .irq_unmask = enable_dynirq, |
2136 | |
2137 | .irq_ack = lateeoi_ack_dynirq, |
2138 | .irq_mask_ack = lateeoi_mask_ack_dynirq, |
2139 | |
2140 | .irq_set_affinity = set_affinity_irq, |
2141 | .irq_retrigger = retrigger_dynirq, |
2142 | }; |
2143 | |
2144 | static struct irq_chip xen_pirq_chip __read_mostly = { |
2145 | .name = "xen-pirq" , |
2146 | |
2147 | .irq_startup = startup_pirq, |
2148 | .irq_shutdown = shutdown_pirq, |
2149 | .irq_enable = enable_pirq, |
2150 | .irq_disable = disable_pirq, |
2151 | |
2152 | .irq_mask = disable_dynirq, |
2153 | .irq_unmask = enable_dynirq, |
2154 | |
2155 | .irq_ack = eoi_pirq, |
2156 | .irq_eoi = eoi_pirq, |
2157 | .irq_mask_ack = mask_ack_pirq, |
2158 | |
2159 | .irq_set_affinity = set_affinity_irq, |
2160 | |
2161 | .irq_retrigger = retrigger_dynirq, |
2162 | }; |
2163 | |
2164 | static struct irq_chip xen_percpu_chip __read_mostly = { |
2165 | .name = "xen-percpu" , |
2166 | |
2167 | .irq_disable = disable_dynirq, |
2168 | .irq_mask = disable_dynirq, |
2169 | .irq_unmask = enable_dynirq, |
2170 | |
2171 | .irq_ack = ack_dynirq, |
2172 | }; |
2173 | |
2174 | #ifdef CONFIG_X86 |
2175 | #ifdef CONFIG_XEN_PVHVM |
2176 | /* Vector callbacks are better than PCI interrupts to receive event |
2177 | * channel notifications because we can receive vector callbacks on any |
2178 | * vcpu and we don't need PCI support or APIC interactions. */ |
2179 | void xen_setup_callback_vector(void) |
2180 | { |
2181 | uint64_t callback_via; |
2182 | |
2183 | if (xen_have_vector_callback) { |
2184 | callback_via = HVM_CALLBACK_VECTOR(HYPERVISOR_CALLBACK_VECTOR); |
2185 | if (xen_set_callback_via(via: callback_via)) { |
2186 | pr_err("Request for Xen HVM callback vector failed\n" ); |
2187 | xen_have_vector_callback = false; |
2188 | } |
2189 | } |
2190 | } |
2191 | |
2192 | /* |
2193 | * Setup per-vCPU vector-type callbacks. If this setup is unavailable, |
2194 | * fallback to the global vector-type callback. |
2195 | */ |
2196 | static __init void xen_init_setup_upcall_vector(void) |
2197 | { |
2198 | if (!xen_have_vector_callback) |
2199 | return; |
2200 | |
2201 | if ((cpuid_eax(op: xen_cpuid_base() + 4) & XEN_HVM_CPUID_UPCALL_VECTOR) && |
2202 | !xen_set_upcall_vector(cpu: 0)) |
2203 | xen_percpu_upcall = true; |
2204 | else if (xen_feature(XENFEAT_hvm_callback_vector)) |
2205 | xen_setup_callback_vector(); |
2206 | else |
2207 | xen_have_vector_callback = false; |
2208 | } |
2209 | |
2210 | int xen_set_upcall_vector(unsigned int cpu) |
2211 | { |
2212 | int rc; |
2213 | xen_hvm_evtchn_upcall_vector_t op = { |
2214 | .vector = HYPERVISOR_CALLBACK_VECTOR, |
2215 | .vcpu = per_cpu(xen_vcpu_id, cpu), |
2216 | }; |
2217 | |
2218 | rc = HYPERVISOR_hvm_op(HVMOP_set_evtchn_upcall_vector, arg: &op); |
2219 | if (rc) |
2220 | return rc; |
2221 | |
2222 | /* Trick toolstack to think we are enlightened. */ |
2223 | if (!cpu) |
2224 | rc = xen_set_callback_via(via: 1); |
2225 | |
2226 | return rc; |
2227 | } |
2228 | |
2229 | static __init void xen_alloc_callback_vector(void) |
2230 | { |
2231 | if (!xen_have_vector_callback) |
2232 | return; |
2233 | |
2234 | pr_info("Xen HVM callback vector for event delivery is enabled\n" ); |
2235 | alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, addr: asm_sysvec_xen_hvm_callback); |
2236 | } |
2237 | #else |
2238 | void xen_setup_callback_vector(void) {} |
2239 | static inline void xen_init_setup_upcall_vector(void) {} |
2240 | int xen_set_upcall_vector(unsigned int cpu) {} |
2241 | static inline void xen_alloc_callback_vector(void) {} |
2242 | #endif /* CONFIG_XEN_PVHVM */ |
2243 | #endif /* CONFIG_X86 */ |
2244 | |
2245 | bool xen_fifo_events = true; |
2246 | module_param_named(fifo_events, xen_fifo_events, bool, 0); |
2247 | |
2248 | static int xen_evtchn_cpu_prepare(unsigned int cpu) |
2249 | { |
2250 | int ret = 0; |
2251 | |
2252 | xen_cpu_init_eoi(cpu); |
2253 | |
2254 | if (evtchn_ops->percpu_init) |
2255 | ret = evtchn_ops->percpu_init(cpu); |
2256 | |
2257 | return ret; |
2258 | } |
2259 | |
2260 | static int xen_evtchn_cpu_dead(unsigned int cpu) |
2261 | { |
2262 | int ret = 0; |
2263 | |
2264 | if (evtchn_ops->percpu_deinit) |
2265 | ret = evtchn_ops->percpu_deinit(cpu); |
2266 | |
2267 | return ret; |
2268 | } |
2269 | |
2270 | void __init xen_init_IRQ(void) |
2271 | { |
2272 | int ret = -EINVAL; |
2273 | evtchn_port_t evtchn; |
2274 | |
2275 | if (xen_fifo_events) |
2276 | ret = xen_evtchn_fifo_init(); |
2277 | if (ret < 0) { |
2278 | xen_evtchn_2l_init(); |
2279 | xen_fifo_events = false; |
2280 | } |
2281 | |
2282 | xen_cpu_init_eoi(smp_processor_id()); |
2283 | |
2284 | cpuhp_setup_state_nocalls(state: CPUHP_XEN_EVTCHN_PREPARE, |
2285 | name: "xen/evtchn:prepare" , |
2286 | startup: xen_evtchn_cpu_prepare, teardown: xen_evtchn_cpu_dead); |
2287 | |
2288 | evtchn_to_irq = kcalloc(EVTCHN_ROW(xen_evtchn_max_channels()), |
2289 | size: sizeof(*evtchn_to_irq), GFP_KERNEL); |
2290 | BUG_ON(!evtchn_to_irq); |
2291 | |
2292 | /* No event channels are 'live' right now. */ |
2293 | for (evtchn = 0; evtchn < xen_evtchn_nr_channels(); evtchn++) |
2294 | mask_evtchn(port: evtchn); |
2295 | |
2296 | pirq_needs_eoi = pirq_needs_eoi_flag; |
2297 | |
2298 | #ifdef CONFIG_X86 |
2299 | if (xen_pv_domain()) { |
2300 | if (xen_initial_domain()) |
2301 | pci_xen_initial_domain(); |
2302 | } |
2303 | xen_init_setup_upcall_vector(); |
2304 | xen_alloc_callback_vector(); |
2305 | |
2306 | |
2307 | if (xen_hvm_domain()) { |
2308 | native_init_IRQ(); |
2309 | /* pci_xen_hvm_init must be called after native_init_IRQ so that |
2310 | * __acpi_register_gsi can point at the right function */ |
2311 | pci_xen_hvm_init(); |
2312 | } else { |
2313 | int rc; |
2314 | struct physdev_pirq_eoi_gmfn eoi_gmfn; |
2315 | |
2316 | pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO); |
2317 | eoi_gmfn.gmfn = virt_to_gfn(pirq_eoi_map); |
2318 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, arg: &eoi_gmfn); |
2319 | if (rc != 0) { |
2320 | free_page((unsigned long) pirq_eoi_map); |
2321 | pirq_eoi_map = NULL; |
2322 | } else |
2323 | pirq_needs_eoi = pirq_check_eoi_map; |
2324 | } |
2325 | #endif |
2326 | } |
2327 | |