1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Xen time implementation. |
4 | * |
5 | * This is implemented in terms of a clocksource driver which uses |
6 | * the hypervisor clock as a nanosecond timebase, and a clockevent |
7 | * driver which uses the hypervisor's timer mechanism. |
8 | * |
9 | * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007 |
10 | */ |
11 | #include <linux/kernel.h> |
12 | #include <linux/interrupt.h> |
13 | #include <linux/clocksource.h> |
14 | #include <linux/clockchips.h> |
15 | #include <linux/gfp.h> |
16 | #include <linux/slab.h> |
17 | #include <linux/pvclock_gtod.h> |
18 | #include <linux/timekeeper_internal.h> |
19 | |
20 | #include <asm/pvclock.h> |
21 | #include <asm/xen/hypervisor.h> |
22 | #include <asm/xen/hypercall.h> |
23 | #include <asm/xen/cpuid.h> |
24 | |
25 | #include <xen/events.h> |
26 | #include <xen/features.h> |
27 | #include <xen/interface/xen.h> |
28 | #include <xen/interface/vcpu.h> |
29 | |
30 | #include "xen-ops.h" |
31 | |
32 | /* Minimum amount of time until next clock event fires */ |
33 | #define TIMER_SLOP 100000 |
34 | |
35 | static u64 xen_sched_clock_offset __read_mostly; |
36 | |
37 | /* Get the TSC speed from Xen */ |
38 | static unsigned long xen_tsc_khz(void) |
39 | { |
40 | struct pvclock_vcpu_time_info *info = |
41 | &HYPERVISOR_shared_info->vcpu_info[0].time; |
42 | |
43 | setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ); |
44 | return pvclock_tsc_khz(src: info); |
45 | } |
46 | |
47 | static u64 xen_clocksource_read(void) |
48 | { |
49 | struct pvclock_vcpu_time_info *src; |
50 | u64 ret; |
51 | |
52 | preempt_disable_notrace(); |
53 | src = &__this_cpu_read(xen_vcpu)->time; |
54 | ret = pvclock_clocksource_read(src); |
55 | preempt_enable_notrace(); |
56 | return ret; |
57 | } |
58 | |
59 | static u64 xen_clocksource_get_cycles(struct clocksource *cs) |
60 | { |
61 | return xen_clocksource_read(); |
62 | } |
63 | |
64 | static noinstr u64 xen_sched_clock(void) |
65 | { |
66 | struct pvclock_vcpu_time_info *src; |
67 | u64 ret; |
68 | |
69 | src = &__this_cpu_read(xen_vcpu)->time; |
70 | ret = pvclock_clocksource_read_nowd(src); |
71 | ret -= xen_sched_clock_offset; |
72 | |
73 | return ret; |
74 | } |
75 | |
76 | static void xen_read_wallclock(struct timespec64 *ts) |
77 | { |
78 | struct shared_info *s = HYPERVISOR_shared_info; |
79 | struct pvclock_wall_clock *wall_clock = &(s->wc); |
80 | struct pvclock_vcpu_time_info *vcpu_time; |
81 | |
82 | vcpu_time = &get_cpu_var(xen_vcpu)->time; |
83 | pvclock_read_wallclock(wall: wall_clock, vcpu: vcpu_time, ts); |
84 | put_cpu_var(xen_vcpu); |
85 | } |
86 | |
87 | static void xen_get_wallclock(struct timespec64 *now) |
88 | { |
89 | xen_read_wallclock(ts: now); |
90 | } |
91 | |
92 | static int xen_set_wallclock(const struct timespec64 *now) |
93 | { |
94 | return -ENODEV; |
95 | } |
96 | |
97 | static int xen_pvclock_gtod_notify(struct notifier_block *nb, |
98 | unsigned long was_set, void *priv) |
99 | { |
100 | /* Protected by the calling core code serialization */ |
101 | static struct timespec64 next_sync; |
102 | |
103 | struct xen_platform_op op; |
104 | struct timespec64 now; |
105 | struct timekeeper *tk = priv; |
106 | static bool settime64_supported = true; |
107 | int ret; |
108 | |
109 | now.tv_sec = tk->xtime_sec; |
110 | now.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); |
111 | |
112 | /* |
113 | * We only take the expensive HV call when the clock was set |
114 | * or when the 11 minutes RTC synchronization time elapsed. |
115 | */ |
116 | if (!was_set && timespec64_compare(lhs: &now, rhs: &next_sync) < 0) |
117 | return NOTIFY_OK; |
118 | |
119 | again: |
120 | if (settime64_supported) { |
121 | op.cmd = XENPF_settime64; |
122 | op.u.settime64.mbz = 0; |
123 | op.u.settime64.secs = now.tv_sec; |
124 | op.u.settime64.nsecs = now.tv_nsec; |
125 | op.u.settime64.system_time = xen_clocksource_read(); |
126 | } else { |
127 | op.cmd = XENPF_settime32; |
128 | op.u.settime32.secs = now.tv_sec; |
129 | op.u.settime32.nsecs = now.tv_nsec; |
130 | op.u.settime32.system_time = xen_clocksource_read(); |
131 | } |
132 | |
133 | ret = HYPERVISOR_platform_op(op: &op); |
134 | |
135 | if (ret == -ENOSYS && settime64_supported) { |
136 | settime64_supported = false; |
137 | goto again; |
138 | } |
139 | if (ret < 0) |
140 | return NOTIFY_BAD; |
141 | |
142 | /* |
143 | * Move the next drift compensation time 11 minutes |
144 | * ahead. That's emulating the sync_cmos_clock() update for |
145 | * the hardware RTC. |
146 | */ |
147 | next_sync = now; |
148 | next_sync.tv_sec += 11 * 60; |
149 | |
150 | return NOTIFY_OK; |
151 | } |
152 | |
153 | static struct notifier_block xen_pvclock_gtod_notifier = { |
154 | .notifier_call = xen_pvclock_gtod_notify, |
155 | }; |
156 | |
157 | static int xen_cs_enable(struct clocksource *cs) |
158 | { |
159 | vclocks_set_used(which: VDSO_CLOCKMODE_PVCLOCK); |
160 | return 0; |
161 | } |
162 | |
163 | static struct clocksource xen_clocksource __read_mostly = { |
164 | .name = "xen" , |
165 | .rating = 400, |
166 | .read = xen_clocksource_get_cycles, |
167 | .mask = CLOCKSOURCE_MASK(64), |
168 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
169 | .enable = xen_cs_enable, |
170 | }; |
171 | |
172 | /* |
173 | Xen clockevent implementation |
174 | |
175 | Xen has two clockevent implementations: |
176 | |
177 | The old timer_op one works with all released versions of Xen prior |
178 | to version 3.0.4. This version of the hypervisor provides a |
179 | single-shot timer with nanosecond resolution. However, sharing the |
180 | same event channel is a 100Hz tick which is delivered while the |
181 | vcpu is running. We don't care about or use this tick, but it will |
182 | cause the core time code to think the timer fired too soon, and |
183 | will end up resetting it each time. It could be filtered, but |
184 | doing so has complications when the ktime clocksource is not yet |
185 | the xen clocksource (ie, at boot time). |
186 | |
187 | The new vcpu_op-based timer interface allows the tick timer period |
188 | to be changed or turned off. The tick timer is not useful as a |
189 | periodic timer because events are only delivered to running vcpus. |
190 | The one-shot timer can report when a timeout is in the past, so |
191 | set_next_event is capable of returning -ETIME when appropriate. |
192 | This interface is used when available. |
193 | */ |
194 | |
195 | |
196 | /* |
197 | Get a hypervisor absolute time. In theory we could maintain an |
198 | offset between the kernel's time and the hypervisor's time, and |
199 | apply that to a kernel's absolute timeout. Unfortunately the |
200 | hypervisor and kernel times can drift even if the kernel is using |
201 | the Xen clocksource, because ntp can warp the kernel's clocksource. |
202 | */ |
203 | static s64 get_abs_timeout(unsigned long delta) |
204 | { |
205 | return xen_clocksource_read() + delta; |
206 | } |
207 | |
208 | static int xen_timerop_shutdown(struct clock_event_device *evt) |
209 | { |
210 | /* cancel timeout */ |
211 | HYPERVISOR_set_timer_op(timeout: 0); |
212 | |
213 | return 0; |
214 | } |
215 | |
216 | static int xen_timerop_set_next_event(unsigned long delta, |
217 | struct clock_event_device *evt) |
218 | { |
219 | WARN_ON(!clockevent_state_oneshot(evt)); |
220 | |
221 | if (HYPERVISOR_set_timer_op(timeout: get_abs_timeout(delta)) < 0) |
222 | BUG(); |
223 | |
224 | /* We may have missed the deadline, but there's no real way of |
225 | knowing for sure. If the event was in the past, then we'll |
226 | get an immediate interrupt. */ |
227 | |
228 | return 0; |
229 | } |
230 | |
231 | static struct clock_event_device xen_timerop_clockevent __ro_after_init = { |
232 | .name = "xen" , |
233 | .features = CLOCK_EVT_FEAT_ONESHOT, |
234 | |
235 | .max_delta_ns = 0xffffffff, |
236 | .max_delta_ticks = 0xffffffff, |
237 | .min_delta_ns = TIMER_SLOP, |
238 | .min_delta_ticks = TIMER_SLOP, |
239 | |
240 | .mult = 1, |
241 | .shift = 0, |
242 | .rating = 500, |
243 | |
244 | .set_state_shutdown = xen_timerop_shutdown, |
245 | .set_next_event = xen_timerop_set_next_event, |
246 | }; |
247 | |
248 | static int xen_vcpuop_shutdown(struct clock_event_device *evt) |
249 | { |
250 | int cpu = smp_processor_id(); |
251 | |
252 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, vcpuid: xen_vcpu_nr(cpu), |
253 | NULL) || |
254 | HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, vcpuid: xen_vcpu_nr(cpu), |
255 | NULL)) |
256 | BUG(); |
257 | |
258 | return 0; |
259 | } |
260 | |
261 | static int xen_vcpuop_set_oneshot(struct clock_event_device *evt) |
262 | { |
263 | int cpu = smp_processor_id(); |
264 | |
265 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, vcpuid: xen_vcpu_nr(cpu), |
266 | NULL)) |
267 | BUG(); |
268 | |
269 | return 0; |
270 | } |
271 | |
272 | static int xen_vcpuop_set_next_event(unsigned long delta, |
273 | struct clock_event_device *evt) |
274 | { |
275 | int cpu = smp_processor_id(); |
276 | struct vcpu_set_singleshot_timer single; |
277 | int ret; |
278 | |
279 | WARN_ON(!clockevent_state_oneshot(evt)); |
280 | |
281 | single.timeout_abs_ns = get_abs_timeout(delta); |
282 | /* Get an event anyway, even if the timeout is already expired */ |
283 | single.flags = 0; |
284 | |
285 | ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, vcpuid: xen_vcpu_nr(cpu), |
286 | extra_args: &single); |
287 | BUG_ON(ret != 0); |
288 | |
289 | return ret; |
290 | } |
291 | |
292 | static struct clock_event_device xen_vcpuop_clockevent __ro_after_init = { |
293 | .name = "xen" , |
294 | .features = CLOCK_EVT_FEAT_ONESHOT, |
295 | |
296 | .max_delta_ns = 0xffffffff, |
297 | .max_delta_ticks = 0xffffffff, |
298 | .min_delta_ns = TIMER_SLOP, |
299 | .min_delta_ticks = TIMER_SLOP, |
300 | |
301 | .mult = 1, |
302 | .shift = 0, |
303 | .rating = 500, |
304 | |
305 | .set_state_shutdown = xen_vcpuop_shutdown, |
306 | .set_state_oneshot = xen_vcpuop_set_oneshot, |
307 | .set_next_event = xen_vcpuop_set_next_event, |
308 | }; |
309 | |
310 | static const struct clock_event_device *xen_clockevent = |
311 | &xen_timerop_clockevent; |
312 | |
313 | struct xen_clock_event_device { |
314 | struct clock_event_device evt; |
315 | char name[16]; |
316 | }; |
317 | static DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt.irq = -1 }; |
318 | |
319 | static irqreturn_t xen_timer_interrupt(int irq, void *dev_id) |
320 | { |
321 | struct clock_event_device *evt = this_cpu_ptr(&xen_clock_events.evt); |
322 | irqreturn_t ret; |
323 | |
324 | ret = IRQ_NONE; |
325 | if (evt->event_handler) { |
326 | evt->event_handler(evt); |
327 | ret = IRQ_HANDLED; |
328 | } |
329 | |
330 | return ret; |
331 | } |
332 | |
333 | void xen_teardown_timer(int cpu) |
334 | { |
335 | struct clock_event_device *evt; |
336 | evt = &per_cpu(xen_clock_events, cpu).evt; |
337 | |
338 | if (evt->irq >= 0) { |
339 | unbind_from_irqhandler(irq: evt->irq, NULL); |
340 | evt->irq = -1; |
341 | } |
342 | } |
343 | |
344 | void xen_setup_timer(int cpu) |
345 | { |
346 | struct xen_clock_event_device *xevt = &per_cpu(xen_clock_events, cpu); |
347 | struct clock_event_device *evt = &xevt->evt; |
348 | int irq; |
349 | |
350 | WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n" , evt->irq, cpu); |
351 | if (evt->irq >= 0) |
352 | xen_teardown_timer(cpu); |
353 | |
354 | printk(KERN_INFO "installing Xen timer for CPU %d\n" , cpu); |
355 | |
356 | snprintf(buf: xevt->name, size: sizeof(xevt->name), fmt: "timer%d" , cpu); |
357 | |
358 | irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, handler: xen_timer_interrupt, |
359 | IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER| |
360 | IRQF_FORCE_RESUME|IRQF_EARLY_RESUME, |
361 | devname: xevt->name, NULL); |
362 | (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX); |
363 | |
364 | memcpy(evt, xen_clockevent, sizeof(*evt)); |
365 | |
366 | evt->cpumask = cpumask_of(cpu); |
367 | evt->irq = irq; |
368 | } |
369 | |
370 | |
371 | void xen_setup_cpu_clockevents(void) |
372 | { |
373 | clockevents_register_device(this_cpu_ptr(&xen_clock_events.evt)); |
374 | } |
375 | |
376 | void xen_timer_resume(void) |
377 | { |
378 | int cpu; |
379 | |
380 | if (xen_clockevent != &xen_vcpuop_clockevent) |
381 | return; |
382 | |
383 | for_each_online_cpu(cpu) { |
384 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, |
385 | vcpuid: xen_vcpu_nr(cpu), NULL)) |
386 | BUG(); |
387 | } |
388 | } |
389 | |
390 | static struct pvclock_vsyscall_time_info *xen_clock __read_mostly; |
391 | static u64 xen_clock_value_saved; |
392 | |
393 | void xen_save_time_memory_area(void) |
394 | { |
395 | struct vcpu_register_time_memory_area t; |
396 | int ret; |
397 | |
398 | xen_clock_value_saved = xen_clocksource_read() - xen_sched_clock_offset; |
399 | |
400 | if (!xen_clock) |
401 | return; |
402 | |
403 | t.addr.v = NULL; |
404 | |
405 | ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, vcpuid: 0, extra_args: &t); |
406 | if (ret != 0) |
407 | pr_notice("Cannot save secondary vcpu_time_info (err %d)" , |
408 | ret); |
409 | else |
410 | clear_page(page: xen_clock); |
411 | } |
412 | |
413 | void xen_restore_time_memory_area(void) |
414 | { |
415 | struct vcpu_register_time_memory_area t; |
416 | int ret; |
417 | |
418 | if (!xen_clock) |
419 | goto out; |
420 | |
421 | t.addr.v = &xen_clock->pvti; |
422 | |
423 | ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, vcpuid: 0, extra_args: &t); |
424 | |
425 | /* |
426 | * We don't disable VDSO_CLOCKMODE_PVCLOCK entirely if it fails to |
427 | * register the secondary time info with Xen or if we migrated to a |
428 | * host without the necessary flags. On both of these cases what |
429 | * happens is either process seeing a zeroed out pvti or seeing no |
430 | * PVCLOCK_TSC_STABLE_BIT bit set. Userspace checks the latter and |
431 | * if 0, it discards the data in pvti and fallbacks to a system |
432 | * call for a reliable timestamp. |
433 | */ |
434 | if (ret != 0) |
435 | pr_notice("Cannot restore secondary vcpu_time_info (err %d)" , |
436 | ret); |
437 | |
438 | out: |
439 | /* Need pvclock_resume() before using xen_clocksource_read(). */ |
440 | pvclock_resume(); |
441 | xen_sched_clock_offset = xen_clocksource_read() - xen_clock_value_saved; |
442 | } |
443 | |
444 | static void xen_setup_vsyscall_time_info(void) |
445 | { |
446 | struct vcpu_register_time_memory_area t; |
447 | struct pvclock_vsyscall_time_info *ti; |
448 | int ret; |
449 | |
450 | ti = (struct pvclock_vsyscall_time_info *)get_zeroed_page(GFP_KERNEL); |
451 | if (!ti) |
452 | return; |
453 | |
454 | t.addr.v = &ti->pvti; |
455 | |
456 | ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, vcpuid: 0, extra_args: &t); |
457 | if (ret) { |
458 | pr_notice("xen: VDSO_CLOCKMODE_PVCLOCK not supported (err %d)\n" , ret); |
459 | free_page((unsigned long)ti); |
460 | return; |
461 | } |
462 | |
463 | /* |
464 | * If primary time info had this bit set, secondary should too since |
465 | * it's the same data on both just different memory regions. But we |
466 | * still check it in case hypervisor is buggy. |
467 | */ |
468 | if (!(ti->pvti.flags & PVCLOCK_TSC_STABLE_BIT)) { |
469 | t.addr.v = NULL; |
470 | ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, |
471 | vcpuid: 0, extra_args: &t); |
472 | if (!ret) |
473 | free_page((unsigned long)ti); |
474 | |
475 | pr_notice("xen: VDSO_CLOCKMODE_PVCLOCK not supported (tsc unstable)\n" ); |
476 | return; |
477 | } |
478 | |
479 | xen_clock = ti; |
480 | pvclock_set_pvti_cpu0_va(pvti: xen_clock); |
481 | |
482 | xen_clocksource.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK; |
483 | } |
484 | |
485 | /* |
486 | * Check if it is possible to safely use the tsc as a clocksource. This is |
487 | * only true if the hypervisor notifies the guest that its tsc is invariant, |
488 | * the tsc is stable, and the tsc instruction will never be emulated. |
489 | */ |
490 | static int __init xen_tsc_safe_clocksource(void) |
491 | { |
492 | u32 eax, ebx, ecx, edx; |
493 | |
494 | if (!(boot_cpu_has(X86_FEATURE_CONSTANT_TSC))) |
495 | return 0; |
496 | |
497 | if (!(boot_cpu_has(X86_FEATURE_NONSTOP_TSC))) |
498 | return 0; |
499 | |
500 | if (check_tsc_unstable()) |
501 | return 0; |
502 | |
503 | /* Leaf 4, sub-leaf 0 (0x40000x03) */ |
504 | cpuid_count(op: xen_cpuid_base() + 3, count: 0, eax: &eax, ebx: &ebx, ecx: &ecx, edx: &edx); |
505 | |
506 | return ebx == XEN_CPUID_TSC_MODE_NEVER_EMULATE; |
507 | } |
508 | |
509 | static void __init xen_time_init(void) |
510 | { |
511 | struct pvclock_vcpu_time_info *pvti; |
512 | int cpu = smp_processor_id(); |
513 | struct timespec64 tp; |
514 | |
515 | /* |
516 | * As Dom0 is never moved, no penalty on using TSC there. |
517 | * |
518 | * If it is possible for the guest to determine that the tsc is a safe |
519 | * clocksource, then set xen_clocksource rating below that of the tsc |
520 | * so that the system prefers tsc instead. |
521 | */ |
522 | if (xen_initial_domain()) |
523 | xen_clocksource.rating = 275; |
524 | else if (xen_tsc_safe_clocksource()) |
525 | xen_clocksource.rating = 299; |
526 | |
527 | clocksource_register_hz(cs: &xen_clocksource, NSEC_PER_SEC); |
528 | |
529 | if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, vcpuid: xen_vcpu_nr(cpu), |
530 | NULL) == 0) { |
531 | /* Successfully turned off 100Hz tick, so we have the |
532 | vcpuop-based timer interface */ |
533 | printk(KERN_DEBUG "Xen: using vcpuop timer interface\n" ); |
534 | xen_clockevent = &xen_vcpuop_clockevent; |
535 | } |
536 | |
537 | /* Set initial system time with full resolution */ |
538 | xen_read_wallclock(ts: &tp); |
539 | do_settimeofday64(ts: &tp); |
540 | |
541 | setup_force_cpu_cap(X86_FEATURE_TSC); |
542 | |
543 | /* |
544 | * We check ahead on the primary time info if this |
545 | * bit is supported hence speeding up Xen clocksource. |
546 | */ |
547 | pvti = &__this_cpu_read(xen_vcpu)->time; |
548 | if (pvti->flags & PVCLOCK_TSC_STABLE_BIT) { |
549 | pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT); |
550 | xen_setup_vsyscall_time_info(); |
551 | } |
552 | |
553 | xen_setup_runstate_info(cpu); |
554 | xen_setup_timer(cpu); |
555 | xen_setup_cpu_clockevents(); |
556 | |
557 | xen_time_setup_guest(); |
558 | |
559 | if (xen_initial_domain()) |
560 | pvclock_gtod_register_notifier(nb: &xen_pvclock_gtod_notifier); |
561 | } |
562 | |
563 | static void __init xen_init_time_common(void) |
564 | { |
565 | xen_sched_clock_offset = xen_clocksource_read(); |
566 | static_call_update(pv_steal_clock, xen_steal_clock); |
567 | paravirt_set_sched_clock(func: xen_sched_clock); |
568 | |
569 | x86_platform.calibrate_tsc = xen_tsc_khz; |
570 | x86_platform.get_wallclock = xen_get_wallclock; |
571 | } |
572 | |
573 | void __init xen_init_time_ops(void) |
574 | { |
575 | xen_init_time_common(); |
576 | |
577 | x86_init.timers.timer_init = xen_time_init; |
578 | x86_init.timers.setup_percpu_clockev = x86_init_noop; |
579 | x86_cpuinit.setup_percpu_clockev = x86_init_noop; |
580 | |
581 | /* Dom0 uses the native method to set the hardware RTC. */ |
582 | if (!xen_initial_domain()) |
583 | x86_platform.set_wallclock = xen_set_wallclock; |
584 | } |
585 | |
586 | #ifdef CONFIG_XEN_PVHVM |
587 | static void xen_hvm_setup_cpu_clockevents(void) |
588 | { |
589 | int cpu = smp_processor_id(); |
590 | xen_setup_runstate_info(cpu); |
591 | /* |
592 | * xen_setup_timer(cpu) - snprintf is bad in atomic context. Hence |
593 | * doing it xen_hvm_cpu_notify (which gets called by smp_init during |
594 | * early bootup and also during CPU hotplug events). |
595 | */ |
596 | xen_setup_cpu_clockevents(); |
597 | } |
598 | |
599 | void __init xen_hvm_init_time_ops(void) |
600 | { |
601 | static bool hvm_time_initialized; |
602 | |
603 | if (hvm_time_initialized) |
604 | return; |
605 | |
606 | /* |
607 | * vector callback is needed otherwise we cannot receive interrupts |
608 | * on cpu > 0 and at this point we don't know how many cpus are |
609 | * available. |
610 | */ |
611 | if (!xen_have_vector_callback) |
612 | return; |
613 | |
614 | if (!xen_feature(XENFEAT_hvm_safe_pvclock)) { |
615 | pr_info_once("Xen doesn't support pvclock on HVM, disable pv timer" ); |
616 | return; |
617 | } |
618 | |
619 | /* |
620 | * Only MAX_VIRT_CPUS 'vcpu_info' are embedded inside 'shared_info'. |
621 | * The __this_cpu_read(xen_vcpu) is still NULL when Xen HVM guest |
622 | * boots on vcpu >= MAX_VIRT_CPUS (e.g., kexec), To access |
623 | * __this_cpu_read(xen_vcpu) via xen_clocksource_read() will panic. |
624 | * |
625 | * The xen_hvm_init_time_ops() should be called again later after |
626 | * __this_cpu_read(xen_vcpu) is available. |
627 | */ |
628 | if (!__this_cpu_read(xen_vcpu)) { |
629 | pr_info("Delay xen_init_time_common() as kernel is running on vcpu=%d\n" , |
630 | xen_vcpu_nr(0)); |
631 | return; |
632 | } |
633 | |
634 | xen_init_time_common(); |
635 | |
636 | x86_init.timers.setup_percpu_clockev = xen_time_init; |
637 | x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents; |
638 | |
639 | x86_platform.set_wallclock = xen_set_wallclock; |
640 | |
641 | hvm_time_initialized = true; |
642 | } |
643 | #endif |
644 | |
645 | /* Kernel parameter to specify Xen timer slop */ |
646 | static int __init parse_xen_timer_slop(char *ptr) |
647 | { |
648 | unsigned long slop = memparse(ptr, NULL); |
649 | |
650 | xen_timerop_clockevent.min_delta_ns = slop; |
651 | xen_timerop_clockevent.min_delta_ticks = slop; |
652 | xen_vcpuop_clockevent.min_delta_ns = slop; |
653 | xen_vcpuop_clockevent.min_delta_ticks = slop; |
654 | |
655 | return 0; |
656 | } |
657 | early_param("xen_timer_slop" , parse_xen_timer_slop); |
658 | |