1 | /* |
2 | * Copyright 2006 Andi Kleen, SUSE Labs. |
3 | * Subject to the GNU Public License, v.2 |
4 | * |
5 | * Fast user context implementation of clock_gettime, gettimeofday, and time. |
6 | * |
7 | * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> |
8 | * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany |
9 | * |
10 | * The code should have no internal unresolved relocations. |
11 | * Check with readelf after changing. |
12 | */ |
13 | |
14 | #include <uapi/linux/time.h> |
15 | #include <asm/vgtod.h> |
16 | #include <asm/vvar.h> |
17 | #include <asm/unistd.h> |
18 | #include <asm/msr.h> |
19 | #include <asm/pvclock.h> |
20 | #include <asm/mshyperv.h> |
21 | #include <linux/math64.h> |
22 | #include <linux/time.h> |
23 | #include <linux/kernel.h> |
24 | |
25 | #define gtod (&VVAR(vsyscall_gtod_data)) |
26 | |
27 | extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); |
28 | extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); |
29 | extern time_t __vdso_time(time_t *t); |
30 | |
31 | #ifdef CONFIG_PARAVIRT_CLOCK |
32 | extern u8 pvclock_page |
33 | __attribute__((visibility("hidden" ))); |
34 | #endif |
35 | |
36 | #ifdef CONFIG_HYPERV_TSCPAGE |
37 | extern u8 hvclock_page |
38 | __attribute__((visibility("hidden" ))); |
39 | #endif |
40 | |
41 | #ifndef BUILD_VDSO32 |
42 | |
43 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) |
44 | { |
45 | long ret; |
46 | asm ("syscall" : "=a" (ret), "=m" (*ts) : |
47 | "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : |
48 | "rcx" , "r11" ); |
49 | return ret; |
50 | } |
51 | |
52 | #else |
53 | |
54 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) |
55 | { |
56 | long ret; |
57 | |
58 | asm ( |
59 | "mov %%ebx, %%edx \n" |
60 | "mov %[clock], %%ebx \n" |
61 | "call __kernel_vsyscall \n" |
62 | "mov %%edx, %%ebx \n" |
63 | : "=a" (ret), "=m" (*ts) |
64 | : "0" (__NR_clock_gettime), [clock] "g" (clock), "c" (ts) |
65 | : "edx" ); |
66 | return ret; |
67 | } |
68 | |
69 | #endif |
70 | |
71 | #ifdef CONFIG_PARAVIRT_CLOCK |
72 | static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void) |
73 | { |
74 | return (const struct pvclock_vsyscall_time_info *)&pvclock_page; |
75 | } |
76 | |
77 | static notrace u64 vread_pvclock(void) |
78 | { |
79 | const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; |
80 | u32 version; |
81 | u64 ret; |
82 | |
83 | /* |
84 | * Note: The kernel and hypervisor must guarantee that cpu ID |
85 | * number maps 1:1 to per-CPU pvclock time info. |
86 | * |
87 | * Because the hypervisor is entirely unaware of guest userspace |
88 | * preemption, it cannot guarantee that per-CPU pvclock time |
89 | * info is updated if the underlying CPU changes or that that |
90 | * version is increased whenever underlying CPU changes. |
91 | * |
92 | * On KVM, we are guaranteed that pvti updates for any vCPU are |
93 | * atomic as seen by *all* vCPUs. This is an even stronger |
94 | * guarantee than we get with a normal seqlock. |
95 | * |
96 | * On Xen, we don't appear to have that guarantee, but Xen still |
97 | * supplies a valid seqlock using the version field. |
98 | * |
99 | * We only do pvclock vdso timing at all if |
100 | * PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to |
101 | * mean that all vCPUs have matching pvti and that the TSC is |
102 | * synced, so we can just look at vCPU 0's pvti. |
103 | */ |
104 | |
105 | do { |
106 | version = pvclock_read_begin(pvti); |
107 | |
108 | if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) |
109 | return U64_MAX; |
110 | |
111 | ret = __pvclock_read_cycles(pvti, rdtsc_ordered()); |
112 | } while (pvclock_read_retry(pvti, version)); |
113 | |
114 | return ret; |
115 | } |
116 | #endif |
117 | #ifdef CONFIG_HYPERV_TSCPAGE |
118 | static notrace u64 vread_hvclock(void) |
119 | { |
120 | const struct ms_hyperv_tsc_page *tsc_pg = |
121 | (const struct ms_hyperv_tsc_page *)&hvclock_page; |
122 | |
123 | return hv_read_tsc_page(tsc_pg); |
124 | } |
125 | #endif |
126 | |
127 | notrace static inline u64 vgetcyc(int mode) |
128 | { |
129 | if (mode == VCLOCK_TSC) |
130 | return (u64)rdtsc_ordered(); |
131 | #ifdef CONFIG_PARAVIRT_CLOCK |
132 | else if (mode == VCLOCK_PVCLOCK) |
133 | return vread_pvclock(); |
134 | #endif |
135 | #ifdef CONFIG_HYPERV_TSCPAGE |
136 | else if (mode == VCLOCK_HVCLOCK) |
137 | return vread_hvclock(); |
138 | #endif |
139 | return U64_MAX; |
140 | } |
141 | |
142 | notrace static int do_hres(clockid_t clk, struct timespec *ts) |
143 | { |
144 | struct vgtod_ts *base = >od->basetime[clk]; |
145 | u64 cycles, last, sec, ns; |
146 | unsigned int seq; |
147 | |
148 | do { |
149 | seq = gtod_read_begin(gtod); |
150 | cycles = vgetcyc(gtod->vclock_mode); |
151 | ns = base->nsec; |
152 | last = gtod->cycle_last; |
153 | if (unlikely((s64)cycles < 0)) |
154 | return vdso_fallback_gettime(clk, ts); |
155 | if (cycles > last) |
156 | ns += (cycles - last) * gtod->mult; |
157 | ns >>= gtod->shift; |
158 | sec = base->sec; |
159 | } while (unlikely(gtod_read_retry(gtod, seq))); |
160 | |
161 | /* |
162 | * Do this outside the loop: a race inside the loop could result |
163 | * in __iter_div_u64_rem() being extremely slow. |
164 | */ |
165 | ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); |
166 | ts->tv_nsec = ns; |
167 | |
168 | return 0; |
169 | } |
170 | |
171 | notrace static void do_coarse(clockid_t clk, struct timespec *ts) |
172 | { |
173 | struct vgtod_ts *base = >od->basetime[clk]; |
174 | unsigned int seq; |
175 | |
176 | do { |
177 | seq = gtod_read_begin(gtod); |
178 | ts->tv_sec = base->sec; |
179 | ts->tv_nsec = base->nsec; |
180 | } while (unlikely(gtod_read_retry(gtod, seq))); |
181 | } |
182 | |
183 | notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) |
184 | { |
185 | unsigned int msk; |
186 | |
187 | /* Sort out negative (CPU/FD) and invalid clocks */ |
188 | if (unlikely((unsigned int) clock >= MAX_CLOCKS)) |
189 | return vdso_fallback_gettime(clock, ts); |
190 | |
191 | /* |
192 | * Convert the clockid to a bitmask and use it to check which |
193 | * clocks are handled in the VDSO directly. |
194 | */ |
195 | msk = 1U << clock; |
196 | if (likely(msk & VGTOD_HRES)) { |
197 | return do_hres(clock, ts); |
198 | } else if (msk & VGTOD_COARSE) { |
199 | do_coarse(clock, ts); |
200 | return 0; |
201 | } |
202 | return vdso_fallback_gettime(clock, ts); |
203 | } |
204 | |
205 | int clock_gettime(clockid_t, struct timespec *) |
206 | __attribute__((weak, alias("__vdso_clock_gettime" ))); |
207 | |
208 | notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) |
209 | { |
210 | if (likely(tv != NULL)) { |
211 | struct timespec *ts = (struct timespec *) tv; |
212 | |
213 | do_hres(CLOCK_REALTIME, ts); |
214 | tv->tv_usec /= 1000; |
215 | } |
216 | if (unlikely(tz != NULL)) { |
217 | tz->tz_minuteswest = gtod->tz_minuteswest; |
218 | tz->tz_dsttime = gtod->tz_dsttime; |
219 | } |
220 | |
221 | return 0; |
222 | } |
223 | int gettimeofday(struct timeval *, struct timezone *) |
224 | __attribute__((weak, alias("__vdso_gettimeofday" ))); |
225 | |
226 | /* |
227 | * This will break when the xtime seconds get inaccurate, but that is |
228 | * unlikely |
229 | */ |
230 | notrace time_t __vdso_time(time_t *t) |
231 | { |
232 | /* This is atomic on x86 so we don't need any locks. */ |
233 | time_t result = READ_ONCE(gtod->basetime[CLOCK_REALTIME].sec); |
234 | |
235 | if (t) |
236 | *t = result; |
237 | return result; |
238 | } |
239 | time_t time(time_t *t) |
240 | __attribute__((weak, alias("__vdso_time" ))); |
241 | |