1 | // SPDX-License-Identifier: GPL-2.0+ |
2 | /* |
3 | * 2002-10-15 Posix Clocks & timers |
4 | * by George Anzinger george@mvista.com |
5 | * Copyright (C) 2002 2003 by MontaVista Software. |
6 | * |
7 | * 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug. |
8 | * Copyright (C) 2004 Boris Hu |
9 | * |
10 | * These are all the functions necessary to implement POSIX clocks & timers |
11 | */ |
12 | #include <linux/mm.h> |
13 | #include <linux/interrupt.h> |
14 | #include <linux/slab.h> |
15 | #include <linux/time.h> |
16 | #include <linux/mutex.h> |
17 | #include <linux/sched/task.h> |
18 | |
19 | #include <linux/uaccess.h> |
20 | #include <linux/list.h> |
21 | #include <linux/init.h> |
22 | #include <linux/compiler.h> |
23 | #include <linux/hash.h> |
24 | #include <linux/posix-clock.h> |
25 | #include <linux/posix-timers.h> |
26 | #include <linux/syscalls.h> |
27 | #include <linux/wait.h> |
28 | #include <linux/workqueue.h> |
29 | #include <linux/export.h> |
30 | #include <linux/hashtable.h> |
31 | #include <linux/compat.h> |
32 | #include <linux/nospec.h> |
33 | #include <linux/time_namespace.h> |
34 | |
35 | #include "timekeeping.h" |
36 | #include "posix-timers.h" |
37 | |
38 | static struct kmem_cache *posix_timers_cache; |
39 | |
40 | /* |
41 | * Timers are managed in a hash table for lockless lookup. The hash key is |
42 | * constructed from current::signal and the timer ID and the timer is |
43 | * matched against current::signal and the timer ID when walking the hash |
44 | * bucket list. |
45 | * |
46 | * This allows checkpoint/restore to reconstruct the exact timer IDs for |
47 | * a process. |
48 | */ |
49 | static DEFINE_HASHTABLE(posix_timers_hashtable, 9); |
50 | static DEFINE_SPINLOCK(hash_lock); |
51 | |
52 | static const struct k_clock * const posix_clocks[]; |
53 | static const struct k_clock *clockid_to_kclock(const clockid_t id); |
54 | static const struct k_clock clock_realtime, clock_monotonic; |
55 | |
56 | /* SIGEV_THREAD_ID cannot share a bit with the other SIGEV values. */ |
57 | #if SIGEV_THREAD_ID != (SIGEV_THREAD_ID & \ |
58 | ~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD)) |
59 | #error "SIGEV_THREAD_ID must not share bit with other SIGEV values!" |
60 | #endif |
61 | |
62 | static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags); |
63 | |
64 | #define lock_timer(tid, flags) \ |
65 | ({ struct k_itimer *__timr; \ |
66 | __cond_lock(&__timr->it_lock, __timr = __lock_timer(tid, flags)); \ |
67 | __timr; \ |
68 | }) |
69 | |
70 | static int hash(struct signal_struct *sig, unsigned int nr) |
71 | { |
72 | return hash_32(val: hash32_ptr(ptr: sig) ^ nr, HASH_BITS(posix_timers_hashtable)); |
73 | } |
74 | |
75 | static struct k_itimer *__posix_timers_find(struct hlist_head *head, |
76 | struct signal_struct *sig, |
77 | timer_t id) |
78 | { |
79 | struct k_itimer *timer; |
80 | |
81 | hlist_for_each_entry_rcu(timer, head, t_hash, lockdep_is_held(&hash_lock)) { |
82 | /* timer->it_signal can be set concurrently */ |
83 | if ((READ_ONCE(timer->it_signal) == sig) && (timer->it_id == id)) |
84 | return timer; |
85 | } |
86 | return NULL; |
87 | } |
88 | |
89 | static struct k_itimer *posix_timer_by_id(timer_t id) |
90 | { |
91 | struct signal_struct *sig = current->signal; |
92 | struct hlist_head *head = &posix_timers_hashtable[hash(sig, nr: id)]; |
93 | |
94 | return __posix_timers_find(head, sig, id); |
95 | } |
96 | |
97 | static int posix_timer_add(struct k_itimer *timer) |
98 | { |
99 | struct signal_struct *sig = current->signal; |
100 | struct hlist_head *head; |
101 | unsigned int cnt, id; |
102 | |
103 | /* |
104 | * FIXME: Replace this by a per signal struct xarray once there is |
105 | * a plan to handle the resulting CRIU regression gracefully. |
106 | */ |
107 | for (cnt = 0; cnt <= INT_MAX; cnt++) { |
108 | spin_lock(lock: &hash_lock); |
109 | id = sig->next_posix_timer_id; |
110 | |
111 | /* Write the next ID back. Clamp it to the positive space */ |
112 | sig->next_posix_timer_id = (id + 1) & INT_MAX; |
113 | |
114 | head = &posix_timers_hashtable[hash(sig, nr: id)]; |
115 | if (!__posix_timers_find(head, sig, id)) { |
116 | hlist_add_head_rcu(n: &timer->t_hash, h: head); |
117 | spin_unlock(lock: &hash_lock); |
118 | return id; |
119 | } |
120 | spin_unlock(lock: &hash_lock); |
121 | } |
122 | /* POSIX return code when no timer ID could be allocated */ |
123 | return -EAGAIN; |
124 | } |
125 | |
126 | static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) |
127 | { |
128 | spin_unlock_irqrestore(lock: &timr->it_lock, flags); |
129 | } |
130 | |
131 | static int posix_get_realtime_timespec(clockid_t which_clock, struct timespec64 *tp) |
132 | { |
133 | ktime_get_real_ts64(tv: tp); |
134 | return 0; |
135 | } |
136 | |
137 | static ktime_t posix_get_realtime_ktime(clockid_t which_clock) |
138 | { |
139 | return ktime_get_real(); |
140 | } |
141 | |
142 | static int posix_clock_realtime_set(const clockid_t which_clock, |
143 | const struct timespec64 *tp) |
144 | { |
145 | return do_sys_settimeofday64(tv: tp, NULL); |
146 | } |
147 | |
148 | static int posix_clock_realtime_adj(const clockid_t which_clock, |
149 | struct __kernel_timex *t) |
150 | { |
151 | return do_adjtimex(t); |
152 | } |
153 | |
154 | static int posix_get_monotonic_timespec(clockid_t which_clock, struct timespec64 *tp) |
155 | { |
156 | ktime_get_ts64(ts: tp); |
157 | timens_add_monotonic(ts: tp); |
158 | return 0; |
159 | } |
160 | |
161 | static ktime_t posix_get_monotonic_ktime(clockid_t which_clock) |
162 | { |
163 | return ktime_get(); |
164 | } |
165 | |
166 | static int posix_get_monotonic_raw(clockid_t which_clock, struct timespec64 *tp) |
167 | { |
168 | ktime_get_raw_ts64(ts: tp); |
169 | timens_add_monotonic(ts: tp); |
170 | return 0; |
171 | } |
172 | |
173 | static int posix_get_realtime_coarse(clockid_t which_clock, struct timespec64 *tp) |
174 | { |
175 | ktime_get_coarse_real_ts64(ts: tp); |
176 | return 0; |
177 | } |
178 | |
179 | static int posix_get_monotonic_coarse(clockid_t which_clock, |
180 | struct timespec64 *tp) |
181 | { |
182 | ktime_get_coarse_ts64(ts: tp); |
183 | timens_add_monotonic(ts: tp); |
184 | return 0; |
185 | } |
186 | |
187 | static int posix_get_coarse_res(const clockid_t which_clock, struct timespec64 *tp) |
188 | { |
189 | *tp = ktime_to_timespec64(KTIME_LOW_RES); |
190 | return 0; |
191 | } |
192 | |
193 | static int posix_get_boottime_timespec(const clockid_t which_clock, struct timespec64 *tp) |
194 | { |
195 | ktime_get_boottime_ts64(ts: tp); |
196 | timens_add_boottime(ts: tp); |
197 | return 0; |
198 | } |
199 | |
200 | static ktime_t posix_get_boottime_ktime(const clockid_t which_clock) |
201 | { |
202 | return ktime_get_boottime(); |
203 | } |
204 | |
205 | static int posix_get_tai_timespec(clockid_t which_clock, struct timespec64 *tp) |
206 | { |
207 | ktime_get_clocktai_ts64(ts: tp); |
208 | return 0; |
209 | } |
210 | |
211 | static ktime_t posix_get_tai_ktime(clockid_t which_clock) |
212 | { |
213 | return ktime_get_clocktai(); |
214 | } |
215 | |
216 | static int posix_get_hrtimer_res(clockid_t which_clock, struct timespec64 *tp) |
217 | { |
218 | tp->tv_sec = 0; |
219 | tp->tv_nsec = hrtimer_resolution; |
220 | return 0; |
221 | } |
222 | |
223 | static __init int init_posix_timers(void) |
224 | { |
225 | posix_timers_cache = kmem_cache_create(name: "posix_timers_cache" , |
226 | size: sizeof(struct k_itimer), align: 0, |
227 | SLAB_PANIC | SLAB_ACCOUNT, NULL); |
228 | return 0; |
229 | } |
230 | __initcall(init_posix_timers); |
231 | |
232 | /* |
233 | * The siginfo si_overrun field and the return value of timer_getoverrun(2) |
234 | * are of type int. Clamp the overrun value to INT_MAX |
235 | */ |
236 | static inline int timer_overrun_to_int(struct k_itimer *timr, int baseval) |
237 | { |
238 | s64 sum = timr->it_overrun_last + (s64)baseval; |
239 | |
240 | return sum > (s64)INT_MAX ? INT_MAX : (int)sum; |
241 | } |
242 | |
243 | static void common_hrtimer_rearm(struct k_itimer *timr) |
244 | { |
245 | struct hrtimer *timer = &timr->it.real.timer; |
246 | |
247 | timr->it_overrun += hrtimer_forward(timer, now: timer->base->get_time(), |
248 | interval: timr->it_interval); |
249 | hrtimer_restart(timer); |
250 | } |
251 | |
252 | /* |
253 | * This function is called from the signal delivery code if |
254 | * info->si_sys_private is not zero, which indicates that the timer has to |
255 | * be rearmed. Restart the timer and update info::si_overrun. |
256 | */ |
257 | void posixtimer_rearm(struct kernel_siginfo *info) |
258 | { |
259 | struct k_itimer *timr; |
260 | unsigned long flags; |
261 | |
262 | timr = lock_timer(info->si_tid, &flags); |
263 | if (!timr) |
264 | return; |
265 | |
266 | if (timr->it_interval && timr->it_requeue_pending == info->si_sys_private) { |
267 | timr->kclock->timer_rearm(timr); |
268 | |
269 | timr->it_active = 1; |
270 | timr->it_overrun_last = timr->it_overrun; |
271 | timr->it_overrun = -1LL; |
272 | ++timr->it_requeue_pending; |
273 | |
274 | info->si_overrun = timer_overrun_to_int(timr, baseval: info->si_overrun); |
275 | } |
276 | |
277 | unlock_timer(timr, flags); |
278 | } |
279 | |
280 | int posix_timer_event(struct k_itimer *timr, int si_private) |
281 | { |
282 | enum pid_type type; |
283 | int ret; |
284 | /* |
285 | * FIXME: if ->sigq is queued we can race with |
286 | * dequeue_signal()->posixtimer_rearm(). |
287 | * |
288 | * If dequeue_signal() sees the "right" value of |
289 | * si_sys_private it calls posixtimer_rearm(). |
290 | * We re-queue ->sigq and drop ->it_lock(). |
291 | * posixtimer_rearm() locks the timer |
292 | * and re-schedules it while ->sigq is pending. |
293 | * Not really bad, but not that we want. |
294 | */ |
295 | timr->sigq->info.si_sys_private = si_private; |
296 | |
297 | type = !(timr->it_sigev_notify & SIGEV_THREAD_ID) ? PIDTYPE_TGID : PIDTYPE_PID; |
298 | ret = send_sigqueue(timr->sigq, timr->it_pid, type); |
299 | /* If we failed to send the signal the timer stops. */ |
300 | return ret > 0; |
301 | } |
302 | |
303 | /* |
304 | * This function gets called when a POSIX.1b interval timer expires from |
305 | * the HRTIMER interrupt (soft interrupt on RT kernels). |
306 | * |
307 | * Handles CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME and CLOCK_TAI |
308 | * based timers. |
309 | */ |
310 | static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer) |
311 | { |
312 | enum hrtimer_restart ret = HRTIMER_NORESTART; |
313 | struct k_itimer *timr; |
314 | unsigned long flags; |
315 | int si_private = 0; |
316 | |
317 | timr = container_of(timer, struct k_itimer, it.real.timer); |
318 | spin_lock_irqsave(&timr->it_lock, flags); |
319 | |
320 | timr->it_active = 0; |
321 | if (timr->it_interval != 0) |
322 | si_private = ++timr->it_requeue_pending; |
323 | |
324 | if (posix_timer_event(timr, si_private)) { |
325 | /* |
326 | * The signal was not queued due to SIG_IGN. As a |
327 | * consequence the timer is not going to be rearmed from |
328 | * the signal delivery path. But as a real signal handler |
329 | * can be installed later the timer must be rearmed here. |
330 | */ |
331 | if (timr->it_interval != 0) { |
332 | ktime_t now = hrtimer_cb_get_time(timer); |
333 | |
334 | /* |
335 | * FIXME: What we really want, is to stop this |
336 | * timer completely and restart it in case the |
337 | * SIG_IGN is removed. This is a non trivial |
338 | * change to the signal handling code. |
339 | * |
340 | * For now let timers with an interval less than a |
341 | * jiffie expire every jiffie and recheck for a |
342 | * valid signal handler. |
343 | * |
344 | * This avoids interrupt starvation in case of a |
345 | * very small interval, which would expire the |
346 | * timer immediately again. |
347 | * |
348 | * Moving now ahead of time by one jiffie tricks |
349 | * hrtimer_forward() to expire the timer later, |
350 | * while it still maintains the overrun accuracy |
351 | * for the price of a slight inconsistency in the |
352 | * timer_gettime() case. This is at least better |
353 | * than a timer storm. |
354 | * |
355 | * Only required when high resolution timers are |
356 | * enabled as the periodic tick based timers are |
357 | * automatically aligned to the next tick. |
358 | */ |
359 | if (IS_ENABLED(CONFIG_HIGH_RES_TIMERS)) { |
360 | ktime_t kj = TICK_NSEC; |
361 | |
362 | if (timr->it_interval < kj) |
363 | now = ktime_add(now, kj); |
364 | } |
365 | |
366 | timr->it_overrun += hrtimer_forward(timer, now, interval: timr->it_interval); |
367 | ret = HRTIMER_RESTART; |
368 | ++timr->it_requeue_pending; |
369 | timr->it_active = 1; |
370 | } |
371 | } |
372 | |
373 | unlock_timer(timr, flags); |
374 | return ret; |
375 | } |
376 | |
377 | static struct pid *good_sigevent(sigevent_t * event) |
378 | { |
379 | struct pid *pid = task_tgid(current); |
380 | struct task_struct *rtn; |
381 | |
382 | switch (event->sigev_notify) { |
383 | case SIGEV_SIGNAL | SIGEV_THREAD_ID: |
384 | pid = find_vpid(nr: event->sigev_notify_thread_id); |
385 | rtn = pid_task(pid, PIDTYPE_PID); |
386 | if (!rtn || !same_thread_group(p1: rtn, current)) |
387 | return NULL; |
388 | fallthrough; |
389 | case SIGEV_SIGNAL: |
390 | case SIGEV_THREAD: |
391 | if (event->sigev_signo <= 0 || event->sigev_signo > SIGRTMAX) |
392 | return NULL; |
393 | fallthrough; |
394 | case SIGEV_NONE: |
395 | return pid; |
396 | default: |
397 | return NULL; |
398 | } |
399 | } |
400 | |
401 | static struct k_itimer * alloc_posix_timer(void) |
402 | { |
403 | struct k_itimer *tmr = kmem_cache_zalloc(k: posix_timers_cache, GFP_KERNEL); |
404 | |
405 | if (!tmr) |
406 | return tmr; |
407 | if (unlikely(!(tmr->sigq = sigqueue_alloc()))) { |
408 | kmem_cache_free(s: posix_timers_cache, objp: tmr); |
409 | return NULL; |
410 | } |
411 | clear_siginfo(info: &tmr->sigq->info); |
412 | return tmr; |
413 | } |
414 | |
415 | static void k_itimer_rcu_free(struct rcu_head *head) |
416 | { |
417 | struct k_itimer *tmr = container_of(head, struct k_itimer, rcu); |
418 | |
419 | kmem_cache_free(s: posix_timers_cache, objp: tmr); |
420 | } |
421 | |
422 | static void posix_timer_free(struct k_itimer *tmr) |
423 | { |
424 | put_pid(pid: tmr->it_pid); |
425 | sigqueue_free(tmr->sigq); |
426 | call_rcu(head: &tmr->rcu, func: k_itimer_rcu_free); |
427 | } |
428 | |
429 | static void posix_timer_unhash_and_free(struct k_itimer *tmr) |
430 | { |
431 | spin_lock(lock: &hash_lock); |
432 | hlist_del_rcu(n: &tmr->t_hash); |
433 | spin_unlock(lock: &hash_lock); |
434 | posix_timer_free(tmr); |
435 | } |
436 | |
437 | static int common_timer_create(struct k_itimer *new_timer) |
438 | { |
439 | hrtimer_init(timer: &new_timer->it.real.timer, which_clock: new_timer->it_clock, mode: 0); |
440 | return 0; |
441 | } |
442 | |
443 | /* Create a POSIX.1b interval timer. */ |
444 | static int do_timer_create(clockid_t which_clock, struct sigevent *event, |
445 | timer_t __user *created_timer_id) |
446 | { |
447 | const struct k_clock *kc = clockid_to_kclock(id: which_clock); |
448 | struct k_itimer *new_timer; |
449 | int error, new_timer_id; |
450 | |
451 | if (!kc) |
452 | return -EINVAL; |
453 | if (!kc->timer_create) |
454 | return -EOPNOTSUPP; |
455 | |
456 | new_timer = alloc_posix_timer(); |
457 | if (unlikely(!new_timer)) |
458 | return -EAGAIN; |
459 | |
460 | spin_lock_init(&new_timer->it_lock); |
461 | |
462 | /* |
463 | * Add the timer to the hash table. The timer is not yet valid |
464 | * because new_timer::it_signal is still NULL. The timer id is also |
465 | * not yet visible to user space. |
466 | */ |
467 | new_timer_id = posix_timer_add(timer: new_timer); |
468 | if (new_timer_id < 0) { |
469 | posix_timer_free(tmr: new_timer); |
470 | return new_timer_id; |
471 | } |
472 | |
473 | new_timer->it_id = (timer_t) new_timer_id; |
474 | new_timer->it_clock = which_clock; |
475 | new_timer->kclock = kc; |
476 | new_timer->it_overrun = -1LL; |
477 | |
478 | if (event) { |
479 | rcu_read_lock(); |
480 | new_timer->it_pid = get_pid(pid: good_sigevent(event)); |
481 | rcu_read_unlock(); |
482 | if (!new_timer->it_pid) { |
483 | error = -EINVAL; |
484 | goto out; |
485 | } |
486 | new_timer->it_sigev_notify = event->sigev_notify; |
487 | new_timer->sigq->info.si_signo = event->sigev_signo; |
488 | new_timer->sigq->info.si_value = event->sigev_value; |
489 | } else { |
490 | new_timer->it_sigev_notify = SIGEV_SIGNAL; |
491 | new_timer->sigq->info.si_signo = SIGALRM; |
492 | memset(&new_timer->sigq->info.si_value, 0, sizeof(sigval_t)); |
493 | new_timer->sigq->info.si_value.sival_int = new_timer->it_id; |
494 | new_timer->it_pid = get_pid(pid: task_tgid(current)); |
495 | } |
496 | |
497 | new_timer->sigq->info.si_tid = new_timer->it_id; |
498 | new_timer->sigq->info.si_code = SI_TIMER; |
499 | |
500 | if (copy_to_user(to: created_timer_id, from: &new_timer_id, n: sizeof (new_timer_id))) { |
501 | error = -EFAULT; |
502 | goto out; |
503 | } |
504 | /* |
505 | * After succesful copy out, the timer ID is visible to user space |
506 | * now but not yet valid because new_timer::signal is still NULL. |
507 | * |
508 | * Complete the initialization with the clock specific create |
509 | * callback. |
510 | */ |
511 | error = kc->timer_create(new_timer); |
512 | if (error) |
513 | goto out; |
514 | |
515 | spin_lock_irq(lock: ¤t->sighand->siglock); |
516 | /* This makes the timer valid in the hash table */ |
517 | WRITE_ONCE(new_timer->it_signal, current->signal); |
518 | list_add(new: &new_timer->list, head: ¤t->signal->posix_timers); |
519 | spin_unlock_irq(lock: ¤t->sighand->siglock); |
520 | /* |
521 | * After unlocking sighand::siglock @new_timer is subject to |
522 | * concurrent removal and cannot be touched anymore |
523 | */ |
524 | return 0; |
525 | out: |
526 | posix_timer_unhash_and_free(tmr: new_timer); |
527 | return error; |
528 | } |
529 | |
530 | SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, |
531 | struct sigevent __user *, timer_event_spec, |
532 | timer_t __user *, created_timer_id) |
533 | { |
534 | if (timer_event_spec) { |
535 | sigevent_t event; |
536 | |
537 | if (copy_from_user(to: &event, from: timer_event_spec, n: sizeof (event))) |
538 | return -EFAULT; |
539 | return do_timer_create(which_clock, event: &event, created_timer_id); |
540 | } |
541 | return do_timer_create(which_clock, NULL, created_timer_id); |
542 | } |
543 | |
544 | #ifdef CONFIG_COMPAT |
545 | COMPAT_SYSCALL_DEFINE3(timer_create, clockid_t, which_clock, |
546 | struct compat_sigevent __user *, timer_event_spec, |
547 | timer_t __user *, created_timer_id) |
548 | { |
549 | if (timer_event_spec) { |
550 | sigevent_t event; |
551 | |
552 | if (get_compat_sigevent(event: &event, u_event: timer_event_spec)) |
553 | return -EFAULT; |
554 | return do_timer_create(which_clock, event: &event, created_timer_id); |
555 | } |
556 | return do_timer_create(which_clock, NULL, created_timer_id); |
557 | } |
558 | #endif |
559 | |
560 | static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) |
561 | { |
562 | struct k_itimer *timr; |
563 | |
564 | /* |
565 | * timer_t could be any type >= int and we want to make sure any |
566 | * @timer_id outside positive int range fails lookup. |
567 | */ |
568 | if ((unsigned long long)timer_id > INT_MAX) |
569 | return NULL; |
570 | |
571 | /* |
572 | * The hash lookup and the timers are RCU protected. |
573 | * |
574 | * Timers are added to the hash in invalid state where |
575 | * timr::it_signal == NULL. timer::it_signal is only set after the |
576 | * rest of the initialization succeeded. |
577 | * |
578 | * Timer destruction happens in steps: |
579 | * 1) Set timr::it_signal to NULL with timr::it_lock held |
580 | * 2) Release timr::it_lock |
581 | * 3) Remove from the hash under hash_lock |
582 | * 4) Call RCU for removal after the grace period |
583 | * |
584 | * Holding rcu_read_lock() accross the lookup ensures that |
585 | * the timer cannot be freed. |
586 | * |
587 | * The lookup validates locklessly that timr::it_signal == |
588 | * current::it_signal and timr::it_id == @timer_id. timr::it_id |
589 | * can't change, but timr::it_signal becomes NULL during |
590 | * destruction. |
591 | */ |
592 | rcu_read_lock(); |
593 | timr = posix_timer_by_id(id: timer_id); |
594 | if (timr) { |
595 | spin_lock_irqsave(&timr->it_lock, *flags); |
596 | /* |
597 | * Validate under timr::it_lock that timr::it_signal is |
598 | * still valid. Pairs with #1 above. |
599 | */ |
600 | if (timr->it_signal == current->signal) { |
601 | rcu_read_unlock(); |
602 | return timr; |
603 | } |
604 | spin_unlock_irqrestore(lock: &timr->it_lock, flags: *flags); |
605 | } |
606 | rcu_read_unlock(); |
607 | |
608 | return NULL; |
609 | } |
610 | |
611 | static ktime_t common_hrtimer_remaining(struct k_itimer *timr, ktime_t now) |
612 | { |
613 | struct hrtimer *timer = &timr->it.real.timer; |
614 | |
615 | return __hrtimer_expires_remaining_adjusted(timer, now); |
616 | } |
617 | |
618 | static s64 common_hrtimer_forward(struct k_itimer *timr, ktime_t now) |
619 | { |
620 | struct hrtimer *timer = &timr->it.real.timer; |
621 | |
622 | return hrtimer_forward(timer, now, interval: timr->it_interval); |
623 | } |
624 | |
625 | /* |
626 | * Get the time remaining on a POSIX.1b interval timer. |
627 | * |
628 | * Two issues to handle here: |
629 | * |
630 | * 1) The timer has a requeue pending. The return value must appear as |
631 | * if the timer has been requeued right now. |
632 | * |
633 | * 2) The timer is a SIGEV_NONE timer. These timers are never enqueued |
634 | * into the hrtimer queue and therefore never expired. Emulate expiry |
635 | * here taking #1 into account. |
636 | */ |
637 | void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting) |
638 | { |
639 | const struct k_clock *kc = timr->kclock; |
640 | ktime_t now, remaining, iv; |
641 | bool sig_none; |
642 | |
643 | sig_none = timr->it_sigev_notify == SIGEV_NONE; |
644 | iv = timr->it_interval; |
645 | |
646 | /* interval timer ? */ |
647 | if (iv) { |
648 | cur_setting->it_interval = ktime_to_timespec64(iv); |
649 | } else if (!timr->it_active) { |
650 | /* |
651 | * SIGEV_NONE oneshot timers are never queued and therefore |
652 | * timr->it_active is always false. The check below |
653 | * vs. remaining time will handle this case. |
654 | * |
655 | * For all other timers there is nothing to update here, so |
656 | * return. |
657 | */ |
658 | if (!sig_none) |
659 | return; |
660 | } |
661 | |
662 | now = kc->clock_get_ktime(timr->it_clock); |
663 | |
664 | /* |
665 | * If this is an interval timer and either has requeue pending or |
666 | * is a SIGEV_NONE timer move the expiry time forward by intervals, |
667 | * so expiry is > now. |
668 | */ |
669 | if (iv && (timr->it_requeue_pending & REQUEUE_PENDING || sig_none)) |
670 | timr->it_overrun += kc->timer_forward(timr, now); |
671 | |
672 | remaining = kc->timer_remaining(timr, now); |
673 | /* |
674 | * As @now is retrieved before a possible timer_forward() and |
675 | * cannot be reevaluated by the compiler @remaining is based on the |
676 | * same @now value. Therefore @remaining is consistent vs. @now. |
677 | * |
678 | * Consequently all interval timers, i.e. @iv > 0, cannot have a |
679 | * remaining time <= 0 because timer_forward() guarantees to move |
680 | * them forward so that the next timer expiry is > @now. |
681 | */ |
682 | if (remaining <= 0) { |
683 | /* |
684 | * A single shot SIGEV_NONE timer must return 0, when it is |
685 | * expired! Timers which have a real signal delivery mode |
686 | * must return a remaining time greater than 0 because the |
687 | * signal has not yet been delivered. |
688 | */ |
689 | if (!sig_none) |
690 | cur_setting->it_value.tv_nsec = 1; |
691 | } else { |
692 | cur_setting->it_value = ktime_to_timespec64(remaining); |
693 | } |
694 | } |
695 | |
696 | static int do_timer_gettime(timer_t timer_id, struct itimerspec64 *setting) |
697 | { |
698 | const struct k_clock *kc; |
699 | struct k_itimer *timr; |
700 | unsigned long flags; |
701 | int ret = 0; |
702 | |
703 | timr = lock_timer(timer_id, &flags); |
704 | if (!timr) |
705 | return -EINVAL; |
706 | |
707 | memset(setting, 0, sizeof(*setting)); |
708 | kc = timr->kclock; |
709 | if (WARN_ON_ONCE(!kc || !kc->timer_get)) |
710 | ret = -EINVAL; |
711 | else |
712 | kc->timer_get(timr, setting); |
713 | |
714 | unlock_timer(timr, flags); |
715 | return ret; |
716 | } |
717 | |
718 | /* Get the time remaining on a POSIX.1b interval timer. */ |
719 | SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, |
720 | struct __kernel_itimerspec __user *, setting) |
721 | { |
722 | struct itimerspec64 cur_setting; |
723 | |
724 | int ret = do_timer_gettime(timer_id, setting: &cur_setting); |
725 | if (!ret) { |
726 | if (put_itimerspec64(it: &cur_setting, uit: setting)) |
727 | ret = -EFAULT; |
728 | } |
729 | return ret; |
730 | } |
731 | |
732 | #ifdef CONFIG_COMPAT_32BIT_TIME |
733 | |
734 | SYSCALL_DEFINE2(timer_gettime32, timer_t, timer_id, |
735 | struct old_itimerspec32 __user *, setting) |
736 | { |
737 | struct itimerspec64 cur_setting; |
738 | |
739 | int ret = do_timer_gettime(timer_id, setting: &cur_setting); |
740 | if (!ret) { |
741 | if (put_old_itimerspec32(its: &cur_setting, uits: setting)) |
742 | ret = -EFAULT; |
743 | } |
744 | return ret; |
745 | } |
746 | |
747 | #endif |
748 | |
749 | /** |
750 | * sys_timer_getoverrun - Get the number of overruns of a POSIX.1b interval timer |
751 | * @timer_id: The timer ID which identifies the timer |
752 | * |
753 | * The "overrun count" of a timer is one plus the number of expiration |
754 | * intervals which have elapsed between the first expiry, which queues the |
755 | * signal and the actual signal delivery. On signal delivery the "overrun |
756 | * count" is calculated and cached, so it can be returned directly here. |
757 | * |
758 | * As this is relative to the last queued signal the returned overrun count |
759 | * is meaningless outside of the signal delivery path and even there it |
760 | * does not accurately reflect the current state when user space evaluates |
761 | * it. |
762 | * |
763 | * Returns: |
764 | * -EINVAL @timer_id is invalid |
765 | * 1..INT_MAX The number of overruns related to the last delivered signal |
766 | */ |
767 | SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id) |
768 | { |
769 | struct k_itimer *timr; |
770 | unsigned long flags; |
771 | int overrun; |
772 | |
773 | timr = lock_timer(timer_id, &flags); |
774 | if (!timr) |
775 | return -EINVAL; |
776 | |
777 | overrun = timer_overrun_to_int(timr, baseval: 0); |
778 | unlock_timer(timr, flags); |
779 | |
780 | return overrun; |
781 | } |
782 | |
783 | static void common_hrtimer_arm(struct k_itimer *timr, ktime_t expires, |
784 | bool absolute, bool sigev_none) |
785 | { |
786 | struct hrtimer *timer = &timr->it.real.timer; |
787 | enum hrtimer_mode mode; |
788 | |
789 | mode = absolute ? HRTIMER_MODE_ABS : HRTIMER_MODE_REL; |
790 | /* |
791 | * Posix magic: Relative CLOCK_REALTIME timers are not affected by |
792 | * clock modifications, so they become CLOCK_MONOTONIC based under the |
793 | * hood. See hrtimer_init(). Update timr->kclock, so the generic |
794 | * functions which use timr->kclock->clock_get_*() work. |
795 | * |
796 | * Note: it_clock stays unmodified, because the next timer_set() might |
797 | * use ABSTIME, so it needs to switch back. |
798 | */ |
799 | if (timr->it_clock == CLOCK_REALTIME) |
800 | timr->kclock = absolute ? &clock_realtime : &clock_monotonic; |
801 | |
802 | hrtimer_init(timer: &timr->it.real.timer, which_clock: timr->it_clock, mode); |
803 | timr->it.real.timer.function = posix_timer_fn; |
804 | |
805 | if (!absolute) |
806 | expires = ktime_add_safe(lhs: expires, rhs: timer->base->get_time()); |
807 | hrtimer_set_expires(timer, time: expires); |
808 | |
809 | if (!sigev_none) |
810 | hrtimer_start_expires(timer, mode: HRTIMER_MODE_ABS); |
811 | } |
812 | |
813 | static int common_hrtimer_try_to_cancel(struct k_itimer *timr) |
814 | { |
815 | return hrtimer_try_to_cancel(timer: &timr->it.real.timer); |
816 | } |
817 | |
818 | static void common_timer_wait_running(struct k_itimer *timer) |
819 | { |
820 | hrtimer_cancel_wait_running(timer: &timer->it.real.timer); |
821 | } |
822 | |
823 | /* |
824 | * On PREEMPT_RT this prevents priority inversion and a potential livelock |
825 | * against the ksoftirqd thread in case that ksoftirqd gets preempted while |
826 | * executing a hrtimer callback. |
827 | * |
828 | * See the comments in hrtimer_cancel_wait_running(). For PREEMPT_RT=n this |
829 | * just results in a cpu_relax(). |
830 | * |
831 | * For POSIX CPU timers with CONFIG_POSIX_CPU_TIMERS_TASK_WORK=n this is |
832 | * just a cpu_relax(). With CONFIG_POSIX_CPU_TIMERS_TASK_WORK=y this |
833 | * prevents spinning on an eventually scheduled out task and a livelock |
834 | * when the task which tries to delete or disarm the timer has preempted |
835 | * the task which runs the expiry in task work context. |
836 | */ |
837 | static struct k_itimer *timer_wait_running(struct k_itimer *timer, |
838 | unsigned long *flags) |
839 | { |
840 | const struct k_clock *kc = READ_ONCE(timer->kclock); |
841 | timer_t timer_id = READ_ONCE(timer->it_id); |
842 | |
843 | /* Prevent kfree(timer) after dropping the lock */ |
844 | rcu_read_lock(); |
845 | unlock_timer(timr: timer, flags: *flags); |
846 | |
847 | /* |
848 | * kc->timer_wait_running() might drop RCU lock. So @timer |
849 | * cannot be touched anymore after the function returns! |
850 | */ |
851 | if (!WARN_ON_ONCE(!kc->timer_wait_running)) |
852 | kc->timer_wait_running(timer); |
853 | |
854 | rcu_read_unlock(); |
855 | /* Relock the timer. It might be not longer hashed. */ |
856 | return lock_timer(timer_id, flags); |
857 | } |
858 | |
859 | /* Set a POSIX.1b interval timer. */ |
860 | int common_timer_set(struct k_itimer *timr, int flags, |
861 | struct itimerspec64 *new_setting, |
862 | struct itimerspec64 *old_setting) |
863 | { |
864 | const struct k_clock *kc = timr->kclock; |
865 | bool sigev_none; |
866 | ktime_t expires; |
867 | |
868 | if (old_setting) |
869 | common_timer_get(timr, cur_setting: old_setting); |
870 | |
871 | /* Prevent rearming by clearing the interval */ |
872 | timr->it_interval = 0; |
873 | /* |
874 | * Careful here. On SMP systems the timer expiry function could be |
875 | * active and spinning on timr->it_lock. |
876 | */ |
877 | if (kc->timer_try_to_cancel(timr) < 0) |
878 | return TIMER_RETRY; |
879 | |
880 | timr->it_active = 0; |
881 | timr->it_requeue_pending = (timr->it_requeue_pending + 2) & |
882 | ~REQUEUE_PENDING; |
883 | timr->it_overrun_last = 0; |
884 | |
885 | /* Switch off the timer when it_value is zero */ |
886 | if (!new_setting->it_value.tv_sec && !new_setting->it_value.tv_nsec) |
887 | return 0; |
888 | |
889 | timr->it_interval = timespec64_to_ktime(ts: new_setting->it_interval); |
890 | expires = timespec64_to_ktime(ts: new_setting->it_value); |
891 | if (flags & TIMER_ABSTIME) |
892 | expires = timens_ktime_to_host(clockid: timr->it_clock, tim: expires); |
893 | sigev_none = timr->it_sigev_notify == SIGEV_NONE; |
894 | |
895 | kc->timer_arm(timr, expires, flags & TIMER_ABSTIME, sigev_none); |
896 | timr->it_active = !sigev_none; |
897 | return 0; |
898 | } |
899 | |
900 | static int do_timer_settime(timer_t timer_id, int tmr_flags, |
901 | struct itimerspec64 *new_spec64, |
902 | struct itimerspec64 *old_spec64) |
903 | { |
904 | const struct k_clock *kc; |
905 | struct k_itimer *timr; |
906 | unsigned long flags; |
907 | int error = 0; |
908 | |
909 | if (!timespec64_valid(ts: &new_spec64->it_interval) || |
910 | !timespec64_valid(ts: &new_spec64->it_value)) |
911 | return -EINVAL; |
912 | |
913 | if (old_spec64) |
914 | memset(old_spec64, 0, sizeof(*old_spec64)); |
915 | |
916 | timr = lock_timer(timer_id, &flags); |
917 | retry: |
918 | if (!timr) |
919 | return -EINVAL; |
920 | |
921 | kc = timr->kclock; |
922 | if (WARN_ON_ONCE(!kc || !kc->timer_set)) |
923 | error = -EINVAL; |
924 | else |
925 | error = kc->timer_set(timr, tmr_flags, new_spec64, old_spec64); |
926 | |
927 | if (error == TIMER_RETRY) { |
928 | // We already got the old time... |
929 | old_spec64 = NULL; |
930 | /* Unlocks and relocks the timer if it still exists */ |
931 | timr = timer_wait_running(timer: timr, flags: &flags); |
932 | goto retry; |
933 | } |
934 | unlock_timer(timr, flags); |
935 | |
936 | return error; |
937 | } |
938 | |
939 | /* Set a POSIX.1b interval timer */ |
940 | SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, |
941 | const struct __kernel_itimerspec __user *, new_setting, |
942 | struct __kernel_itimerspec __user *, old_setting) |
943 | { |
944 | struct itimerspec64 new_spec, old_spec, *rtn; |
945 | int error = 0; |
946 | |
947 | if (!new_setting) |
948 | return -EINVAL; |
949 | |
950 | if (get_itimerspec64(it: &new_spec, uit: new_setting)) |
951 | return -EFAULT; |
952 | |
953 | rtn = old_setting ? &old_spec : NULL; |
954 | error = do_timer_settime(timer_id, tmr_flags: flags, new_spec64: &new_spec, old_spec64: rtn); |
955 | if (!error && old_setting) { |
956 | if (put_itimerspec64(it: &old_spec, uit: old_setting)) |
957 | error = -EFAULT; |
958 | } |
959 | return error; |
960 | } |
961 | |
962 | #ifdef CONFIG_COMPAT_32BIT_TIME |
963 | SYSCALL_DEFINE4(timer_settime32, timer_t, timer_id, int, flags, |
964 | struct old_itimerspec32 __user *, new, |
965 | struct old_itimerspec32 __user *, old) |
966 | { |
967 | struct itimerspec64 new_spec, old_spec; |
968 | struct itimerspec64 *rtn = old ? &old_spec : NULL; |
969 | int error = 0; |
970 | |
971 | if (!new) |
972 | return -EINVAL; |
973 | if (get_old_itimerspec32(its: &new_spec, uits: new)) |
974 | return -EFAULT; |
975 | |
976 | error = do_timer_settime(timer_id, tmr_flags: flags, new_spec64: &new_spec, old_spec64: rtn); |
977 | if (!error && old) { |
978 | if (put_old_itimerspec32(its: &old_spec, uits: old)) |
979 | error = -EFAULT; |
980 | } |
981 | return error; |
982 | } |
983 | #endif |
984 | |
985 | int common_timer_del(struct k_itimer *timer) |
986 | { |
987 | const struct k_clock *kc = timer->kclock; |
988 | |
989 | timer->it_interval = 0; |
990 | if (kc->timer_try_to_cancel(timer) < 0) |
991 | return TIMER_RETRY; |
992 | timer->it_active = 0; |
993 | return 0; |
994 | } |
995 | |
996 | static inline int timer_delete_hook(struct k_itimer *timer) |
997 | { |
998 | const struct k_clock *kc = timer->kclock; |
999 | |
1000 | if (WARN_ON_ONCE(!kc || !kc->timer_del)) |
1001 | return -EINVAL; |
1002 | return kc->timer_del(timer); |
1003 | } |
1004 | |
1005 | /* Delete a POSIX.1b interval timer. */ |
1006 | SYSCALL_DEFINE1(timer_delete, timer_t, timer_id) |
1007 | { |
1008 | struct k_itimer *timer; |
1009 | unsigned long flags; |
1010 | |
1011 | timer = lock_timer(timer_id, &flags); |
1012 | |
1013 | retry_delete: |
1014 | if (!timer) |
1015 | return -EINVAL; |
1016 | |
1017 | if (unlikely(timer_delete_hook(timer) == TIMER_RETRY)) { |
1018 | /* Unlocks and relocks the timer if it still exists */ |
1019 | timer = timer_wait_running(timer, flags: &flags); |
1020 | goto retry_delete; |
1021 | } |
1022 | |
1023 | spin_lock(lock: ¤t->sighand->siglock); |
1024 | list_del(entry: &timer->list); |
1025 | spin_unlock(lock: ¤t->sighand->siglock); |
1026 | /* |
1027 | * A concurrent lookup could check timer::it_signal lockless. It |
1028 | * will reevaluate with timer::it_lock held and observe the NULL. |
1029 | */ |
1030 | WRITE_ONCE(timer->it_signal, NULL); |
1031 | |
1032 | unlock_timer(timr: timer, flags); |
1033 | posix_timer_unhash_and_free(tmr: timer); |
1034 | return 0; |
1035 | } |
1036 | |
1037 | /* |
1038 | * Delete a timer if it is armed, remove it from the hash and schedule it |
1039 | * for RCU freeing. |
1040 | */ |
1041 | static void itimer_delete(struct k_itimer *timer) |
1042 | { |
1043 | unsigned long flags; |
1044 | |
1045 | /* |
1046 | * irqsave is required to make timer_wait_running() work. |
1047 | */ |
1048 | spin_lock_irqsave(&timer->it_lock, flags); |
1049 | |
1050 | retry_delete: |
1051 | /* |
1052 | * Even if the timer is not longer accessible from other tasks |
1053 | * it still might be armed and queued in the underlying timer |
1054 | * mechanism. Worse, that timer mechanism might run the expiry |
1055 | * function concurrently. |
1056 | */ |
1057 | if (timer_delete_hook(timer) == TIMER_RETRY) { |
1058 | /* |
1059 | * Timer is expired concurrently, prevent livelocks |
1060 | * and pointless spinning on RT. |
1061 | * |
1062 | * timer_wait_running() drops timer::it_lock, which opens |
1063 | * the possibility for another task to delete the timer. |
1064 | * |
1065 | * That's not possible here because this is invoked from |
1066 | * do_exit() only for the last thread of the thread group. |
1067 | * So no other task can access and delete that timer. |
1068 | */ |
1069 | if (WARN_ON_ONCE(timer_wait_running(timer, &flags) != timer)) |
1070 | return; |
1071 | |
1072 | goto retry_delete; |
1073 | } |
1074 | list_del(entry: &timer->list); |
1075 | |
1076 | /* |
1077 | * Setting timer::it_signal to NULL is technically not required |
1078 | * here as nothing can access the timer anymore legitimately via |
1079 | * the hash table. Set it to NULL nevertheless so that all deletion |
1080 | * paths are consistent. |
1081 | */ |
1082 | WRITE_ONCE(timer->it_signal, NULL); |
1083 | |
1084 | spin_unlock_irqrestore(lock: &timer->it_lock, flags); |
1085 | posix_timer_unhash_and_free(tmr: timer); |
1086 | } |
1087 | |
1088 | /* |
1089 | * Invoked from do_exit() when the last thread of a thread group exits. |
1090 | * At that point no other task can access the timers of the dying |
1091 | * task anymore. |
1092 | */ |
1093 | void exit_itimers(struct task_struct *tsk) |
1094 | { |
1095 | struct list_head timers; |
1096 | struct k_itimer *tmr; |
1097 | |
1098 | if (list_empty(head: &tsk->signal->posix_timers)) |
1099 | return; |
1100 | |
1101 | /* Protect against concurrent read via /proc/$PID/timers */ |
1102 | spin_lock_irq(lock: &tsk->sighand->siglock); |
1103 | list_replace_init(old: &tsk->signal->posix_timers, new: &timers); |
1104 | spin_unlock_irq(lock: &tsk->sighand->siglock); |
1105 | |
1106 | /* The timers are not longer accessible via tsk::signal */ |
1107 | while (!list_empty(head: &timers)) { |
1108 | tmr = list_first_entry(&timers, struct k_itimer, list); |
1109 | itimer_delete(timer: tmr); |
1110 | } |
1111 | } |
1112 | |
1113 | SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, |
1114 | const struct __kernel_timespec __user *, tp) |
1115 | { |
1116 | const struct k_clock *kc = clockid_to_kclock(id: which_clock); |
1117 | struct timespec64 new_tp; |
1118 | |
1119 | if (!kc || !kc->clock_set) |
1120 | return -EINVAL; |
1121 | |
1122 | if (get_timespec64(ts: &new_tp, uts: tp)) |
1123 | return -EFAULT; |
1124 | |
1125 | /* |
1126 | * Permission checks have to be done inside the clock specific |
1127 | * setter callback. |
1128 | */ |
1129 | return kc->clock_set(which_clock, &new_tp); |
1130 | } |
1131 | |
1132 | SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, |
1133 | struct __kernel_timespec __user *, tp) |
1134 | { |
1135 | const struct k_clock *kc = clockid_to_kclock(id: which_clock); |
1136 | struct timespec64 kernel_tp; |
1137 | int error; |
1138 | |
1139 | if (!kc) |
1140 | return -EINVAL; |
1141 | |
1142 | error = kc->clock_get_timespec(which_clock, &kernel_tp); |
1143 | |
1144 | if (!error && put_timespec64(ts: &kernel_tp, uts: tp)) |
1145 | error = -EFAULT; |
1146 | |
1147 | return error; |
1148 | } |
1149 | |
1150 | int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex * ktx) |
1151 | { |
1152 | const struct k_clock *kc = clockid_to_kclock(id: which_clock); |
1153 | |
1154 | if (!kc) |
1155 | return -EINVAL; |
1156 | if (!kc->clock_adj) |
1157 | return -EOPNOTSUPP; |
1158 | |
1159 | return kc->clock_adj(which_clock, ktx); |
1160 | } |
1161 | |
1162 | SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock, |
1163 | struct __kernel_timex __user *, utx) |
1164 | { |
1165 | struct __kernel_timex ktx; |
1166 | int err; |
1167 | |
1168 | if (copy_from_user(to: &ktx, from: utx, n: sizeof(ktx))) |
1169 | return -EFAULT; |
1170 | |
1171 | err = do_clock_adjtime(which_clock, ktx: &ktx); |
1172 | |
1173 | if (err >= 0 && copy_to_user(to: utx, from: &ktx, n: sizeof(ktx))) |
1174 | return -EFAULT; |
1175 | |
1176 | return err; |
1177 | } |
1178 | |
1179 | /** |
1180 | * sys_clock_getres - Get the resolution of a clock |
1181 | * @which_clock: The clock to get the resolution for |
1182 | * @tp: Pointer to a a user space timespec64 for storage |
1183 | * |
1184 | * POSIX defines: |
1185 | * |
1186 | * "The clock_getres() function shall return the resolution of any |
1187 | * clock. Clock resolutions are implementation-defined and cannot be set by |
1188 | * a process. If the argument res is not NULL, the resolution of the |
1189 | * specified clock shall be stored in the location pointed to by res. If |
1190 | * res is NULL, the clock resolution is not returned. If the time argument |
1191 | * of clock_settime() is not a multiple of res, then the value is truncated |
1192 | * to a multiple of res." |
1193 | * |
1194 | * Due to the various hardware constraints the real resolution can vary |
1195 | * wildly and even change during runtime when the underlying devices are |
1196 | * replaced. The kernel also can use hardware devices with different |
1197 | * resolutions for reading the time and for arming timers. |
1198 | * |
1199 | * The kernel therefore deviates from the POSIX spec in various aspects: |
1200 | * |
1201 | * 1) The resolution returned to user space |
1202 | * |
1203 | * For CLOCK_REALTIME, CLOCK_MONOTONIC, CLOCK_BOOTTIME, CLOCK_TAI, |
1204 | * CLOCK_REALTIME_ALARM, CLOCK_BOOTTIME_ALAREM and CLOCK_MONOTONIC_RAW |
1205 | * the kernel differentiates only two cases: |
1206 | * |
1207 | * I) Low resolution mode: |
1208 | * |
1209 | * When high resolution timers are disabled at compile or runtime |
1210 | * the resolution returned is nanoseconds per tick, which represents |
1211 | * the precision at which timers expire. |
1212 | * |
1213 | * II) High resolution mode: |
1214 | * |
1215 | * When high resolution timers are enabled the resolution returned |
1216 | * is always one nanosecond independent of the actual resolution of |
1217 | * the underlying hardware devices. |
1218 | * |
1219 | * For CLOCK_*_ALARM the actual resolution depends on system |
1220 | * state. When system is running the resolution is the same as the |
1221 | * resolution of the other clocks. During suspend the actual |
1222 | * resolution is the resolution of the underlying RTC device which |
1223 | * might be way less precise than the clockevent device used during |
1224 | * running state. |
1225 | * |
1226 | * For CLOCK_REALTIME_COARSE and CLOCK_MONOTONIC_COARSE the resolution |
1227 | * returned is always nanoseconds per tick. |
1228 | * |
1229 | * For CLOCK_PROCESS_CPUTIME and CLOCK_THREAD_CPUTIME the resolution |
1230 | * returned is always one nanosecond under the assumption that the |
1231 | * underlying scheduler clock has a better resolution than nanoseconds |
1232 | * per tick. |
1233 | * |
1234 | * For dynamic POSIX clocks (PTP devices) the resolution returned is |
1235 | * always one nanosecond. |
1236 | * |
1237 | * 2) Affect on sys_clock_settime() |
1238 | * |
1239 | * The kernel does not truncate the time which is handed in to |
1240 | * sys_clock_settime(). The kernel internal timekeeping is always using |
1241 | * nanoseconds precision independent of the clocksource device which is |
1242 | * used to read the time from. The resolution of that device only |
1243 | * affects the presicion of the time returned by sys_clock_gettime(). |
1244 | * |
1245 | * Returns: |
1246 | * 0 Success. @tp contains the resolution |
1247 | * -EINVAL @which_clock is not a valid clock ID |
1248 | * -EFAULT Copying the resolution to @tp faulted |
1249 | * -ENODEV Dynamic POSIX clock is not backed by a device |
1250 | * -EOPNOTSUPP Dynamic POSIX clock does not support getres() |
1251 | */ |
1252 | SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, |
1253 | struct __kernel_timespec __user *, tp) |
1254 | { |
1255 | const struct k_clock *kc = clockid_to_kclock(id: which_clock); |
1256 | struct timespec64 rtn_tp; |
1257 | int error; |
1258 | |
1259 | if (!kc) |
1260 | return -EINVAL; |
1261 | |
1262 | error = kc->clock_getres(which_clock, &rtn_tp); |
1263 | |
1264 | if (!error && tp && put_timespec64(ts: &rtn_tp, uts: tp)) |
1265 | error = -EFAULT; |
1266 | |
1267 | return error; |
1268 | } |
1269 | |
1270 | #ifdef CONFIG_COMPAT_32BIT_TIME |
1271 | |
1272 | SYSCALL_DEFINE2(clock_settime32, clockid_t, which_clock, |
1273 | struct old_timespec32 __user *, tp) |
1274 | { |
1275 | const struct k_clock *kc = clockid_to_kclock(id: which_clock); |
1276 | struct timespec64 ts; |
1277 | |
1278 | if (!kc || !kc->clock_set) |
1279 | return -EINVAL; |
1280 | |
1281 | if (get_old_timespec32(&ts, tp)) |
1282 | return -EFAULT; |
1283 | |
1284 | return kc->clock_set(which_clock, &ts); |
1285 | } |
1286 | |
1287 | SYSCALL_DEFINE2(clock_gettime32, clockid_t, which_clock, |
1288 | struct old_timespec32 __user *, tp) |
1289 | { |
1290 | const struct k_clock *kc = clockid_to_kclock(id: which_clock); |
1291 | struct timespec64 ts; |
1292 | int err; |
1293 | |
1294 | if (!kc) |
1295 | return -EINVAL; |
1296 | |
1297 | err = kc->clock_get_timespec(which_clock, &ts); |
1298 | |
1299 | if (!err && put_old_timespec32(&ts, tp)) |
1300 | err = -EFAULT; |
1301 | |
1302 | return err; |
1303 | } |
1304 | |
1305 | SYSCALL_DEFINE2(clock_adjtime32, clockid_t, which_clock, |
1306 | struct old_timex32 __user *, utp) |
1307 | { |
1308 | struct __kernel_timex ktx; |
1309 | int err; |
1310 | |
1311 | err = get_old_timex32(&ktx, utp); |
1312 | if (err) |
1313 | return err; |
1314 | |
1315 | err = do_clock_adjtime(which_clock, ktx: &ktx); |
1316 | |
1317 | if (err >= 0 && put_old_timex32(utp, &ktx)) |
1318 | return -EFAULT; |
1319 | |
1320 | return err; |
1321 | } |
1322 | |
1323 | SYSCALL_DEFINE2(clock_getres_time32, clockid_t, which_clock, |
1324 | struct old_timespec32 __user *, tp) |
1325 | { |
1326 | const struct k_clock *kc = clockid_to_kclock(id: which_clock); |
1327 | struct timespec64 ts; |
1328 | int err; |
1329 | |
1330 | if (!kc) |
1331 | return -EINVAL; |
1332 | |
1333 | err = kc->clock_getres(which_clock, &ts); |
1334 | if (!err && tp && put_old_timespec32(&ts, tp)) |
1335 | return -EFAULT; |
1336 | |
1337 | return err; |
1338 | } |
1339 | |
1340 | #endif |
1341 | |
1342 | /* |
1343 | * sys_clock_nanosleep() for CLOCK_REALTIME and CLOCK_TAI |
1344 | */ |
1345 | static int common_nsleep(const clockid_t which_clock, int flags, |
1346 | const struct timespec64 *rqtp) |
1347 | { |
1348 | ktime_t texp = timespec64_to_ktime(ts: *rqtp); |
1349 | |
1350 | return hrtimer_nanosleep(rqtp: texp, mode: flags & TIMER_ABSTIME ? |
1351 | HRTIMER_MODE_ABS : HRTIMER_MODE_REL, |
1352 | clockid: which_clock); |
1353 | } |
1354 | |
1355 | /* |
1356 | * sys_clock_nanosleep() for CLOCK_MONOTONIC and CLOCK_BOOTTIME |
1357 | * |
1358 | * Absolute nanosleeps for these clocks are time-namespace adjusted. |
1359 | */ |
1360 | static int common_nsleep_timens(const clockid_t which_clock, int flags, |
1361 | const struct timespec64 *rqtp) |
1362 | { |
1363 | ktime_t texp = timespec64_to_ktime(ts: *rqtp); |
1364 | |
1365 | if (flags & TIMER_ABSTIME) |
1366 | texp = timens_ktime_to_host(clockid: which_clock, tim: texp); |
1367 | |
1368 | return hrtimer_nanosleep(rqtp: texp, mode: flags & TIMER_ABSTIME ? |
1369 | HRTIMER_MODE_ABS : HRTIMER_MODE_REL, |
1370 | clockid: which_clock); |
1371 | } |
1372 | |
1373 | SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, |
1374 | const struct __kernel_timespec __user *, rqtp, |
1375 | struct __kernel_timespec __user *, rmtp) |
1376 | { |
1377 | const struct k_clock *kc = clockid_to_kclock(id: which_clock); |
1378 | struct timespec64 t; |
1379 | |
1380 | if (!kc) |
1381 | return -EINVAL; |
1382 | if (!kc->nsleep) |
1383 | return -EOPNOTSUPP; |
1384 | |
1385 | if (get_timespec64(ts: &t, uts: rqtp)) |
1386 | return -EFAULT; |
1387 | |
1388 | if (!timespec64_valid(ts: &t)) |
1389 | return -EINVAL; |
1390 | if (flags & TIMER_ABSTIME) |
1391 | rmtp = NULL; |
1392 | current->restart_block.fn = do_no_restart_syscall; |
1393 | current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE; |
1394 | current->restart_block.nanosleep.rmtp = rmtp; |
1395 | |
1396 | return kc->nsleep(which_clock, flags, &t); |
1397 | } |
1398 | |
1399 | #ifdef CONFIG_COMPAT_32BIT_TIME |
1400 | |
1401 | SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags, |
1402 | struct old_timespec32 __user *, rqtp, |
1403 | struct old_timespec32 __user *, rmtp) |
1404 | { |
1405 | const struct k_clock *kc = clockid_to_kclock(id: which_clock); |
1406 | struct timespec64 t; |
1407 | |
1408 | if (!kc) |
1409 | return -EINVAL; |
1410 | if (!kc->nsleep) |
1411 | return -EOPNOTSUPP; |
1412 | |
1413 | if (get_old_timespec32(&t, rqtp)) |
1414 | return -EFAULT; |
1415 | |
1416 | if (!timespec64_valid(ts: &t)) |
1417 | return -EINVAL; |
1418 | if (flags & TIMER_ABSTIME) |
1419 | rmtp = NULL; |
1420 | current->restart_block.fn = do_no_restart_syscall; |
1421 | current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE; |
1422 | current->restart_block.nanosleep.compat_rmtp = rmtp; |
1423 | |
1424 | return kc->nsleep(which_clock, flags, &t); |
1425 | } |
1426 | |
1427 | #endif |
1428 | |
1429 | static const struct k_clock clock_realtime = { |
1430 | .clock_getres = posix_get_hrtimer_res, |
1431 | .clock_get_timespec = posix_get_realtime_timespec, |
1432 | .clock_get_ktime = posix_get_realtime_ktime, |
1433 | .clock_set = posix_clock_realtime_set, |
1434 | .clock_adj = posix_clock_realtime_adj, |
1435 | .nsleep = common_nsleep, |
1436 | .timer_create = common_timer_create, |
1437 | .timer_set = common_timer_set, |
1438 | .timer_get = common_timer_get, |
1439 | .timer_del = common_timer_del, |
1440 | .timer_rearm = common_hrtimer_rearm, |
1441 | .timer_forward = common_hrtimer_forward, |
1442 | .timer_remaining = common_hrtimer_remaining, |
1443 | .timer_try_to_cancel = common_hrtimer_try_to_cancel, |
1444 | .timer_wait_running = common_timer_wait_running, |
1445 | .timer_arm = common_hrtimer_arm, |
1446 | }; |
1447 | |
1448 | static const struct k_clock clock_monotonic = { |
1449 | .clock_getres = posix_get_hrtimer_res, |
1450 | .clock_get_timespec = posix_get_monotonic_timespec, |
1451 | .clock_get_ktime = posix_get_monotonic_ktime, |
1452 | .nsleep = common_nsleep_timens, |
1453 | .timer_create = common_timer_create, |
1454 | .timer_set = common_timer_set, |
1455 | .timer_get = common_timer_get, |
1456 | .timer_del = common_timer_del, |
1457 | .timer_rearm = common_hrtimer_rearm, |
1458 | .timer_forward = common_hrtimer_forward, |
1459 | .timer_remaining = common_hrtimer_remaining, |
1460 | .timer_try_to_cancel = common_hrtimer_try_to_cancel, |
1461 | .timer_wait_running = common_timer_wait_running, |
1462 | .timer_arm = common_hrtimer_arm, |
1463 | }; |
1464 | |
1465 | static const struct k_clock clock_monotonic_raw = { |
1466 | .clock_getres = posix_get_hrtimer_res, |
1467 | .clock_get_timespec = posix_get_monotonic_raw, |
1468 | }; |
1469 | |
1470 | static const struct k_clock clock_realtime_coarse = { |
1471 | .clock_getres = posix_get_coarse_res, |
1472 | .clock_get_timespec = posix_get_realtime_coarse, |
1473 | }; |
1474 | |
1475 | static const struct k_clock clock_monotonic_coarse = { |
1476 | .clock_getres = posix_get_coarse_res, |
1477 | .clock_get_timespec = posix_get_monotonic_coarse, |
1478 | }; |
1479 | |
1480 | static const struct k_clock clock_tai = { |
1481 | .clock_getres = posix_get_hrtimer_res, |
1482 | .clock_get_ktime = posix_get_tai_ktime, |
1483 | .clock_get_timespec = posix_get_tai_timespec, |
1484 | .nsleep = common_nsleep, |
1485 | .timer_create = common_timer_create, |
1486 | .timer_set = common_timer_set, |
1487 | .timer_get = common_timer_get, |
1488 | .timer_del = common_timer_del, |
1489 | .timer_rearm = common_hrtimer_rearm, |
1490 | .timer_forward = common_hrtimer_forward, |
1491 | .timer_remaining = common_hrtimer_remaining, |
1492 | .timer_try_to_cancel = common_hrtimer_try_to_cancel, |
1493 | .timer_wait_running = common_timer_wait_running, |
1494 | .timer_arm = common_hrtimer_arm, |
1495 | }; |
1496 | |
1497 | static const struct k_clock clock_boottime = { |
1498 | .clock_getres = posix_get_hrtimer_res, |
1499 | .clock_get_ktime = posix_get_boottime_ktime, |
1500 | .clock_get_timespec = posix_get_boottime_timespec, |
1501 | .nsleep = common_nsleep_timens, |
1502 | .timer_create = common_timer_create, |
1503 | .timer_set = common_timer_set, |
1504 | .timer_get = common_timer_get, |
1505 | .timer_del = common_timer_del, |
1506 | .timer_rearm = common_hrtimer_rearm, |
1507 | .timer_forward = common_hrtimer_forward, |
1508 | .timer_remaining = common_hrtimer_remaining, |
1509 | .timer_try_to_cancel = common_hrtimer_try_to_cancel, |
1510 | .timer_wait_running = common_timer_wait_running, |
1511 | .timer_arm = common_hrtimer_arm, |
1512 | }; |
1513 | |
1514 | static const struct k_clock * const posix_clocks[] = { |
1515 | [CLOCK_REALTIME] = &clock_realtime, |
1516 | [CLOCK_MONOTONIC] = &clock_monotonic, |
1517 | [CLOCK_PROCESS_CPUTIME_ID] = &clock_process, |
1518 | [CLOCK_THREAD_CPUTIME_ID] = &clock_thread, |
1519 | [CLOCK_MONOTONIC_RAW] = &clock_monotonic_raw, |
1520 | [CLOCK_REALTIME_COARSE] = &clock_realtime_coarse, |
1521 | [CLOCK_MONOTONIC_COARSE] = &clock_monotonic_coarse, |
1522 | [CLOCK_BOOTTIME] = &clock_boottime, |
1523 | [CLOCK_REALTIME_ALARM] = &alarm_clock, |
1524 | [CLOCK_BOOTTIME_ALARM] = &alarm_clock, |
1525 | [CLOCK_TAI] = &clock_tai, |
1526 | }; |
1527 | |
1528 | static const struct k_clock *clockid_to_kclock(const clockid_t id) |
1529 | { |
1530 | clockid_t idx = id; |
1531 | |
1532 | if (id < 0) { |
1533 | return (id & CLOCKFD_MASK) == CLOCKFD ? |
1534 | &clock_posix_dynamic : &clock_posix_cpu; |
1535 | } |
1536 | |
1537 | if (id >= ARRAY_SIZE(posix_clocks)) |
1538 | return NULL; |
1539 | |
1540 | return posix_clocks[array_index_nospec(idx, ARRAY_SIZE(posix_clocks))]; |
1541 | } |
1542 | |