1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | |
3 | #include <linux/plist.h> |
4 | #include <linux/sched/task.h> |
5 | #include <linux/sched/signal.h> |
6 | #include <linux/freezer.h> |
7 | |
8 | #include "futex.h" |
9 | |
10 | /* |
11 | * READ this before attempting to hack on futexes! |
12 | * |
13 | * Basic futex operation and ordering guarantees |
14 | * ============================================= |
15 | * |
16 | * The waiter reads the futex value in user space and calls |
17 | * futex_wait(). This function computes the hash bucket and acquires |
18 | * the hash bucket lock. After that it reads the futex user space value |
19 | * again and verifies that the data has not changed. If it has not changed |
20 | * it enqueues itself into the hash bucket, releases the hash bucket lock |
21 | * and schedules. |
22 | * |
23 | * The waker side modifies the user space value of the futex and calls |
24 | * futex_wake(). This function computes the hash bucket and acquires the |
25 | * hash bucket lock. Then it looks for waiters on that futex in the hash |
26 | * bucket and wakes them. |
27 | * |
28 | * In futex wake up scenarios where no tasks are blocked on a futex, taking |
29 | * the hb spinlock can be avoided and simply return. In order for this |
30 | * optimization to work, ordering guarantees must exist so that the waiter |
31 | * being added to the list is acknowledged when the list is concurrently being |
32 | * checked by the waker, avoiding scenarios like the following: |
33 | * |
34 | * CPU 0 CPU 1 |
35 | * val = *futex; |
36 | * sys_futex(WAIT, futex, val); |
37 | * futex_wait(futex, val); |
38 | * uval = *futex; |
39 | * *futex = newval; |
40 | * sys_futex(WAKE, futex); |
41 | * futex_wake(futex); |
42 | * if (queue_empty()) |
43 | * return; |
44 | * if (uval == val) |
45 | * lock(hash_bucket(futex)); |
46 | * queue(); |
47 | * unlock(hash_bucket(futex)); |
48 | * schedule(); |
49 | * |
50 | * This would cause the waiter on CPU 0 to wait forever because it |
51 | * missed the transition of the user space value from val to newval |
52 | * and the waker did not find the waiter in the hash bucket queue. |
53 | * |
54 | * The correct serialization ensures that a waiter either observes |
55 | * the changed user space value before blocking or is woken by a |
56 | * concurrent waker: |
57 | * |
58 | * CPU 0 CPU 1 |
59 | * val = *futex; |
60 | * sys_futex(WAIT, futex, val); |
61 | * futex_wait(futex, val); |
62 | * |
63 | * waiters++; (a) |
64 | * smp_mb(); (A) <-- paired with -. |
65 | * | |
66 | * lock(hash_bucket(futex)); | |
67 | * | |
68 | * uval = *futex; | |
69 | * | *futex = newval; |
70 | * | sys_futex(WAKE, futex); |
71 | * | futex_wake(futex); |
72 | * | |
73 | * `--------> smp_mb(); (B) |
74 | * if (uval == val) |
75 | * queue(); |
76 | * unlock(hash_bucket(futex)); |
77 | * schedule(); if (waiters) |
78 | * lock(hash_bucket(futex)); |
79 | * else wake_waiters(futex); |
80 | * waiters--; (b) unlock(hash_bucket(futex)); |
81 | * |
82 | * Where (A) orders the waiters increment and the futex value read through |
83 | * atomic operations (see futex_hb_waiters_inc) and where (B) orders the write |
84 | * to futex and the waiters read (see futex_hb_waiters_pending()). |
85 | * |
86 | * This yields the following case (where X:=waiters, Y:=futex): |
87 | * |
88 | * X = Y = 0 |
89 | * |
90 | * w[X]=1 w[Y]=1 |
91 | * MB MB |
92 | * r[Y]=y r[X]=x |
93 | * |
94 | * Which guarantees that x==0 && y==0 is impossible; which translates back into |
95 | * the guarantee that we cannot both miss the futex variable change and the |
96 | * enqueue. |
97 | * |
98 | * Note that a new waiter is accounted for in (a) even when it is possible that |
99 | * the wait call can return error, in which case we backtrack from it in (b). |
100 | * Refer to the comment in futex_q_lock(). |
101 | * |
102 | * Similarly, in order to account for waiters being requeued on another |
103 | * address we always increment the waiters for the destination bucket before |
104 | * acquiring the lock. It then decrements them again after releasing it - |
105 | * the code that actually moves the futex(es) between hash buckets (requeue_futex) |
106 | * will do the additional required waiter count housekeeping. This is done for |
107 | * double_lock_hb() and double_unlock_hb(), respectively. |
108 | */ |
109 | |
110 | bool __futex_wake_mark(struct futex_q *q) |
111 | { |
112 | if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n" )) |
113 | return false; |
114 | |
115 | __futex_unqueue(q); |
116 | /* |
117 | * The waiting task can free the futex_q as soon as q->lock_ptr = NULL |
118 | * is written, without taking any locks. This is possible in the event |
119 | * of a spurious wakeup, for example. A memory barrier is required here |
120 | * to prevent the following store to lock_ptr from getting ahead of the |
121 | * plist_del in __futex_unqueue(). |
122 | */ |
123 | smp_store_release(&q->lock_ptr, NULL); |
124 | |
125 | return true; |
126 | } |
127 | |
128 | /* |
129 | * The hash bucket lock must be held when this is called. |
130 | * Afterwards, the futex_q must not be accessed. Callers |
131 | * must ensure to later call wake_up_q() for the actual |
132 | * wakeups to occur. |
133 | */ |
134 | void futex_wake_mark(struct wake_q_head *wake_q, struct futex_q *q) |
135 | { |
136 | struct task_struct *p = q->task; |
137 | |
138 | get_task_struct(t: p); |
139 | |
140 | if (!__futex_wake_mark(q)) { |
141 | put_task_struct(t: p); |
142 | return; |
143 | } |
144 | |
145 | /* |
146 | * Queue the task for later wakeup for after we've released |
147 | * the hb->lock. |
148 | */ |
149 | wake_q_add_safe(head: wake_q, task: p); |
150 | } |
151 | |
152 | /* |
153 | * Wake up waiters matching bitset queued on this futex (uaddr). |
154 | */ |
155 | int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset) |
156 | { |
157 | struct futex_hash_bucket *hb; |
158 | struct futex_q *this, *next; |
159 | union futex_key key = FUTEX_KEY_INIT; |
160 | DEFINE_WAKE_Q(wake_q); |
161 | int ret; |
162 | |
163 | if (!bitset) |
164 | return -EINVAL; |
165 | |
166 | ret = get_futex_key(uaddr, flags, key: &key, rw: FUTEX_READ); |
167 | if (unlikely(ret != 0)) |
168 | return ret; |
169 | |
170 | if ((flags & FLAGS_STRICT) && !nr_wake) |
171 | return 0; |
172 | |
173 | hb = futex_hash(key: &key); |
174 | |
175 | /* Make sure we really have tasks to wakeup */ |
176 | if (!futex_hb_waiters_pending(hb)) |
177 | return ret; |
178 | |
179 | spin_lock(lock: &hb->lock); |
180 | |
181 | plist_for_each_entry_safe(this, next, &hb->chain, list) { |
182 | if (futex_match (key1: &this->key, key2: &key)) { |
183 | if (this->pi_state || this->rt_waiter) { |
184 | ret = -EINVAL; |
185 | break; |
186 | } |
187 | |
188 | /* Check if one of the bits is set in both bitsets */ |
189 | if (!(this->bitset & bitset)) |
190 | continue; |
191 | |
192 | this->wake(&wake_q, this); |
193 | if (++ret >= nr_wake) |
194 | break; |
195 | } |
196 | } |
197 | |
198 | spin_unlock(lock: &hb->lock); |
199 | wake_up_q(head: &wake_q); |
200 | return ret; |
201 | } |
202 | |
203 | static int futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr) |
204 | { |
205 | unsigned int op = (encoded_op & 0x70000000) >> 28; |
206 | unsigned int cmp = (encoded_op & 0x0f000000) >> 24; |
207 | int oparg = sign_extend32(value: (encoded_op & 0x00fff000) >> 12, index: 11); |
208 | int cmparg = sign_extend32(value: encoded_op & 0x00000fff, index: 11); |
209 | int oldval, ret; |
210 | |
211 | if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) { |
212 | if (oparg < 0 || oparg > 31) { |
213 | char comm[sizeof(current->comm)]; |
214 | /* |
215 | * kill this print and return -EINVAL when userspace |
216 | * is sane again |
217 | */ |
218 | pr_info_ratelimited("futex_wake_op: %s tries to shift op by %d; fix this program\n" , |
219 | get_task_comm(comm, current), oparg); |
220 | oparg &= 31; |
221 | } |
222 | oparg = 1 << oparg; |
223 | } |
224 | |
225 | pagefault_disable(); |
226 | ret = arch_futex_atomic_op_inuser(op, oparg, oval: &oldval, uaddr); |
227 | pagefault_enable(); |
228 | if (ret) |
229 | return ret; |
230 | |
231 | switch (cmp) { |
232 | case FUTEX_OP_CMP_EQ: |
233 | return oldval == cmparg; |
234 | case FUTEX_OP_CMP_NE: |
235 | return oldval != cmparg; |
236 | case FUTEX_OP_CMP_LT: |
237 | return oldval < cmparg; |
238 | case FUTEX_OP_CMP_GE: |
239 | return oldval >= cmparg; |
240 | case FUTEX_OP_CMP_LE: |
241 | return oldval <= cmparg; |
242 | case FUTEX_OP_CMP_GT: |
243 | return oldval > cmparg; |
244 | default: |
245 | return -ENOSYS; |
246 | } |
247 | } |
248 | |
249 | /* |
250 | * Wake up all waiters hashed on the physical page that is mapped |
251 | * to this virtual address: |
252 | */ |
253 | int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, |
254 | int nr_wake, int nr_wake2, int op) |
255 | { |
256 | union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT; |
257 | struct futex_hash_bucket *hb1, *hb2; |
258 | struct futex_q *this, *next; |
259 | int ret, op_ret; |
260 | DEFINE_WAKE_Q(wake_q); |
261 | |
262 | retry: |
263 | ret = get_futex_key(uaddr: uaddr1, flags, key: &key1, rw: FUTEX_READ); |
264 | if (unlikely(ret != 0)) |
265 | return ret; |
266 | ret = get_futex_key(uaddr: uaddr2, flags, key: &key2, rw: FUTEX_WRITE); |
267 | if (unlikely(ret != 0)) |
268 | return ret; |
269 | |
270 | hb1 = futex_hash(key: &key1); |
271 | hb2 = futex_hash(key: &key2); |
272 | |
273 | retry_private: |
274 | double_lock_hb(hb1, hb2); |
275 | op_ret = futex_atomic_op_inuser(encoded_op: op, uaddr: uaddr2); |
276 | if (unlikely(op_ret < 0)) { |
277 | double_unlock_hb(hb1, hb2); |
278 | |
279 | if (!IS_ENABLED(CONFIG_MMU) || |
280 | unlikely(op_ret != -EFAULT && op_ret != -EAGAIN)) { |
281 | /* |
282 | * we don't get EFAULT from MMU faults if we don't have |
283 | * an MMU, but we might get them from range checking |
284 | */ |
285 | ret = op_ret; |
286 | return ret; |
287 | } |
288 | |
289 | if (op_ret == -EFAULT) { |
290 | ret = fault_in_user_writeable(uaddr: uaddr2); |
291 | if (ret) |
292 | return ret; |
293 | } |
294 | |
295 | cond_resched(); |
296 | if (!(flags & FLAGS_SHARED)) |
297 | goto retry_private; |
298 | goto retry; |
299 | } |
300 | |
301 | plist_for_each_entry_safe(this, next, &hb1->chain, list) { |
302 | if (futex_match (key1: &this->key, key2: &key1)) { |
303 | if (this->pi_state || this->rt_waiter) { |
304 | ret = -EINVAL; |
305 | goto out_unlock; |
306 | } |
307 | this->wake(&wake_q, this); |
308 | if (++ret >= nr_wake) |
309 | break; |
310 | } |
311 | } |
312 | |
313 | if (op_ret > 0) { |
314 | op_ret = 0; |
315 | plist_for_each_entry_safe(this, next, &hb2->chain, list) { |
316 | if (futex_match (key1: &this->key, key2: &key2)) { |
317 | if (this->pi_state || this->rt_waiter) { |
318 | ret = -EINVAL; |
319 | goto out_unlock; |
320 | } |
321 | this->wake(&wake_q, this); |
322 | if (++op_ret >= nr_wake2) |
323 | break; |
324 | } |
325 | } |
326 | ret += op_ret; |
327 | } |
328 | |
329 | out_unlock: |
330 | double_unlock_hb(hb1, hb2); |
331 | wake_up_q(head: &wake_q); |
332 | return ret; |
333 | } |
334 | |
335 | static long futex_wait_restart(struct restart_block *restart); |
336 | |
337 | /** |
338 | * futex_wait_queue() - futex_queue() and wait for wakeup, timeout, or signal |
339 | * @hb: the futex hash bucket, must be locked by the caller |
340 | * @q: the futex_q to queue up on |
341 | * @timeout: the prepared hrtimer_sleeper, or null for no timeout |
342 | */ |
343 | void futex_wait_queue(struct futex_hash_bucket *hb, struct futex_q *q, |
344 | struct hrtimer_sleeper *timeout) |
345 | { |
346 | /* |
347 | * The task state is guaranteed to be set before another task can |
348 | * wake it. set_current_state() is implemented using smp_store_mb() and |
349 | * futex_queue() calls spin_unlock() upon completion, both serializing |
350 | * access to the hash list and forcing another memory barrier. |
351 | */ |
352 | set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); |
353 | futex_queue(q, hb); |
354 | |
355 | /* Arm the timer */ |
356 | if (timeout) |
357 | hrtimer_sleeper_start_expires(sl: timeout, mode: HRTIMER_MODE_ABS); |
358 | |
359 | /* |
360 | * If we have been removed from the hash list, then another task |
361 | * has tried to wake us, and we can skip the call to schedule(). |
362 | */ |
363 | if (likely(!plist_node_empty(&q->list))) { |
364 | /* |
365 | * If the timer has already expired, current will already be |
366 | * flagged for rescheduling. Only call schedule if there |
367 | * is no timeout, or if it has yet to expire. |
368 | */ |
369 | if (!timeout || timeout->task) |
370 | schedule(); |
371 | } |
372 | __set_current_state(TASK_RUNNING); |
373 | } |
374 | |
375 | /** |
376 | * futex_unqueue_multiple - Remove various futexes from their hash bucket |
377 | * @v: The list of futexes to unqueue |
378 | * @count: Number of futexes in the list |
379 | * |
380 | * Helper to unqueue a list of futexes. This can't fail. |
381 | * |
382 | * Return: |
383 | * - >=0 - Index of the last futex that was awoken; |
384 | * - -1 - No futex was awoken |
385 | */ |
386 | int futex_unqueue_multiple(struct futex_vector *v, int count) |
387 | { |
388 | int ret = -1, i; |
389 | |
390 | for (i = 0; i < count; i++) { |
391 | if (!futex_unqueue(q: &v[i].q)) |
392 | ret = i; |
393 | } |
394 | |
395 | return ret; |
396 | } |
397 | |
398 | /** |
399 | * futex_wait_multiple_setup - Prepare to wait and enqueue multiple futexes |
400 | * @vs: The futex list to wait on |
401 | * @count: The size of the list |
402 | * @woken: Index of the last woken futex, if any. Used to notify the |
403 | * caller that it can return this index to userspace (return parameter) |
404 | * |
405 | * Prepare multiple futexes in a single step and enqueue them. This may fail if |
406 | * the futex list is invalid or if any futex was already awoken. On success the |
407 | * task is ready to interruptible sleep. |
408 | * |
409 | * Return: |
410 | * - 1 - One of the futexes was woken by another thread |
411 | * - 0 - Success |
412 | * - <0 - -EFAULT, -EWOULDBLOCK or -EINVAL |
413 | */ |
414 | int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *woken) |
415 | { |
416 | struct futex_hash_bucket *hb; |
417 | bool retry = false; |
418 | int ret, i; |
419 | u32 uval; |
420 | |
421 | /* |
422 | * Enqueuing multiple futexes is tricky, because we need to enqueue |
423 | * each futex on the list before dealing with the next one to avoid |
424 | * deadlocking on the hash bucket. But, before enqueuing, we need to |
425 | * make sure that current->state is TASK_INTERRUPTIBLE, so we don't |
426 | * lose any wake events, which cannot be done before the get_futex_key |
427 | * of the next key, because it calls get_user_pages, which can sleep. |
428 | * Thus, we fetch the list of futexes keys in two steps, by first |
429 | * pinning all the memory keys in the futex key, and only then we read |
430 | * each key and queue the corresponding futex. |
431 | * |
432 | * Private futexes doesn't need to recalculate hash in retry, so skip |
433 | * get_futex_key() when retrying. |
434 | */ |
435 | retry: |
436 | for (i = 0; i < count; i++) { |
437 | if (!(vs[i].w.flags & FLAGS_SHARED) && retry) |
438 | continue; |
439 | |
440 | ret = get_futex_key(u64_to_user_ptr(vs[i].w.uaddr), |
441 | flags: vs[i].w.flags, |
442 | key: &vs[i].q.key, rw: FUTEX_READ); |
443 | |
444 | if (unlikely(ret)) |
445 | return ret; |
446 | } |
447 | |
448 | set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); |
449 | |
450 | for (i = 0; i < count; i++) { |
451 | u32 __user *uaddr = (u32 __user *)(unsigned long)vs[i].w.uaddr; |
452 | struct futex_q *q = &vs[i].q; |
453 | u32 val = vs[i].w.val; |
454 | |
455 | hb = futex_q_lock(q); |
456 | ret = futex_get_value_locked(dest: &uval, from: uaddr); |
457 | |
458 | if (!ret && uval == val) { |
459 | /* |
460 | * The bucket lock can't be held while dealing with the |
461 | * next futex. Queue each futex at this moment so hb can |
462 | * be unlocked. |
463 | */ |
464 | futex_queue(q, hb); |
465 | continue; |
466 | } |
467 | |
468 | futex_q_unlock(hb); |
469 | __set_current_state(TASK_RUNNING); |
470 | |
471 | /* |
472 | * Even if something went wrong, if we find out that a futex |
473 | * was woken, we don't return error and return this index to |
474 | * userspace |
475 | */ |
476 | *woken = futex_unqueue_multiple(v: vs, count: i); |
477 | if (*woken >= 0) |
478 | return 1; |
479 | |
480 | if (ret) { |
481 | /* |
482 | * If we need to handle a page fault, we need to do so |
483 | * without any lock and any enqueued futex (otherwise |
484 | * we could lose some wakeup). So we do it here, after |
485 | * undoing all the work done so far. In success, we |
486 | * retry all the work. |
487 | */ |
488 | if (get_user(uval, uaddr)) |
489 | return -EFAULT; |
490 | |
491 | retry = true; |
492 | goto retry; |
493 | } |
494 | |
495 | if (uval != val) |
496 | return -EWOULDBLOCK; |
497 | } |
498 | |
499 | return 0; |
500 | } |
501 | |
502 | /** |
503 | * futex_sleep_multiple - Check sleeping conditions and sleep |
504 | * @vs: List of futexes to wait for |
505 | * @count: Length of vs |
506 | * @to: Timeout |
507 | * |
508 | * Sleep if and only if the timeout hasn't expired and no futex on the list has |
509 | * been woken up. |
510 | */ |
511 | static void futex_sleep_multiple(struct futex_vector *vs, unsigned int count, |
512 | struct hrtimer_sleeper *to) |
513 | { |
514 | if (to && !to->task) |
515 | return; |
516 | |
517 | for (; count; count--, vs++) { |
518 | if (!READ_ONCE(vs->q.lock_ptr)) |
519 | return; |
520 | } |
521 | |
522 | schedule(); |
523 | } |
524 | |
525 | /** |
526 | * futex_wait_multiple - Prepare to wait on and enqueue several futexes |
527 | * @vs: The list of futexes to wait on |
528 | * @count: The number of objects |
529 | * @to: Timeout before giving up and returning to userspace |
530 | * |
531 | * Entry point for the FUTEX_WAIT_MULTIPLE futex operation, this function |
532 | * sleeps on a group of futexes and returns on the first futex that is |
533 | * wake, or after the timeout has elapsed. |
534 | * |
535 | * Return: |
536 | * - >=0 - Hint to the futex that was awoken |
537 | * - <0 - On error |
538 | */ |
539 | int futex_wait_multiple(struct futex_vector *vs, unsigned int count, |
540 | struct hrtimer_sleeper *to) |
541 | { |
542 | int ret, hint = 0; |
543 | |
544 | if (to) |
545 | hrtimer_sleeper_start_expires(sl: to, mode: HRTIMER_MODE_ABS); |
546 | |
547 | while (1) { |
548 | ret = futex_wait_multiple_setup(vs, count, woken: &hint); |
549 | if (ret) { |
550 | if (ret > 0) { |
551 | /* A futex was woken during setup */ |
552 | ret = hint; |
553 | } |
554 | return ret; |
555 | } |
556 | |
557 | futex_sleep_multiple(vs, count, to); |
558 | |
559 | __set_current_state(TASK_RUNNING); |
560 | |
561 | ret = futex_unqueue_multiple(v: vs, count); |
562 | if (ret >= 0) |
563 | return ret; |
564 | |
565 | if (to && !to->task) |
566 | return -ETIMEDOUT; |
567 | else if (signal_pending(current)) |
568 | return -ERESTARTSYS; |
569 | /* |
570 | * The final case is a spurious wakeup, for |
571 | * which just retry. |
572 | */ |
573 | } |
574 | } |
575 | |
576 | /** |
577 | * futex_wait_setup() - Prepare to wait on a futex |
578 | * @uaddr: the futex userspace address |
579 | * @val: the expected value |
580 | * @flags: futex flags (FLAGS_SHARED, etc.) |
581 | * @q: the associated futex_q |
582 | * @hb: storage for hash_bucket pointer to be returned to caller |
583 | * |
584 | * Setup the futex_q and locate the hash_bucket. Get the futex value and |
585 | * compare it with the expected value. Handle atomic faults internally. |
586 | * Return with the hb lock held on success, and unlocked on failure. |
587 | * |
588 | * Return: |
589 | * - 0 - uaddr contains val and hb has been locked; |
590 | * - <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlocked |
591 | */ |
592 | int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags, |
593 | struct futex_q *q, struct futex_hash_bucket **hb) |
594 | { |
595 | u32 uval; |
596 | int ret; |
597 | |
598 | /* |
599 | * Access the page AFTER the hash-bucket is locked. |
600 | * Order is important: |
601 | * |
602 | * Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val); |
603 | * Userspace waker: if (cond(var)) { var = new; futex_wake(&var); } |
604 | * |
605 | * The basic logical guarantee of a futex is that it blocks ONLY |
606 | * if cond(var) is known to be true at the time of blocking, for |
607 | * any cond. If we locked the hash-bucket after testing *uaddr, that |
608 | * would open a race condition where we could block indefinitely with |
609 | * cond(var) false, which would violate the guarantee. |
610 | * |
611 | * On the other hand, we insert q and release the hash-bucket only |
612 | * after testing *uaddr. This guarantees that futex_wait() will NOT |
613 | * absorb a wakeup if *uaddr does not match the desired values |
614 | * while the syscall executes. |
615 | */ |
616 | retry: |
617 | ret = get_futex_key(uaddr, flags, key: &q->key, rw: FUTEX_READ); |
618 | if (unlikely(ret != 0)) |
619 | return ret; |
620 | |
621 | retry_private: |
622 | *hb = futex_q_lock(q); |
623 | |
624 | ret = futex_get_value_locked(dest: &uval, from: uaddr); |
625 | |
626 | if (ret) { |
627 | futex_q_unlock(hb: *hb); |
628 | |
629 | ret = get_user(uval, uaddr); |
630 | if (ret) |
631 | return ret; |
632 | |
633 | if (!(flags & FLAGS_SHARED)) |
634 | goto retry_private; |
635 | |
636 | goto retry; |
637 | } |
638 | |
639 | if (uval != val) { |
640 | futex_q_unlock(hb: *hb); |
641 | ret = -EWOULDBLOCK; |
642 | } |
643 | |
644 | return ret; |
645 | } |
646 | |
647 | int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, |
648 | struct hrtimer_sleeper *to, u32 bitset) |
649 | { |
650 | struct futex_q q = futex_q_init; |
651 | struct futex_hash_bucket *hb; |
652 | int ret; |
653 | |
654 | if (!bitset) |
655 | return -EINVAL; |
656 | |
657 | q.bitset = bitset; |
658 | |
659 | retry: |
660 | /* |
661 | * Prepare to wait on uaddr. On success, it holds hb->lock and q |
662 | * is initialized. |
663 | */ |
664 | ret = futex_wait_setup(uaddr, val, flags, q: &q, hb: &hb); |
665 | if (ret) |
666 | return ret; |
667 | |
668 | /* futex_queue and wait for wakeup, timeout, or a signal. */ |
669 | futex_wait_queue(hb, q: &q, timeout: to); |
670 | |
671 | /* If we were woken (and unqueued), we succeeded, whatever. */ |
672 | if (!futex_unqueue(q: &q)) |
673 | return 0; |
674 | |
675 | if (to && !to->task) |
676 | return -ETIMEDOUT; |
677 | |
678 | /* |
679 | * We expect signal_pending(current), but we might be the |
680 | * victim of a spurious wakeup as well. |
681 | */ |
682 | if (!signal_pending(current)) |
683 | goto retry; |
684 | |
685 | return -ERESTARTSYS; |
686 | } |
687 | |
688 | int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset) |
689 | { |
690 | struct hrtimer_sleeper timeout, *to; |
691 | struct restart_block *restart; |
692 | int ret; |
693 | |
694 | to = futex_setup_timer(time: abs_time, timeout: &timeout, flags, |
695 | current->timer_slack_ns); |
696 | |
697 | ret = __futex_wait(uaddr, flags, val, to, bitset); |
698 | |
699 | /* No timeout, nothing to clean up. */ |
700 | if (!to) |
701 | return ret; |
702 | |
703 | hrtimer_cancel(timer: &to->timer); |
704 | destroy_hrtimer_on_stack(timer: &to->timer); |
705 | |
706 | if (ret == -ERESTARTSYS) { |
707 | restart = ¤t->restart_block; |
708 | restart->futex.uaddr = uaddr; |
709 | restart->futex.val = val; |
710 | restart->futex.time = *abs_time; |
711 | restart->futex.bitset = bitset; |
712 | restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; |
713 | |
714 | return set_restart_fn(restart, fn: futex_wait_restart); |
715 | } |
716 | |
717 | return ret; |
718 | } |
719 | |
720 | static long futex_wait_restart(struct restart_block *restart) |
721 | { |
722 | u32 __user *uaddr = restart->futex.uaddr; |
723 | ktime_t t, *tp = NULL; |
724 | |
725 | if (restart->futex.flags & FLAGS_HAS_TIMEOUT) { |
726 | t = restart->futex.time; |
727 | tp = &t; |
728 | } |
729 | restart->fn = do_no_restart_syscall; |
730 | |
731 | return (long)futex_wait(uaddr, flags: restart->futex.flags, |
732 | val: restart->futex.val, abs_time: tp, bitset: restart->futex.bitset); |
733 | } |
734 | |
735 | |