pthread_cond_wait.c source code [glibc/nptl/pthread_cond_wait.c]

1	/ Copyright (C) 2003-2022 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3
4	The GNU C Library is free software; you can redistribute it and/or
5	modify it under the terms of the GNU Lesser General Public
6	License as published by the Free Software Foundation; either
7	version 2.1 of the License, or (at your option) any later version.
8
9	The GNU C Library is distributed in the hope that it will be useful,
10	but WITHOUT ANY WARRANTY; without even the implied warranty of
11	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12	Lesser General Public License for more details.
13
14	You should have received a copy of the GNU Lesser General Public
15	License along with the GNU C Library; if not, see
16	<https://www.gnu.org/licenses/>. /*
17
18	#include <endian.h>
19	#include <errno.h>
20	#include <sysdep.h>
21	#include <futex-internal.h>
22	#include <pthread.h>
23	#include <pthreadP.h>
24	#include <sys/time.h>
25	#include <atomic.h>
26	#include <stdint.h>
27	#include <stdbool.h>
28
29	#include <shlib-compat.h>
30	#include <stap-probe.h>
31	#include <time.h>
32
33	#include "pthread_cond_common.c"
34
35
36	struct _condvar_cleanup_buffer
37	{
38	uint64_t wseq;
39	pthread_cond_t *cond;
40	pthread_mutex_t *mutex;
41	int private;
42	};
43
44
45	/ Decrease the waiter reference count. /
46	static void
47	__condvar_confirm_wakeup (pthread_cond_t cond, int* private)
48	{
49	/ If destruction is pending (i.e., the wake-request flag is nonzero) and we*
50	are the last waiter (prior value of __wrefs was 1 << 3), then wake any
51	threads waiting in pthread_cond_destroy. Release MO to synchronize with
52	these threads. Don't bother clearing the wake-up request flag. /*
53	if ((atomic_fetch_add_release (&cond->__data.__wrefs, -`8`) >> `2`) == `3`)
54	futex_wake (futex_word: &cond->__data.__wrefs, INT_MAX, private);
55	}
56
57
58	/ Cancel waiting after having registered as a waiter previously. SEQ is our*
59	position and G is our group index.
60	The goal of cancellation is to make our group smaller if that is still
61	possible. If we are in a closed group, this is not possible anymore; in
62	this case, we need to send a replacement signal for the one we effectively
63	consumed because the signal should have gotten consumed by another waiter
64	instead; we must not both cancel waiting and consume a signal.
65
66	Must not be called while still holding a reference on the group.
67
68	Returns true iff we consumed a signal.
69
70	On some kind of timeouts, we may be able to pretend that a signal we
71	effectively consumed happened before the timeout (i.e., similarly to first
72	spinning on signals before actually checking whether the timeout has
73	passed already). Doing this would allow us to skip sending a replacement
74	signal, but this case might happen rarely because the end of the timeout
75	must race with someone else sending a signal. Therefore, we don't bother
76	trying to optimize this. /*
77	static void
78	__condvar_cancel_waiting (pthread_cond_t cond, uint64_t seq, unsigned* int g,
79	int private)
80	{
81	bool consumed_signal = false;
82
83	/ No deadlock with group switching is possible here because we do*
84	not hold a reference on the group. /*
85	__condvar_acquire_lock (cond, private);
86
87	uint64_t g1_start = __condvar_load_g1_start_relaxed (cond) >> `1`;
88	if (g1_start > seq)
89	{
90	/ Our group is closed, so someone provided enough signals for it.*
91	Thus, we effectively consumed a signal. /*
92	consumed_signal = true;
93	}
94	else
95	{
96	if (g1_start + __condvar_get_orig_size (cond) <= seq)
97	{
98	/ We are in the current G2 and thus cannot have consumed a signal.*
99	Reduce its effective size or handle overflow. Remember that in
100	G2, unsigned int size is zero or a negative value. /*
101	if (cond->__data.__g_size[g] + __PTHREAD_COND_MAX_GROUP_SIZE > `0`)
102	{
103	cond->__data.__g_size[g]--;
104	}
105	else
106	{
107	/ Cancellations would overflow the maximum group size. Just*
108	wake up everyone spuriously to create a clean state. This
109	also means we do not consume a signal someone else sent. /*
110	__condvar_release_lock (cond, private);
111	__pthread_cond_broadcast (cond);
112	return;
113	}
114	}
115	else
116	{
117	/ We are in current G1. If the group's size is zero, someone put*
118	a signal in the group that nobody else but us can consume. /*
119	if (cond->__data.__g_size[g] == `0`)
120	consumed_signal = true;
121	else
122	{
123	/ Otherwise, we decrease the size of the group. This is*
124	equivalent to atomically putting in a signal just for us and
125	consuming it right away. We do not consume a signal sent
126	by someone else. We also cannot have consumed a futex
127	wake-up because if we were cancelled or timed out in a futex
128	call, the futex will wake another waiter. /*
129	cond->__data.__g_size[g]--;
130	}
131	}
132	}
133
134	__condvar_release_lock (cond, private);
135
136	if (consumed_signal)
137	{
138	/ We effectively consumed a signal even though we didn't want to.*
139	Therefore, we need to send a replacement signal.
140	If we would want to optimize this, we could do what
141	pthread_cond_signal does right in the critical section above. /*
142	__pthread_cond_signal (cond);
143	}
144	}
145
146	/ Wake up any signalers that might be waiting. /
147	static void
148	__condvar_dec_grefs (pthread_cond_t cond, unsigned* int g, int private)
149	{
150	/ Release MO to synchronize-with the acquire load in*
151	__condvar_quiesce_and_switch_g1. /*
152	if (atomic_fetch_add_release (cond->__data.__g_refs + g, -`2`) == `3`)
153	{
154	/ Clear the wake-up request flag before waking up. We do not need more*
155	than relaxed MO and it doesn't matter if we apply this for an aliased
156	group because we wake all futex waiters right after clearing the
157	flag. /*
158	atomic_fetch_and_relaxed (cond->__data.__g_refs + g, ~(unsigned int) `1`);
159	futex_wake (futex_word: cond->__data.__g_refs + g, INT_MAX, private);
160	}
161	}
162
163	/ Clean-up for cancellation of waiters waiting for normal signals. We cancel*
164	our registration as a waiter, confirm we have woken up, and re-acquire the
165	mutex. /*
166	static void
167	__condvar_cleanup_waiting (void *arg)
168	{
169	struct _condvar_cleanup_buffer *cbuffer =
170	(struct _condvar_cleanup_buffer *) arg;
171	pthread_cond_t *cond = cbuffer->cond;
172	unsigned g = cbuffer->wseq & `1`;
173
174	__condvar_dec_grefs (cond, g, private: cbuffer->private);
175
176	__condvar_cancel_waiting (cond, seq: cbuffer->wseq >> `1`, g, private: cbuffer->private);
177	/ FIXME With the current cancellation implementation, it is possible that*
178	a thread is cancelled after it has returned from a syscall. This could
179	result in a cancelled waiter consuming a futex wake-up that is then
180	causing another waiter in the same group to not wake up. To work around
181	this issue until we have fixed cancellation, just add a futex wake-up
182	conservatively. /*
183	futex_wake (futex_word: cond->__data.__g_signals + g, processes_to_wake: `1`, private: cbuffer->private);
184
185	__condvar_confirm_wakeup (cond, private: cbuffer->private);
186
187	/ XXX If locking the mutex fails, should we just stop execution? This*
188	might be better than silently ignoring the error. /*
189	__pthread_mutex_cond_lock (mutex: cbuffer->mutex);
190	}
191
192	/ This condvar implementation guarantees that all calls to signal and*
193	broadcast and all of the three virtually atomic parts of each call to wait
194	(i.e., (1) releasing the mutex and blocking, (2) unblocking, and (3) re-
195	acquiring the mutex) happen in some total order that is consistent with the
196	happens-before relations in the calling program. However, this order does
197	not necessarily result in additional happens-before relations being
198	established (which aligns well with spurious wake-ups being allowed).
199
200	All waiters acquire a certain position in a 64b waiter sequence (__wseq).
201	This sequence determines which waiters are allowed to consume signals.
202	A broadcast is equal to sending as many signals as are unblocked waiters.
203	When a signal arrives, it samples the current value of __wseq with a
204	relaxed-MO load (i.e., the position the next waiter would get). (This is
205	sufficient because it is consistent with happens-before; the caller can
206	enforce stronger ordering constraints by calling signal while holding the
207	mutex.) Only waiters with a position less than the __wseq value observed
208	by the signal are eligible to consume this signal.
209
210	This would be straight-forward to implement if waiters would just spin but
211	we need to let them block using futexes. Futexes give no guarantee of
212	waking in FIFO order, so we cannot reliably wake eligible waiters if we
213	just use a single futex. Also, futex words are 32b in size, but we need
214	to distinguish more than 1<<32 states because we need to represent the
215	order of wake-up (and thus which waiters are eligible to consume signals);
216	blocking in a futex is not atomic with a waiter determining its position in
217	the waiter sequence, so we need the futex word to reliably notify waiters
218	that they should not attempt to block anymore because they have been
219	already signaled in the meantime. While an ABA issue on a 32b value will
220	be rare, ignoring it when we are aware of it is not the right thing to do
221	either.
222
223	Therefore, we use a 64b counter to represent the waiter sequence (on
224	architectures which only support 32b atomics, we use a few bits less).
225	To deal with the blocking using futexes, we maintain two groups of waiters:
226	* Group G1 consists of waiters that are all eligible to consume signals;
227	incoming signals will always signal waiters in this group until all
228	waiters in G1 have been signaled.
229	* Group G2 consists of waiters that arrive when a G1 is present and still
230	contains waiters that have not been signaled. When all waiters in G1
231	are signaled and a new signal arrives, the new signal will convert G2
232	into the new G1 and create a new G2 for future waiters.
233
234	We cannot allocate new memory because of process-shared condvars, so we
235	have just two slots of groups that change their role between G1 and G2.
236	Each has a separate futex word, a number of signals available for
237	consumption, a size (number of waiters in the group that have not been
238	signaled), and a reference count.
239
240	The group reference count is used to maintain the number of waiters that
241	are using the group's futex. Before a group can change its role, the
242	reference count must show that no waiters are using the futex anymore; this
243	prevents ABA issues on the futex word.
244
245	To represent which intervals in the waiter sequence the groups cover (and
246	thus also which group slot contains G1 or G2), we use a 64b counter to
247	designate the start position of G1 (inclusive), and a single bit in the
248	waiter sequence counter to represent which group slot currently contains
249	G2. This allows us to switch group roles atomically wrt. waiters obtaining
250	a position in the waiter sequence. The G1 start position allows waiters to
251	figure out whether they are in a group that has already been completely
252	signaled (i.e., if the current G1 starts at a later position that the
253	waiter's position). Waiters cannot determine whether they are currently
254	in G2 or G1 -- but they do not have too because all they are interested in
255	is whether there are available signals, and they always start in G2 (whose
256	group slot they know because of the bit in the waiter sequence. Signalers
257	will simply fill the right group until it is completely signaled and can
258	be closed (they do not switch group roles until they really have to to
259	decrease the likelihood of having to wait for waiters still holding a
260	reference on the now-closed G1).
261
262	Signalers maintain the initial size of G1 to be able to determine where
263	G2 starts (G2 is always open-ended until it becomes G1). They track the
264	remaining size of a group; when waiters cancel waiting (due to PThreads
265	cancellation or timeouts), they will decrease this remaining size as well.
266
267	To implement condvar destruction requirements (i.e., that
268	pthread_cond_destroy can be called as soon as all waiters have been
269	signaled), waiters increment a reference count before starting to wait and
270	decrement it after they stopped waiting but right before they acquire the
271	mutex associated with the condvar.
272
273	pthread_cond_t thus consists of the following (bits that are used for
274	flags and are not part of the primary value of each field but necessary
275	to make some things atomic or because there was no space for them
276	elsewhere in the data structure):
277
278	__wseq: Waiter sequence counter
279	* LSB is index of current G2.
280	* Waiters fetch-add while having acquire the mutex associated with the
281	condvar. Signalers load it and fetch-xor it concurrently.
282	__g1_start: Starting position of G1 (inclusive)
283	* LSB is index of current G2.
284	* Modified by signalers while having acquired the condvar-internal lock
285	and observed concurrently by waiters.
286	__g1_orig_size: Initial size of G1
287	* The two least-significant bits represent the condvar-internal lock.
288	* Only accessed while having acquired the condvar-internal lock.
289	__wrefs: Waiter reference counter.
290	* Bit 2 is true if waiters should run futex_wake when they remove the
291	last reference. pthread_cond_destroy uses this as futex word.
292	* Bit 1 is the clock ID (0 == CLOCK_REALTIME, 1 == CLOCK_MONOTONIC).
293	* Bit 0 is true iff this is a process-shared condvar.
294	* Simple reference count used by both waiters and pthread_cond_destroy.
295	(If the format of __wrefs is changed, update nptl_lock_constants.pysym
296	and the pretty printers.)
297	For each of the two groups, we have:
298	__g_refs: Futex waiter reference count.
299	* LSB is true if waiters should run futex_wake when they remove the
300	last reference.
301	* Reference count used by waiters concurrently with signalers that have
302	acquired the condvar-internal lock.
303	__g_signals: The number of signals that can still be consumed.
304	* Used as a futex word by waiters. Used concurrently by waiters and
305	signalers.
306	* LSB is true iff this group has been completely signaled (i.e., it is
307	closed).
308	__g_size: Waiters remaining in this group (i.e., which have not been
309	signaled yet.
310	* Accessed by signalers and waiters that cancel waiting (both do so only
311	when having acquired the condvar-internal lock.
312	* The size of G2 is always zero because it cannot be determined until
313	the group becomes G1.
314	* Although this is of unsigned type, we rely on using unsigned overflow
315	rules to make this hold effectively negative values too (in
316	particular, when waiters in G2 cancel waiting).
317
318	A PTHREAD_COND_INITIALIZER condvar has all fields set to zero, which yields
319	a condvar that has G2 starting at position 0 and a G1 that is closed.
320
321	Because waiters do not claim ownership of a group right when obtaining a
322	position in __wseq but only reference count the group when using futexes
323	to block, it can happen that a group gets closed before a waiter can
324	increment the reference count. Therefore, waiters have to check whether
325	their group is already closed using __g1_start. They also have to perform
326	this check when spinning when trying to grab a signal from __g_signals.
327	Note that for these checks, using relaxed MO to load __g1_start is
328	sufficient because if a waiter can see a sufficiently large value, it could
329	have also consume a signal in the waiters group.
330
331	Waiters try to grab a signal from __g_signals without holding a reference
332	count, which can lead to stealing a signal from a more recent group after
333	their own group was already closed. They cannot always detect whether they
334	in fact did because they do not know when they stole, but they can
335	conservatively add a signal back to the group they stole from; if they
336	did so unnecessarily, all that happens is a spurious wake-up. To make this
337	even less likely, __g1_start contains the index of the current g2 too,
338	which allows waiters to check if there aliasing on the group slots; if
339	there wasn't, they didn't steal from the current G1, which means that the
340	G1 they stole from must have been already closed and they do not need to
341	fix anything.
342
343	It is essential that the last field in pthread_cond_t is __g_signals[1]:
344	The previous condvar used a pointer-sized field in pthread_cond_t, so a
345	PTHREAD_COND_INITIALIZER from that condvar implementation might only
346	initialize 4 bytes to zero instead of the 8 bytes we need (i.e., 44 bytes
347	in total instead of the 48 we need). __g_signals[1] is not accessed before
348	the first group switch (G2 starts at index 0), which will set its value to
349	zero after a harmless fetch-or whose return value is ignored. This
350	effectively completes initialization.
351
352
353	Limitations:
354	* This condvar isn't designed to allow for more than
355	__PTHREAD_COND_MAX_GROUP_SIZE (1 << 31) calls to __pthread_cond_wait.*
356	* More than __PTHREAD_COND_MAX_GROUP_SIZE concurrent waiters are not
357	supported.
358	* Beyond what is allowed as errors by POSIX or documented, we can also
359	return the following errors:
360	* EPERM if MUTEX is a recursive mutex and the caller doesn't own it.
361	* EOWNERDEAD or ENOTRECOVERABLE when using robust mutexes. Unlike
362	for other errors, this can happen when we re-acquire the mutex; this
363	isn't allowed by POSIX (which requires all errors to virtually happen
364	before we release the mutex or change the condvar state), but there's
365	nothing we can do really.
366	* When using PTHREAD_MUTEX_PP_* mutexes, we can also return all errors
367	returned by __pthread_tpp_change_priority. We will already have
368	released the mutex in such cases, so the caller cannot expect to own
369	MUTEX.
370
371	Other notes:
372	* Instead of the normal mutex unlock / lock functions, we use
373	__pthread_mutex_unlock_usercnt(m, 0) / __pthread_mutex_cond_lock(m)
374	because those will not change the mutex-internal users count, so that it
375	can be detected when a condvar is still associated with a particular
376	mutex because there is a waiter blocked on this condvar using this mutex.
377	*/
378	static __always_inline int
379	__pthread_cond_wait_common (pthread_cond_t cond, pthread_mutex_t mutex,
380	clockid_t clockid, const struct __timespec64 *abstime)
381	{
382	const int maxspin = `0`;
383	int err;
384	int result = `0`;
385
386	LIBC_PROBE (cond_wait, `2`, cond, mutex);
387
388	/ clockid will already have been checked by*
389	__pthread_cond_clockwait or pthread_condattr_setclock, or we
390	don't use it if abstime is NULL, so we don't need to check it
391	here. /*
392
393	/ Acquire a position (SEQ) in the waiter sequence (WSEQ). We use an*
394	atomic operation because signals and broadcasts may update the group
395	switch without acquiring the mutex. We do not need release MO here
396	because we do not need to establish any happens-before relation with
397	signalers (see __pthread_cond_signal); modification order alone
398	establishes a total order of waiters/signals. We do need acquire MO
399	to synchronize with group reinitialization in
400	__condvar_quiesce_and_switch_g1. /*
401	uint64_t wseq = __condvar_fetch_add_wseq_acquire (cond, val: `2`);
402	/ Find our group's index. We always go into what was G2 when we acquired*
403	our position. /*
404	unsigned int g = wseq & `1`;
405	uint64_t seq = wseq >> `1`;
406
407	/ Increase the waiter reference count. Relaxed MO is sufficient because*
408	we only need to synchronize when decrementing the reference count. /*
409	unsigned int flags = atomic_fetch_add_relaxed (&cond->__data.__wrefs, `8`);
410	int private = __condvar_get_private (flags);
411
412	/ Now that we are registered as a waiter, we can release the mutex.*
413	Waiting on the condvar must be atomic with releasing the mutex, so if
414	the mutex is used to establish a happens-before relation with any
415	signaler, the waiter must be visible to the latter; thus, we release the
416	mutex after registering as waiter.
417	If releasing the mutex fails, we just cancel our registration as a
418	waiter and confirm that we have woken up. /*
419	err = __pthread_mutex_unlock_usercnt (mutex, `0`);
420	if (__glibc_unlikely (err != `0`))
421	{
422	__condvar_cancel_waiting (cond, seq, g, private);
423	__condvar_confirm_wakeup (cond, private);
424	return err;
425	}
426
427	/ Now wait until a signal is available in our group or it is closed.*
428	Acquire MO so that if we observe a value of zero written after group
429	switching in __condvar_quiesce_and_switch_g1, we synchronize with that
430	store and will see the prior update of __g1_start done while switching
431	groups too. /*
432	unsigned int signals = atomic_load_acquire (cond->__data.__g_signals + g);
433
434	do
435	{
436	while (`1`)
437	{
438	/ Spin-wait first.*
439	Note that spinning first without checking whether a timeout
440	passed might lead to what looks like a spurious wake-up even
441	though we should return ETIMEDOUT (e.g., if the caller provides
442	an absolute timeout that is clearly in the past). However,
443	(1) spurious wake-ups are allowed, (2) it seems unlikely that a
444	user will (ab)use pthread_cond_wait as a check for whether a
445	point in time is in the past, and (3) spinning first without
446	having to compare against the current time seems to be the right
447	choice from a performance perspective for most use cases. /*
448	unsigned int spin = maxspin;
449	while (signals == `0` && spin > `0`)
450	{
451	/ Check that we are not spinning on a group that's already*
452	closed. /*
453	if (seq < (__condvar_load_g1_start_relaxed (cond) >> `1`))
454	goto done;
455
456	/ TODO Back off. /
457
458	/ Reload signals. See above for MO. /
459	signals = atomic_load_acquire (cond->__data.__g_signals + g);
460	spin--;
461	}
462
463	/ If our group will be closed as indicated by the flag on signals,*
464	don't bother grabbing a signal. /*
465	if (signals & `1`)
466	goto done;
467
468	/ If there is an available signal, don't block. /
469	if (signals != `0`)
470	break;
471
472	/ No signals available after spinning, so prepare to block.*
473	We first acquire a group reference and use acquire MO for that so
474	that we synchronize with the dummy read-modify-write in
475	__condvar_quiesce_and_switch_g1 if we read from that. In turn,
476	in this case this will make us see the closed flag on __g_signals
477	that designates a concurrent attempt to reuse the group's slot.
478	We use acquire MO for the __g_signals check to make the
479	__g1_start check work (see spinning above).
480	Note that the group reference acquisition will not mask the
481	release MO when decrementing the reference count because we use
482	an atomic read-modify-write operation and thus extend the release
483	sequence. /*
484	atomic_fetch_add_acquire (cond->__data.__g_refs + g, `2`);
485	if (((atomic_load_acquire (cond->__data.__g_signals + g) & `1`) != `0`)
486	\|\| (seq < (__condvar_load_g1_start_relaxed (cond) >> `1`)))
487	{
488	/ Our group is closed. Wake up any signalers that might be*
489	waiting. /*
490	__condvar_dec_grefs (cond, g, private);
491	goto done;
492	}
493
494	// Now block.
495	struct _pthread_cleanup_buffer buffer;
496	struct _condvar_cleanup_buffer cbuffer;
497	cbuffer.wseq = wseq;
498	cbuffer.cond = cond;
499	cbuffer.mutex = mutex;
500	cbuffer.private = private;
501	__pthread_cleanup_push (&buffer, __condvar_cleanup_waiting, &cbuffer);
502
503	err = __futex_abstimed_wait_cancelable64 (
504	cond->__data.__g_signals + g, `0`, clockid, abstime, private);
505
506	__pthread_cleanup_pop (&buffer, `0`);
507
508	if (__glibc_unlikely (err == ETIMEDOUT \|\| err == EOVERFLOW))
509	{
510	__condvar_dec_grefs (cond, g, private);
511	/ If we timed out, we effectively cancel waiting. Note that*
512	we have decremented __g_refs before cancellation, so that a
513	deadlock between waiting for quiescence of our group in
514	__condvar_quiesce_and_switch_g1 and us trying to acquire
515	the lock during cancellation is not possible. /*
516	__condvar_cancel_waiting (cond, seq, g, private);
517	result = err;
518	goto done;
519	}
520	else
521	__condvar_dec_grefs (cond, g, private);
522
523	/ Reload signals. See above for MO. /
524	signals = atomic_load_acquire (cond->__data.__g_signals + g);
525	}
526
527	}
528	/ Try to grab a signal. Use acquire MO so that we see an up-to-date value*
529	of __g1_start below (see spinning above for a similar case). In
530	particular, if we steal from a more recent group, we will also see a
531	more recent __g1_start below. /*
532	while (!atomic_compare_exchange_weak_acquire (cond->__data.__g_signals + g,
533	&signals, signals - `2`));
534
535	/ We consumed a signal but we could have consumed from a more recent group*
536	that aliased with ours due to being in the same group slot. If this
537	might be the case our group must be closed as visible through
538	__g1_start. /*
539	uint64_t g1_start = __condvar_load_g1_start_relaxed (cond);
540	if (seq < (g1_start >> `1`))
541	{
542	/ We potentially stole a signal from a more recent group but we do not*
543	know which group we really consumed from.
544	We do not care about groups older than current G1 because they are
545	closed; we could have stolen from these, but then we just add a
546	spurious wake-up for the current groups.
547	We will never steal a signal from current G2 that was really intended
548	for G2 because G2 never receives signals (until it becomes G1). We
549	could have stolen a signal from G2 that was conservatively added by a
550	previous waiter that also thought it stole a signal -- but given that
551	that signal was added unnecessarily, it's not a problem if we steal
552	it.
553	Thus, the remaining case is that we could have stolen from the current
554	G1, where "current" means the __g1_start value we observed. However,
555	if the current G1 does not have the same slot index as we do, we did
556	not steal from it and do not need to undo that. This is the reason
557	for putting a bit with G2's index into__g1_start as well. /*
558	if (((g1_start & `1`) ^ `1`) == g)
559	{
560	/ We have to conservatively undo our potential mistake of stealing*
561	a signal. We can stop trying to do that when the current G1
562	changes because other spinning waiters will notice this too and
563	__condvar_quiesce_and_switch_g1 has checked that there are no
564	futex waiters anymore before switching G1.
565	Relaxed MO is fine for the __g1_start load because we need to
566	merely be able to observe this fact and not have to observe
567	something else as well.
568	??? Would it help to spin for a little while to see whether the
569	current G1 gets closed? This might be worthwhile if the group is
570	small or close to being closed. /*
571	unsigned int s = atomic_load_relaxed (cond->__data.__g_signals + g);
572	while (__condvar_load_g1_start_relaxed (cond) == g1_start)
573	{
574	/ Try to add a signal. We don't need to acquire the lock*
575	because at worst we can cause a spurious wake-up. If the
576	group is in the process of being closed (LSB is true), this
577	has an effect similar to us adding a signal. /*
578	if (((s & `1`) != `0`)
579	\|\| atomic_compare_exchange_weak_relaxed
580	(cond->__data.__g_signals + g, &s, s + `2`))
581	{
582	/ If we added a signal, we also need to add a wake-up on*
583	the futex. We also need to do that if we skipped adding
584	a signal because the group is being closed because
585	while __condvar_quiesce_and_switch_g1 could have closed
586	the group, it might stil be waiting for futex waiters to
587	leave (and one of those waiters might be the one we stole
588	the signal from, which cause it to block using the
589	futex). /*
590	futex_wake (futex_word: cond->__data.__g_signals + g, processes_to_wake: `1`, private);
591	break;
592	}
593	/ TODO Back off. /
594	}
595	}
596	}
597
598	done:
599
600	/ Confirm that we have been woken. We do that before acquiring the mutex*
601	to allow for execution of pthread_cond_destroy while having acquired the
602	mutex. /*
603	__condvar_confirm_wakeup (cond, private);
604
605	/ Woken up; now re-acquire the mutex. If this doesn't fail, return RESULT,*
606	which is set to ETIMEDOUT if a timeout occured, or zero otherwise. /*
607	err = __pthread_mutex_cond_lock (mutex: mutex);
608	/ XXX Abort on errors that are disallowed by POSIX? /
609	return (err != `0`) ? err : result;
610	}
611
612
613	/ See __pthread_cond_wait_common. /
614	int
615	___pthread_cond_wait (pthread_cond_t cond, pthread_mutex_t mutex)
616	{
617	/ clockid is unused when abstime is NULL. /
618	return __pthread_cond_wait_common (cond, mutex, clockid: `0`, NULL);
619	}
620
621	versioned_symbol (libc, ___pthread_cond_wait, pthread_cond_wait,
622	GLIBC_2_3_2);
623	libc_hidden_ver (___pthread_cond_wait, __pthread_cond_wait)
624	#ifndef SHARED
625	strong_alias (___pthread_cond_wait, __pthread_cond_wait)
626	#endif
627
628	/ See __pthread_cond_wait_common. /
629	int
630	___pthread_cond_timedwait64 (pthread_cond_t cond, pthread_mutex_t mutex,
631	const struct __timespec64 *abstime)
632	{
633	/ Check parameter validity. This should also tell the compiler that*
634	it can assume that abstime is not NULL. /*
635	if (! valid_nanoseconds (ns: abstime->tv_nsec))
636	return EINVAL;
637
638	/ Relaxed MO is suffice because clock ID bit is only modified*
639	in condition creation. /*
640	unsigned int flags = atomic_load_relaxed (&cond->__data.__wrefs);
641	clockid_t clockid = (flags & __PTHREAD_COND_CLOCK_MONOTONIC_MASK)
642	? CLOCK_MONOTONIC : CLOCK_REALTIME;
643	return __pthread_cond_wait_common (cond, mutex, clockid, abstime);
644	}
645
646	#if __TIMESIZE == 64
647	strong_alias (___pthread_cond_timedwait64, ___pthread_cond_timedwait)
648	#else
649	strong_alias (___pthread_cond_timedwait64, __pthread_cond_timedwait64)
650	libc_hidden_def (__pthread_cond_timedwait64)
651
652	int
653	___pthread_cond_timedwait (pthread_cond_t cond, pthread_mutex_t mutex,
654	const struct timespec *abstime)
655	{
656	struct __timespec64 ts64 = valid_timespec_to_timespec64 (*abstime);
657
658	return __pthread_cond_timedwait64 (cond, mutex, &ts64);
659	}
660	#endif /* __TIMESIZE == 64 */
661	versioned_symbol (libc, ___pthread_cond_timedwait,
662	pthread_cond_timedwait, GLIBC_2_3_2);
663	libc_hidden_ver (___pthread_cond_timedwait, __pthread_cond_timedwait)
664	#ifndef SHARED
665	strong_alias (___pthread_cond_timedwait, __pthread_cond_timedwait)
666	#endif
667
668	/ See __pthread_cond_wait_common. /
669	int
670	___pthread_cond_clockwait64 (pthread_cond_t cond, pthread_mutex_t mutex,
671	clockid_t clockid,
672	const struct __timespec64 *abstime)
673	{
674	/ Check parameter validity. This should also tell the compiler that*
675	it can assume that abstime is not NULL. /*
676	if (! valid_nanoseconds (ns: abstime->tv_nsec))
677	return EINVAL;
678
679	if (!futex_abstimed_supported_clockid (clockid))
680	return EINVAL;
681
682	return __pthread_cond_wait_common (cond, mutex, clockid, abstime);
683	}
684
685	#if __TIMESIZE == 64
686	strong_alias (___pthread_cond_clockwait64, ___pthread_cond_clockwait)
687	#else
688	strong_alias (___pthread_cond_clockwait64, __pthread_cond_clockwait64);
689	libc_hidden_def (__pthread_cond_clockwait64)
690
691	int
692	___pthread_cond_clockwait (pthread_cond_t cond, pthread_mutex_t mutex,
693	clockid_t clockid,
694	const struct timespec *abstime)
695	{
696	struct __timespec64 ts64 = valid_timespec_to_timespec64 (*abstime);
697
698	return __pthread_cond_clockwait64 (cond, mutex, clockid, &ts64);
699	}
700	#endif /* __TIMESIZE == 64 */
701	libc_hidden_ver (___pthread_cond_clockwait, __pthread_cond_clockwait)
702	#ifndef SHARED
703	strong_alias (___pthread_cond_clockwait, __pthread_cond_clockwait)
704	#endif
705	versioned_symbol (libc, ___pthread_cond_clockwait,
706	pthread_cond_clockwait, GLIBC_2_34);
707	#if OTHER_SHLIB_COMPAT (libpthread, GLIBC_2_30, GLIBC_2_34)
708	compat_symbol (libpthread, ___pthread_cond_clockwait,
709	pthread_cond_clockwait, GLIBC_2_30);
710	#endif
711

source code of glibc/nptl/pthread_cond_wait.c