1/* Thread-local storage handling in the ELF dynamic linker. Generic version.
2 Copyright (C) 2002-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <assert.h>
20#include <errno.h>
21#include <libintl.h>
22#include <signal.h>
23#include <stdlib.h>
24#include <unistd.h>
25#include <sys/param.h>
26#include <atomic.h>
27
28#include <tls.h>
29#include <dl-tls.h>
30#include <ldsodefs.h>
31
32#if PTHREAD_IN_LIBC
33# include <list.h>
34#endif
35
36#define TUNABLE_NAMESPACE rtld
37#include <dl-tunables.h>
38
39/* Surplus static TLS, GLRO(dl_tls_static_surplus), is used for
40
41 - IE TLS in libc.so for all dlmopen namespaces except in the initial
42 one where libc.so is not loaded dynamically but at startup time,
43 - IE TLS in other libraries which may be dynamically loaded even in the
44 initial namespace,
45 - and optionally for optimizing dynamic TLS access.
46
47 The maximum number of namespaces is DL_NNS, but to support that many
48 namespaces correctly the static TLS allocation should be significantly
49 increased, which may cause problems with small thread stacks due to the
50 way static TLS is accounted (bug 11787).
51
52 So there is a rtld.nns tunable limit on the number of supported namespaces
53 that affects the size of the static TLS and by default it's small enough
54 not to cause problems with existing applications. The limit is not
55 enforced or checked: it is the user's responsibility to increase rtld.nns
56 if more dlmopen namespaces are used.
57
58 Audit modules use their own namespaces, they are not included in rtld.nns,
59 but come on top when computing the number of namespaces. */
60
61/* Size of initial-exec TLS in libc.so. This should be the maximum of
62 observed PT_GNU_TLS sizes across all architectures. Some
63 architectures have lower values due to differences in type sizes
64 and link editor capabilities. */
65#define LIBC_IE_TLS 144
66
67/* Size of initial-exec TLS in libraries other than libc.so.
68 This should be large enough to cover runtime libraries of the
69 compiler such as libgomp and libraries in libc other than libc.so. */
70#define OTHER_IE_TLS 144
71
72/* Default number of namespaces. */
73#define DEFAULT_NNS 4
74
75/* Default for dl_tls_static_optional. */
76#define OPTIONAL_TLS 512
77
78/* Compute the static TLS surplus based on the namespace count and the
79 TLS space that can be used for optimizations. */
80static inline int
81tls_static_surplus (int nns, int opt_tls)
82{
83 return (nns - 1) * LIBC_IE_TLS + nns * OTHER_IE_TLS + opt_tls;
84}
85
86/* This value is chosen so that with default values for the tunables,
87 the computation of dl_tls_static_surplus in
88 _dl_tls_static_surplus_init yields the historic value 1664, for
89 backwards compatibility. */
90#define LEGACY_TLS (1664 - tls_static_surplus (DEFAULT_NNS, OPTIONAL_TLS))
91
92/* Calculate the size of the static TLS surplus, when the given
93 number of audit modules are loaded. Must be called after the
94 number of audit modules is known and before static TLS allocation. */
95void
96_dl_tls_static_surplus_init (size_t naudit)
97{
98 size_t nns, opt_tls;
99
100 nns = TUNABLE_GET (nns, size_t, NULL);
101 opt_tls = TUNABLE_GET (optional_static_tls, size_t, NULL);
102 if (nns > DL_NNS)
103 nns = DL_NNS;
104 if (DL_NNS - nns < naudit)
105 _dl_fatal_printf (fmt: "Failed loading %lu audit modules, %lu are supported.\n",
106 (unsigned long) naudit, (unsigned long) (DL_NNS - nns));
107 nns += naudit;
108
109 GL(dl_tls_static_optional) = opt_tls;
110 assert (LEGACY_TLS >= 0);
111 GLRO(dl_tls_static_surplus) = tls_static_surplus (nns, opt_tls) + LEGACY_TLS;
112}
113
114/* Out-of-memory handler. */
115static void
116__attribute__ ((__noreturn__))
117oom (void)
118{
119 _dl_fatal_printf (fmt: "cannot allocate memory for thread-local data: ABORT\n");
120}
121
122
123void
124_dl_assign_tls_modid (struct link_map *l)
125{
126 size_t result;
127
128 if (__builtin_expect (GL(dl_tls_dtv_gaps), false))
129 {
130 size_t disp = 0;
131 struct dtv_slotinfo_list *runp = GL(dl_tls_dtv_slotinfo_list);
132
133 /* Note that this branch will never be executed during program
134 start since there are no gaps at that time. Therefore it
135 does not matter that the dl_tls_dtv_slotinfo is not allocated
136 yet when the function is called for the first times.
137
138 NB: the offset +1 is due to the fact that DTV[0] is used
139 for something else. */
140 result = GL(dl_tls_static_nelem) + 1;
141 if (result <= GL(dl_tls_max_dtv_idx))
142 do
143 {
144 while (result - disp < runp->len)
145 {
146 if (runp->slotinfo[result - disp].map == NULL)
147 break;
148
149 ++result;
150 assert (result <= GL(dl_tls_max_dtv_idx) + 1);
151 }
152
153 if (result - disp < runp->len)
154 {
155 /* Mark the entry as used, so any dependency see it. */
156 atomic_store_relaxed (&runp->slotinfo[result - disp].map, l);
157 atomic_store_relaxed (&runp->slotinfo[result - disp].gen, 0);
158 break;
159 }
160
161 disp += runp->len;
162 }
163 while ((runp = runp->next) != NULL);
164
165 if (result > GL(dl_tls_max_dtv_idx))
166 {
167 /* The new index must indeed be exactly one higher than the
168 previous high. */
169 assert (result == GL(dl_tls_max_dtv_idx) + 1);
170 /* There is no gap anymore. */
171 GL(dl_tls_dtv_gaps) = false;
172
173 goto nogaps;
174 }
175 }
176 else
177 {
178 /* No gaps, allocate a new entry. */
179 nogaps:
180
181 result = GL(dl_tls_max_dtv_idx) + 1;
182 /* Can be read concurrently. */
183 atomic_store_relaxed (&GL(dl_tls_max_dtv_idx), result);
184 }
185
186 l->l_tls_modid = result;
187}
188
189
190size_t
191_dl_count_modids (void)
192{
193 /* The count is the max unless dlclose or failed dlopen created gaps. */
194 if (__glibc_likely (!GL(dl_tls_dtv_gaps)))
195 return GL(dl_tls_max_dtv_idx);
196
197 /* We have gaps and are forced to count the non-NULL entries. */
198 size_t n = 0;
199 struct dtv_slotinfo_list *runp = GL(dl_tls_dtv_slotinfo_list);
200 while (runp != NULL)
201 {
202 for (size_t i = 0; i < runp->len; ++i)
203 if (runp->slotinfo[i].map != NULL)
204 ++n;
205
206 runp = runp->next;
207 }
208
209 return n;
210}
211
212
213#ifdef SHARED
214void
215_dl_determine_tlsoffset (void)
216{
217 size_t max_align = TCB_ALIGNMENT;
218 size_t freetop = 0;
219 size_t freebottom = 0;
220
221 /* The first element of the dtv slot info list is allocated. */
222 assert (GL(dl_tls_dtv_slotinfo_list) != NULL);
223 /* There is at this point only one element in the
224 dl_tls_dtv_slotinfo_list list. */
225 assert (GL(dl_tls_dtv_slotinfo_list)->next == NULL);
226
227 struct dtv_slotinfo *slotinfo = GL(dl_tls_dtv_slotinfo_list)->slotinfo;
228
229 /* Determining the offset of the various parts of the static TLS
230 block has several dependencies. In addition we have to work
231 around bugs in some toolchains.
232
233 Each TLS block from the objects available at link time has a size
234 and an alignment requirement. The GNU ld computes the alignment
235 requirements for the data at the positions *in the file*, though.
236 I.e, it is not simply possible to allocate a block with the size
237 of the TLS program header entry. The data is laid out assuming
238 that the first byte of the TLS block fulfills
239
240 p_vaddr mod p_align == &TLS_BLOCK mod p_align
241
242 This means we have to add artificial padding at the beginning of
243 the TLS block. These bytes are never used for the TLS data in
244 this module but the first byte allocated must be aligned
245 according to mod p_align == 0 so that the first byte of the TLS
246 block is aligned according to p_vaddr mod p_align. This is ugly
247 and the linker can help by computing the offsets in the TLS block
248 assuming the first byte of the TLS block is aligned according to
249 p_align.
250
251 The extra space which might be allocated before the first byte of
252 the TLS block need not go unused. The code below tries to use
253 that memory for the next TLS block. This can work if the total
254 memory requirement for the next TLS block is smaller than the
255 gap. */
256
257#if TLS_TCB_AT_TP
258 /* We simply start with zero. */
259 size_t offset = 0;
260
261 for (size_t cnt = 0; slotinfo[cnt].map != NULL; ++cnt)
262 {
263 assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len);
264
265 size_t firstbyte = (-slotinfo[cnt].map->l_tls_firstbyte_offset
266 & (slotinfo[cnt].map->l_tls_align - 1));
267 size_t off;
268 max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align);
269
270 if (freebottom - freetop >= slotinfo[cnt].map->l_tls_blocksize)
271 {
272 off = roundup (freetop + slotinfo[cnt].map->l_tls_blocksize
273 - firstbyte, slotinfo[cnt].map->l_tls_align)
274 + firstbyte;
275 if (off <= freebottom)
276 {
277 freetop = off;
278
279 /* XXX For some architectures we perhaps should store the
280 negative offset. */
281 slotinfo[cnt].map->l_tls_offset = off;
282 continue;
283 }
284 }
285
286 off = roundup (offset + slotinfo[cnt].map->l_tls_blocksize - firstbyte,
287 slotinfo[cnt].map->l_tls_align) + firstbyte;
288 if (off > offset + slotinfo[cnt].map->l_tls_blocksize
289 + (freebottom - freetop))
290 {
291 freetop = offset;
292 freebottom = off - slotinfo[cnt].map->l_tls_blocksize;
293 }
294 offset = off;
295
296 /* XXX For some architectures we perhaps should store the
297 negative offset. */
298 slotinfo[cnt].map->l_tls_offset = off;
299 }
300
301 GL(dl_tls_static_used) = offset;
302 GLRO (dl_tls_static_size) = (roundup (offset + GLRO(dl_tls_static_surplus),
303 max_align)
304 + TLS_TCB_SIZE);
305#elif TLS_DTV_AT_TP
306 /* The TLS blocks start right after the TCB. */
307 size_t offset = TLS_TCB_SIZE;
308
309 for (size_t cnt = 0; slotinfo[cnt].map != NULL; ++cnt)
310 {
311 assert (cnt < GL(dl_tls_dtv_slotinfo_list)->len);
312
313 size_t firstbyte = (-slotinfo[cnt].map->l_tls_firstbyte_offset
314 & (slotinfo[cnt].map->l_tls_align - 1));
315 size_t off;
316 max_align = MAX (max_align, slotinfo[cnt].map->l_tls_align);
317
318 if (slotinfo[cnt].map->l_tls_blocksize <= freetop - freebottom)
319 {
320 off = roundup (freebottom, slotinfo[cnt].map->l_tls_align);
321 if (off - freebottom < firstbyte)
322 off += slotinfo[cnt].map->l_tls_align;
323 if (off + slotinfo[cnt].map->l_tls_blocksize - firstbyte <= freetop)
324 {
325 slotinfo[cnt].map->l_tls_offset = off - firstbyte;
326 freebottom = (off + slotinfo[cnt].map->l_tls_blocksize
327 - firstbyte);
328 continue;
329 }
330 }
331
332 off = roundup (offset, slotinfo[cnt].map->l_tls_align);
333 if (off - offset < firstbyte)
334 off += slotinfo[cnt].map->l_tls_align;
335
336 slotinfo[cnt].map->l_tls_offset = off - firstbyte;
337 if (off - firstbyte - offset > freetop - freebottom)
338 {
339 freebottom = offset;
340 freetop = off - firstbyte;
341 }
342
343 offset = off + slotinfo[cnt].map->l_tls_blocksize - firstbyte;
344 }
345
346 GL(dl_tls_static_used) = offset;
347 GLRO (dl_tls_static_size) = roundup (offset + GLRO(dl_tls_static_surplus),
348 TCB_ALIGNMENT);
349#else
350# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
351#endif
352
353 /* The alignment requirement for the static TLS block. */
354 GLRO (dl_tls_static_align) = max_align;
355}
356#endif /* SHARED */
357
358static void *
359allocate_dtv (void *result)
360{
361 dtv_t *dtv;
362 size_t dtv_length;
363
364 /* Relaxed MO, because the dtv size is later rechecked, not relied on. */
365 size_t max_modid = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx));
366 /* We allocate a few more elements in the dtv than are needed for the
367 initial set of modules. This should avoid in most cases expansions
368 of the dtv. */
369 dtv_length = max_modid + DTV_SURPLUS;
370 dtv = calloc (nmemb: dtv_length + 2, size: sizeof (dtv_t));
371 if (dtv != NULL)
372 {
373 /* This is the initial length of the dtv. */
374 dtv[0].counter = dtv_length;
375
376 /* The rest of the dtv (including the generation counter) is
377 Initialize with zero to indicate nothing there. */
378
379 /* Add the dtv to the thread data structures. */
380 INSTALL_DTV (result, dtv);
381 }
382 else
383 result = NULL;
384
385 return result;
386}
387
388/* Get size and alignment requirements of the static TLS block. This
389 function is no longer used by glibc itself, but the GCC sanitizers
390 use it despite the GLIBC_PRIVATE status. */
391void
392_dl_get_tls_static_info (size_t *sizep, size_t *alignp)
393{
394 *sizep = GLRO (dl_tls_static_size);
395 *alignp = GLRO (dl_tls_static_align);
396}
397
398/* Derive the location of the pointer to the start of the original
399 allocation (before alignment) from the pointer to the TCB. */
400static inline void **
401tcb_to_pointer_to_free_location (void *tcb)
402{
403#if TLS_TCB_AT_TP
404 /* The TCB follows the TLS blocks, and the pointer to the front
405 follows the TCB. */
406 void **original_pointer_location = tcb + TLS_TCB_SIZE;
407#elif TLS_DTV_AT_TP
408 /* The TCB comes first, preceded by the pre-TCB, and the pointer is
409 before that. */
410 void **original_pointer_location = tcb - TLS_PRE_TCB_SIZE - sizeof (void *);
411#endif
412 return original_pointer_location;
413}
414
415void *
416_dl_allocate_tls_storage (void)
417{
418 void *result;
419 size_t size = GLRO (dl_tls_static_size);
420
421#if TLS_DTV_AT_TP
422 /* Memory layout is:
423 [ TLS_PRE_TCB_SIZE ] [ TLS_TCB_SIZE ] [ TLS blocks ]
424 ^ This should be returned. */
425 size += TLS_PRE_TCB_SIZE;
426#endif
427
428 /* Perform the allocation. Reserve space for the required alignment
429 and the pointer to the original allocation. */
430 size_t alignment = GLRO (dl_tls_static_align);
431 void *allocated = malloc (size: size + alignment + sizeof (void *));
432 if (__glibc_unlikely (allocated == NULL))
433 return NULL;
434
435 /* Perform alignment and allocate the DTV. */
436#if TLS_TCB_AT_TP
437 /* The TCB follows the TLS blocks, which determine the alignment.
438 (TCB alignment requirements have been taken into account when
439 calculating GLRO (dl_tls_static_align).) */
440 void *aligned = (void *) roundup ((uintptr_t) allocated, alignment);
441 result = aligned + size - TLS_TCB_SIZE;
442
443 /* Clear the TCB data structure. We can't ask the caller (i.e.
444 libpthread) to do it, because we will initialize the DTV et al. */
445 memset (result, '\0', TLS_TCB_SIZE);
446#elif TLS_DTV_AT_TP
447 /* Pre-TCB and TCB come before the TLS blocks. The layout computed
448 in _dl_determine_tlsoffset assumes that the TCB is aligned to the
449 TLS block alignment, and not just the TLS blocks after it. This
450 can leave an unused alignment gap between the TCB and the TLS
451 blocks. */
452 result = (void *) roundup
453 (sizeof (void *) + TLS_PRE_TCB_SIZE + (uintptr_t) allocated,
454 alignment);
455
456 /* Clear the TCB data structure and TLS_PRE_TCB_SIZE bytes before
457 it. We can't ask the caller (i.e. libpthread) to do it, because
458 we will initialize the DTV et al. */
459 memset (result - TLS_PRE_TCB_SIZE, '\0', TLS_PRE_TCB_SIZE + TLS_TCB_SIZE);
460#endif
461
462 /* Record the value of the original pointer for later
463 deallocation. */
464 *tcb_to_pointer_to_free_location (tcb: result) = allocated;
465
466 result = allocate_dtv (result);
467 if (result == NULL)
468 free (ptr: allocated);
469 return result;
470}
471
472
473#ifndef SHARED
474extern dtv_t _dl_static_dtv[];
475# define _dl_initial_dtv (&_dl_static_dtv[1])
476#endif
477
478static dtv_t *
479_dl_resize_dtv (dtv_t *dtv, size_t max_modid)
480{
481 /* Resize the dtv. */
482 dtv_t *newp;
483 size_t newsize = max_modid + DTV_SURPLUS;
484 size_t oldsize = dtv[-1].counter;
485
486 if (dtv == GL(dl_initial_dtv))
487 {
488 /* This is the initial dtv that was either statically allocated in
489 __libc_setup_tls or allocated during rtld startup using the
490 dl-minimal.c malloc instead of the real malloc. We can't free
491 it, we have to abandon the old storage. */
492
493 newp = malloc (size: (2 + newsize) * sizeof (dtv_t));
494 if (newp == NULL)
495 oom ();
496 memcpy (newp, &dtv[-1], (2 + oldsize) * sizeof (dtv_t));
497 }
498 else
499 {
500 newp = realloc (ptr: &dtv[-1],
501 size: (2 + newsize) * sizeof (dtv_t));
502 if (newp == NULL)
503 oom ();
504 }
505
506 newp[0].counter = newsize;
507
508 /* Clear the newly allocated part. */
509 memset (newp + 2 + oldsize, '\0',
510 (newsize - oldsize) * sizeof (dtv_t));
511
512 /* Return the generation counter. */
513 return &newp[1];
514}
515
516
517/* Allocate initial TLS. RESULT should be a non-NULL pointer to storage
518 for the TLS space. The DTV may be resized, and so this function may
519 call malloc to allocate that space. The loader's GL(dl_load_tls_lock)
520 is taken when manipulating global TLS-related data in the loader. */
521void *
522_dl_allocate_tls_init (void *result, bool init_tls)
523{
524 if (result == NULL)
525 /* The memory allocation failed. */
526 return NULL;
527
528 dtv_t *dtv = GET_DTV (result);
529 struct dtv_slotinfo_list *listp;
530 size_t total = 0;
531 size_t maxgen = 0;
532
533 /* Protects global dynamic TLS related state. */
534 __rtld_lock_lock_recursive (GL(dl_load_tls_lock));
535
536 /* Check if the current dtv is big enough. */
537 if (dtv[-1].counter < GL(dl_tls_max_dtv_idx))
538 {
539 /* Resize the dtv. */
540 dtv = _dl_resize_dtv (dtv, GL(dl_tls_max_dtv_idx));
541
542 /* Install this new dtv in the thread data structures. */
543 INSTALL_DTV (result, &dtv[-1]);
544 }
545
546 /* We have to prepare the dtv for all currently loaded modules using
547 TLS. For those which are dynamically loaded we add the values
548 indicating deferred allocation. */
549 listp = GL(dl_tls_dtv_slotinfo_list);
550 while (1)
551 {
552 size_t cnt;
553
554 for (cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt)
555 {
556 struct link_map *map;
557 void *dest;
558
559 /* Check for the total number of used slots. */
560 if (total + cnt > GL(dl_tls_max_dtv_idx))
561 break;
562
563 map = listp->slotinfo[cnt].map;
564 if (map == NULL)
565 /* Unused entry. */
566 continue;
567
568 /* Keep track of the maximum generation number. This might
569 not be the generation counter. */
570 assert (listp->slotinfo[cnt].gen <= GL(dl_tls_generation));
571 maxgen = MAX (maxgen, listp->slotinfo[cnt].gen);
572
573 dtv[map->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED;
574 dtv[map->l_tls_modid].pointer.to_free = NULL;
575
576 if (map->l_tls_offset == NO_TLS_OFFSET
577 || map->l_tls_offset == FORCED_DYNAMIC_TLS_OFFSET)
578 continue;
579
580 assert (map->l_tls_modid == total + cnt);
581 assert (map->l_tls_blocksize >= map->l_tls_initimage_size);
582#if TLS_TCB_AT_TP
583 assert ((size_t) map->l_tls_offset >= map->l_tls_blocksize);
584 dest = (char *) result - map->l_tls_offset;
585#elif TLS_DTV_AT_TP
586 dest = (char *) result + map->l_tls_offset;
587#else
588# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
589#endif
590
591 /* Set up the DTV entry. The simplified __tls_get_addr that
592 some platforms use in static programs requires it. */
593 dtv[map->l_tls_modid].pointer.val = dest;
594
595 /* Copy the initialization image and clear the BSS part. For
596 audit modules or dependencies with initial-exec TLS, we can not
597 set the initial TLS image on default loader initialization
598 because it would already be set by the audit setup. However,
599 subsequent thread creation would need to follow the default
600 behaviour. */
601 if (map->l_ns != LM_ID_BASE && !init_tls)
602 continue;
603 memset (__mempcpy (dest, map->l_tls_initimage,
604 map->l_tls_initimage_size), '\0',
605 map->l_tls_blocksize - map->l_tls_initimage_size);
606 }
607
608 total += cnt;
609 if (total > GL(dl_tls_max_dtv_idx))
610 break;
611
612 listp = listp->next;
613 assert (listp != NULL);
614 }
615 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
616
617 /* The DTV version is up-to-date now. */
618 dtv[0].counter = maxgen;
619
620 return result;
621}
622rtld_hidden_def (_dl_allocate_tls_init)
623
624void *
625_dl_allocate_tls (void *mem)
626{
627 return _dl_allocate_tls_init (result: mem == NULL
628 ? _dl_allocate_tls_storage ()
629 : allocate_dtv (result: mem), true);
630}
631rtld_hidden_def (_dl_allocate_tls)
632
633
634void
635_dl_deallocate_tls (void *tcb, bool dealloc_tcb)
636{
637 dtv_t *dtv = GET_DTV (tcb);
638
639 /* We need to free the memory allocated for non-static TLS. */
640 for (size_t cnt = 0; cnt < dtv[-1].counter; ++cnt)
641 free (ptr: dtv[1 + cnt].pointer.to_free);
642
643 /* The array starts with dtv[-1]. */
644 if (dtv != GL(dl_initial_dtv))
645 free (ptr: dtv - 1);
646
647 if (dealloc_tcb)
648 free (ptr: *tcb_to_pointer_to_free_location (tcb));
649}
650rtld_hidden_def (_dl_deallocate_tls)
651
652
653#ifdef SHARED
654/* The __tls_get_addr function has two basic forms which differ in the
655 arguments. The IA-64 form takes two parameters, the module ID and
656 offset. The form used, among others, on IA-32 takes a reference to
657 a special structure which contain the same information. The second
658 form seems to be more often used (in the moment) so we default to
659 it. Users of the IA-64 form have to provide adequate definitions
660 of the following macros. */
661# ifndef GET_ADDR_ARGS
662# define GET_ADDR_ARGS tls_index *ti
663# define GET_ADDR_PARAM ti
664# endif
665# ifndef GET_ADDR_MODULE
666# define GET_ADDR_MODULE ti->ti_module
667# endif
668# ifndef GET_ADDR_OFFSET
669# define GET_ADDR_OFFSET ti->ti_offset
670# endif
671
672/* Allocate one DTV entry. */
673static struct dtv_pointer
674allocate_dtv_entry (size_t alignment, size_t size)
675{
676 if (powerof2 (alignment) && alignment <= _Alignof (max_align_t))
677 {
678 /* The alignment is supported by malloc. */
679 void *ptr = malloc (size);
680 return (struct dtv_pointer) { ptr, ptr };
681 }
682
683 /* Emulate memalign to by manually aligning a pointer returned by
684 malloc. First compute the size with an overflow check. */
685 size_t alloc_size = size + alignment;
686 if (alloc_size < size)
687 return (struct dtv_pointer) {};
688
689 /* Perform the allocation. This is the pointer we need to free
690 later. */
691 void *start = malloc (alloc_size);
692 if (start == NULL)
693 return (struct dtv_pointer) {};
694
695 /* Find the aligned position within the larger allocation. */
696 void *aligned = (void *) roundup ((uintptr_t) start, alignment);
697
698 return (struct dtv_pointer) { .val = aligned, .to_free = start };
699}
700
701static struct dtv_pointer
702allocate_and_init (struct link_map *map)
703{
704 struct dtv_pointer result = allocate_dtv_entry
705 (map->l_tls_align, map->l_tls_blocksize);
706 if (result.val == NULL)
707 oom ();
708
709 /* Initialize the memory. */
710 memset (__mempcpy (result.val, map->l_tls_initimage,
711 map->l_tls_initimage_size),
712 '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
713
714 return result;
715}
716
717
718struct link_map *
719_dl_update_slotinfo (unsigned long int req_modid, size_t new_gen)
720{
721 struct link_map *the_map = NULL;
722 dtv_t *dtv = THREAD_DTV ();
723
724 /* CONCURRENCY NOTES:
725
726 The global dl_tls_dtv_slotinfo_list array contains for each module
727 index the generation counter current when that entry was updated.
728 This array never shrinks so that all module indices which were
729 valid at some time can be used to access it. Concurrent loading
730 and unloading of modules can update slotinfo entries or extend
731 the array. The updates happen under the GL(dl_load_tls_lock) and
732 finish with the release store of the generation counter to
733 GL(dl_tls_generation) which is synchronized with the load of
734 new_gen in the caller. So updates up to new_gen are synchronized
735 but updates for later generations may not be.
736
737 Here we update the thread dtv from old_gen (== dtv[0].counter) to
738 new_gen generation. For this, each dtv[i] entry is either set to
739 an unallocated state (set), or left unmodified (nop). Where (set)
740 may resize the dtv first if modid i >= dtv[-1].counter. The rules
741 for the decision between (set) and (nop) are
742
743 (1) If slotinfo entry i is concurrently updated then either (set)
744 or (nop) is valid: TLS access cannot use dtv[i] unless it is
745 synchronized with a generation > new_gen.
746
747 Otherwise, if the generation of slotinfo entry i is gen and the
748 loaded module for this entry is map then
749
750 (2) If gen <= old_gen then do (nop).
751
752 (3) If old_gen < gen <= new_gen then
753 (3.1) if map != 0 then (set)
754 (3.2) if map == 0 then either (set) or (nop).
755
756 Note that (1) cannot be reliably detected, but since both actions
757 are valid it does not have to be. Only (2) and (3.1) cases need
758 to be distinguished for which relaxed mo access of gen and map is
759 enough: their value is synchronized when it matters.
760
761 Note that a relaxed mo load may give an out-of-thin-air value since
762 it is used in decisions that can affect concurrent stores. But this
763 should only happen if the OOTA value causes UB that justifies the
764 concurrent store of the value. This is not expected to be an issue
765 in practice. */
766 struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
767
768 if (dtv[0].counter < new_gen)
769 {
770 size_t total = 0;
771 size_t max_modid = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx));
772 assert (max_modid >= req_modid);
773
774 /* We have to look through the entire dtv slotinfo list. */
775 listp = GL(dl_tls_dtv_slotinfo_list);
776 do
777 {
778 for (size_t cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt)
779 {
780 size_t modid = total + cnt;
781
782 /* Case (1) for all later modids. */
783 if (modid > max_modid)
784 break;
785
786 size_t gen = atomic_load_relaxed (&listp->slotinfo[cnt].gen);
787
788 /* Case (1). */
789 if (gen > new_gen)
790 continue;
791
792 /* Case (2) or (1). */
793 if (gen <= dtv[0].counter)
794 continue;
795
796 /* Case (3) or (1). */
797
798 /* If there is no map this means the entry is empty. */
799 struct link_map *map
800 = atomic_load_relaxed (&listp->slotinfo[cnt].map);
801 /* Check whether the current dtv array is large enough. */
802 if (dtv[-1].counter < modid)
803 {
804 /* Case (3.2) or (1). */
805 if (map == NULL)
806 continue;
807
808 /* Resizing the dtv aborts on failure: bug 16134. */
809 dtv = _dl_resize_dtv (dtv, max_modid);
810
811 assert (modid <= dtv[-1].counter);
812
813 /* Install this new dtv in the thread data
814 structures. */
815 INSTALL_NEW_DTV (dtv);
816 }
817
818 /* If there is currently memory allocate for this
819 dtv entry free it. Note: this is not AS-safe. */
820 /* XXX Ideally we will at some point create a memory
821 pool. */
822 free (dtv[modid].pointer.to_free);
823 dtv[modid].pointer.val = TLS_DTV_UNALLOCATED;
824 dtv[modid].pointer.to_free = NULL;
825
826 if (modid == req_modid)
827 the_map = map;
828 }
829
830 total += listp->len;
831 if (total > max_modid)
832 break;
833
834 /* Synchronize with _dl_add_to_slotinfo. Ideally this would
835 be consume MO since we only need to order the accesses to
836 the next node after the read of the address and on most
837 hardware (other than alpha) a normal load would do that
838 because of the address dependency. */
839 listp = atomic_load_acquire (&listp->next);
840 }
841 while (listp != NULL);
842
843 /* This will be the new maximum generation counter. */
844 dtv[0].counter = new_gen;
845 }
846
847 return the_map;
848}
849
850
851static void *
852__attribute_noinline__
853tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
854{
855 /* The allocation was deferred. Do it now. */
856 if (the_map == NULL)
857 {
858 /* Find the link map for this module. */
859 size_t idx = GET_ADDR_MODULE;
860 struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
861
862 while (idx >= listp->len)
863 {
864 idx -= listp->len;
865 listp = listp->next;
866 }
867
868 the_map = listp->slotinfo[idx].map;
869 }
870
871 /* Make sure that, if a dlopen running in parallel forces the
872 variable into static storage, we'll wait until the address in the
873 static TLS block is set up, and use that. If we're undecided
874 yet, make sure we make the decision holding the lock as well. */
875 if (__glibc_unlikely (the_map->l_tls_offset
876 != FORCED_DYNAMIC_TLS_OFFSET))
877 {
878 __rtld_lock_lock_recursive (GL(dl_load_tls_lock));
879 if (__glibc_likely (the_map->l_tls_offset == NO_TLS_OFFSET))
880 {
881 the_map->l_tls_offset = FORCED_DYNAMIC_TLS_OFFSET;
882 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
883 }
884 else if (__glibc_likely (the_map->l_tls_offset
885 != FORCED_DYNAMIC_TLS_OFFSET))
886 {
887#if TLS_TCB_AT_TP
888 void *p = (char *) THREAD_SELF - the_map->l_tls_offset;
889#elif TLS_DTV_AT_TP
890 void *p = (char *) THREAD_SELF + the_map->l_tls_offset + TLS_PRE_TCB_SIZE;
891#else
892# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
893#endif
894 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
895
896 dtv[GET_ADDR_MODULE].pointer.to_free = NULL;
897 dtv[GET_ADDR_MODULE].pointer.val = p;
898
899 return (char *) p + GET_ADDR_OFFSET;
900 }
901 else
902 __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
903 }
904 struct dtv_pointer result = allocate_and_init (the_map);
905 dtv[GET_ADDR_MODULE].pointer = result;
906 assert (result.to_free != NULL);
907
908 return (char *) result.val + GET_ADDR_OFFSET;
909}
910
911
912static struct link_map *
913__attribute_noinline__
914update_get_addr (GET_ADDR_ARGS, size_t gen)
915{
916 struct link_map *the_map = _dl_update_slotinfo (GET_ADDR_MODULE, gen);
917 dtv_t *dtv = THREAD_DTV ();
918
919 void *p = dtv[GET_ADDR_MODULE].pointer.val;
920
921 if (__glibc_unlikely (p == TLS_DTV_UNALLOCATED))
922 return tls_get_addr_tail (GET_ADDR_PARAM, dtv, the_map);
923
924 return (void *) p + GET_ADDR_OFFSET;
925}
926
927/* For all machines that have a non-macro version of __tls_get_addr, we
928 want to use rtld_hidden_proto/rtld_hidden_def in order to call the
929 internal alias for __tls_get_addr from ld.so. This avoids a PLT entry
930 in ld.so for __tls_get_addr. */
931
932#ifndef __tls_get_addr
933extern void * __tls_get_addr (GET_ADDR_ARGS);
934rtld_hidden_proto (__tls_get_addr)
935rtld_hidden_def (__tls_get_addr)
936#endif
937
938/* The generic dynamic and local dynamic model cannot be used in
939 statically linked applications. */
940void *
941__tls_get_addr (GET_ADDR_ARGS)
942{
943 dtv_t *dtv = THREAD_DTV ();
944
945 /* Update is needed if dtv[0].counter < the generation of the accessed
946 module, but the global generation counter is easier to check (which
947 must be synchronized up to the generation of the accessed module by
948 user code doing the TLS access so relaxed mo read is enough). */
949 size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
950 if (__glibc_unlikely (dtv[0].counter != gen))
951 {
952 /* Update DTV up to the global generation, see CONCURRENCY NOTES
953 in _dl_update_slotinfo. */
954 gen = atomic_load_acquire (&GL(dl_tls_generation));
955 return update_get_addr (GET_ADDR_PARAM, gen);
956 }
957
958 void *p = dtv[GET_ADDR_MODULE].pointer.val;
959
960 if (__glibc_unlikely (p == TLS_DTV_UNALLOCATED))
961 return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL);
962
963 return (char *) p + GET_ADDR_OFFSET;
964}
965#endif
966
967
968/* Look up the module's TLS block as for __tls_get_addr,
969 but never touch anything. Return null if it's not allocated yet. */
970void *
971_dl_tls_get_addr_soft (struct link_map *l)
972{
973 if (__glibc_unlikely (l->l_tls_modid == 0))
974 /* This module has no TLS segment. */
975 return NULL;
976
977 dtv_t *dtv = THREAD_DTV ();
978 /* This may be called without holding the GL(dl_load_tls_lock). Reading
979 arbitrary gen value is fine since this is best effort code. */
980 size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
981 if (__glibc_unlikely (dtv[0].counter != gen))
982 {
983 /* This thread's DTV is not completely current,
984 but it might already cover this module. */
985
986 if (l->l_tls_modid >= dtv[-1].counter)
987 /* Nope. */
988 return NULL;
989
990 size_t idx = l->l_tls_modid;
991 struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
992 while (idx >= listp->len)
993 {
994 idx -= listp->len;
995 listp = listp->next;
996 }
997
998 /* We've reached the slot for this module.
999 If its generation counter is higher than the DTV's,
1000 this thread does not know about this module yet. */
1001 if (dtv[0].counter < listp->slotinfo[idx].gen)
1002 return NULL;
1003 }
1004
1005 void *data = dtv[l->l_tls_modid].pointer.val;
1006 if (__glibc_unlikely (data == TLS_DTV_UNALLOCATED))
1007 /* The DTV is current, but this thread has not yet needed
1008 to allocate this module's segment. */
1009 data = NULL;
1010
1011 return data;
1012}
1013
1014
1015void
1016_dl_add_to_slotinfo (struct link_map *l, bool do_add)
1017{
1018 /* Now that we know the object is loaded successfully add
1019 modules containing TLS data to the dtv info table. We
1020 might have to increase its size. */
1021 struct dtv_slotinfo_list *listp;
1022 struct dtv_slotinfo_list *prevp;
1023 size_t idx = l->l_tls_modid;
1024
1025 /* Find the place in the dtv slotinfo list. */
1026 listp = GL(dl_tls_dtv_slotinfo_list);
1027 prevp = NULL; /* Needed to shut up gcc. */
1028 do
1029 {
1030 /* Does it fit in the array of this list element? */
1031 if (idx < listp->len)
1032 break;
1033 idx -= listp->len;
1034 prevp = listp;
1035 listp = listp->next;
1036 }
1037 while (listp != NULL);
1038
1039 if (listp == NULL)
1040 {
1041 /* When we come here it means we have to add a new element
1042 to the slotinfo list. And the new module must be in
1043 the first slot. */
1044 assert (idx == 0);
1045
1046 listp = (struct dtv_slotinfo_list *)
1047 malloc (size: sizeof (struct dtv_slotinfo_list)
1048 + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
1049 if (listp == NULL)
1050 {
1051 /* We ran out of memory while resizing the dtv slotinfo list. */
1052 _dl_signal_error (ENOMEM, object: "dlopen", NULL, N_("\
1053cannot create TLS data structures"));
1054 }
1055
1056 listp->len = TLS_SLOTINFO_SURPLUS;
1057 listp->next = NULL;
1058 memset (listp->slotinfo, '\0',
1059 TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo));
1060 /* Synchronize with _dl_update_slotinfo. */
1061 atomic_store_release (&prevp->next, listp);
1062 }
1063
1064 /* Add the information into the slotinfo data structure. */
1065 if (do_add)
1066 {
1067 /* Can be read concurrently. See _dl_update_slotinfo. */
1068 atomic_store_relaxed (&listp->slotinfo[idx].map, l);
1069 atomic_store_relaxed (&listp->slotinfo[idx].gen,
1070 GL(dl_tls_generation) + 1);
1071 }
1072}
1073
1074#if PTHREAD_IN_LIBC
1075static inline void __attribute__((always_inline))
1076init_one_static_tls (struct pthread *curp, struct link_map *map)
1077{
1078# if TLS_TCB_AT_TP
1079 void *dest = (char *) curp - map->l_tls_offset;
1080# elif TLS_DTV_AT_TP
1081 void *dest = (char *) curp + map->l_tls_offset + TLS_PRE_TCB_SIZE;
1082# else
1083# error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
1084# endif
1085
1086 /* Initialize the memory. */
1087 memset (__mempcpy (dest, map->l_tls_initimage, map->l_tls_initimage_size),
1088 '\0', map->l_tls_blocksize - map->l_tls_initimage_size);
1089}
1090
1091void
1092_dl_init_static_tls (struct link_map *map)
1093{
1094 lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
1095
1096 /* Iterate over the list with system-allocated threads first. */
1097 list_t *runp;
1098 list_for_each (runp, &GL (dl_stack_used))
1099 init_one_static_tls (list_entry (runp, struct pthread, list), map);
1100
1101 /* Now the list with threads using user-allocated stacks. */
1102 list_for_each (runp, &GL (dl_stack_user))
1103 init_one_static_tls (list_entry (runp, struct pthread, list), map);
1104
1105 lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
1106}
1107#endif /* PTHREAD_IN_LIBC */
1108

source code of glibc/elf/dl-tls.c