1/* PLT trampolines. x86-64 version.
2 Copyright (C) 2009-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19 .text
20#ifdef _dl_runtime_resolve
21
22# undef REGISTER_SAVE_AREA
23# undef LOCAL_STORAGE_AREA
24# undef BASE
25
26# if (STATE_SAVE_ALIGNMENT % 16) != 0
27# error STATE_SAVE_ALIGNMENT must be multples of 16
28# endif
29
30# if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0
31# error STATE_SAVE_OFFSET must be multples of STATE_SAVE_ALIGNMENT
32# endif
33
34# if DL_RUNTIME_RESOLVE_REALIGN_STACK
35/* Local stack area before jumping to function address: RBX. */
36# define LOCAL_STORAGE_AREA 8
37# define BASE rbx
38# ifdef USE_FXSAVE
39/* Use fxsave to save XMM registers. */
40# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET)
41# if (REGISTER_SAVE_AREA % 16) != 0
42# error REGISTER_SAVE_AREA must be multples of 16
43# endif
44# endif
45# else
46# ifndef USE_FXSAVE
47# error USE_FXSAVE must be defined
48# endif
49/* Use fxsave to save XMM registers. */
50# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET + 8)
51/* Local stack area before jumping to function address: All saved
52 registers. */
53# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA
54# define BASE rsp
55# if (REGISTER_SAVE_AREA % 16) != 8
56# error REGISTER_SAVE_AREA must be odd multples of 8
57# endif
58# endif
59
60 .globl _dl_runtime_resolve
61 .hidden _dl_runtime_resolve
62 .type _dl_runtime_resolve, @function
63 .align 16
64 cfi_startproc
65_dl_runtime_resolve:
66 cfi_adjust_cfa_offset(16) # Incorporate PLT
67 _CET_ENDBR
68# if DL_RUNTIME_RESOLVE_REALIGN_STACK
69# if LOCAL_STORAGE_AREA != 8
70# error LOCAL_STORAGE_AREA must be 8
71# endif
72 pushq %rbx # push subtracts stack by 8.
73 cfi_adjust_cfa_offset(8)
74 cfi_rel_offset(%rbx, 0)
75 mov %RSP_LP, %RBX_LP
76 cfi_def_cfa_register(%rbx)
77 and $-STATE_SAVE_ALIGNMENT, %RSP_LP
78# endif
79# ifdef REGISTER_SAVE_AREA
80 sub $REGISTER_SAVE_AREA, %RSP_LP
81# if !DL_RUNTIME_RESOLVE_REALIGN_STACK
82 cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
83# endif
84# else
85 # Allocate stack space of the required size to save the state.
86# if IS_IN (rtld)
87 sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
88# else
89 sub _dl_x86_cpu_features+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
90# endif
91# endif
92 # Preserve registers otherwise clobbered.
93 movq %rax, REGISTER_SAVE_RAX(%rsp)
94 movq %rcx, REGISTER_SAVE_RCX(%rsp)
95 movq %rdx, REGISTER_SAVE_RDX(%rsp)
96 movq %rsi, REGISTER_SAVE_RSI(%rsp)
97 movq %rdi, REGISTER_SAVE_RDI(%rsp)
98 movq %r8, REGISTER_SAVE_R8(%rsp)
99 movq %r9, REGISTER_SAVE_R9(%rsp)
100# ifdef USE_FXSAVE
101 fxsave STATE_SAVE_OFFSET(%rsp)
102# else
103 movl $STATE_SAVE_MASK, %eax
104 xorl %edx, %edx
105 # Clear the XSAVE Header.
106# ifdef USE_XSAVE
107 movq %rdx, (STATE_SAVE_OFFSET + 512)(%rsp)
108 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8)(%rsp)
109# endif
110 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 2)(%rsp)
111 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 3)(%rsp)
112 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 4)(%rsp)
113 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 5)(%rsp)
114 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 6)(%rsp)
115 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 7)(%rsp)
116# ifdef USE_XSAVE
117 xsave STATE_SAVE_OFFSET(%rsp)
118# else
119 xsavec STATE_SAVE_OFFSET(%rsp)
120# endif
121# endif
122 # Copy args pushed by PLT in register.
123 # %rdi: link_map, %rsi: reloc_index
124 mov (LOCAL_STORAGE_AREA + 8)(%BASE), %RSI_LP
125 mov LOCAL_STORAGE_AREA(%BASE), %RDI_LP
126 call _dl_fixup # Call resolver.
127 mov %RAX_LP, %R11_LP # Save return value
128 # Get register content back.
129# ifdef USE_FXSAVE
130 fxrstor STATE_SAVE_OFFSET(%rsp)
131# else
132 movl $STATE_SAVE_MASK, %eax
133 xorl %edx, %edx
134 xrstor STATE_SAVE_OFFSET(%rsp)
135# endif
136 movq REGISTER_SAVE_R9(%rsp), %r9
137 movq REGISTER_SAVE_R8(%rsp), %r8
138 movq REGISTER_SAVE_RDI(%rsp), %rdi
139 movq REGISTER_SAVE_RSI(%rsp), %rsi
140 movq REGISTER_SAVE_RDX(%rsp), %rdx
141 movq REGISTER_SAVE_RCX(%rsp), %rcx
142 movq REGISTER_SAVE_RAX(%rsp), %rax
143# if DL_RUNTIME_RESOLVE_REALIGN_STACK
144 mov %RBX_LP, %RSP_LP
145 cfi_def_cfa_register(%rsp)
146 movq (%rsp), %rbx
147 cfi_restore(%rbx)
148# endif
149 # Adjust stack(PLT did 2 pushes)
150 add $(LOCAL_STORAGE_AREA + 16), %RSP_LP
151 cfi_adjust_cfa_offset(-(LOCAL_STORAGE_AREA + 16))
152 jmp *%r11 # Jump to function address.
153 cfi_endproc
154 .size _dl_runtime_resolve, .-_dl_runtime_resolve
155#endif
156
157
158#if !defined PROF && defined _dl_runtime_profile
159# if (LR_VECTOR_OFFSET % VEC_SIZE) != 0
160# error LR_VECTOR_OFFSET must be multples of VEC_SIZE
161# endif
162
163 .globl _dl_runtime_profile
164 .hidden _dl_runtime_profile
165 .type _dl_runtime_profile, @function
166 .align 16
167_dl_runtime_profile:
168 cfi_startproc
169 cfi_adjust_cfa_offset(16) # Incorporate PLT
170 _CET_ENDBR
171 /* The La_x86_64_regs data structure pointed to by the
172 fourth paramater must be VEC_SIZE-byte aligned. This must
173 be explicitly enforced. We have the set up a dynamically
174 sized stack frame. %rbx points to the top half which
175 has a fixed size and preserves the original stack pointer. */
176
177 sub $32, %RSP_LP # Allocate the local storage.
178 cfi_adjust_cfa_offset(32)
179 movq %rbx, (%rsp)
180 cfi_rel_offset(%rbx, 0)
181
182 /* On the stack:
183 56(%rbx) parameter #1
184 48(%rbx) return address
185
186 40(%rbx) reloc index
187 32(%rbx) link_map
188
189 24(%rbx) La_x86_64_regs pointer
190 16(%rbx) framesize
191 8(%rbx) rax
192 (%rbx) rbx
193 */
194
195 movq %rax, 8(%rsp)
196 mov %RSP_LP, %RBX_LP
197 cfi_def_cfa_register(%rbx)
198
199 /* Actively align the La_x86_64_regs structure. */
200 and $-VEC_SIZE, %RSP_LP
201 /* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers
202 to detect if any xmm0-xmm7 registers are changed by audit
203 module. */
204 sub $(LR_SIZE + XMM_SIZE*8), %RSP_LP
205 movq %rsp, 24(%rbx)
206
207 /* Fill the La_x86_64_regs structure. */
208 movq %rdx, LR_RDX_OFFSET(%rsp)
209 movq %r8, LR_R8_OFFSET(%rsp)
210 movq %r9, LR_R9_OFFSET(%rsp)
211 movq %rcx, LR_RCX_OFFSET(%rsp)
212 movq %rsi, LR_RSI_OFFSET(%rsp)
213 movq %rdi, LR_RDI_OFFSET(%rsp)
214 movq %rbp, LR_RBP_OFFSET(%rsp)
215
216 lea 48(%rbx), %RAX_LP
217 movq %rax, LR_RSP_OFFSET(%rsp)
218
219 /* We always store the XMM registers even if AVX is available.
220 This is to provide backward binary compatibility for existing
221 audit modules. */
222 movaps %xmm0, (LR_XMM_OFFSET)(%rsp)
223 movaps %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp)
224 movaps %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
225 movaps %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
226 movaps %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
227 movaps %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
228 movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
229 movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
230
231# ifdef RESTORE_AVX
232 /* This is to support AVX audit modules. */
233 VMOVA %VEC(0), (LR_VECTOR_OFFSET)(%rsp)
234 VMOVA %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
235 VMOVA %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
236 VMOVA %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
237 VMOVA %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
238 VMOVA %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
239 VMOVA %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
240 VMOVA %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
241
242 /* Save xmm0-xmm7 registers to detect if any of them are
243 changed by audit module. */
244 vmovdqa %xmm0, (LR_SIZE)(%rsp)
245 vmovdqa %xmm1, (LR_SIZE + XMM_SIZE)(%rsp)
246 vmovdqa %xmm2, (LR_SIZE + XMM_SIZE*2)(%rsp)
247 vmovdqa %xmm3, (LR_SIZE + XMM_SIZE*3)(%rsp)
248 vmovdqa %xmm4, (LR_SIZE + XMM_SIZE*4)(%rsp)
249 vmovdqa %xmm5, (LR_SIZE + XMM_SIZE*5)(%rsp)
250 vmovdqa %xmm6, (LR_SIZE + XMM_SIZE*6)(%rsp)
251 vmovdqa %xmm7, (LR_SIZE + XMM_SIZE*7)(%rsp)
252# endif
253
254 mov %RSP_LP, %RCX_LP # La_x86_64_regs pointer to %rcx.
255 mov 48(%rbx), %RDX_LP # Load return address if needed.
256 mov 40(%rbx), %RSI_LP # Copy args pushed by PLT in register.
257 mov 32(%rbx), %RDI_LP # %rdi: link_map, %rsi: reloc_index
258 lea 16(%rbx), %R8_LP # Address of framesize
259 call _dl_profile_fixup # Call resolver.
260
261 mov %RAX_LP, %R11_LP # Save return value.
262
263 movq 8(%rbx), %rax # Get back register content.
264 movq LR_RDX_OFFSET(%rsp), %rdx
265 movq LR_R8_OFFSET(%rsp), %r8
266 movq LR_R9_OFFSET(%rsp), %r9
267
268 movaps (LR_XMM_OFFSET)(%rsp), %xmm0
269 movaps (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1
270 movaps (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2
271 movaps (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3
272 movaps (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4
273 movaps (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5
274 movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6
275 movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7
276
277# ifdef RESTORE_AVX
278 /* Check if any xmm0-xmm7 registers are changed by audit
279 module. */
280 vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm8
281 vpmovmskb %xmm8, %esi
282 cmpl $0xffff, %esi
283 je 2f
284 vmovdqa %xmm0, (LR_VECTOR_OFFSET)(%rsp)
285 jmp 1f
2862: VMOVA (LR_VECTOR_OFFSET)(%rsp), %VEC(0)
287 vmovdqa %xmm0, (LR_XMM_OFFSET)(%rsp)
288
2891: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8
290 vpmovmskb %xmm8, %esi
291 cmpl $0xffff, %esi
292 je 2f
293 vmovdqa %xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
294 jmp 1f
2952: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1)
296 vmovdqa %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp)
297
2981: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8
299 vpmovmskb %xmm8, %esi
300 cmpl $0xffff, %esi
301 je 2f
302 vmovdqa %xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
303 jmp 1f
3042: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2)
305 vmovdqa %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
306
3071: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8
308 vpmovmskb %xmm8, %esi
309 cmpl $0xffff, %esi
310 je 2f
311 vmovdqa %xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
312 jmp 1f
3132: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3)
314 vmovdqa %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
315
3161: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8
317 vpmovmskb %xmm8, %esi
318 cmpl $0xffff, %esi
319 je 2f
320 vmovdqa %xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
321 jmp 1f
3222: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4)
323 vmovdqa %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
324
3251: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8
326 vpmovmskb %xmm8, %esi
327 cmpl $0xffff, %esi
328 je 2f
329 vmovdqa %xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
330 jmp 1f
3312: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5)
332 vmovdqa %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
333
3341: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8
335 vpmovmskb %xmm8, %esi
336 cmpl $0xffff, %esi
337 je 2f
338 vmovdqa %xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
339 jmp 1f
3402: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6)
341 vmovdqa %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
342
3431: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8
344 vpmovmskb %xmm8, %esi
345 cmpl $0xffff, %esi
346 je 2f
347 vmovdqa %xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
348 jmp 1f
3492: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7)
350 vmovdqa %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
351
3521:
353# endif
354
355 mov 16(%rbx), %R10_LP # Anything in framesize?
356 test %R10_LP, %R10_LP
357 jns 3f
358
359 /* There's nothing in the frame size, so there
360 will be no call to the _dl_audit_pltexit. */
361
362 /* Get back registers content. */
363 movq LR_RCX_OFFSET(%rsp), %rcx
364 movq LR_RSI_OFFSET(%rsp), %rsi
365 movq LR_RDI_OFFSET(%rsp), %rdi
366
367 mov %RBX_LP, %RSP_LP
368 movq (%rsp), %rbx
369 cfi_restore(%rbx)
370 cfi_def_cfa_register(%rsp)
371
372 add $48, %RSP_LP # Adjust the stack to the return value
373 # (eats the reloc index and link_map)
374 cfi_adjust_cfa_offset(-48)
375 jmp *%r11 # Jump to function address.
376
3773:
378 cfi_adjust_cfa_offset(48)
379 cfi_rel_offset(%rbx, 0)
380 cfi_def_cfa_register(%rbx)
381
382 /* At this point we need to prepare new stack for the function
383 which has to be called. We copy the original stack to a
384 temporary buffer of the size specified by the 'framesize'
385 returned from _dl_profile_fixup */
386
387 lea LR_RSP_OFFSET(%rbx), %RSI_LP # stack
388 add $8, %R10_LP
389 and $-16, %R10_LP
390 mov %R10_LP, %RCX_LP
391 sub %R10_LP, %RSP_LP
392 mov %RSP_LP, %RDI_LP
393 shr $3, %RCX_LP
394 rep
395 movsq
396
397 movq 24(%rdi), %rcx # Get back register content.
398 movq 32(%rdi), %rsi
399 movq 40(%rdi), %rdi
400
401 call *%r11
402
403 mov 24(%rbx), %RSP_LP # Drop the copied stack content
404
405 /* Now we have to prepare the La_x86_64_retval structure for the
406 _dl_audit_pltexit. The La_x86_64_regs is being pointed by rsp now,
407 so we just need to allocate the sizeof(La_x86_64_retval) space on
408 the stack, since the alignment has already been taken care of. */
409# ifdef RESTORE_AVX
410 /* sizeof(La_x86_64_retval). Need extra space for 2 SSE
411 registers to detect if xmm0/xmm1 registers are changed
412 by audit module. Since rsp is aligned to VEC_SIZE, we
413 need to make sure that the address of La_x86_64_retval +
414 LRV_VECTOR0_OFFSET is aligned to VEC_SIZE. */
415# define LRV_SPACE (LRV_SIZE + XMM_SIZE*2)
416# define LRV_MISALIGNED ((LRV_SIZE + LRV_VECTOR0_OFFSET) & (VEC_SIZE - 1))
417# if LRV_MISALIGNED == 0
418 sub $LRV_SPACE, %RSP_LP
419# else
420 sub $(LRV_SPACE + VEC_SIZE - LRV_MISALIGNED), %RSP_LP
421# endif
422# else
423 sub $LRV_SIZE, %RSP_LP # sizeof(La_x86_64_retval)
424# endif
425 mov %RSP_LP, %RCX_LP # La_x86_64_retval argument to %rcx.
426
427 /* Fill in the La_x86_64_retval structure. */
428 movq %rax, LRV_RAX_OFFSET(%rcx)
429 movq %rdx, LRV_RDX_OFFSET(%rcx)
430
431 movaps %xmm0, LRV_XMM0_OFFSET(%rcx)
432 movaps %xmm1, LRV_XMM1_OFFSET(%rcx)
433
434# ifdef RESTORE_AVX
435 /* This is to support AVX audit modules. */
436 VMOVA %VEC(0), LRV_VECTOR0_OFFSET(%rcx)
437 VMOVA %VEC(1), LRV_VECTOR1_OFFSET(%rcx)
438
439 /* Save xmm0/xmm1 registers to detect if they are changed
440 by audit module. */
441 vmovdqa %xmm0, (LRV_SIZE)(%rcx)
442 vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx)
443# endif
444
445 fstpt LRV_ST0_OFFSET(%rcx)
446 fstpt LRV_ST1_OFFSET(%rcx)
447
448 movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx.
449 movq 40(%rbx), %rsi # Copy args pushed by PLT in register.
450 movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
451 call _dl_audit_pltexit
452
453 /* Restore return registers. */
454 movq LRV_RAX_OFFSET(%rsp), %rax
455 movq LRV_RDX_OFFSET(%rsp), %rdx
456
457 movaps LRV_XMM0_OFFSET(%rsp), %xmm0
458 movaps LRV_XMM1_OFFSET(%rsp), %xmm1
459
460# ifdef RESTORE_AVX
461 /* Check if xmm0/xmm1 registers are changed by audit module. */
462 vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2
463 vpmovmskb %xmm2, %esi
464 cmpl $0xffff, %esi
465 jne 1f
466 VMOVA LRV_VECTOR0_OFFSET(%rsp), %VEC(0)
467
4681: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2
469 vpmovmskb %xmm2, %esi
470 cmpl $0xffff, %esi
471 jne 1f
472 VMOVA LRV_VECTOR1_OFFSET(%rsp), %VEC(1)
473
4741:
475# endif
476
477 fldt LRV_ST1_OFFSET(%rsp)
478 fldt LRV_ST0_OFFSET(%rsp)
479
480 mov %RBX_LP, %RSP_LP
481 movq (%rsp), %rbx
482 cfi_restore(%rbx)
483 cfi_def_cfa_register(%rsp)
484
485 add $48, %RSP_LP # Adjust the stack to the return value
486 # (eats the reloc index and link_map)
487 cfi_adjust_cfa_offset(-48)
488 retq
489
490 cfi_endproc
491 .size _dl_runtime_profile, .-_dl_runtime_profile
492#endif
493

source code of glibc/sysdeps/x86_64/dl-trampoline.h