1/* PLT trampolines. ia64 version.
2 Copyright (C) 2005-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19#include <sysdep.h>
20#undef ret
21
22/*
23 This code is used in dl-runtime.c to call the `_dl_fixup' function
24 and then redirect to the address it returns. `_dl_fixup()' takes two
25 arguments, however _dl_profile_fixup() takes five.
26
27 The ABI specifies that we will never see more than 8 input
28 registers to a function call, thus it is safe to simply allocate
29 those, and simpler than playing stack games. */
30
31/* Used to save and restore 8 incoming fp registers */
32#define RESOLVE_FRAME_SIZE (16*8)
33
34ENTRY(_dl_runtime_resolve)
35 { .mmi
36 .prologue
37 .save ar.pfs, r40
38 alloc loc0 = ar.pfs, 8, 6, 2, 0
39 /* Use the 16 byte scratch area. r2 will start at f8 and
40 r3 will start at f9. */
41 adds r2 = -(RESOLVE_FRAME_SIZE - 16), r12
42 adds r3 = -(RESOLVE_FRAME_SIZE - 32), r12
43 }
44 { .mii
45 .fframe RESOLVE_FRAME_SIZE
46 adds r12 = -RESOLVE_FRAME_SIZE, r12
47 .save rp, loc1
48 mov loc1 = b0
49 .body
50 mov loc2 = r8 /* preserve struct value register */
51 ;;
52 }
53 { .mii
54 mov loc3 = r9 /* preserve language specific register */
55 mov loc4 = r10 /* preserve language specific register */
56 mov loc5 = r11 /* preserve language specific register */
57 }
58 { .mmi
59 stf.spill [r2] = f8, 32
60 stf.spill [r3] = f9, 32
61 mov out0 = r16
62 ;;
63 }
64 { .mmi
65 stf.spill [r2] = f10, 32
66 stf.spill [r3] = f11, 32
67 shl out1 = r15, 4
68 ;;
69 }
70 { .mmi
71 stf.spill [r2] = f12, 32
72 stf.spill [r3] = f13, 32
73 /* Relocation record is 24 byte. */
74 shladd out1 = r15, 3, out1
75 ;;
76 }
77 { .mmb
78 stf.spill [r2] = f14
79 stf.spill [r3] = f15
80 br.call.sptk.many b0 = _dl_fixup
81 }
82 { .mii
83 /* Skip the 16byte scratch area. */
84 adds r2 = 16, r12
85 adds r3 = 32, r12
86 mov b6 = ret0
87 ;;
88 }
89 { .mmi
90 ldf.fill f8 = [r2], 32
91 ldf.fill f9 = [r3], 32
92 mov b0 = loc1
93 ;;
94 }
95 { .mmi
96 ldf.fill f10 = [r2], 32
97 ldf.fill f11 = [r3], 32
98 mov gp = ret1
99 ;;
100 }
101 { .mmi
102 ldf.fill f12 = [r2], 32
103 ldf.fill f13 = [r3], 32
104 mov ar.pfs = loc0
105 ;;
106 }
107 { .mmi
108 ldf.fill f14 = [r2], 32
109 ldf.fill f15 = [r3], 32
110 .restore sp /* pop the unwind frame state */
111 adds r12 = RESOLVE_FRAME_SIZE, r12
112 ;;
113 }
114 { .mii
115 mov r9 = loc3 /* restore language specific register */
116 mov r10 = loc4 /* restore language specific register */
117 mov r11 = loc5 /* restore language specific register */
118 }
119 { .mii
120 mov r8 = loc2 /* restore struct value register */
121 ;;
122 }
123 /* An alloc is needed for the break system call to work.
124 We don't care about the old value of the pfs register. */
125 { .mmb
126 .prologue
127 .body
128 alloc r2 = ar.pfs, 0, 0, 8, 0
129 br.sptk.many b6
130 ;;
131 }
132END(_dl_runtime_resolve)
133
134
135/* The fourth argument to _dl_profile_fixup and the third one to
136 _dl_audit_pltexit are a pointer to La_ia64_regs:
137
138 8byte r8
139 8byte r9
140 8byte r10
141 8byte r11
142 8byte in0
143 8byte in1
144 8byte in2
145 8byte in3
146 8byte in4
147 8byte in5
148 8byte in6
149 8byte in7
150 16byte f8
151 16byte f9
152 16byte f10
153 16byte f11
154 16byte f12
155 16byte f13
156 16byte f14
157 16byte f15
158 8byte ar.unat
159 8byte sp
160
161 The fifth argument to _dl_profile_fixup is a pointer to long int.
162 The fourth argument to _dl_audit_pltexit is a pointer to
163 La_ia64_retval:
164
165 8byte r8
166 8byte r9
167 8byte r10
168 8byte r11
169 16byte f8
170 16byte f9
171 16byte f10
172 16byte f11
173 16byte f12
174 16byte f13
175 16byte f14
176 16byte f15
177
178 Since stack has to be 16 byte aligned, the stack allocation is in
179 16byte increment. Before calling _dl_profile_fixup, the stack will
180 look like
181
182 psp new frame_size
183 +16 La_ia64_regs
184 sp scratch
185
186 */
187
188#define PLTENTER_FRAME_SIZE (4*8 + 8*8 + 8*16 + 2*8 + 16)
189#define PLTEXIT_FRAME_SIZE (PLTENTER_FRAME_SIZE + 4*8 + 8*16)
190
191#ifndef PROF
192ENTRY(_dl_runtime_profile)
193 { .mii
194 .prologue
195 .save ar.pfs, r40
196 alloc loc0 = ar.pfs, 8, 12, 8, 0
197 .vframe loc10
198 mov loc10 = r12
199 .save rp, loc1
200 mov loc1 = b0
201 }
202 { .mii
203 .save ar.unat, r17
204 mov r17 = ar.unat
205 .save ar.lc, loc6
206 mov loc6 = ar.lc
207 mov loc11 = gp
208 }
209 { .mii
210 .body
211 /* There is a 16 byte scratch area. r2 will start at r8 and
212 r3 will start at r9 for La_ia64_regs. */
213 adds r2 = -(PLTENTER_FRAME_SIZE - 16), r12
214 adds r3 = -(PLTENTER_FRAME_SIZE - 24), r12
215 adds r12 = -PLTENTER_FRAME_SIZE, r12
216 ;;
217 }
218 { .mmi
219 st8 [r2] = r8, 16;
220 st8 [r3] = r9, 16;
221 mov out2 = b0 /* needed by _dl_fixup_profile */
222 ;;
223 }
224 { .mmi
225 st8 [r2] = r10, 16;
226 st8 [r3] = r11, 16;
227 adds out3 = 16, r12 /* pointer to La_ia64_regs */
228 ;;
229 }
230 { .mmi
231 .mem.offset 0, 0
232 st8.spill [r2] = in0, 16
233 .mem.offset 8, 0
234 st8.spill [r3] = in1, 16
235 mov out4 = loc10 /* pointer to new frame size */
236 ;;
237 }
238 { .mmi
239 .mem.offset 0, 0
240 st8.spill [r2] = in2, 16
241 .mem.offset 8, 0
242 st8.spill [r3] = in3, 16
243 mov loc2 = r8 /* preserve struct value register */
244 ;;
245 }
246 { .mmi
247 .mem.offset 0, 0
248 st8.spill [r2] = in4, 16
249 .mem.offset 8, 0
250 st8.spill [r3] = in5, 16
251 mov loc3 = r9 /* preserve language specific register */
252 ;;
253 }
254 { .mmi
255 .mem.offset 0, 0
256 st8 [r2] = in6, 16
257 .mem.offset 8, 0
258 st8 [r3] = in7, 24 /* adjust for f9 */
259 mov loc4 = r10 /* preserve language specific register */
260 ;;
261 }
262 { .mii
263 mov r18 = ar.unat /* save it in La_ia64_regs */
264 mov loc7 = out3 /* save it for _dl_audit_pltexit */
265 mov loc5 = r11 /* preserve language specific register */
266 }
267 { .mmi
268 stf.spill [r2] = f8, 32
269 stf.spill [r3] = f9, 32
270 mov out0 = r16 /* needed by _dl_fixup_profile */
271 ;;
272 }
273 { .mii
274 mov ar.unat = r17 /* restore it for function call */
275 mov loc8 = r16 /* save it for _dl_audit_pltexit */
276 nop.i 0x0
277 }
278 { .mmi
279 stf.spill [r2] = f10, 32
280 stf.spill [r3] = f11, 32
281 shl out1 = r15, 4
282 ;;
283 }
284 { .mmi
285 stf.spill [r2] = f12, 32
286 stf.spill [r3] = f13, 32
287 /* Relocation record is 24 byte. */
288 shladd out1 = r15, 3, out1
289 ;;
290 }
291 { .mmi
292 stf.spill [r2] = f14, 32
293 stf.spill [r3] = f15, 24
294 mov loc9 = out1 /* save it for _dl_audit_pltexit */
295 ;;
296 }
297 { .mmb
298 st8 [r2] = r18 /* store ar.unat */
299 st8 [r3] = loc10 /* store sp */
300 br.call.sptk.many b0 = _dl_profile_fixup
301 }
302 { .mii
303 /* Skip the 16byte scratch area, 4 language specific GRs and
304 8 incoming GRs to restore incoming fp registers. */
305 adds r2 = (4*8 + 8*8 + 16), r12
306 adds r3 = (4*8 + 8*8 + 32), r12
307 mov b6 = ret0
308 ;;
309 }
310 { .mmi
311 ldf.fill f8 = [r2], 32
312 ldf.fill f9 = [r3], 32
313 mov gp = ret1
314 ;;
315 }
316 { .mmi
317 ldf.fill f10 = [r2], 32
318 ldf.fill f11 = [r3], 32
319 mov r8 = loc2 /* restore struct value register */
320 ;;
321 }
322 { .mmi
323 ldf.fill f12 = [r2], 32
324 ldf.fill f13 = [r3], 32
325 mov r9 = loc3 /* restore language specific register */
326 ;;
327 }
328 { .mmi
329 ldf.fill f14 = [r2], 32
330 ldf.fill f15 = [r3], 32
331 mov r10 = loc4 /* restore language specific register */
332 ;;
333 }
334 { .mii
335 ld8 r15 = [loc10] /* load the new frame size */
336 mov r11 = loc5 /* restore language specific register */
337 ;;
338 cmp.eq p6, p7 = -1, r15
339 ;;
340 }
341 { .mii
342(p7) cmp.eq p8, p9 = 0, r15
343(p6) mov b0 = loc1
344(p6) mov ar.lc = loc6
345 }
346 { .mib
347 nop.m 0x0
348(p6) mov ar.pfs = loc0
349(p6) br.cond.dptk.many .Lresolved
350 ;;
351 }
352
353 /* At this point, the stack looks like
354
355 +psp free
356 +16 La_ia64_regs
357 sp scratch
358
359 We need to keep the current stack and call the resolved
360 function by copying the r15 byte from sp + PLTENTER_FRAME_SIZE
361 + 16 (scratch area) to sp + 16 (scratch area). Since stack
362 has to be 16byte aligned, we around r15 up to 16byte. */
363
364 { .mbb
365(p9) adds r15 = 15, r15
366(p8) br.cond.dptk.many .Lno_new_frame
367 nop.b 0x0
368 ;;
369 }
370 { .mmi
371 and r15 = -16, r15
372 ;;
373 /* We don't copy the 16byte scatch area. Prepare r16/r17 as
374 destination. */
375 sub r16 = r12, r15
376 sub r17 = r12, r15
377 ;;
378 }
379 { .mii
380 adds r16 = 16, r16
381 adds r17 = 24, r17
382 sub r12 = r12, r15 /* Adjust stack */
383 ;;
384 }
385 { .mii
386 nop.m 0x0
387 shr r15 = r15, 4
388 ;;
389 adds r15 = -1, r15
390 ;;
391 }
392 { .mii
393 /* Skip the 16byte scatch area. Prepare r2/r3 as source. */
394 adds r2 = 16, loc10
395 adds r3 = 24, loc10
396 mov ar.lc = r15
397 ;;
398 }
399.Lcopy:
400 { .mmi
401 ld8 r18 = [r2], 16
402 ld8 r19 = [r3], 16
403 nop.i 0x0
404 ;;
405 }
406 { .mmb
407 st8 [r16] = r18, 16
408 st8 [r17] = r19, 16
409 br.cloop.sptk.few .Lcopy
410 }
411.Lno_new_frame:
412 { .mii
413 mov out0 = in0
414 mov out1 = in1
415 mov out2 = in2
416 }
417 { .mii
418 mov out3 = in3
419 mov out4 = in4
420 mov out5 = in5
421 }
422 { .mib
423 mov out6 = in6
424 mov out7 = in7
425 /* Call the resolved function */
426 br.call.sptk.many b0 = b6
427 }
428 { .mii
429 /* Prepare stack for _dl_audit_pltexit. Loc10 has the original
430 stack pointer. */
431 adds r12 = -PLTEXIT_FRAME_SIZE, loc10
432 adds r2 = -(PLTEXIT_FRAME_SIZE - 16), loc10
433 adds r3 = -(PLTEXIT_FRAME_SIZE - 24), loc10
434 ;;
435 }
436 { .mmi
437 /* Load all possible return values into buffer. */
438 st8 [r2] = r8, 16
439 st8 [r3] = r9, 16
440 mov out0 = loc8
441 ;;
442 }
443 { .mmi
444 st8 [r2] = r10, 16
445 st8 [r3] = r11, 24
446 mov out1 = loc9
447 ;;
448 }
449 { .mmi
450 stf.spill [r2] = f8, 32
451 stf.spill [r3] = f9, 32
452 mov out2 = loc7 /* Pointer to La_ia64_regs */
453 ;;
454 }
455 { .mmi
456 stf.spill [r2] = f10, 32
457 stf.spill [r3] = f11, 32
458 adds out3 = 16, r12 /* Pointer to La_ia64_retval */
459 ;;
460 }
461 { .mmi
462 stf.spill [r2] = f12, 32
463 stf.spill [r3] = f13, 32
464 /* We need to restore gp for _dl_audit_pltexit. */
465 mov gp = loc11
466 ;;
467 }
468 { .mmb
469 stf.spill [r2] = f14
470 stf.spill [r3] = f15
471 br.call.sptk.many b0 = _dl_audit_pltexit
472 }
473 { .mmi
474 /* Load all the non-floating and floating return values. Skip
475 the 16byte scratch area. */
476 adds r2 = 16, r12
477 adds r3 = 24, r12
478 nop.i 0x0
479 ;;
480 }
481 { .mmi
482 ld8 r8 = [r2], 16
483 ld8 r9 = [r3], 16
484 nop.i 0x0
485 ;;
486 }
487 { .mmi
488 ld8 r10 = [r2], 16
489 ld8 r11 = [r3], 24
490 nop.i 0x0
491 ;;
492 }
493 { .mmi
494 ldf.fill f8 = [r2], 32
495 ldf.fill f9 = [r3], 32
496 mov ar.lc = loc6
497 ;;
498 }
499 { .mmi
500 ldf.fill f10 = [r2], 32
501 ldf.fill f11 = [r3], 32
502 mov ar.pfs = loc0
503 ;;
504 }
505 { .mmi
506 ldf.fill f12 = [r2], 32
507 ldf.fill f13 = [r3], 32
508 mov b0 = loc1
509 ;;
510 }
511 { .mmi
512 ldf.fill f14 = [r2]
513 ldf.fill f15 = [r3]
514 /* We know that the previous stack pointer, loc10, isn't 0.
515 We use it to reload p7. */
516 cmp.ne p7, p0 = 0, loc10
517 ;;
518 }
519.Lresolved:
520 { .mmb
521 .restore sp
522 mov r12 = loc10
523(p7) br.ret.sptk.many b0
524 ;;
525 }
526 /* An alloc is needed for the break system call to work. We
527 don't care about the old value of the pfs register. After
528 this alloc, we can't use any rotating registers. Otherwise
529 assembler won't be happy. This has to be at the end. */
530 { .mmb
531 .prologue
532 .body
533 alloc r2 = ar.pfs, 0, 0, 8, 0
534 br.sptk.many b6
535 ;;
536 }
537END(_dl_runtime_profile)
538#endif
539

source code of glibc/sysdeps/ia64/dl-trampoline.S