1 | /* PLT trampolines. ia64 version. |
2 | Copyright (C) 2005-2022 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <sysdep.h> |
20 | #undef ret |
21 | |
22 | /* |
23 | This code is used in dl-runtime.c to call the `_dl_fixup' function |
24 | and then redirect to the address it returns. `_dl_fixup()' takes two |
25 | arguments, however _dl_profile_fixup() takes five. |
26 | |
27 | The ABI specifies that we will never see more than 8 input |
28 | registers to a function call, thus it is safe to simply allocate |
29 | those, and simpler than playing stack games. */ |
30 | |
31 | /* Used to save and restore 8 incoming fp registers */ |
32 | #define RESOLVE_FRAME_SIZE (16*8) |
33 | |
34 | ENTRY(_dl_runtime_resolve) |
35 | { .mmi |
36 | .prologue |
37 | .save ar.pfs, r40 |
38 | alloc loc0 = ar.pfs, 8, 6, 2, 0 |
39 | /* Use the 16 byte scratch area. r2 will start at f8 and |
40 | r3 will start at f9. */ |
41 | adds r2 = -(RESOLVE_FRAME_SIZE - 16), r12 |
42 | adds r3 = -(RESOLVE_FRAME_SIZE - 32), r12 |
43 | } |
44 | { .mii |
45 | .fframe RESOLVE_FRAME_SIZE |
46 | adds r12 = -RESOLVE_FRAME_SIZE, r12 |
47 | .save rp, loc1 |
48 | mov loc1 = b0 |
49 | .body |
50 | mov loc2 = r8 /* preserve struct value register */ |
51 | ;; |
52 | } |
53 | { .mii |
54 | mov loc3 = r9 /* preserve language specific register */ |
55 | mov loc4 = r10 /* preserve language specific register */ |
56 | mov loc5 = r11 /* preserve language specific register */ |
57 | } |
58 | { .mmi |
59 | stf.spill [r2] = f8, 32 |
60 | stf.spill [r3] = f9, 32 |
61 | mov out0 = r16 |
62 | ;; |
63 | } |
64 | { .mmi |
65 | stf.spill [r2] = f10, 32 |
66 | stf.spill [r3] = f11, 32 |
67 | shl out1 = r15, 4 |
68 | ;; |
69 | } |
70 | { .mmi |
71 | stf.spill [r2] = f12, 32 |
72 | stf.spill [r3] = f13, 32 |
73 | /* Relocation record is 24 byte. */ |
74 | shladd out1 = r15, 3, out1 |
75 | ;; |
76 | } |
77 | { .mmb |
78 | stf.spill [r2] = f14 |
79 | stf.spill [r3] = f15 |
80 | br.call.sptk.many b0 = _dl_fixup |
81 | } |
82 | { .mii |
83 | /* Skip the 16byte scratch area. */ |
84 | adds r2 = 16, r12 |
85 | adds r3 = 32, r12 |
86 | mov b6 = ret0 |
87 | ;; |
88 | } |
89 | { .mmi |
90 | ldf.fill f8 = [r2], 32 |
91 | ldf.fill f9 = [r3], 32 |
92 | mov b0 = loc1 |
93 | ;; |
94 | } |
95 | { .mmi |
96 | ldf.fill f10 = [r2], 32 |
97 | ldf.fill f11 = [r3], 32 |
98 | mov gp = ret1 |
99 | ;; |
100 | } |
101 | { .mmi |
102 | ldf.fill f12 = [r2], 32 |
103 | ldf.fill f13 = [r3], 32 |
104 | mov ar.pfs = loc0 |
105 | ;; |
106 | } |
107 | { .mmi |
108 | ldf.fill f14 = [r2], 32 |
109 | ldf.fill f15 = [r3], 32 |
110 | .restore sp /* pop the unwind frame state */ |
111 | adds r12 = RESOLVE_FRAME_SIZE, r12 |
112 | ;; |
113 | } |
114 | { .mii |
115 | mov r9 = loc3 /* restore language specific register */ |
116 | mov r10 = loc4 /* restore language specific register */ |
117 | mov r11 = loc5 /* restore language specific register */ |
118 | } |
119 | { .mii |
120 | mov r8 = loc2 /* restore struct value register */ |
121 | ;; |
122 | } |
123 | /* An alloc is needed for the break system call to work. |
124 | We don't care about the old value of the pfs register. */ |
125 | { .mmb |
126 | .prologue |
127 | .body |
128 | alloc r2 = ar.pfs, 0, 0, 8, 0 |
129 | br.sptk.many b6 |
130 | ;; |
131 | } |
132 | END(_dl_runtime_resolve) |
133 | |
134 | |
135 | /* The fourth argument to _dl_profile_fixup and the third one to |
136 | _dl_audit_pltexit are a pointer to La_ia64_regs: |
137 | |
138 | 8byte r8 |
139 | 8byte r9 |
140 | 8byte r10 |
141 | 8byte r11 |
142 | 8byte in0 |
143 | 8byte in1 |
144 | 8byte in2 |
145 | 8byte in3 |
146 | 8byte in4 |
147 | 8byte in5 |
148 | 8byte in6 |
149 | 8byte in7 |
150 | 16byte f8 |
151 | 16byte f9 |
152 | 16byte f10 |
153 | 16byte f11 |
154 | 16byte f12 |
155 | 16byte f13 |
156 | 16byte f14 |
157 | 16byte f15 |
158 | 8byte ar.unat |
159 | 8byte sp |
160 | |
161 | The fifth argument to _dl_profile_fixup is a pointer to long int. |
162 | The fourth argument to _dl_audit_pltexit is a pointer to |
163 | La_ia64_retval: |
164 | |
165 | 8byte r8 |
166 | 8byte r9 |
167 | 8byte r10 |
168 | 8byte r11 |
169 | 16byte f8 |
170 | 16byte f9 |
171 | 16byte f10 |
172 | 16byte f11 |
173 | 16byte f12 |
174 | 16byte f13 |
175 | 16byte f14 |
176 | 16byte f15 |
177 | |
178 | Since stack has to be 16 byte aligned, the stack allocation is in |
179 | 16byte increment. Before calling _dl_profile_fixup, the stack will |
180 | look like |
181 | |
182 | psp new frame_size |
183 | +16 La_ia64_regs |
184 | sp scratch |
185 | |
186 | */ |
187 | |
188 | #define PLTENTER_FRAME_SIZE (4*8 + 8*8 + 8*16 + 2*8 + 16) |
189 | #define PLTEXIT_FRAME_SIZE (PLTENTER_FRAME_SIZE + 4*8 + 8*16) |
190 | |
191 | #ifndef PROF |
192 | ENTRY(_dl_runtime_profile) |
193 | { .mii |
194 | .prologue |
195 | .save ar.pfs, r40 |
196 | alloc loc0 = ar.pfs, 8, 12, 8, 0 |
197 | .vframe loc10 |
198 | mov loc10 = r12 |
199 | .save rp, loc1 |
200 | mov loc1 = b0 |
201 | } |
202 | { .mii |
203 | .save ar.unat, r17 |
204 | mov r17 = ar.unat |
205 | .save ar.lc, loc6 |
206 | mov loc6 = ar.lc |
207 | mov loc11 = gp |
208 | } |
209 | { .mii |
210 | .body |
211 | /* There is a 16 byte scratch area. r2 will start at r8 and |
212 | r3 will start at r9 for La_ia64_regs. */ |
213 | adds r2 = -(PLTENTER_FRAME_SIZE - 16), r12 |
214 | adds r3 = -(PLTENTER_FRAME_SIZE - 24), r12 |
215 | adds r12 = -PLTENTER_FRAME_SIZE, r12 |
216 | ;; |
217 | } |
218 | { .mmi |
219 | st8 [r2] = r8, 16; |
220 | st8 [r3] = r9, 16; |
221 | mov out2 = b0 /* needed by _dl_fixup_profile */ |
222 | ;; |
223 | } |
224 | { .mmi |
225 | st8 [r2] = r10, 16; |
226 | st8 [r3] = r11, 16; |
227 | adds out3 = 16, r12 /* pointer to La_ia64_regs */ |
228 | ;; |
229 | } |
230 | { .mmi |
231 | .mem.offset 0, 0 |
232 | st8.spill [r2] = in0, 16 |
233 | .mem.offset 8, 0 |
234 | st8.spill [r3] = in1, 16 |
235 | mov out4 = loc10 /* pointer to new frame size */ |
236 | ;; |
237 | } |
238 | { .mmi |
239 | .mem.offset 0, 0 |
240 | st8.spill [r2] = in2, 16 |
241 | .mem.offset 8, 0 |
242 | st8.spill [r3] = in3, 16 |
243 | mov loc2 = r8 /* preserve struct value register */ |
244 | ;; |
245 | } |
246 | { .mmi |
247 | .mem.offset 0, 0 |
248 | st8.spill [r2] = in4, 16 |
249 | .mem.offset 8, 0 |
250 | st8.spill [r3] = in5, 16 |
251 | mov loc3 = r9 /* preserve language specific register */ |
252 | ;; |
253 | } |
254 | { .mmi |
255 | .mem.offset 0, 0 |
256 | st8 [r2] = in6, 16 |
257 | .mem.offset 8, 0 |
258 | st8 [r3] = in7, 24 /* adjust for f9 */ |
259 | mov loc4 = r10 /* preserve language specific register */ |
260 | ;; |
261 | } |
262 | { .mii |
263 | mov r18 = ar.unat /* save it in La_ia64_regs */ |
264 | mov loc7 = out3 /* save it for _dl_audit_pltexit */ |
265 | mov loc5 = r11 /* preserve language specific register */ |
266 | } |
267 | { .mmi |
268 | stf.spill [r2] = f8, 32 |
269 | stf.spill [r3] = f9, 32 |
270 | mov out0 = r16 /* needed by _dl_fixup_profile */ |
271 | ;; |
272 | } |
273 | { .mii |
274 | mov ar.unat = r17 /* restore it for function call */ |
275 | mov loc8 = r16 /* save it for _dl_audit_pltexit */ |
276 | nop.i 0x0 |
277 | } |
278 | { .mmi |
279 | stf.spill [r2] = f10, 32 |
280 | stf.spill [r3] = f11, 32 |
281 | shl out1 = r15, 4 |
282 | ;; |
283 | } |
284 | { .mmi |
285 | stf.spill [r2] = f12, 32 |
286 | stf.spill [r3] = f13, 32 |
287 | /* Relocation record is 24 byte. */ |
288 | shladd out1 = r15, 3, out1 |
289 | ;; |
290 | } |
291 | { .mmi |
292 | stf.spill [r2] = f14, 32 |
293 | stf.spill [r3] = f15, 24 |
294 | mov loc9 = out1 /* save it for _dl_audit_pltexit */ |
295 | ;; |
296 | } |
297 | { .mmb |
298 | st8 [r2] = r18 /* store ar.unat */ |
299 | st8 [r3] = loc10 /* store sp */ |
300 | br.call.sptk.many b0 = _dl_profile_fixup |
301 | } |
302 | { .mii |
303 | /* Skip the 16byte scratch area, 4 language specific GRs and |
304 | 8 incoming GRs to restore incoming fp registers. */ |
305 | adds r2 = (4*8 + 8*8 + 16), r12 |
306 | adds r3 = (4*8 + 8*8 + 32), r12 |
307 | mov b6 = ret0 |
308 | ;; |
309 | } |
310 | { .mmi |
311 | ldf.fill f8 = [r2], 32 |
312 | ldf.fill f9 = [r3], 32 |
313 | mov gp = ret1 |
314 | ;; |
315 | } |
316 | { .mmi |
317 | ldf.fill f10 = [r2], 32 |
318 | ldf.fill f11 = [r3], 32 |
319 | mov r8 = loc2 /* restore struct value register */ |
320 | ;; |
321 | } |
322 | { .mmi |
323 | ldf.fill f12 = [r2], 32 |
324 | ldf.fill f13 = [r3], 32 |
325 | mov r9 = loc3 /* restore language specific register */ |
326 | ;; |
327 | } |
328 | { .mmi |
329 | ldf.fill f14 = [r2], 32 |
330 | ldf.fill f15 = [r3], 32 |
331 | mov r10 = loc4 /* restore language specific register */ |
332 | ;; |
333 | } |
334 | { .mii |
335 | ld8 r15 = [loc10] /* load the new frame size */ |
336 | mov r11 = loc5 /* restore language specific register */ |
337 | ;; |
338 | cmp.eq p6, p7 = -1, r15 |
339 | ;; |
340 | } |
341 | { .mii |
342 | (p7) cmp.eq p8, p9 = 0, r15 |
343 | (p6) mov b0 = loc1 |
344 | (p6) mov ar.lc = loc6 |
345 | } |
346 | { .mib |
347 | nop.m 0x0 |
348 | (p6) mov ar.pfs = loc0 |
349 | (p6) br.cond.dptk.many .Lresolved |
350 | ;; |
351 | } |
352 | |
353 | /* At this point, the stack looks like |
354 | |
355 | +psp free |
356 | +16 La_ia64_regs |
357 | sp scratch |
358 | |
359 | We need to keep the current stack and call the resolved |
360 | function by copying the r15 byte from sp + PLTENTER_FRAME_SIZE |
361 | + 16 (scratch area) to sp + 16 (scratch area). Since stack |
362 | has to be 16byte aligned, we around r15 up to 16byte. */ |
363 | |
364 | { .mbb |
365 | (p9) adds r15 = 15, r15 |
366 | (p8) br.cond.dptk.many .Lno_new_frame |
367 | nop.b 0x0 |
368 | ;; |
369 | } |
370 | { .mmi |
371 | and r15 = -16, r15 |
372 | ;; |
373 | /* We don't copy the 16byte scatch area. Prepare r16/r17 as |
374 | destination. */ |
375 | sub r16 = r12, r15 |
376 | sub r17 = r12, r15 |
377 | ;; |
378 | } |
379 | { .mii |
380 | adds r16 = 16, r16 |
381 | adds r17 = 24, r17 |
382 | sub r12 = r12, r15 /* Adjust stack */ |
383 | ;; |
384 | } |
385 | { .mii |
386 | nop.m 0x0 |
387 | shr r15 = r15, 4 |
388 | ;; |
389 | adds r15 = -1, r15 |
390 | ;; |
391 | } |
392 | { .mii |
393 | /* Skip the 16byte scatch area. Prepare r2/r3 as source. */ |
394 | adds r2 = 16, loc10 |
395 | adds r3 = 24, loc10 |
396 | mov ar.lc = r15 |
397 | ;; |
398 | } |
399 | .Lcopy: |
400 | { .mmi |
401 | ld8 r18 = [r2], 16 |
402 | ld8 r19 = [r3], 16 |
403 | nop.i 0x0 |
404 | ;; |
405 | } |
406 | { .mmb |
407 | st8 [r16] = r18, 16 |
408 | st8 [r17] = r19, 16 |
409 | br.cloop.sptk.few .Lcopy |
410 | } |
411 | .Lno_new_frame: |
412 | { .mii |
413 | mov out0 = in0 |
414 | mov out1 = in1 |
415 | mov out2 = in2 |
416 | } |
417 | { .mii |
418 | mov out3 = in3 |
419 | mov out4 = in4 |
420 | mov out5 = in5 |
421 | } |
422 | { .mib |
423 | mov out6 = in6 |
424 | mov out7 = in7 |
425 | /* Call the resolved function */ |
426 | br.call.sptk.many b0 = b6 |
427 | } |
428 | { .mii |
429 | /* Prepare stack for _dl_audit_pltexit. Loc10 has the original |
430 | stack pointer. */ |
431 | adds r12 = -PLTEXIT_FRAME_SIZE, loc10 |
432 | adds r2 = -(PLTEXIT_FRAME_SIZE - 16), loc10 |
433 | adds r3 = -(PLTEXIT_FRAME_SIZE - 24), loc10 |
434 | ;; |
435 | } |
436 | { .mmi |
437 | /* Load all possible return values into buffer. */ |
438 | st8 [r2] = r8, 16 |
439 | st8 [r3] = r9, 16 |
440 | mov out0 = loc8 |
441 | ;; |
442 | } |
443 | { .mmi |
444 | st8 [r2] = r10, 16 |
445 | st8 [r3] = r11, 24 |
446 | mov out1 = loc9 |
447 | ;; |
448 | } |
449 | { .mmi |
450 | stf.spill [r2] = f8, 32 |
451 | stf.spill [r3] = f9, 32 |
452 | mov out2 = loc7 /* Pointer to La_ia64_regs */ |
453 | ;; |
454 | } |
455 | { .mmi |
456 | stf.spill [r2] = f10, 32 |
457 | stf.spill [r3] = f11, 32 |
458 | adds out3 = 16, r12 /* Pointer to La_ia64_retval */ |
459 | ;; |
460 | } |
461 | { .mmi |
462 | stf.spill [r2] = f12, 32 |
463 | stf.spill [r3] = f13, 32 |
464 | /* We need to restore gp for _dl_audit_pltexit. */ |
465 | mov gp = loc11 |
466 | ;; |
467 | } |
468 | { .mmb |
469 | stf.spill [r2] = f14 |
470 | stf.spill [r3] = f15 |
471 | br.call.sptk.many b0 = _dl_audit_pltexit |
472 | } |
473 | { .mmi |
474 | /* Load all the non-floating and floating return values. Skip |
475 | the 16byte scratch area. */ |
476 | adds r2 = 16, r12 |
477 | adds r3 = 24, r12 |
478 | nop.i 0x0 |
479 | ;; |
480 | } |
481 | { .mmi |
482 | ld8 r8 = [r2], 16 |
483 | ld8 r9 = [r3], 16 |
484 | nop.i 0x0 |
485 | ;; |
486 | } |
487 | { .mmi |
488 | ld8 r10 = [r2], 16 |
489 | ld8 r11 = [r3], 24 |
490 | nop.i 0x0 |
491 | ;; |
492 | } |
493 | { .mmi |
494 | ldf.fill f8 = [r2], 32 |
495 | ldf.fill f9 = [r3], 32 |
496 | mov ar.lc = loc6 |
497 | ;; |
498 | } |
499 | { .mmi |
500 | ldf.fill f10 = [r2], 32 |
501 | ldf.fill f11 = [r3], 32 |
502 | mov ar.pfs = loc0 |
503 | ;; |
504 | } |
505 | { .mmi |
506 | ldf.fill f12 = [r2], 32 |
507 | ldf.fill f13 = [r3], 32 |
508 | mov b0 = loc1 |
509 | ;; |
510 | } |
511 | { .mmi |
512 | ldf.fill f14 = [r2] |
513 | ldf.fill f15 = [r3] |
514 | /* We know that the previous stack pointer, loc10, isn't 0. |
515 | We use it to reload p7. */ |
516 | cmp.ne p7, p0 = 0, loc10 |
517 | ;; |
518 | } |
519 | .Lresolved: |
520 | { .mmb |
521 | .restore sp |
522 | mov r12 = loc10 |
523 | (p7) br.ret.sptk.many b0 |
524 | ;; |
525 | } |
526 | /* An alloc is needed for the break system call to work. We |
527 | don't care about the old value of the pfs register. After |
528 | this alloc, we can't use any rotating registers. Otherwise |
529 | assembler won't be happy. This has to be at the end. */ |
530 | { .mmb |
531 | .prologue |
532 | .body |
533 | alloc r2 = ar.pfs, 0, 0, 8, 0 |
534 | br.sptk.many b6 |
535 | ;; |
536 | } |
537 | END(_dl_runtime_profile) |
538 | #endif |
539 | |