1//===----------------------Hexagon builtin routine ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9// Functions that implement common sequences in function prologues and epilogues
10// used to save code size
11
12 .macro FUNCTION_BEGIN name
13 .text
14 .globl \name
15 .type \name, @function
16 .falign
17\name:
18 .endm
19
20 .macro FUNCTION_END name
21 .size \name, . - \name
22 .endm
23
24 .macro FALLTHROUGH_TAIL_CALL name0 name1
25 .size \name0, . - \name0
26 .globl \name1
27 .type \name1, @function
28 .falign
29\name1:
30 .endm
31
32
33
34
35// Save r25:24 at fp+#-8 and r27:26 at fp+#-16.
36
37
38
39
40// The compiler knows that the __save_* functions clobber LR. No other
41// registers should be used without informing the compiler.
42
43// Since we can only issue one store per packet, we don't hurt performance by
44// simply jumping to the right point in this sequence of stores.
45
46FUNCTION_BEGIN __save_r24_through_r27
47 memd(fp+#-16) = r27:26
48FALLTHROUGH_TAIL_CALL __save_r24_through_r27 __save_r24_through_r25
49 {
50 memd(fp+#-8) = r25:24
51 jumpr lr
52 }
53FUNCTION_END __save_r24_through_r25
54
55
56
57
58// For each of the *_before_tailcall functions, jumpr lr is executed in parallel
59// with deallocframe. That way, the return gets the old value of lr, which is
60// where these functions need to return, and at the same time, lr gets the value
61// it needs going into the tail call.
62
63FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe_before_tailcall
64 r27:26 = memd(fp+#-16)
65FALLTHROUGH_TAIL_CALL __restore_r24_through_r27_and_deallocframe_before_tailcall __restore_r24_through_r25_and_deallocframe_before_tailcall
66 {
67 r25:24 = memd(fp+#-8)
68 deallocframe
69 jumpr lr
70 }
71FUNCTION_END __restore_r24_through_r25_and_deallocframe_before_tailcall
72
73
74
75
76// Here we use the extra load bandwidth to restore LR early, allowing the return
77// to occur in parallel with the deallocframe.
78
79FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe
80 {
81 lr = memw(fp+#4)
82 r27:26 = memd(fp+#-16)
83 }
84 {
85 r25:24 = memd(fp+#-8)
86 deallocframe
87 jumpr lr
88 }
89FUNCTION_END __restore_r24_through_r27_and_deallocframe
90
91
92
93
94// Here the load bandwidth is maximized.
95
96FUNCTION_BEGIN __restore_r24_through_r25_and_deallocframe
97 {
98 r25:24 = memd(fp+#-8)
99 deallocframe
100 }
101 jumpr lr
102FUNCTION_END __restore_r24_through_r25_and_deallocframe
103

source code of compiler-rt/lib/builtins/hexagon/common_entry_exit_abi1.S