1//===----------------------Hexagon builtin routine ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9
10// Functions that implement common sequences in function prologues and epilogues
11// used to save code size
12
13 .macro FUNCTION_BEGIN name
14 .text
15 .globl \name
16 .type \name, @function
17 .falign
18\name:
19 .endm
20
21 .macro FUNCTION_END name
22 .size \name, . - \name
23 .endm
24
25 .macro FALLTHROUGH_TAIL_CALL name0 name1
26 .size \name0, . - \name0
27 .globl \name1
28 .type \name1, @function
29 .falign
30\name1:
31 .endm
32
33
34
35
36// Save r27:26 at fp+#-8, r25:24 at fp+#-16, r23:22 at fp+#-24, r21:20 at
37// fp+#-32, r19:18 at fp+#-40, and r17:16 at fp+#-48.
38
39
40
41
42// The compiler knows that the __save_* functions clobber LR. No other
43// registers should be used without informing the compiler.
44
45// Since we can only issue one store per packet, we don't hurt performance by
46// simply jumping to the right point in this sequence of stores.
47
48FUNCTION_BEGIN __save_r27_through_r16
49 memd(fp+#-48) = r17:16
50FALLTHROUGH_TAIL_CALL __save_r27_through_r16 __save_r27_through_r18
51 memd(fp+#-40) = r19:18
52FALLTHROUGH_TAIL_CALL __save_r27_through_r18 __save_r27_through_r20
53 memd(fp+#-32) = r21:20
54FALLTHROUGH_TAIL_CALL __save_r27_through_r20 __save_r27_through_r22
55 memd(fp+#-24) = r23:22
56FALLTHROUGH_TAIL_CALL __save_r27_through_r22 __save_r27_through_r24
57 memd(fp+#-16) = r25:24
58 {
59 memd(fp+#-8) = r27:26
60 jumpr lr
61 }
62FUNCTION_END __save_r27_through_r24
63
64
65
66
67// For each of the *_before_sibcall functions, jumpr lr is executed in parallel
68// with deallocframe. That way, the return gets the old value of lr, which is
69// where these functions need to return, and at the same time, lr gets the value
70// it needs going into the sibcall.
71
72FUNCTION_BEGIN __restore_r27_through_r20_and_deallocframe_before_sibcall
73 {
74 r21:20 = memd(fp+#-32)
75 r23:22 = memd(fp+#-24)
76 }
77FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe_before_sibcall __restore_r27_through_r24_and_deallocframe_before_sibcall
78 {
79 r25:24 = memd(fp+#-16)
80 jump __restore_r27_through_r26_and_deallocframe_before_sibcall
81 }
82FUNCTION_END __restore_r27_through_r24_and_deallocframe_before_sibcall
83
84
85
86
87FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe_before_sibcall
88 r17:16 = memd(fp+#-48)
89FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe_before_sibcall __restore_r27_through_r18_and_deallocframe_before_sibcall
90 {
91 r19:18 = memd(fp+#-40)
92 r21:20 = memd(fp+#-32)
93 }
94FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe_before_sibcall __restore_r27_through_r22_and_deallocframe_before_sibcall
95 {
96 r23:22 = memd(fp+#-24)
97 r25:24 = memd(fp+#-16)
98 }
99FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe_before_sibcall __restore_r27_through_r26_and_deallocframe_before_sibcall
100 {
101 r27:26 = memd(fp+#-8)
102 deallocframe
103 jumpr lr
104 }
105FUNCTION_END __restore_r27_through_r26_and_deallocframe_before_sibcall
106
107
108
109
110// Here we use the extra load bandwidth to restore LR early, allowing the return
111// to occur in parallel with the deallocframe.
112
113FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe
114 {
115 r17:16 = memd(fp+#-48)
116 r19:18 = memd(fp+#-40)
117 }
118FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe __restore_r27_through_r20_and_deallocframe
119 {
120 r21:20 = memd(fp+#-32)
121 r23:22 = memd(fp+#-24)
122 }
123FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe __restore_r27_through_r24_and_deallocframe
124 {
125 lr = memw(fp+#4)
126 r25:24 = memd(fp+#-16)
127 }
128 {
129 r27:26 = memd(fp+#-8)
130 deallocframe
131 jumpr lr
132 }
133FUNCTION_END __restore_r27_through_r24_and_deallocframe
134
135
136
137
138// Here the load bandwidth is maximized for all three functions.
139
140FUNCTION_BEGIN __restore_r27_through_r18_and_deallocframe
141 {
142 r19:18 = memd(fp+#-40)
143 r21:20 = memd(fp+#-32)
144 }
145FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe __restore_r27_through_r22_and_deallocframe
146 {
147 r23:22 = memd(fp+#-24)
148 r25:24 = memd(fp+#-16)
149 }
150FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe __restore_r27_through_r26_and_deallocframe
151 {
152 r27:26 = memd(fp+#-8)
153 deallocframe
154 }
155 jumpr lr
156FUNCTION_END __restore_r27_through_r26_and_deallocframe
157

source code of compiler-rt/lib/builtins/hexagon/common_entry_exit_legacy.S