1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | .file "wm_shrx.S" |
3 | /*---------------------------------------------------------------------------+ |
4 | | wm_shrx.S | |
5 | | | |
6 | | 64 bit right shift functions | |
7 | | | |
8 | | Copyright (C) 1992,1995 | |
9 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | |
10 | | Australia. E-mail billm@jacobi.maths.monash.edu.au | |
11 | | | |
12 | | Call from C as: | |
13 | | unsigned FPU_shrx(void *arg1, unsigned arg2) | |
14 | | and | |
15 | | unsigned FPU_shrxs(void *arg1, unsigned arg2) | |
16 | | | |
17 | +---------------------------------------------------------------------------*/ |
18 | |
19 | #include "fpu_emu.h" |
20 | |
21 | .text |
22 | /*---------------------------------------------------------------------------+ |
23 | | unsigned FPU_shrx(void *arg1, unsigned arg2) | |
24 | | | |
25 | | Extended shift right function. | |
26 | | Fastest for small shifts. | |
27 | | Shifts the 64 bit quantity pointed to by the first arg (arg1) | |
28 | | right by the number of bits specified by the second arg (arg2). | |
29 | | Forms a 96 bit quantity from the 64 bit arg and eax: | |
30 | | [ 64 bit arg ][ eax ] | |
31 | | shift right ---------> | |
32 | | The eax register is initialized to 0 before the shifting. | |
33 | | Results returned in the 64 bit arg and eax. | |
34 | +---------------------------------------------------------------------------*/ |
35 | |
36 | SYM_FUNC_START(FPU_shrx) |
37 | push %ebp |
38 | movl %esp,%ebp |
39 | pushl %esi |
40 | movl PARAM2,%ecx |
41 | movl PARAM1,%esi |
42 | cmpl $32,%ecx /* shrd only works for 0..31 bits */ |
43 | jnc L_more_than_31 |
44 | |
45 | /* less than 32 bits */ |
46 | pushl %ebx |
47 | movl (%esi),%ebx /* lsl */ |
48 | movl 4(%esi),%edx /* msl */ |
49 | xorl %eax,%eax /* extension */ |
50 | shrd %cl,%ebx,%eax |
51 | shrd %cl,%edx,%ebx |
52 | shr %cl,%edx |
53 | movl %ebx,(%esi) |
54 | movl %edx,4(%esi) |
55 | popl %ebx |
56 | popl %esi |
57 | leave |
58 | RET |
59 | |
60 | L_more_than_31: |
61 | cmpl $64,%ecx |
62 | jnc L_more_than_63 |
63 | |
64 | subb $32,%cl |
65 | movl (%esi),%eax /* lsl */ |
66 | movl 4(%esi),%edx /* msl */ |
67 | shrd %cl,%edx,%eax |
68 | shr %cl,%edx |
69 | movl %edx,(%esi) |
70 | movl $0,4(%esi) |
71 | popl %esi |
72 | leave |
73 | RET |
74 | |
75 | L_more_than_63: |
76 | cmpl $96,%ecx |
77 | jnc L_more_than_95 |
78 | |
79 | subb $64,%cl |
80 | movl 4(%esi),%eax /* msl */ |
81 | shr %cl,%eax |
82 | xorl %edx,%edx |
83 | movl %edx,(%esi) |
84 | movl %edx,4(%esi) |
85 | popl %esi |
86 | leave |
87 | RET |
88 | |
89 | L_more_than_95: |
90 | xorl %eax,%eax |
91 | movl %eax,(%esi) |
92 | movl %eax,4(%esi) |
93 | popl %esi |
94 | leave |
95 | RET |
96 | SYM_FUNC_END(FPU_shrx) |
97 | |
98 | |
99 | /*---------------------------------------------------------------------------+ |
100 | | unsigned FPU_shrxs(void *arg1, unsigned arg2) | |
101 | | | |
102 | | Extended shift right function (optimized for small floating point | |
103 | | integers). | |
104 | | Shifts the 64 bit quantity pointed to by the first arg (arg1) | |
105 | | right by the number of bits specified by the second arg (arg2). | |
106 | | Forms a 96 bit quantity from the 64 bit arg and eax: | |
107 | | [ 64 bit arg ][ eax ] | |
108 | | shift right ---------> | |
109 | | The eax register is initialized to 0 before the shifting. | |
110 | | The lower 8 bits of eax are lost and replaced by a flag which is | |
111 | | set (to 0x01) if any bit, apart from the first one, is set in the | |
112 | | part which has been shifted out of the arg. | |
113 | | Results returned in the 64 bit arg and eax. | |
114 | +---------------------------------------------------------------------------*/ |
115 | SYM_FUNC_START(FPU_shrxs) |
116 | push %ebp |
117 | movl %esp,%ebp |
118 | pushl %esi |
119 | pushl %ebx |
120 | movl PARAM2,%ecx |
121 | movl PARAM1,%esi |
122 | cmpl $64,%ecx /* shrd only works for 0..31 bits */ |
123 | jnc Ls_more_than_63 |
124 | |
125 | cmpl $32,%ecx /* shrd only works for 0..31 bits */ |
126 | jc Ls_less_than_32 |
127 | |
128 | /* We got here without jumps by assuming that the most common requirement |
129 | is for small integers */ |
130 | /* Shift by [32..63] bits */ |
131 | subb $32,%cl |
132 | movl (%esi),%eax /* lsl */ |
133 | movl 4(%esi),%edx /* msl */ |
134 | xorl %ebx,%ebx |
135 | shrd %cl,%eax,%ebx |
136 | shrd %cl,%edx,%eax |
137 | shr %cl,%edx |
138 | orl %ebx,%ebx /* test these 32 bits */ |
139 | setne %bl |
140 | test $0x7fffffff,%eax /* and 31 bits here */ |
141 | setne %bh |
142 | orw %bx,%bx /* Any of the 63 bit set ? */ |
143 | setne %al |
144 | movl %edx,(%esi) |
145 | movl $0,4(%esi) |
146 | popl %ebx |
147 | popl %esi |
148 | leave |
149 | RET |
150 | |
151 | /* Shift by [0..31] bits */ |
152 | Ls_less_than_32: |
153 | movl (%esi),%ebx /* lsl */ |
154 | movl 4(%esi),%edx /* msl */ |
155 | xorl %eax,%eax /* extension */ |
156 | shrd %cl,%ebx,%eax |
157 | shrd %cl,%edx,%ebx |
158 | shr %cl,%edx |
159 | test $0x7fffffff,%eax /* only need to look at eax here */ |
160 | setne %al |
161 | movl %ebx,(%esi) |
162 | movl %edx,4(%esi) |
163 | popl %ebx |
164 | popl %esi |
165 | leave |
166 | RET |
167 | |
168 | /* Shift by [64..95] bits */ |
169 | Ls_more_than_63: |
170 | cmpl $96,%ecx |
171 | jnc Ls_more_than_95 |
172 | |
173 | subb $64,%cl |
174 | movl (%esi),%ebx /* lsl */ |
175 | movl 4(%esi),%eax /* msl */ |
176 | xorl %edx,%edx /* extension */ |
177 | shrd %cl,%ebx,%edx |
178 | shrd %cl,%eax,%ebx |
179 | shr %cl,%eax |
180 | orl %ebx,%edx |
181 | setne %bl |
182 | test $0x7fffffff,%eax /* only need to look at eax here */ |
183 | setne %bh |
184 | orw %bx,%bx |
185 | setne %al |
186 | xorl %edx,%edx |
187 | movl %edx,(%esi) /* set to zero */ |
188 | movl %edx,4(%esi) /* set to zero */ |
189 | popl %ebx |
190 | popl %esi |
191 | leave |
192 | RET |
193 | |
194 | Ls_more_than_95: |
195 | /* Shift by [96..inf) bits */ |
196 | xorl %eax,%eax |
197 | movl (%esi),%ebx |
198 | orl 4(%esi),%ebx |
199 | setne %al |
200 | xorl %ebx,%ebx |
201 | movl %ebx,(%esi) |
202 | movl %ebx,4(%esi) |
203 | popl %ebx |
204 | popl %esi |
205 | leave |
206 | RET |
207 | SYM_FUNC_END(FPU_shrxs) |
208 | |