1 | /* |
2 | * Copyright 2012-15 Advanced Micro Devices, Inc. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | * |
22 | * Authors: AMD |
23 | * |
24 | */ |
25 | |
26 | #include "dm_services.h" |
27 | #include "include/fixed31_32.h" |
28 | |
29 | static const struct fixed31_32 dc_fixpt_two_pi = { 26986075409LL }; |
30 | static const struct fixed31_32 dc_fixpt_ln2 = { 2977044471LL }; |
31 | static const struct fixed31_32 dc_fixpt_ln2_div_2 = { 1488522236LL }; |
32 | |
33 | static inline unsigned long long abs_i64( |
34 | long long arg) |
35 | { |
36 | if (arg > 0) |
37 | return (unsigned long long)arg; |
38 | else |
39 | return (unsigned long long)(-arg); |
40 | } |
41 | |
42 | /* |
43 | * @brief |
44 | * result = dividend / divisor |
45 | * *remainder = dividend % divisor |
46 | */ |
47 | static inline unsigned long long complete_integer_division_u64( |
48 | unsigned long long dividend, |
49 | unsigned long long divisor, |
50 | unsigned long long *remainder) |
51 | { |
52 | unsigned long long result; |
53 | |
54 | ASSERT(divisor); |
55 | |
56 | result = div64_u64_rem(dividend, divisor, remainder); |
57 | |
58 | return result; |
59 | } |
60 | |
61 | |
62 | #define FRACTIONAL_PART_MASK \ |
63 | ((1ULL << FIXED31_32_BITS_PER_FRACTIONAL_PART) - 1) |
64 | |
65 | #define GET_INTEGER_PART(x) \ |
66 | ((x) >> FIXED31_32_BITS_PER_FRACTIONAL_PART) |
67 | |
68 | #define GET_FRACTIONAL_PART(x) \ |
69 | (FRACTIONAL_PART_MASK & (x)) |
70 | |
71 | struct fixed31_32 dc_fixpt_from_fraction(long long numerator, long long denominator) |
72 | { |
73 | struct fixed31_32 res; |
74 | |
75 | bool arg1_negative = numerator < 0; |
76 | bool arg2_negative = denominator < 0; |
77 | |
78 | unsigned long long arg1_value = arg1_negative ? -numerator : numerator; |
79 | unsigned long long arg2_value = arg2_negative ? -denominator : denominator; |
80 | |
81 | unsigned long long remainder; |
82 | |
83 | /* determine integer part */ |
84 | |
85 | unsigned long long res_value = complete_integer_division_u64( |
86 | dividend: arg1_value, divisor: arg2_value, remainder: &remainder); |
87 | |
88 | ASSERT(res_value <= LONG_MAX); |
89 | |
90 | /* determine fractional part */ |
91 | { |
92 | unsigned int i = FIXED31_32_BITS_PER_FRACTIONAL_PART; |
93 | |
94 | do { |
95 | remainder <<= 1; |
96 | |
97 | res_value <<= 1; |
98 | |
99 | if (remainder >= arg2_value) { |
100 | res_value |= 1; |
101 | remainder -= arg2_value; |
102 | } |
103 | } while (--i != 0); |
104 | } |
105 | |
106 | /* round up LSB */ |
107 | { |
108 | unsigned long long summand = (remainder << 1) >= arg2_value; |
109 | |
110 | ASSERT(res_value <= LLONG_MAX - summand); |
111 | |
112 | res_value += summand; |
113 | } |
114 | |
115 | res.value = (long long)res_value; |
116 | |
117 | if (arg1_negative ^ arg2_negative) |
118 | res.value = -res.value; |
119 | |
120 | return res; |
121 | } |
122 | |
123 | struct fixed31_32 dc_fixpt_mul(struct fixed31_32 arg1, struct fixed31_32 arg2) |
124 | { |
125 | struct fixed31_32 res; |
126 | |
127 | bool arg1_negative = arg1.value < 0; |
128 | bool arg2_negative = arg2.value < 0; |
129 | |
130 | unsigned long long arg1_value = arg1_negative ? -arg1.value : arg1.value; |
131 | unsigned long long arg2_value = arg2_negative ? -arg2.value : arg2.value; |
132 | |
133 | unsigned long long arg1_int = GET_INTEGER_PART(arg1_value); |
134 | unsigned long long arg2_int = GET_INTEGER_PART(arg2_value); |
135 | |
136 | unsigned long long arg1_fra = GET_FRACTIONAL_PART(arg1_value); |
137 | unsigned long long arg2_fra = GET_FRACTIONAL_PART(arg2_value); |
138 | |
139 | unsigned long long tmp; |
140 | |
141 | res.value = arg1_int * arg2_int; |
142 | |
143 | ASSERT(res.value <= LONG_MAX); |
144 | |
145 | res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; |
146 | |
147 | tmp = arg1_int * arg2_fra; |
148 | |
149 | ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); |
150 | |
151 | res.value += tmp; |
152 | |
153 | tmp = arg2_int * arg1_fra; |
154 | |
155 | ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); |
156 | |
157 | res.value += tmp; |
158 | |
159 | tmp = arg1_fra * arg2_fra; |
160 | |
161 | tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + |
162 | (tmp >= (unsigned long long)dc_fixpt_half.value); |
163 | |
164 | ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); |
165 | |
166 | res.value += tmp; |
167 | |
168 | if (arg1_negative ^ arg2_negative) |
169 | res.value = -res.value; |
170 | |
171 | return res; |
172 | } |
173 | |
174 | struct fixed31_32 dc_fixpt_sqr(struct fixed31_32 arg) |
175 | { |
176 | struct fixed31_32 res; |
177 | |
178 | unsigned long long arg_value = abs_i64(arg: arg.value); |
179 | |
180 | unsigned long long arg_int = GET_INTEGER_PART(arg_value); |
181 | |
182 | unsigned long long arg_fra = GET_FRACTIONAL_PART(arg_value); |
183 | |
184 | unsigned long long tmp; |
185 | |
186 | res.value = arg_int * arg_int; |
187 | |
188 | ASSERT(res.value <= LONG_MAX); |
189 | |
190 | res.value <<= FIXED31_32_BITS_PER_FRACTIONAL_PART; |
191 | |
192 | tmp = arg_int * arg_fra; |
193 | |
194 | ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); |
195 | |
196 | res.value += tmp; |
197 | |
198 | ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); |
199 | |
200 | res.value += tmp; |
201 | |
202 | tmp = arg_fra * arg_fra; |
203 | |
204 | tmp = (tmp >> FIXED31_32_BITS_PER_FRACTIONAL_PART) + |
205 | (tmp >= (unsigned long long)dc_fixpt_half.value); |
206 | |
207 | ASSERT(tmp <= (unsigned long long)(LLONG_MAX - res.value)); |
208 | |
209 | res.value += tmp; |
210 | |
211 | return res; |
212 | } |
213 | |
214 | struct fixed31_32 dc_fixpt_recip(struct fixed31_32 arg) |
215 | { |
216 | /* |
217 | * @note |
218 | * Good idea to use Newton's method |
219 | */ |
220 | |
221 | ASSERT(arg.value); |
222 | |
223 | return dc_fixpt_from_fraction( |
224 | numerator: dc_fixpt_one.value, |
225 | denominator: arg.value); |
226 | } |
227 | |
228 | struct fixed31_32 dc_fixpt_sinc(struct fixed31_32 arg) |
229 | { |
230 | struct fixed31_32 square; |
231 | |
232 | struct fixed31_32 res = dc_fixpt_one; |
233 | |
234 | int n = 27; |
235 | |
236 | struct fixed31_32 arg_norm = arg; |
237 | |
238 | if (dc_fixpt_le( |
239 | arg1: dc_fixpt_two_pi, |
240 | arg2: dc_fixpt_abs(arg))) { |
241 | arg_norm = dc_fixpt_sub( |
242 | arg1: arg_norm, |
243 | arg2: dc_fixpt_mul_int( |
244 | arg1: dc_fixpt_two_pi, |
245 | arg2: (int)div64_s64( |
246 | dividend: arg_norm.value, |
247 | divisor: dc_fixpt_two_pi.value))); |
248 | } |
249 | |
250 | square = dc_fixpt_sqr(arg: arg_norm); |
251 | |
252 | do { |
253 | res = dc_fixpt_sub( |
254 | arg1: dc_fixpt_one, |
255 | arg2: dc_fixpt_div_int( |
256 | arg1: dc_fixpt_mul( |
257 | arg1: square, |
258 | arg2: res), |
259 | arg2: n * (n - 1))); |
260 | |
261 | n -= 2; |
262 | } while (n > 2); |
263 | |
264 | if (arg.value != arg_norm.value) |
265 | res = dc_fixpt_div( |
266 | arg1: dc_fixpt_mul(arg1: res, arg2: arg_norm), |
267 | arg2: arg); |
268 | |
269 | return res; |
270 | } |
271 | |
272 | struct fixed31_32 dc_fixpt_sin(struct fixed31_32 arg) |
273 | { |
274 | return dc_fixpt_mul( |
275 | arg1: arg, |
276 | arg2: dc_fixpt_sinc(arg)); |
277 | } |
278 | |
279 | struct fixed31_32 dc_fixpt_cos(struct fixed31_32 arg) |
280 | { |
281 | /* TODO implement argument normalization */ |
282 | |
283 | const struct fixed31_32 square = dc_fixpt_sqr(arg); |
284 | |
285 | struct fixed31_32 res = dc_fixpt_one; |
286 | |
287 | int n = 26; |
288 | |
289 | do { |
290 | res = dc_fixpt_sub( |
291 | arg1: dc_fixpt_one, |
292 | arg2: dc_fixpt_div_int( |
293 | arg1: dc_fixpt_mul( |
294 | arg1: square, |
295 | arg2: res), |
296 | arg2: n * (n - 1))); |
297 | |
298 | n -= 2; |
299 | } while (n != 0); |
300 | |
301 | return res; |
302 | } |
303 | |
304 | /* |
305 | * @brief |
306 | * result = exp(arg), |
307 | * where abs(arg) < 1 |
308 | * |
309 | * Calculated as Taylor series. |
310 | */ |
311 | static struct fixed31_32 fixed31_32_exp_from_taylor_series(struct fixed31_32 arg) |
312 | { |
313 | unsigned int n = 9; |
314 | |
315 | struct fixed31_32 res = dc_fixpt_from_fraction( |
316 | numerator: n + 2, |
317 | denominator: n + 1); |
318 | /* TODO find correct res */ |
319 | |
320 | ASSERT(dc_fixpt_lt(arg, dc_fixpt_one)); |
321 | |
322 | do |
323 | res = dc_fixpt_add( |
324 | arg1: dc_fixpt_one, |
325 | arg2: dc_fixpt_div_int( |
326 | arg1: dc_fixpt_mul( |
327 | arg1: arg, |
328 | arg2: res), |
329 | arg2: n)); |
330 | while (--n != 1); |
331 | |
332 | return dc_fixpt_add( |
333 | arg1: dc_fixpt_one, |
334 | arg2: dc_fixpt_mul( |
335 | arg1: arg, |
336 | arg2: res)); |
337 | } |
338 | |
339 | struct fixed31_32 dc_fixpt_exp(struct fixed31_32 arg) |
340 | { |
341 | /* |
342 | * @brief |
343 | * Main equation is: |
344 | * exp(x) = exp(r + m * ln(2)) = (1 << m) * exp(r), |
345 | * where m = round(x / ln(2)), r = x - m * ln(2) |
346 | */ |
347 | |
348 | if (dc_fixpt_le( |
349 | arg1: dc_fixpt_ln2_div_2, |
350 | arg2: dc_fixpt_abs(arg))) { |
351 | int m = dc_fixpt_round( |
352 | arg: dc_fixpt_div( |
353 | arg1: arg, |
354 | arg2: dc_fixpt_ln2)); |
355 | |
356 | struct fixed31_32 r = dc_fixpt_sub( |
357 | arg1: arg, |
358 | arg2: dc_fixpt_mul_int( |
359 | arg1: dc_fixpt_ln2, |
360 | arg2: m)); |
361 | |
362 | ASSERT(m != 0); |
363 | |
364 | ASSERT(dc_fixpt_lt( |
365 | dc_fixpt_abs(r), |
366 | dc_fixpt_one)); |
367 | |
368 | if (m > 0) |
369 | return dc_fixpt_shl( |
370 | arg: fixed31_32_exp_from_taylor_series(arg: r), |
371 | shift: (unsigned char)m); |
372 | else |
373 | return dc_fixpt_div_int( |
374 | arg1: fixed31_32_exp_from_taylor_series(arg: r), |
375 | arg2: 1LL << -m); |
376 | } else if (arg.value != 0) |
377 | return fixed31_32_exp_from_taylor_series(arg); |
378 | else |
379 | return dc_fixpt_one; |
380 | } |
381 | |
382 | struct fixed31_32 dc_fixpt_log(struct fixed31_32 arg) |
383 | { |
384 | struct fixed31_32 res = dc_fixpt_neg(arg: dc_fixpt_one); |
385 | /* TODO improve 1st estimation */ |
386 | |
387 | struct fixed31_32 error; |
388 | |
389 | ASSERT(arg.value > 0); |
390 | /* TODO if arg is negative, return NaN */ |
391 | /* TODO if arg is zero, return -INF */ |
392 | |
393 | do { |
394 | struct fixed31_32 res1 = dc_fixpt_add( |
395 | arg1: dc_fixpt_sub( |
396 | arg1: res, |
397 | arg2: dc_fixpt_one), |
398 | arg2: dc_fixpt_div( |
399 | arg1: arg, |
400 | arg2: dc_fixpt_exp(arg: res))); |
401 | |
402 | error = dc_fixpt_sub( |
403 | arg1: res, |
404 | arg2: res1); |
405 | |
406 | res = res1; |
407 | /* TODO determine max_allowed_error based on quality of exp() */ |
408 | } while (abs_i64(arg: error.value) > 100ULL); |
409 | |
410 | return res; |
411 | } |
412 | |
413 | |
414 | /* this function is a generic helper to translate fixed point value to |
415 | * specified integer format that will consist of integer_bits integer part and |
416 | * fractional_bits fractional part. For example it is used in |
417 | * dc_fixpt_u2d19 to receive 2 bits integer part and 19 bits fractional |
418 | * part in 32 bits. It is used in hw programming (scaler) |
419 | */ |
420 | |
421 | static inline unsigned int ux_dy( |
422 | long long value, |
423 | unsigned int integer_bits, |
424 | unsigned int fractional_bits) |
425 | { |
426 | /* 1. create mask of integer part */ |
427 | unsigned int result = (1 << integer_bits) - 1; |
428 | /* 2. mask out fractional part */ |
429 | unsigned int fractional_part = FRACTIONAL_PART_MASK & value; |
430 | /* 3. shrink fixed point integer part to be of integer_bits width*/ |
431 | result &= GET_INTEGER_PART(value); |
432 | /* 4. make space for fractional part to be filled in after integer */ |
433 | result <<= fractional_bits; |
434 | /* 5. shrink fixed point fractional part to of fractional_bits width*/ |
435 | fractional_part >>= FIXED31_32_BITS_PER_FRACTIONAL_PART - fractional_bits; |
436 | /* 6. merge the result */ |
437 | return result | fractional_part; |
438 | } |
439 | |
440 | static inline unsigned int clamp_ux_dy( |
441 | long long value, |
442 | unsigned int integer_bits, |
443 | unsigned int fractional_bits, |
444 | unsigned int min_clamp) |
445 | { |
446 | unsigned int truncated_val = ux_dy(value, integer_bits, fractional_bits); |
447 | |
448 | if (value >= (1LL << (integer_bits + FIXED31_32_BITS_PER_FRACTIONAL_PART))) |
449 | return (1 << (integer_bits + fractional_bits)) - 1; |
450 | else if (truncated_val > min_clamp) |
451 | return truncated_val; |
452 | else |
453 | return min_clamp; |
454 | } |
455 | |
456 | unsigned int dc_fixpt_u4d19(struct fixed31_32 arg) |
457 | { |
458 | return ux_dy(value: arg.value, integer_bits: 4, fractional_bits: 19); |
459 | } |
460 | |
461 | unsigned int dc_fixpt_u3d19(struct fixed31_32 arg) |
462 | { |
463 | return ux_dy(value: arg.value, integer_bits: 3, fractional_bits: 19); |
464 | } |
465 | |
466 | unsigned int dc_fixpt_u2d19(struct fixed31_32 arg) |
467 | { |
468 | return ux_dy(value: arg.value, integer_bits: 2, fractional_bits: 19); |
469 | } |
470 | |
471 | unsigned int dc_fixpt_u0d19(struct fixed31_32 arg) |
472 | { |
473 | return ux_dy(value: arg.value, integer_bits: 0, fractional_bits: 19); |
474 | } |
475 | |
476 | unsigned int dc_fixpt_clamp_u0d14(struct fixed31_32 arg) |
477 | { |
478 | return clamp_ux_dy(value: arg.value, integer_bits: 0, fractional_bits: 14, min_clamp: 1); |
479 | } |
480 | |
481 | unsigned int dc_fixpt_clamp_u0d10(struct fixed31_32 arg) |
482 | { |
483 | return clamp_ux_dy(value: arg.value, integer_bits: 0, fractional_bits: 10, min_clamp: 1); |
484 | } |
485 | |
486 | int dc_fixpt_s4d19(struct fixed31_32 arg) |
487 | { |
488 | if (arg.value < 0) |
489 | return -(int)ux_dy(value: dc_fixpt_abs(arg).value, integer_bits: 4, fractional_bits: 19); |
490 | else |
491 | return ux_dy(value: arg.value, integer_bits: 4, fractional_bits: 19); |
492 | } |
493 | |