1 | /* |
---|---|

2 | * ==================================================== |

3 | * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. |

4 | * |

5 | * Developed at SunPro, a Sun Microsystems, Inc. business. |

6 | * Permission to use, copy, modify, and distribute this |

7 | * software is freely granted, provided that this notice |

8 | * is preserved. |

9 | * ==================================================== |

10 | */ |

11 | |

12 | /* Expansions and modifications for 128-bit long double are |

13 | Copyright (C) 2001 Stephen L. Moshier <moshier@na-net.ornl.gov> |

14 | and are incorporated herein by permission of the author. The author |

15 | reserves the right to distribute this material elsewhere under different |

16 | copying permissions. These modifications are distributed here under |

17 | the following terms: |

18 | |

19 | This library is free software; you can redistribute it and/or |

20 | modify it under the terms of the GNU Lesser General Public |

21 | License as published by the Free Software Foundation; either |

22 | version 2.1 of the License, or (at your option) any later version. |

23 | |

24 | This library is distributed in the hope that it will be useful, |

25 | but WITHOUT ANY WARRANTY; without even the implied warranty of |

26 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |

27 | Lesser General Public License for more details. |

28 | |

29 | You should have received a copy of the GNU Lesser General Public |

30 | License along with this library; if not, write to the Free Software |

31 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ |

32 | |

33 | /* powq(x,y) return x**y |

34 | * |

35 | * n |

36 | * Method: Let x = 2 * (1+f) |

37 | * 1. Compute and return log2(x) in two pieces: |

38 | * log2(x) = w1 + w2, |

39 | * where w1 has 113-53 = 60 bit trailing zeros. |

40 | * 2. Perform y*log2(x) = n+y' by simulating muti-precision |

41 | * arithmetic, where |y'|<=0.5. |

42 | * 3. Return x**y = 2**n*exp(y'*log2) |

43 | * |

44 | * Special cases: |

45 | * 1. (anything) ** 0 is 1 |

46 | * 2. (anything) ** 1 is itself |

47 | * 3. (anything) ** NAN is NAN |

48 | * 4. NAN ** (anything except 0) is NAN |

49 | * 5. +-(|x| > 1) ** +INF is +INF |

50 | * 6. +-(|x| > 1) ** -INF is +0 |

51 | * 7. +-(|x| < 1) ** +INF is +0 |

52 | * 8. +-(|x| < 1) ** -INF is +INF |

53 | * 9. +-1 ** +-INF is NAN |

54 | * 10. +0 ** (+anything except 0, NAN) is +0 |

55 | * 11. -0 ** (+anything except 0, NAN, odd integer) is +0 |

56 | * 12. +0 ** (-anything except 0, NAN) is +INF |

57 | * 13. -0 ** (-anything except 0, NAN, odd integer) is +INF |

58 | * 14. -0 ** (odd integer) = -( +0 ** (odd integer) ) |

59 | * 15. +INF ** (+anything except 0,NAN) is +INF |

60 | * 16. +INF ** (-anything except 0,NAN) is +0 |

61 | * 17. -INF ** (anything) = -0 ** (-anything) |

62 | * 18. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer) |

63 | * 19. (-anything except 0 and inf) ** (non-integer) is NAN |

64 | * |

65 | */ |

66 | |

67 | #include "quadmath-imp.h" |

68 | |

69 | static const __float128 bp[] = { |

70 | 1.0Q, |

71 | 1.5Q, |

72 | }; |

73 | |

74 | /* log_2(1.5) */ |

75 | static const __float128 dp_h[] = { |

76 | 0.0, |

77 | 5.8496250072115607565592654282227158546448E-1Q |

78 | }; |

79 | |

80 | /* Low part of log_2(1.5) */ |

81 | static const __float128 dp_l[] = { |

82 | 0.0, |

83 | 1.0579781240112554492329533686862998106046E-16Q |

84 | }; |

85 | |

86 | static const __float128 zero = 0.0Q, |

87 | one = 1.0Q, |

88 | two = 2.0Q, |

89 | two113 = 1.0384593717069655257060992658440192E34Q, |

90 | huge = 1.0e3000Q, |

91 | tiny = 1.0e-3000Q; |

92 | |

93 | /* 3/2 log x = 3 z + z^3 + z^3 (z^2 R(z^2)) |

94 | z = (x-1)/(x+1) |

95 | 1 <= x <= 1.25 |

96 | Peak relative error 2.3e-37 */ |

97 | static const __float128 LN[] = |

98 | { |

99 | -3.0779177200290054398792536829702930623200E1Q, |

100 | 6.5135778082209159921251824580292116201640E1Q, |

101 | -4.6312921812152436921591152809994014413540E1Q, |

102 | 1.2510208195629420304615674658258363295208E1Q, |

103 | -9.9266909031921425609179910128531667336670E-1Q |

104 | }; |

105 | static const __float128 LD[] = |

106 | { |

107 | -5.129862866715009066465422805058933131960E1Q, |

108 | 1.452015077564081884387441590064272782044E2Q, |

109 | -1.524043275549860505277434040464085593165E2Q, |

110 | 7.236063513651544224319663428634139768808E1Q, |

111 | -1.494198912340228235853027849917095580053E1Q |

112 | /* 1.0E0 */ |

113 | }; |

114 | |

115 | /* exp(x) = 1 + x - x / (1 - 2 / (x - x^2 R(x^2))) |

116 | 0 <= x <= 0.5 |

117 | Peak relative error 5.7e-38 */ |

118 | static const __float128 PN[] = |

119 | { |

120 | 5.081801691915377692446852383385968225675E8Q, |

121 | 9.360895299872484512023336636427675327355E6Q, |

122 | 4.213701282274196030811629773097579432957E4Q, |

123 | 5.201006511142748908655720086041570288182E1Q, |

124 | 9.088368420359444263703202925095675982530E-3Q, |

125 | }; |

126 | static const __float128 PD[] = |

127 | { |

128 | 3.049081015149226615468111430031590411682E9Q, |

129 | 1.069833887183886839966085436512368982758E8Q, |

130 | 8.259257717868875207333991924545445705394E5Q, |

131 | 1.872583833284143212651746812884298360922E3Q, |

132 | /* 1.0E0 */ |

133 | }; |

134 | |

135 | static const __float128 |

136 | /* ln 2 */ |

137 | lg2 = 6.9314718055994530941723212145817656807550E-1Q, |

138 | lg2_h = 6.9314718055994528622676398299518041312695E-1Q, |

139 | lg2_l = 2.3190468138462996154948554638754786504121E-17Q, |

140 | ovt = 8.0085662595372944372e-0017Q, |

141 | /* 2/(3*log(2)) */ |

142 | cp = 9.6179669392597560490661645400126142495110E-1Q, |

143 | cp_h = 9.6179669392597555432899980587535537779331E-1Q, |

144 | cp_l = 5.0577616648125906047157785230014751039424E-17Q; |

145 | |

146 | __float128 |

147 | powq (__float128 x, __float128 y) |

148 | { |

149 | __float128 z, ax, z_h, z_l, p_h, p_l; |

150 | __float128 y1, t1, t2, r, s, sgn, t, u, v, w; |

151 | __float128 s2, s_h, s_l, t_h, t_l, ay; |

152 | int32_t i, j, k, yisint, n; |

153 | uint32_t ix, iy; |

154 | int32_t hx, hy; |

155 | ieee854_float128 o, p, q; |

156 | |

157 | p.value = x; |

158 | hx = p.words32.w0; |

159 | ix = hx & 0x7fffffff; |

160 | |

161 | q.value = y; |

162 | hy = q.words32.w0; |

163 | iy = hy & 0x7fffffff; |

164 | |

165 | |

166 | /* y==zero: x**0 = 1 */ |

167 | if ((iy | q.words32.w1 | q.words32.w2 | q.words32.w3) == 0) |

168 | return one; |

169 | |

170 | /* 1.0**y = 1; -1.0**+-Inf = 1 */ |

171 | if (x == one) |

172 | return one; |

173 | if (x == -1.0Q && iy == 0x7fff0000 |

174 | && (q.words32.w1 | q.words32.w2 | q.words32.w3) == 0) |

175 | return one; |

176 | |

177 | /* +-NaN return x+y */ |

178 | if ((ix > 0x7fff0000) |

179 | || ((ix == 0x7fff0000) |

180 | && ((p.words32.w1 | p.words32.w2 | p.words32.w3) != 0)) |

181 | || (iy > 0x7fff0000) |

182 | || ((iy == 0x7fff0000) |

183 | && ((q.words32.w1 | q.words32.w2 | q.words32.w3) != 0))) |

184 | return x + y; |

185 | |

186 | /* determine if y is an odd int when x < 0 |

187 | * yisint = 0 ... y is not an integer |

188 | * yisint = 1 ... y is an odd int |

189 | * yisint = 2 ... y is an even int |

190 | */ |

191 | yisint = 0; |

192 | if (hx < 0) |

193 | { |

194 | if (iy >= 0x40700000) /* 2^113 */ |

195 | yisint = 2; /* even integer y */ |

196 | else if (iy >= 0x3fff0000) /* 1.0 */ |

197 | { |

198 | if (floorq (y) == y) |

199 | { |

200 | z = 0.5 * y; |

201 | if (floorq (z) == z) |

202 | yisint = 2; |

203 | else |

204 | yisint = 1; |

205 | } |

206 | } |

207 | } |

208 | |

209 | /* special value of y */ |

210 | if ((q.words32.w1 | q.words32.w2 | q.words32.w3) == 0) |

211 | { |

212 | if (iy == 0x7fff0000) /* y is +-inf */ |

213 | { |

214 | if (((ix - 0x3fff0000) | p.words32.w1 | p.words32.w2 | p.words32.w3) |

215 | == 0) |

216 | return y - y; /* +-1**inf is NaN */ |

217 | else if (ix >= 0x3fff0000) /* (|x|>1)**+-inf = inf,0 */ |

218 | return (hy >= 0) ? y : zero; |

219 | else /* (|x|<1)**-,+inf = inf,0 */ |

220 | return (hy < 0) ? -y : zero; |

221 | } |

222 | if (iy == 0x3fff0000) |

223 | { /* y is +-1 */ |

224 | if (hy < 0) |

225 | return one / x; |

226 | else |

227 | return x; |

228 | } |

229 | if (hy == 0x40000000) |

230 | return x * x; /* y is 2 */ |

231 | if (hy == 0x3ffe0000) |

232 | { /* y is 0.5 */ |

233 | if (hx >= 0) /* x >= +0 */ |

234 | return sqrtq (x); |

235 | } |

236 | } |

237 | |

238 | ax = fabsq (x); |

239 | /* special value of x */ |

240 | if ((p.words32.w1 | p.words32.w2 | p.words32.w3) == 0) |

241 | { |

242 | if (ix == 0x7fff0000 || ix == 0 || ix == 0x3fff0000) |

243 | { |

244 | z = ax; /*x is +-0,+-inf,+-1 */ |

245 | if (hy < 0) |

246 | z = one / z; /* z = (1/|x|) */ |

247 | if (hx < 0) |

248 | { |

249 | if (((ix - 0x3fff0000) | yisint) == 0) |

250 | { |

251 | z = (z - z) / (z - z); /* (-1)**non-int is NaN */ |

252 | } |

253 | else if (yisint == 1) |

254 | z = -z; /* (x<0)**odd = -(|x|**odd) */ |

255 | } |

256 | return z; |

257 | } |

258 | } |

259 | |

260 | /* (x<0)**(non-int) is NaN */ |

261 | if (((((uint32_t) hx >> 31) - 1) | yisint) == 0) |

262 | return (x - x) / (x - x); |

263 | |

264 | /* sgn (sign of result -ve**odd) = -1 else = 1 */ |

265 | sgn = one; |

266 | if (((((uint32_t) hx >> 31) - 1) | (yisint - 1)) == 0) |

267 | sgn = -one; /* (-ve)**(odd int) */ |

268 | |

269 | /* |y| is huge. |

270 | 2^-16495 = 1/2 of smallest representable value. |

271 | If (1 - 1/131072)^y underflows, y > 1.4986e9 */ |

272 | if (iy > 0x401d654b) |

273 | { |

274 | /* if (1 - 2^-113)^y underflows, y > 1.1873e38 */ |

275 | if (iy > 0x407d654b) |

276 | { |

277 | if (ix <= 0x3ffeffff) |

278 | return (hy < 0) ? huge * huge : tiny * tiny; |

279 | if (ix >= 0x3fff0000) |

280 | return (hy > 0) ? huge * huge : tiny * tiny; |

281 | } |

282 | /* over/underflow if x is not close to one */ |

283 | if (ix < 0x3ffeffff) |

284 | return (hy < 0) ? sgn * huge * huge : sgn * tiny * tiny; |

285 | if (ix > 0x3fff0000) |

286 | return (hy > 0) ? sgn * huge * huge : sgn * tiny * tiny; |

287 | } |

288 | |

289 | ay = y > 0 ? y : -y; |

290 | if (ay < 0x1p-128) |

291 | y = y < 0 ? -0x1p-128 : 0x1p-128; |

292 | |

293 | n = 0; |

294 | /* take care subnormal number */ |

295 | if (ix < 0x00010000) |

296 | { |

297 | ax *= two113; |

298 | n -= 113; |

299 | o.value = ax; |

300 | ix = o.words32.w0; |

301 | } |

302 | n += ((ix) >> 16) - 0x3fff; |

303 | j = ix & 0x0000ffff; |

304 | /* determine interval */ |

305 | ix = j | 0x3fff0000; /* normalize ix */ |

306 | if (j <= 0x3988) |

307 | k = 0; /* |x|<sqrt(3/2) */ |

308 | else if (j < 0xbb67) |

309 | k = 1; /* |x|<sqrt(3) */ |

310 | else |

311 | { |

312 | k = 0; |

313 | n += 1; |

314 | ix -= 0x00010000; |

315 | } |

316 | |

317 | o.value = ax; |

318 | o.words32.w0 = ix; |

319 | ax = o.value; |

320 | |

321 | /* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */ |

322 | u = ax - bp[k]; /* bp[0]=1.0, bp[1]=1.5 */ |

323 | v = one / (ax + bp[k]); |

324 | s = u * v; |

325 | s_h = s; |

326 | |

327 | o.value = s_h; |

328 | o.words32.w3 = 0; |

329 | o.words32.w2 &= 0xf8000000; |

330 | s_h = o.value; |

331 | /* t_h=ax+bp[k] High */ |

332 | t_h = ax + bp[k]; |

333 | o.value = t_h; |

334 | o.words32.w3 = 0; |

335 | o.words32.w2 &= 0xf8000000; |

336 | t_h = o.value; |

337 | t_l = ax - (t_h - bp[k]); |

338 | s_l = v * ((u - s_h * t_h) - s_h * t_l); |

339 | /* compute log(ax) */ |

340 | s2 = s * s; |

341 | u = LN[0] + s2 * (LN[1] + s2 * (LN[2] + s2 * (LN[3] + s2 * LN[4]))); |

342 | v = LD[0] + s2 * (LD[1] + s2 * (LD[2] + s2 * (LD[3] + s2 * (LD[4] + s2)))); |

343 | r = s2 * s2 * u / v; |

344 | r += s_l * (s_h + s); |

345 | s2 = s_h * s_h; |

346 | t_h = 3.0 + s2 + r; |

347 | o.value = t_h; |

348 | o.words32.w3 = 0; |

349 | o.words32.w2 &= 0xf8000000; |

350 | t_h = o.value; |

351 | t_l = r - ((t_h - 3.0) - s2); |

352 | /* u+v = s*(1+...) */ |

353 | u = s_h * t_h; |

354 | v = s_l * t_h + t_l * s; |

355 | /* 2/(3log2)*(s+...) */ |

356 | p_h = u + v; |

357 | o.value = p_h; |

358 | o.words32.w3 = 0; |

359 | o.words32.w2 &= 0xf8000000; |

360 | p_h = o.value; |

361 | p_l = v - (p_h - u); |

362 | z_h = cp_h * p_h; /* cp_h+cp_l = 2/(3*log2) */ |

363 | z_l = cp_l * p_h + p_l * cp + dp_l[k]; |

364 | /* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */ |

365 | t = (__float128) n; |

366 | t1 = (((z_h + z_l) + dp_h[k]) + t); |

367 | o.value = t1; |

368 | o.words32.w3 = 0; |

369 | o.words32.w2 &= 0xf8000000; |

370 | t1 = o.value; |

371 | t2 = z_l - (((t1 - t) - dp_h[k]) - z_h); |

372 | |

373 | /* split up y into y1+y2 and compute (y1+y2)*(t1+t2) */ |

374 | y1 = y; |

375 | o.value = y1; |

376 | o.words32.w3 = 0; |

377 | o.words32.w2 &= 0xf8000000; |

378 | y1 = o.value; |

379 | p_l = (y - y1) * t1 + y * t2; |

380 | p_h = y1 * t1; |

381 | z = p_l + p_h; |

382 | o.value = z; |

383 | j = o.words32.w0; |

384 | if (j >= 0x400d0000) /* z >= 16384 */ |

385 | { |

386 | /* if z > 16384 */ |

387 | if (((j - 0x400d0000) | o.words32.w1 | o.words32.w2 | o.words32.w3) != 0) |

388 | return sgn * huge * huge; /* overflow */ |

389 | else |

390 | { |

391 | if (p_l + ovt > z - p_h) |

392 | return sgn * huge * huge; /* overflow */ |

393 | } |

394 | } |

395 | else if ((j & 0x7fffffff) >= 0x400d01b9) /* z <= -16495 */ |

396 | { |

397 | /* z < -16495 */ |

398 | if (((j - 0xc00d01bc) | o.words32.w1 | o.words32.w2 | o.words32.w3) |

399 | != 0) |

400 | return sgn * tiny * tiny; /* underflow */ |

401 | else |

402 | { |

403 | if (p_l <= z - p_h) |

404 | return sgn * tiny * tiny; /* underflow */ |

405 | } |

406 | } |

407 | /* compute 2**(p_h+p_l) */ |

408 | i = j & 0x7fffffff; |

409 | k = (i >> 16) - 0x3fff; |

410 | n = 0; |

411 | if (i > 0x3ffe0000) |

412 | { /* if |z| > 0.5, set n = [z+0.5] */ |

413 | n = floorq (z + 0.5Q); |

414 | t = n; |

415 | p_h -= t; |

416 | } |

417 | t = p_l + p_h; |

418 | o.value = t; |

419 | o.words32.w3 = 0; |

420 | o.words32.w2 &= 0xf8000000; |

421 | t = o.value; |

422 | u = t * lg2_h; |

423 | v = (p_l - (t - p_h)) * lg2 + t * lg2_l; |

424 | z = u + v; |

425 | w = v - (z - u); |

426 | /* exp(z) */ |

427 | t = z * z; |

428 | u = PN[0] + t * (PN[1] + t * (PN[2] + t * (PN[3] + t * PN[4]))); |

429 | v = PD[0] + t * (PD[1] + t * (PD[2] + t * (PD[3] + t))); |

430 | t1 = z - t * u / v; |

431 | r = (z * t1) / (t1 - two) - (w + z * w); |

432 | z = one - (r - z); |

433 | o.value = z; |

434 | j = o.words32.w0; |

435 | j += (n << 16); |

436 | if ((j >> 16) <= 0) |

437 | { |

438 | z = scalbnq (z, n); /* subnormal output */ |

439 | __float128 force_underflow = z * z; |

440 | math_force_eval (force_underflow); |

441 | } |

442 | else |

443 | { |

444 | o.words32.w0 = j; |

445 | z = o.value; |

446 | } |

447 | return sgn * z; |

448 | } |

449 |