1 | //===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===// |
---|---|

2 | // |

3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |

4 | // See https://llvm.org/LICENSE.txt for license information. |

5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |

6 | // |

7 | //===----------------------------------------------------------------------===// |

8 | // |

9 | // This file contains some functions that are useful for math stuff. |

10 | // |

11 | //===----------------------------------------------------------------------===// |

12 | |

13 | #ifndef LLVM_SUPPORT_MATHEXTRAS_H |

14 | #define LLVM_SUPPORT_MATHEXTRAS_H |

15 | |

16 | #include "llvm/Support/Compiler.h" |

17 | #include <cassert> |

18 | #include <climits> |

19 | #include <cmath> |

20 | #include <cstdint> |

21 | #include <cstring> |

22 | #include <limits> |

23 | #include <type_traits> |

24 | |

25 | #ifdef __ANDROID_NDK__ |

26 | #include <android/api-level.h> |

27 | #endif |

28 | |

29 | #ifdef _MSC_VER |

30 | // Declare these intrinsics manually rather including intrin.h. It's very |

31 | // expensive, and MathExtras.h is popular. |

32 | // #include <intrin.h> |

33 | extern "C"{ |

34 | unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask); |

35 | unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask); |

36 | unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask); |

37 | unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask); |

38 | } |

39 | #endif |

40 | |

41 | namespace llvm { |

42 | |

43 | /// The behavior an operation has on an input of 0. |

44 | enum ZeroBehavior { |

45 | /// The returned value is undefined. |

46 | ZB_Undefined, |

47 | /// The returned value is numeric_limits<T>::max() |

48 | ZB_Max, |

49 | /// The returned value is numeric_limits<T>::digits |

50 | ZB_Width |

51 | }; |

52 | |

53 | /// Mathematical constants. |

54 | namespace numbers { |

55 | // TODO: Track C++20 std::numbers. |

56 | // TODO: Favor using the hexadecimal FP constants (requires C++17). |

57 | constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113 |

58 | egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620 |

59 | ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162 |

60 | ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392 |

61 | log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0) |

62 | log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2) |

63 | pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796 |

64 | inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541 |

65 | sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161 |

66 | inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197 |

67 | sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219 |

68 | inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1) |

69 | sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194 |

70 | inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1) |

71 | phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622 |

72 | constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113 |

73 | egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620 |

74 | ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162 |

75 | ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392 |

76 | log2ef = 1.44269504F, // (0x1.715476P+0) |

77 | log10ef = .434294482F, // (0x1.bcb7b2P-2) |

78 | pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796 |

79 | inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541 |

80 | sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161 |

81 | inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197 |

82 | sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193 |

83 | inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1) |

84 | sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194 |

85 | inv_sqrt3f = .577350269F, // (0x1.279a74P-1) |

86 | phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622 |

87 | } // namespace numbers |

88 | |

89 | namespace detail { |

90 | template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter { |

91 | static unsigned count(T Val, ZeroBehavior) { |

92 | if (!Val) |

93 | return std::numeric_limits<T>::digits; |

94 | if (Val & 0x1) |

95 | return 0; |

96 | |

97 | // Bisection method. |

98 | unsigned ZeroBits = 0; |

99 | T Shift = std::numeric_limits<T>::digits >> 1; |

100 | T Mask = std::numeric_limits<T>::max() >> Shift; |

101 | while (Shift) { |

102 | if ((Val & Mask) == 0) { |

103 | Val >>= Shift; |

104 | ZeroBits |= Shift; |

105 | } |

106 | Shift >>= 1; |

107 | Mask >>= Shift; |

108 | } |

109 | return ZeroBits; |

110 | } |

111 | }; |

112 | |

113 | #if defined(__GNUC__) || defined(_MSC_VER) |

114 | template <typename T> struct TrailingZerosCounter<T, 4> { |

115 | static unsigned count(T Val, ZeroBehavior ZB) { |

116 | if (ZB != ZB_Undefined && Val == 0) |

117 | return 32; |

118 | |

119 | #if __has_builtin(__builtin_ctz) || defined(__GNUC__) |

120 | return __builtin_ctz(Val); |

121 | #elif defined(_MSC_VER) |

122 | unsigned long Index; |

123 | _BitScanForward(&Index, Val); |

124 | return Index; |

125 | #endif |

126 | } |

127 | }; |

128 | |

129 | #if !defined(_MSC_VER) || defined(_M_X64) |

130 | template <typename T> struct TrailingZerosCounter<T, 8> { |

131 | static unsigned count(T Val, ZeroBehavior ZB) { |

132 | if (ZB != ZB_Undefined && Val == 0) |

133 | return 64; |

134 | |

135 | #if __has_builtin(__builtin_ctzll) || defined(__GNUC__) |

136 | return __builtin_ctzll(Val); |

137 | #elif defined(_MSC_VER) |

138 | unsigned long Index; |

139 | _BitScanForward64(&Index, Val); |

140 | return Index; |

141 | #endif |

142 | } |

143 | }; |

144 | #endif |

145 | #endif |

146 | } // namespace detail |

147 | |

148 | /// Count number of 0's from the least significant bit to the most |

149 | /// stopping at the first 1. |

150 | /// |

151 | /// Only unsigned integral types are allowed. |

152 | /// |

153 | /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are |

154 | /// valid arguments. |

155 | template <typename T> |

156 | unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) { |

157 | static_assert(std::numeric_limits<T>::is_integer && |

158 | !std::numeric_limits<T>::is_signed, |

159 | "Only unsigned integral types are allowed."); |

160 | return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB); |

161 | } |

162 | |

163 | namespace detail { |

164 | template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter { |

165 | static unsigned count(T Val, ZeroBehavior) { |

166 | if (!Val) |

167 | return std::numeric_limits<T>::digits; |

168 | |

169 | // Bisection method. |

170 | unsigned ZeroBits = 0; |

171 | for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) { |

172 | T Tmp = Val >> Shift; |

173 | if (Tmp) |

174 | Val = Tmp; |

175 | else |

176 | ZeroBits |= Shift; |

177 | } |

178 | return ZeroBits; |

179 | } |

180 | }; |

181 | |

182 | #if defined(__GNUC__) || defined(_MSC_VER) |

183 | template <typename T> struct LeadingZerosCounter<T, 4> { |

184 | static unsigned count(T Val, ZeroBehavior ZB) { |

185 | if (ZB != ZB_Undefined && Val == 0) |

186 | return 32; |

187 | |

188 | #if __has_builtin(__builtin_clz) || defined(__GNUC__) |

189 | return __builtin_clz(Val); |

190 | #elif defined(_MSC_VER) |

191 | unsigned long Index; |

192 | _BitScanReverse(&Index, Val); |

193 | return Index ^ 31; |

194 | #endif |

195 | } |

196 | }; |

197 | |

198 | #if !defined(_MSC_VER) || defined(_M_X64) |

199 | template <typename T> struct LeadingZerosCounter<T, 8> { |

200 | static unsigned count(T Val, ZeroBehavior ZB) { |

201 | if (ZB != ZB_Undefined && Val == 0) |

202 | return 64; |

203 | |

204 | #if __has_builtin(__builtin_clzll) || defined(__GNUC__) |

205 | return __builtin_clzll(Val); |

206 | #elif defined(_MSC_VER) |

207 | unsigned long Index; |

208 | _BitScanReverse64(&Index, Val); |

209 | return Index ^ 63; |

210 | #endif |

211 | } |

212 | }; |

213 | #endif |

214 | #endif |

215 | } // namespace detail |

216 | |

217 | /// Count number of 0's from the most significant bit to the least |

218 | /// stopping at the first 1. |

219 | /// |

220 | /// Only unsigned integral types are allowed. |

221 | /// |

222 | /// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are |

223 | /// valid arguments. |

224 | template <typename T> |

225 | unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) { |

226 | static_assert(std::numeric_limits<T>::is_integer && |

227 | !std::numeric_limits<T>::is_signed, |

228 | "Only unsigned integral types are allowed."); |

229 | return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB); |

230 | } |

231 | |

232 | /// Get the index of the first set bit starting from the least |

233 | /// significant bit. |

234 | /// |

235 | /// Only unsigned integral types are allowed. |

236 | /// |

237 | /// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are |

238 | /// valid arguments. |

239 | template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) { |

240 | if (ZB == ZB_Max && Val == 0) |

241 | return std::numeric_limits<T>::max(); |

242 | |

243 | return countTrailingZeros(Val, ZB_Undefined); |

244 | } |

245 | |

246 | /// Create a bitmask with the N right-most bits set to 1, and all other |

247 | /// bits set to 0. Only unsigned types are allowed. |

248 | template <typename T> T maskTrailingOnes(unsigned N) { |

249 | static_assert(std::is_unsigned<T>::value, "Invalid type!"); |

250 | const unsigned Bits = CHAR_BIT * sizeof(T); |

251 | assert(N <= Bits && "Invalid bit index"); |

252 | return N == 0 ? 0 : (T(-1) >> (Bits - N)); |

253 | } |

254 | |

255 | /// Create a bitmask with the N left-most bits set to 1, and all other |

256 | /// bits set to 0. Only unsigned types are allowed. |

257 | template <typename T> T maskLeadingOnes(unsigned N) { |

258 | return ~maskTrailingOnes<T>(CHAR_BIT * sizeof(T) - N); |

259 | } |

260 | |

261 | /// Create a bitmask with the N right-most bits set to 0, and all other |

262 | /// bits set to 1. Only unsigned types are allowed. |

263 | template <typename T> T maskTrailingZeros(unsigned N) { |

264 | return maskLeadingOnes<T>(CHAR_BIT * sizeof(T) - N); |

265 | } |

266 | |

267 | /// Create a bitmask with the N left-most bits set to 0, and all other |

268 | /// bits set to 1. Only unsigned types are allowed. |

269 | template <typename T> T maskLeadingZeros(unsigned N) { |

270 | return maskTrailingOnes<T>(CHAR_BIT * sizeof(T) - N); |

271 | } |

272 | |

273 | /// Get the index of the last set bit starting from the least |

274 | /// significant bit. |

275 | /// |

276 | /// Only unsigned integral types are allowed. |

277 | /// |

278 | /// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are |

279 | /// valid arguments. |

280 | template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) { |

281 | if (ZB == ZB_Max && Val == 0) |

282 | return std::numeric_limits<T>::max(); |

283 | |

284 | // Use ^ instead of - because both gcc and llvm can remove the associated ^ |

285 | // in the __builtin_clz intrinsic on x86. |

286 | return countLeadingZeros(Val, ZB_Undefined) ^ |

287 | (std::numeric_limits<T>::digits - 1); |

288 | } |

289 | |

290 | /// Macro compressed bit reversal table for 256 bits. |

291 | /// |

292 | /// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable |

293 | static const unsigned char BitReverseTable256[256] = { |

294 | #define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64 |

295 | #define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16) |

296 | #define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4) |

297 | R6(0), R6(2), R6(1), R6(3) |

298 | #undef R2 |

299 | #undef R4 |

300 | #undef R6 |

301 | }; |

302 | |

303 | /// Reverse the bits in \p Val. |

304 | template <typename T> |

305 | T reverseBits(T Val) { |

306 | unsigned char in[sizeof(Val)]; |

307 | unsigned char out[sizeof(Val)]; |

308 | std::memcpy(in, &Val, sizeof(Val)); |

309 | for (unsigned i = 0; i < sizeof(Val); ++i) |

310 | out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]]; |

311 | std::memcpy(&Val, out, sizeof(Val)); |

312 | return Val; |

313 | } |

314 | |

315 | #if __has_builtin(__builtin_bitreverse8) |

316 | template<> |

317 | inline uint8_t reverseBits<uint8_t>(uint8_t Val) { |

318 | return __builtin_bitreverse8(Val); |

319 | } |

320 | #endif |

321 | |

322 | #if __has_builtin(__builtin_bitreverse16) |

323 | template<> |

324 | inline uint16_t reverseBits<uint16_t>(uint16_t Val) { |

325 | return __builtin_bitreverse16(Val); |

326 | } |

327 | #endif |

328 | |

329 | #if __has_builtin(__builtin_bitreverse32) |

330 | template<> |

331 | inline uint32_t reverseBits<uint32_t>(uint32_t Val) { |

332 | return __builtin_bitreverse32(Val); |

333 | } |

334 | #endif |

335 | |

336 | #if __has_builtin(__builtin_bitreverse64) |

337 | template<> |

338 | inline uint64_t reverseBits<uint64_t>(uint64_t Val) { |

339 | return __builtin_bitreverse64(Val); |

340 | } |

341 | #endif |

342 | |

343 | // NOTE: The following support functions use the _32/_64 extensions instead of |

344 | // type overloading so that signed and unsigned integers can be used without |

345 | // ambiguity. |

346 | |

347 | /// Return the high 32 bits of a 64 bit value. |

348 | constexpr inline uint32_t Hi_32(uint64_t Value) { |

349 | return static_cast<uint32_t>(Value >> 32); |

350 | } |

351 | |

352 | /// Return the low 32 bits of a 64 bit value. |

353 | constexpr inline uint32_t Lo_32(uint64_t Value) { |

354 | return static_cast<uint32_t>(Value); |

355 | } |

356 | |

357 | /// Make a 64-bit integer from a high / low pair of 32-bit integers. |

358 | constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) { |

359 | return ((uint64_t)High << 32) | (uint64_t)Low; |

360 | } |

361 | |

362 | /// Checks if an integer fits into the given bit width. |

363 | template <unsigned N> constexpr inline bool isInt(int64_t x) { |

364 | return N >= 64 || (-(INT64_C(1)<<(N-1)) <= x && x < (INT64_C(1)<<(N-1))); |

365 | } |

366 | // Template specializations to get better code for common cases. |

367 | template <> constexpr inline bool isInt<8>(int64_t x) { |

368 | return static_cast<int8_t>(x) == x; |

369 | } |

370 | template <> constexpr inline bool isInt<16>(int64_t x) { |

371 | return static_cast<int16_t>(x) == x; |

372 | } |

373 | template <> constexpr inline bool isInt<32>(int64_t x) { |

374 | return static_cast<int32_t>(x) == x; |

375 | } |

376 | |

377 | /// Checks if a signed integer is an N bit number shifted left by S. |

378 | template <unsigned N, unsigned S> |

379 | constexpr inline bool isShiftedInt(int64_t x) { |

380 | static_assert( |

381 | N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number."); |

382 | static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide."); |

383 | return isInt<N + S>(x) && (x % (UINT64_C(1) << S) == 0); |

384 | } |

385 | |

386 | /// Checks if an unsigned integer fits into the given bit width. |

387 | /// |

388 | /// This is written as two functions rather than as simply |

389 | /// |

390 | /// return N >= 64 || X < (UINT64_C(1) << N); |

391 | /// |

392 | /// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting |

393 | /// left too many places. |

394 | template <unsigned N> |

395 | constexpr inline std::enable_if_t<(N < 64), bool> isUInt(uint64_t X) { |

396 | static_assert(N > 0, "isUInt<0> doesn't make sense"); |

397 | return X < (UINT64_C(1) << (N)); |

398 | } |

399 | template <unsigned N> |

400 | constexpr inline std::enable_if_t<N >= 64, bool> isUInt(uint64_t) { |

401 | return true; |

402 | } |

403 | |

404 | // Template specializations to get better code for common cases. |

405 | template <> constexpr inline bool isUInt<8>(uint64_t x) { |

406 | return static_cast<uint8_t>(x) == x; |

407 | } |

408 | template <> constexpr inline bool isUInt<16>(uint64_t x) { |

409 | return static_cast<uint16_t>(x) == x; |

410 | } |

411 | template <> constexpr inline bool isUInt<32>(uint64_t x) { |

412 | return static_cast<uint32_t>(x) == x; |

413 | } |

414 | |

415 | /// Checks if a unsigned integer is an N bit number shifted left by S. |

416 | template <unsigned N, unsigned S> |

417 | constexpr inline bool isShiftedUInt(uint64_t x) { |

418 | static_assert( |

419 | N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)"); |

420 | static_assert(N + S <= 64, |

421 | "isShiftedUInt<N, S> with N + S > 64 is too wide."); |

422 | // Per the two static_asserts above, S must be strictly less than 64. So |

423 | // 1 << S is not undefined behavior. |

424 | return isUInt<N + S>(x) && (x % (UINT64_C(1) << S) == 0); |

425 | } |

426 | |

427 | /// Gets the maximum value for a N-bit unsigned integer. |

428 | inline uint64_t maxUIntN(uint64_t N) { |

429 | assert(N > 0 && N <= 64 && "integer width out of range"); |

430 | |

431 | // uint64_t(1) << 64 is undefined behavior, so we can't do |

432 | // (uint64_t(1) << N) - 1 |

433 | // without checking first that N != 64. But this works and doesn't have a |

434 | // branch. |

435 | return UINT64_MAX >> (64 - N); |

436 | } |

437 | |

438 | /// Gets the minimum value for a N-bit signed integer. |

439 | inline int64_t minIntN(int64_t N) { |

440 | assert(N > 0 && N <= 64 && "integer width out of range"); |

441 | |

442 | return UINT64_C(1) + ~(UINT64_C(1) << (N - 1)); |

443 | } |

444 | |

445 | /// Gets the maximum value for a N-bit signed integer. |

446 | inline int64_t maxIntN(int64_t N) { |

447 | assert(N > 0 && N <= 64 && "integer width out of range"); |

448 | |

449 | // This relies on two's complement wraparound when N == 64, so we convert to |

450 | // int64_t only at the very end to avoid UB. |

451 | return (UINT64_C(1) << (N - 1)) - 1; |

452 | } |

453 | |

454 | /// Checks if an unsigned integer fits into the given (dynamic) bit width. |

455 | inline bool isUIntN(unsigned N, uint64_t x) { |

456 | return N >= 64 || x <= maxUIntN(N); |

457 | } |

458 | |

459 | /// Checks if an signed integer fits into the given (dynamic) bit width. |

460 | inline bool isIntN(unsigned N, int64_t x) { |

461 | return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N)); |

462 | } |

463 | |

464 | /// Return true if the argument is a non-empty sequence of ones starting at the |

465 | /// least significant bit with the remainder zero (32 bit version). |

466 | /// Ex. isMask_32(0x0000FFFFU) == true. |

467 | constexpr inline bool isMask_32(uint32_t Value) { |

468 | return Value && ((Value + 1) & Value) == 0; |

469 | } |

470 | |

471 | /// Return true if the argument is a non-empty sequence of ones starting at the |

472 | /// least significant bit with the remainder zero (64 bit version). |

473 | constexpr inline bool isMask_64(uint64_t Value) { |

474 | return Value && ((Value + 1) & Value) == 0; |

475 | } |

476 | |

477 | /// Return true if the argument contains a non-empty sequence of ones with the |

478 | /// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true. |

479 | constexpr inline bool isShiftedMask_32(uint32_t Value) { |

480 | return Value && isMask_32((Value - 1) | Value); |

481 | } |

482 | |

483 | /// Return true if the argument contains a non-empty sequence of ones with the |

484 | /// remainder zero (64 bit version.) |

485 | constexpr inline bool isShiftedMask_64(uint64_t Value) { |

486 | return Value && isMask_64((Value - 1) | Value); |

487 | } |

488 | |

489 | /// Return true if the argument is a power of two > 0. |

490 | /// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.) |

491 | constexpr inline bool isPowerOf2_32(uint32_t Value) { |

492 | return Value && !(Value & (Value - 1)); |

493 | } |

494 | |

495 | /// Return true if the argument is a power of two > 0 (64 bit edition.) |

496 | constexpr inline bool isPowerOf2_64(uint64_t Value) { |

497 | return Value && !(Value & (Value - 1)); |

498 | } |

499 | |

500 | /// Count the number of ones from the most significant bit to the first |

501 | /// zero bit. |

502 | /// |

503 | /// Ex. countLeadingOnes(0xFF0FFF00) == 8. |

504 | /// Only unsigned integral types are allowed. |

505 | /// |

506 | /// \param ZB the behavior on an input of all ones. Only ZB_Width and |

507 | /// ZB_Undefined are valid arguments. |

508 | template <typename T> |

509 | unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) { |

510 | static_assert(std::numeric_limits<T>::is_integer && |

511 | !std::numeric_limits<T>::is_signed, |

512 | "Only unsigned integral types are allowed."); |

513 | return countLeadingZeros<T>(~Value, ZB); |

514 | } |

515 | |

516 | /// Count the number of ones from the least significant bit to the first |

517 | /// zero bit. |

518 | /// |

519 | /// Ex. countTrailingOnes(0x00FF00FF) == 8. |

520 | /// Only unsigned integral types are allowed. |

521 | /// |

522 | /// \param ZB the behavior on an input of all ones. Only ZB_Width and |

523 | /// ZB_Undefined are valid arguments. |

524 | template <typename T> |

525 | unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) { |

526 | static_assert(std::numeric_limits<T>::is_integer && |

527 | !std::numeric_limits<T>::is_signed, |

528 | "Only unsigned integral types are allowed."); |

529 | return countTrailingZeros<T>(~Value, ZB); |

530 | } |

531 | |

532 | namespace detail { |

533 | template <typename T, std::size_t SizeOfT> struct PopulationCounter { |

534 | static unsigned count(T Value) { |

535 | // Generic version, forward to 32 bits. |

536 | static_assert(SizeOfT <= 4, "Not implemented!"); |

537 | #if defined(__GNUC__) |

538 | return __builtin_popcount(Value); |

539 | #else |

540 | uint32_t v = Value; |

541 | v = v - ((v >> 1) & 0x55555555); |

542 | v = (v & 0x33333333) + ((v >> 2) & 0x33333333); |

543 | return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; |

544 | #endif |

545 | } |

546 | }; |

547 | |

548 | template <typename T> struct PopulationCounter<T, 8> { |

549 | static unsigned count(T Value) { |

550 | #if defined(__GNUC__) |

551 | return __builtin_popcountll(Value); |

552 | #else |

553 | uint64_t v = Value; |

554 | v = v - ((v >> 1) & 0x5555555555555555ULL); |

555 | v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL); |

556 | v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL; |

557 | return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56); |

558 | #endif |

559 | } |

560 | }; |

561 | } // namespace detail |

562 | |

563 | /// Count the number of set bits in a value. |

564 | /// Ex. countPopulation(0xF000F000) = 8 |

565 | /// Returns 0 if the word is zero. |

566 | template <typename T> |

567 | inline unsigned countPopulation(T Value) { |

568 | static_assert(std::numeric_limits<T>::is_integer && |

569 | !std::numeric_limits<T>::is_signed, |

570 | "Only unsigned integral types are allowed."); |

571 | return detail::PopulationCounter<T, sizeof(T)>::count(Value); |

572 | } |

573 | |

574 | /// Compile time Log2. |

575 | /// Valid only for positive powers of two. |

576 | template <size_t kValue> constexpr inline size_t CTLog2() { |

577 | static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue), |

578 | "Value is not a valid power of 2"); |

579 | return 1 + CTLog2<kValue / 2>(); |

580 | } |

581 | |

582 | template <> constexpr inline size_t CTLog2<1>() { return 0; } |

583 | |

584 | /// Return the log base 2 of the specified value. |

585 | inline double Log2(double Value) { |

586 | #if defined(__ANDROID_API__) && __ANDROID_API__ < 18 |

587 | return __builtin_log(Value) / __builtin_log(2.0); |

588 | #else |

589 | return log2(Value); |

590 | #endif |

591 | } |

592 | |

593 | /// Return the floor log base 2 of the specified value, -1 if the value is zero. |

594 | /// (32 bit edition.) |

595 | /// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2 |

596 | inline unsigned Log2_32(uint32_t Value) { |

597 | return 31 - countLeadingZeros(Value); |

598 | } |

599 | |

600 | /// Return the floor log base 2 of the specified value, -1 if the value is zero. |

601 | /// (64 bit edition.) |

602 | inline unsigned Log2_64(uint64_t Value) { |

603 | return 63 - countLeadingZeros(Value); |

604 | } |

605 | |

606 | /// Return the ceil log base 2 of the specified value, 32 if the value is zero. |

607 | /// (32 bit edition). |

608 | /// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3 |

609 | inline unsigned Log2_32_Ceil(uint32_t Value) { |

610 | return 32 - countLeadingZeros(Value - 1); |

611 | } |

612 | |

613 | /// Return the ceil log base 2 of the specified value, 64 if the value is zero. |

614 | /// (64 bit edition.) |

615 | inline unsigned Log2_64_Ceil(uint64_t Value) { |

616 | return 64 - countLeadingZeros(Value - 1); |

617 | } |

618 | |

619 | /// Return the greatest common divisor of the values using Euclid's algorithm. |

620 | template <typename T> |

621 | inline T greatestCommonDivisor(T A, T B) { |

622 | while (B) { |

623 | T Tmp = B; |

624 | B = A % B; |

625 | A = Tmp; |

626 | } |

627 | return A; |

628 | } |

629 | |

630 | inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) { |

631 | return greatestCommonDivisor<uint64_t>(A, B); |

632 | } |

633 | |

634 | /// This function takes a 64-bit integer and returns the bit equivalent double. |

635 | inline double BitsToDouble(uint64_t Bits) { |

636 | double D; |

637 | static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes"); |

638 | memcpy(&D, &Bits, sizeof(Bits)); |

639 | return D; |

640 | } |

641 | |

642 | /// This function takes a 32-bit integer and returns the bit equivalent float. |

643 | inline float BitsToFloat(uint32_t Bits) { |

644 | float F; |

645 | static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes"); |

646 | memcpy(&F, &Bits, sizeof(Bits)); |

647 | return F; |

648 | } |

649 | |

650 | /// This function takes a double and returns the bit equivalent 64-bit integer. |

651 | /// Note that copying doubles around changes the bits of NaNs on some hosts, |

652 | /// notably x86, so this routine cannot be used if these bits are needed. |

653 | inline uint64_t DoubleToBits(double Double) { |

654 | uint64_t Bits; |

655 | static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes"); |

656 | memcpy(&Bits, &Double, sizeof(Double)); |

657 | return Bits; |

658 | } |

659 | |

660 | /// This function takes a float and returns the bit equivalent 32-bit integer. |

661 | /// Note that copying floats around changes the bits of NaNs on some hosts, |

662 | /// notably x86, so this routine cannot be used if these bits are needed. |

663 | inline uint32_t FloatToBits(float Float) { |

664 | uint32_t Bits; |

665 | static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes"); |

666 | memcpy(&Bits, &Float, sizeof(Float)); |

667 | return Bits; |

668 | } |

669 | |

670 | /// A and B are either alignments or offsets. Return the minimum alignment that |

671 | /// may be assumed after adding the two together. |

672 | constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) { |

673 | // The largest power of 2 that divides both A and B. |

674 | // |

675 | // Replace "-Value" by "1+~Value" in the following commented code to avoid |

676 | // MSVC warning C4146 |

677 | // return (A | B) & -(A | B); |

678 | return (A | B) & (1 + ~(A | B)); |

679 | } |

680 | |

681 | /// Returns the next power of two (in 64-bits) that is strictly greater than A. |

682 | /// Returns zero on overflow. |

683 | inline uint64_t NextPowerOf2(uint64_t A) { |

684 | A |= (A >> 1); |

685 | A |= (A >> 2); |

686 | A |= (A >> 4); |

687 | A |= (A >> 8); |

688 | A |= (A >> 16); |

689 | A |= (A >> 32); |

690 | return A + 1; |

691 | } |

692 | |

693 | /// Returns the power of two which is less than or equal to the given value. |

694 | /// Essentially, it is a floor operation across the domain of powers of two. |

695 | inline uint64_t PowerOf2Floor(uint64_t A) { |

696 | if (!A) return 0; |

697 | return 1ull << (63 - countLeadingZeros(A, ZB_Undefined)); |

698 | } |

699 | |

700 | /// Returns the power of two which is greater than or equal to the given value. |

701 | /// Essentially, it is a ceil operation across the domain of powers of two. |

702 | inline uint64_t PowerOf2Ceil(uint64_t A) { |

703 | if (!A) |

704 | return 0; |

705 | return NextPowerOf2(A - 1); |

706 | } |

707 | |

708 | /// Returns the next integer (mod 2**64) that is greater than or equal to |

709 | /// \p Value and is a multiple of \p Align. \p Align must be non-zero. |

710 | /// |

711 | /// If non-zero \p Skew is specified, the return value will be a minimal |

712 | /// integer that is greater than or equal to \p Value and equal to |

713 | /// \p Align * N + \p Skew for some integer N. If \p Skew is larger than |

714 | /// \p Align, its value is adjusted to '\p Skew mod \p Align'. |

715 | /// |

716 | /// Examples: |

717 | /// \code |

718 | /// alignTo(5, 8) = 8 |

719 | /// alignTo(17, 8) = 24 |

720 | /// alignTo(~0LL, 8) = 0 |

721 | /// alignTo(321, 255) = 510 |

722 | /// |

723 | /// alignTo(5, 8, 7) = 7 |

724 | /// alignTo(17, 8, 1) = 17 |

725 | /// alignTo(~0LL, 8, 3) = 3 |

726 | /// alignTo(321, 255, 42) = 552 |

727 | /// \endcode |

728 | inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { |

729 | assert(Align != 0u && "Align can't be 0."); |

730 | Skew %= Align; |

731 | return (Value + Align - 1 - Skew) / Align * Align + Skew; |

732 | } |

733 | |

734 | /// Returns the next integer (mod 2**64) that is greater than or equal to |

735 | /// \p Value and is a multiple of \c Align. \c Align must be non-zero. |

736 | template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) { |

737 | static_assert(Align != 0u, "Align must be non-zero"); |

738 | return (Value + Align - 1) / Align * Align; |

739 | } |

740 | |

741 | /// Returns the integer ceil(Numerator / Denominator). |

742 | inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) { |

743 | return alignTo(Numerator, Denominator) / Denominator; |

744 | } |

745 | |

746 | /// Returns the integer nearest(Numerator / Denominator). |

747 | inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) { |

748 | return (Numerator + (Denominator / 2)) / Denominator; |

749 | } |

750 | |

751 | /// Returns the largest uint64_t less than or equal to \p Value and is |

752 | /// \p Skew mod \p Align. \p Align must be non-zero |

753 | inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) { |

754 | assert(Align != 0u && "Align can't be 0."); |

755 | Skew %= Align; |

756 | return (Value - Skew) / Align * Align + Skew; |

757 | } |

758 | |

759 | /// Sign-extend the number in the bottom B bits of X to a 32-bit integer. |

760 | /// Requires 0 < B <= 32. |

761 | template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) { |

762 | static_assert(B > 0, "Bit width can't be 0."); |

763 | static_assert(B <= 32, "Bit width out of range."); |

764 | return int32_t(X << (32 - B)) >> (32 - B); |

765 | } |

766 | |

767 | /// Sign-extend the number in the bottom B bits of X to a 32-bit integer. |

768 | /// Requires 0 < B <= 32. |

769 | inline int32_t SignExtend32(uint32_t X, unsigned B) { |

770 | assert(B > 0 && "Bit width can't be 0."); |

771 | assert(B <= 32 && "Bit width out of range."); |

772 | return int32_t(X << (32 - B)) >> (32 - B); |

773 | } |

774 | |

775 | /// Sign-extend the number in the bottom B bits of X to a 64-bit integer. |

776 | /// Requires 0 < B <= 64. |

777 | template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) { |

778 | static_assert(B > 0, "Bit width can't be 0."); |

779 | static_assert(B <= 64, "Bit width out of range."); |

780 | return int64_t(x << (64 - B)) >> (64 - B); |

781 | } |

782 | |

783 | /// Sign-extend the number in the bottom B bits of X to a 64-bit integer. |

784 | /// Requires 0 < B <= 64. |

785 | inline int64_t SignExtend64(uint64_t X, unsigned B) { |

786 | assert(B > 0 && "Bit width can't be 0."); |

787 | assert(B <= 64 && "Bit width out of range."); |

788 | return int64_t(X << (64 - B)) >> (64 - B); |

789 | } |

790 | |

791 | /// Subtract two unsigned integers, X and Y, of type T and return the absolute |

792 | /// value of the result. |

793 | template <typename T> |

794 | std::enable_if_t<std::is_unsigned<T>::value, T> AbsoluteDifference(T X, T Y) { |

795 | return X > Y ? (X - Y) : (Y - X); |

796 | } |

797 | |

798 | /// Add two unsigned integers, X and Y, of type T. Clamp the result to the |

799 | /// maximum representable value of T on overflow. ResultOverflowed indicates if |

800 | /// the result is larger than the maximum representable value of type T. |

801 | template <typename T> |

802 | std::enable_if_t<std::is_unsigned<T>::value, T> |

803 | SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) { |

804 | bool Dummy; |

805 | bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; |

806 | // Hacker's Delight, p. 29 |

807 | T Z = X + Y; |

808 | Overflowed = (Z < X || Z < Y); |

809 | if (Overflowed) |

810 | return std::numeric_limits<T>::max(); |

811 | else |

812 | return Z; |

813 | } |

814 | |

815 | /// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the |

816 | /// maximum representable value of T on overflow. ResultOverflowed indicates if |

817 | /// the result is larger than the maximum representable value of type T. |

818 | template <typename T> |

819 | std::enable_if_t<std::is_unsigned<T>::value, T> |

820 | SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) { |

821 | bool Dummy; |

822 | bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; |

823 | |

824 | // Hacker's Delight, p. 30 has a different algorithm, but we don't use that |

825 | // because it fails for uint16_t (where multiplication can have undefined |

826 | // behavior due to promotion to int), and requires a division in addition |

827 | // to the multiplication. |

828 | |

829 | Overflowed = false; |

830 | |

831 | // Log2(Z) would be either Log2Z or Log2Z + 1. |

832 | // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z |

833 | // will necessarily be less than Log2Max as desired. |

834 | int Log2Z = Log2_64(X) + Log2_64(Y); |

835 | const T Max = std::numeric_limits<T>::max(); |

836 | int Log2Max = Log2_64(Max); |

837 | if (Log2Z < Log2Max) { |

838 | return X * Y; |

839 | } |

840 | if (Log2Z > Log2Max) { |

841 | Overflowed = true; |

842 | return Max; |

843 | } |

844 | |

845 | // We're going to use the top bit, and maybe overflow one |

846 | // bit past it. Multiply all but the bottom bit then add |

847 | // that on at the end. |

848 | T Z = (X >> 1) * Y; |

849 | if (Z & ~(Max >> 1)) { |

850 | Overflowed = true; |

851 | return Max; |

852 | } |

853 | Z <<= 1; |

854 | if (X & 1) |

855 | return SaturatingAdd(Z, Y, ResultOverflowed); |

856 | |

857 | return Z; |

858 | } |

859 | |

860 | /// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to |

861 | /// the product. Clamp the result to the maximum representable value of T on |

862 | /// overflow. ResultOverflowed indicates if the result is larger than the |

863 | /// maximum representable value of type T. |

864 | template <typename T> |

865 | std::enable_if_t<std::is_unsigned<T>::value, T> |

866 | SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) { |

867 | bool Dummy; |

868 | bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; |

869 | |

870 | T Product = SaturatingMultiply(X, Y, &Overflowed); |

871 | if (Overflowed) |

872 | return Product; |

873 | |

874 | return SaturatingAdd(A, Product, &Overflowed); |

875 | } |

876 | |

877 | /// Use this rather than HUGE_VALF; the latter causes warnings on MSVC. |

878 | extern const float huge_valf; |

879 | |

880 | |

881 | /// Add two signed integers, computing the two's complement truncated result, |

882 | /// returning true if overflow occured. |

883 | template <typename T> |

884 | std::enable_if_t<std::is_signed<T>::value, T> AddOverflow(T X, T Y, T &Result) { |

885 | #if __has_builtin(__builtin_add_overflow) |

886 | return __builtin_add_overflow(X, Y, &Result); |

887 | #else |

888 | // Perform the unsigned addition. |

889 | using U = std::make_unsigned_t<T>; |

890 | const U UX = static_cast<U>(X); |

891 | const U UY = static_cast<U>(Y); |

892 | const U UResult = UX + UY; |

893 | |

894 | // Convert to signed. |

895 | Result = static_cast<T>(UResult); |

896 | |

897 | // Adding two positive numbers should result in a positive number. |

898 | if (X > 0 && Y > 0) |

899 | return Result <= 0; |

900 | // Adding two negatives should result in a negative number. |

901 | if (X < 0 && Y < 0) |

902 | return Result >= 0; |

903 | return false; |

904 | #endif |

905 | } |

906 | |

907 | /// Subtract two signed integers, computing the two's complement truncated |

908 | /// result, returning true if an overflow ocurred. |

909 | template <typename T> |

910 | std::enable_if_t<std::is_signed<T>::value, T> SubOverflow(T X, T Y, T &Result) { |

911 | #if __has_builtin(__builtin_sub_overflow) |

912 | return __builtin_sub_overflow(X, Y, &Result); |

913 | #else |

914 | // Perform the unsigned addition. |

915 | using U = std::make_unsigned_t<T>; |

916 | const U UX = static_cast<U>(X); |

917 | const U UY = static_cast<U>(Y); |

918 | const U UResult = UX - UY; |

919 | |

920 | // Convert to signed. |

921 | Result = static_cast<T>(UResult); |

922 | |

923 | // Subtracting a positive number from a negative results in a negative number. |

924 | if (X <= 0 && Y > 0) |

925 | return Result >= 0; |

926 | // Subtracting a negative number from a positive results in a positive number. |

927 | if (X >= 0 && Y < 0) |

928 | return Result <= 0; |

929 | return false; |

930 | #endif |

931 | } |

932 | |

933 | /// Multiply two signed integers, computing the two's complement truncated |

934 | /// result, returning true if an overflow ocurred. |

935 | template <typename T> |

936 | std::enable_if_t<std::is_signed<T>::value, T> MulOverflow(T X, T Y, T &Result) { |

937 | // Perform the unsigned multiplication on absolute values. |

938 | using U = std::make_unsigned_t<T>; |

939 | const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X); |

940 | const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y); |

941 | const U UResult = UX * UY; |

942 | |

943 | // Convert to signed. |

944 | const bool IsNegative = (X < 0) ^ (Y < 0); |

945 | Result = IsNegative ? (0 - UResult) : UResult; |

946 | |

947 | // If any of the args was 0, result is 0 and no overflow occurs. |

948 | if (UX == 0 || UY == 0) |

949 | return false; |

950 | |

951 | // UX and UY are in [1, 2^n], where n is the number of digits. |

952 | // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for |

953 | // positive) divided by an argument compares to the other. |

954 | if (IsNegative) |

955 | return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY; |

956 | else |

957 | return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY; |

958 | } |

959 | |

960 | } // End llvm namespace |

961 | |

962 | #endif |

963 |