1//===-- lib/Decimal/decimal-to-binary.cpp ---------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "big-radix-floating-point.h"
10#include "flang/Common/bit-population-count.h"
11#include "flang/Common/leading-zero-bit-count.h"
12#include "flang/Decimal/binary-floating-point.h"
13#include "flang/Decimal/decimal.h"
14#include "flang/Runtime/freestanding-tools.h"
15#include <cinttypes>
16#include <cstring>
17#include <utility>
18
19// Some environments, viz. glibc 2.17 and *BSD, allow the macro HUGE
20// to leak out of <math.h>.
21#undef HUGE
22
23namespace Fortran::decimal {
24
25template <int PREC, int LOG10RADIX>
26bool BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ParseNumber(
27 const char *&p, bool &inexact, const char *end) {
28 SetToZero();
29 if (end && p >= end) {
30 return false;
31 }
32 // Skip leading spaces
33 for (; p != end && *p == ' '; ++p) {
34 }
35 if (p == end) {
36 return false;
37 }
38 const char *q{p};
39 isNegative_ = *q == '-';
40 if (*q == '-' || *q == '+') {
41 ++q;
42 }
43 const char *start{q};
44 for (; q != end && *q == '0'; ++q) {
45 }
46 const char *firstDigit{q};
47 for (; q != end && *q >= '0' && *q <= '9'; ++q) {
48 }
49 const char *point{nullptr};
50 if (q != end && *q == '.') {
51 point = q;
52 for (++q; q != end && *q >= '0' && *q <= '9'; ++q) {
53 }
54 }
55 if (q == start || (q == start + 1 && start == point)) {
56 return false; // require at least one digit
57 }
58 // There's a valid number here; set the reference argument to point to
59 // the first character afterward, which might be an exponent part.
60 p = q;
61 // Strip off trailing zeroes
62 if (point) {
63 while (q[-1] == '0') {
64 --q;
65 }
66 if (q[-1] == '.') {
67 point = nullptr;
68 --q;
69 }
70 }
71 if (!point) {
72 while (q > firstDigit && q[-1] == '0') {
73 --q;
74 ++exponent_;
75 }
76 }
77 // Trim any excess digits
78 const char *limit{firstDigit + maxDigits * log10Radix + (point != nullptr)};
79 if (q > limit) {
80 inexact = true;
81 if (point >= limit) {
82 q = point;
83 point = nullptr;
84 }
85 if (!point) {
86 exponent_ += q - limit;
87 }
88 q = limit;
89 }
90 if (point) {
91 exponent_ -= static_cast<int>(q - point - 1);
92 }
93 if (q == firstDigit) {
94 exponent_ = 0; // all zeros
95 }
96 // Rack the decimal digits up into big Digits.
97 for (auto times{radix}; q-- > firstDigit;) {
98 if (*q != '.') {
99 if (times == radix) {
100 digit_[digits_++] = *q - '0';
101 times = 10;
102 } else {
103 digit_[digits_ - 1] += times * (*q - '0');
104 times *= 10;
105 }
106 }
107 }
108 // Look for an optional exponent field.
109 if (p == end) {
110 return true;
111 }
112 q = p;
113 switch (*q) {
114 case 'e':
115 case 'E':
116 case 'd':
117 case 'D':
118 case 'q':
119 case 'Q': {
120 if (++q == end) {
121 break;
122 }
123 bool negExpo{*q == '-'};
124 if (*q == '-' || *q == '+') {
125 ++q;
126 }
127 if (q != end && *q >= '0' && *q <= '9') {
128 int expo{0};
129 for (; q != end && *q == '0'; ++q) {
130 }
131 const char *expDig{q};
132 for (; q != end && *q >= '0' && *q <= '9'; ++q) {
133 expo = 10 * expo + *q - '0';
134 }
135 if (q >= expDig + 8) {
136 // There's a ridiculous number of nonzero exponent digits.
137 // The decimal->binary conversion routine will cope with
138 // returning 0 or Inf, but we must ensure that "expo" didn't
139 // overflow back around to something legal.
140 expo = 10 * Real::decimalRange;
141 exponent_ = 0;
142 }
143 p = q; // exponent is valid; advance the termination pointer
144 if (negExpo) {
145 exponent_ -= expo;
146 } else {
147 exponent_ += expo;
148 }
149 }
150 } break;
151 default:
152 break;
153 }
154 return true;
155}
156
157template <int PREC, int LOG10RADIX>
158void BigRadixFloatingPointNumber<PREC,
159 LOG10RADIX>::LoseLeastSignificantDigit() {
160 Digit LSD{digit_[0]};
161 for (int j{0}; j < digits_ - 1; ++j) {
162 digit_[j] = digit_[j + 1];
163 }
164 digit_[digits_ - 1] = 0;
165 bool incr{false};
166 switch (rounding_) {
167 case RoundNearest:
168 incr = LSD > radix / 2 || (LSD == radix / 2 && digit_[0] % 2 != 0);
169 break;
170 case RoundUp:
171 incr = LSD > 0 && !isNegative_;
172 break;
173 case RoundDown:
174 incr = LSD > 0 && isNegative_;
175 break;
176 case RoundToZero:
177 break;
178 case RoundCompatible:
179 incr = LSD >= radix / 2;
180 break;
181 }
182 for (int j{0}; (digit_[j] += incr) == radix; ++j) {
183 digit_[j] = 0;
184 }
185}
186
187// This local utility class represents an unrounded nonnegative
188// binary floating-point value with an unbiased (i.e., signed)
189// binary exponent, an integer value (not a fraction) with an implied
190// binary point to its *right*, and some guard bits for rounding.
191template <int PREC> class IntermediateFloat {
192public:
193 static constexpr int precision{PREC};
194 using IntType = common::HostUnsignedIntType<precision>;
195 static constexpr IntType topBit{IntType{1} << (precision - 1)};
196 static constexpr IntType mask{topBit + (topBit - 1)};
197
198 RT_API_ATTRS IntermediateFloat() {}
199 IntermediateFloat(const IntermediateFloat &) = default;
200
201 // Assumes that exponent_ is valid on entry, and may increment it.
202 // Returns the number of guard_ bits that have been determined.
203 template <typename UINT> RT_API_ATTRS bool SetTo(UINT n) {
204 static constexpr int nBits{CHAR_BIT * sizeof n};
205 if constexpr (precision >= nBits) {
206 value_ = n;
207 guard_ = 0;
208 return 0;
209 } else {
210 int shift{common::BitsNeededFor(n) - precision};
211 if (shift <= 0) {
212 value_ = n;
213 guard_ = 0;
214 return 0;
215 } else {
216 value_ = n >> shift;
217 exponent_ += shift;
218 n <<= nBits - shift;
219 guard_ = (n >> (nBits - guardBits)) | ((n << guardBits) != 0);
220 return shift;
221 }
222 }
223 }
224
225 RT_API_ATTRS void ShiftIn(int bit = 0) { value_ = value_ + value_ + bit; }
226 RT_API_ATTRS bool IsFull() const { return value_ >= topBit; }
227 RT_API_ATTRS void AdjustExponent(int by) { exponent_ += by; }
228 RT_API_ATTRS void SetGuard(int g) {
229 guard_ |= (static_cast<GuardType>(g & 6) << (guardBits - 3)) | (g & 1);
230 }
231
232 RT_API_ATTRS ConversionToBinaryResult<PREC> ToBinary(
233 bool isNegative, FortranRounding) const;
234
235private:
236 static constexpr int guardBits{3}; // guard, round, sticky
237 using GuardType = int;
238 static constexpr GuardType oneHalf{GuardType{1} << (guardBits - 1)};
239
240 IntType value_{0};
241 GuardType guard_{0};
242 int exponent_{0};
243};
244
245// The standard says that these overflow cases round to "representable"
246// numbers, and some popular compilers interpret that to mean +/-HUGE()
247// rather than +/-Inf.
248static inline RT_API_ATTRS constexpr bool RoundOverflowToHuge(
249 enum FortranRounding rounding, bool isNegative) {
250 return rounding == RoundToZero || (!isNegative && rounding == RoundDown) ||
251 (isNegative && rounding == RoundUp);
252}
253
254template <int PREC>
255ConversionToBinaryResult<PREC> IntermediateFloat<PREC>::ToBinary(
256 bool isNegative, FortranRounding rounding) const {
257 using Binary = BinaryFloatingPointNumber<PREC>;
258 // Create a fraction with a binary point to the left of the integer
259 // value_, and bias the exponent.
260 IntType fraction{value_};
261 GuardType guard{guard_};
262 int expo{exponent_ + Binary::exponentBias + (precision - 1)};
263 while (expo < 1 && (fraction > 0 || guard > oneHalf)) {
264 guard = (guard & 1) | (guard >> 1) |
265 ((static_cast<GuardType>(fraction) & 1) << (guardBits - 1));
266 fraction >>= 1;
267 ++expo;
268 }
269 int flags{Exact};
270 if (guard != 0) {
271 flags |= Inexact;
272 }
273 if (fraction == 0) {
274 if (guard <= oneHalf) {
275 if ((!isNegative && rounding == RoundUp) ||
276 (isNegative && rounding == RoundDown)) {
277 // round to least nonzero value
278 expo = 0;
279 } else { // round to zero
280 if (guard != 0) {
281 flags |= Underflow;
282 }
283 Binary zero;
284 if (isNegative) {
285 zero.Negate();
286 }
287 return {
288 std::move(zero), static_cast<enum ConversionResultFlags>(flags)};
289 }
290 }
291 } else {
292 // The value is nonzero; normalize it.
293 while (fraction < topBit && expo > 1) {
294 --expo;
295 fraction = fraction * 2 + (guard >> (guardBits - 2));
296 guard =
297 (((guard >> (guardBits - 2)) & 1) << (guardBits - 1)) | (guard & 1);
298 }
299 }
300 // Apply rounding
301 bool incr{false};
302 switch (rounding) {
303 case RoundNearest:
304 incr = guard > oneHalf || (guard == oneHalf && (fraction & 1));
305 break;
306 case RoundUp:
307 incr = guard != 0 && !isNegative;
308 break;
309 case RoundDown:
310 incr = guard != 0 && isNegative;
311 break;
312 case RoundToZero:
313 break;
314 case RoundCompatible:
315 incr = guard >= oneHalf;
316 break;
317 }
318 if (incr) {
319 if (fraction == mask) {
320 // rounding causes a carry
321 ++expo;
322 fraction = topBit;
323 } else {
324 ++fraction;
325 }
326 }
327 if (expo == 1 && fraction < topBit) {
328 expo = 0; // subnormal
329 flags |= Underflow;
330 } else if (expo == 0) {
331 flags |= Underflow;
332 } else if (expo >= Binary::maxExponent) {
333 if (RoundOverflowToHuge(rounding, isNegative)) {
334 expo = Binary::maxExponent - 1;
335 fraction = mask;
336 } else { // Inf
337 expo = Binary::maxExponent;
338 flags |= Overflow;
339 if constexpr (Binary::bits == 80) { // x87
340 fraction = IntType{1} << 63;
341 } else {
342 fraction = 0;
343 }
344 }
345 }
346 using Raw = typename Binary::RawType;
347 Raw raw = static_cast<Raw>(isNegative) << (Binary::bits - 1);
348 raw |= static_cast<Raw>(expo) << Binary::significandBits;
349 if constexpr (Binary::isImplicitMSB) {
350 fraction &= ~topBit;
351 }
352 raw |= fraction;
353 return {Binary(raw), static_cast<enum ConversionResultFlags>(flags)};
354}
355
356template <int PREC, int LOG10RADIX>
357ConversionToBinaryResult<PREC>
358BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary() {
359 // On entry, *this holds a multi-precision integer value in a radix of a
360 // large power of ten. Its radix point is defined to be to the right of its
361 // digits, and "exponent_" is the power of ten by which it is to be scaled.
362 Normalize();
363 if (digits_ == 0) { // zero value
364 return {Real{SignBit()}};
365 }
366 // The value is not zero: x = D. * 10.**E
367 // Shift our perspective on the radix (& decimal) point so that
368 // it sits to the *left* of the digits: i.e., x = .D * 10.**E
369 exponent_ += digits_ * log10Radix;
370 // Sanity checks for ridiculous exponents
371 static constexpr int crazy{2 * Real::decimalRange + log10Radix};
372 if (exponent_ < -crazy) {
373 enum ConversionResultFlags flags {
374 static_cast<enum ConversionResultFlags>(Inexact | Underflow)
375 };
376 if ((!isNegative_ && rounding_ == RoundUp) ||
377 (isNegative_ && rounding_ == RoundDown)) {
378 // return least nonzero value
379 return {Real{Raw{1} | SignBit()}, flags};
380 } else { // underflow to +/-0.
381 return {Real{SignBit()}, flags};
382 }
383 } else if (exponent_ > crazy) { // overflow to +/-HUGE() or +/-Inf
384 if (RoundOverflowToHuge(rounding_, isNegative_)) {
385 return {Real{HUGE()}};
386 } else {
387 return {Real{Infinity()}, Overflow};
388 }
389 }
390 // Apply any negative decimal exponent by multiplication
391 // by a power of two, adjusting the binary exponent to compensate.
392 IntermediateFloat<PREC> f;
393 while (exponent_ < log10Radix) {
394 // x = 0.D * 10.**E * 2.**(f.ex) -> 512 * 0.D * 10.**E * 2.**(f.ex-9)
395 f.AdjustExponent(-9);
396 digitLimit_ = digits_;
397 if (int carry{MultiplyWithoutNormalization<512>()}) {
398 // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex)
399 PushCarry(carry);
400 exponent_ += log10Radix;
401 }
402 }
403 // Apply any positive decimal exponent greater than
404 // is needed to treat the topmost digit as an integer
405 // part by multiplying by 10 or 10000 repeatedly.
406 while (exponent_ > log10Radix) {
407 digitLimit_ = digits_;
408 int carry;
409 if (exponent_ >= log10Radix + 4) {
410 // x = 0.D * 10.**E * 2.**(f.ex) -> 625 * .D * 10.**(E-4) * 2.**(f.ex+4)
411 exponent_ -= 4;
412 carry = MultiplyWithoutNormalization<(5 * 5 * 5 * 5)>();
413 f.AdjustExponent(4);
414 } else {
415 // x = 0.D * 10.**E * 2.**(f.ex) -> 5 * .D * 10.**(E-1) * 2.**(f.ex+1)
416 --exponent_;
417 carry = MultiplyWithoutNormalization<5>();
418 f.AdjustExponent(1);
419 }
420 if (carry != 0) {
421 // x = c.D * 10.**E * 2.**(f.ex) -> .cD * 10.**(E+16) * 2.**(f.ex)
422 PushCarry(carry);
423 exponent_ += log10Radix;
424 }
425 }
426 // So exponent_ is now log10Radix, meaning that the
427 // MSD can be taken as an integer part and transferred
428 // to the binary result.
429 // x = .jD * 10.**16 * 2.**(f.ex) -> .D * j * 2.**(f.ex)
430 int guardShift{f.SetTo(digit_[--digits_])};
431 // Transfer additional bits until the result is normal.
432 digitLimit_ = digits_;
433 while (!f.IsFull()) {
434 // x = ((b.D)/2) * j * 2.**(f.ex) -> .D * (2j + b) * 2.**(f.ex-1)
435 f.AdjustExponent(-1);
436 std::uint32_t carry = MultiplyWithoutNormalization<2>();
437 f.ShiftIn(carry);
438 }
439 // Get the next few bits for rounding. Allow for some guard bits
440 // that may have already been set in f.SetTo() above.
441 int guard{0};
442 if (guardShift == 0) {
443 guard = MultiplyWithoutNormalization<4>();
444 } else if (guardShift == 1) {
445 guard = MultiplyWithoutNormalization<2>();
446 }
447 guard = guard + guard + !IsZero();
448 f.SetGuard(guard);
449 return f.ToBinary(isNegative_, rounding_);
450}
451
452template <int PREC, int LOG10RADIX>
453ConversionToBinaryResult<PREC>
454BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToBinary(
455 const char *&p, const char *limit) {
456 bool inexact{false};
457 if (ParseNumber(p, inexact, limit)) {
458 auto result{ConvertToBinary()};
459 if (inexact) {
460 result.flags =
461 static_cast<enum ConversionResultFlags>(result.flags | Inexact);
462 }
463 return result;
464 } else {
465 // Could not parse a decimal floating-point number. p has been
466 // advanced over any leading spaces. Most Fortran compilers set
467 // the sign bit for -NaN.
468 const char *q{p};
469 if (!limit || q < limit) {
470 isNegative_ = *q == '-';
471 if (isNegative_ || *q == '+') {
472 ++q;
473 }
474 }
475 if ((!limit || limit >= q + 3) && runtime::toupper(q[0]) == 'N' &&
476 runtime::toupper(q[1]) == 'A' && runtime::toupper(q[2]) == 'N') {
477 // NaN
478 p = q + 3;
479 bool isQuiet{true};
480 if ((!limit || p < limit) && *p == '(') {
481 int depth{1};
482 do {
483 ++p;
484 if (limit && p >= limit) {
485 // Invalid input
486 return {Real{NaN(false)}, Invalid};
487 } else if (*p == '(') {
488 ++depth;
489 } else if (*p == ')') {
490 --depth;
491 } else if (*p != ' ') {
492 // Implementation dependent, but other compilers
493 // all return quiet NaNs.
494 }
495 } while (depth > 0);
496 ++p;
497 }
498 return {Real{NaN(isQuiet)}};
499 } else { // Inf?
500 if ((!limit || limit >= q + 3) && runtime::toupper(q[0]) == 'I' &&
501 runtime::toupper(q[1]) == 'N' && runtime::toupper(q[2]) == 'F') {
502 if ((!limit || limit >= q + 8) && runtime::toupper(q[3]) == 'I' &&
503 runtime::toupper(q[4]) == 'N' && runtime::toupper(q[5]) == 'I' &&
504 runtime::toupper(q[6]) == 'T' && runtime::toupper(q[7]) == 'Y') {
505 p = q + 8;
506 } else {
507 p = q + 3;
508 }
509 return {Real{Infinity()}};
510 } else {
511 // Invalid input
512 return {Real{NaN()}, Invalid};
513 }
514 }
515 }
516}
517
518template <int PREC>
519ConversionToBinaryResult<PREC> ConvertToBinary(
520 const char *&p, enum FortranRounding rounding, const char *end) {
521 return BigRadixFloatingPointNumber<PREC>{rounding}.ConvertToBinary(p, end);
522}
523
524template ConversionToBinaryResult<8> ConvertToBinary<8>(
525 const char *&, enum FortranRounding, const char *end);
526template ConversionToBinaryResult<11> ConvertToBinary<11>(
527 const char *&, enum FortranRounding, const char *end);
528template ConversionToBinaryResult<24> ConvertToBinary<24>(
529 const char *&, enum FortranRounding, const char *end);
530template ConversionToBinaryResult<53> ConvertToBinary<53>(
531 const char *&, enum FortranRounding, const char *end);
532template ConversionToBinaryResult<64> ConvertToBinary<64>(
533 const char *&, enum FortranRounding, const char *end);
534template ConversionToBinaryResult<113> ConvertToBinary<113>(
535 const char *&, enum FortranRounding, const char *end);
536
537extern "C" {
538RT_EXT_API_GROUP_BEGIN
539
540enum ConversionResultFlags ConvertDecimalToFloat(
541 const char **p, float *f, enum FortranRounding rounding) {
542 auto result{Fortran::decimal::ConvertToBinary<24>(*p, rounding)};
543 std::memcpy(dest: reinterpret_cast<void *>(f),
544 src: reinterpret_cast<const void *>(&result.binary), n: sizeof *f);
545 return result.flags;
546}
547enum ConversionResultFlags ConvertDecimalToDouble(
548 const char **p, double *d, enum FortranRounding rounding) {
549 auto result{Fortran::decimal::ConvertToBinary<53>(*p, rounding)};
550 std::memcpy(dest: reinterpret_cast<void *>(d),
551 src: reinterpret_cast<const void *>(&result.binary), n: sizeof *d);
552 return result.flags;
553}
554enum ConversionResultFlags ConvertDecimalToLongDouble(
555 const char **p, long double *ld, enum FortranRounding rounding) {
556 auto result{Fortran::decimal::ConvertToBinary<64>(*p, rounding)};
557 std::memcpy(dest: reinterpret_cast<void *>(ld),
558 src: reinterpret_cast<const void *>(&result.binary), n: sizeof *ld);
559 return result.flags;
560}
561
562RT_EXT_API_GROUP_END
563} // extern "C"
564} // namespace Fortran::decimal
565

source code of flang/lib/Decimal/decimal-to-binary.cpp