1//===-- runtime/edit-input.cpp --------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "edit-input.h"
10#include "namelist.h"
11#include "utf.h"
12#include "flang/Common/optional.h"
13#include "flang/Common/real.h"
14#include "flang/Common/uint128.h"
15#include "flang/Runtime/freestanding-tools.h"
16#include <algorithm>
17#include <cfenv>
18
19namespace Fortran::runtime::io {
20RT_OFFLOAD_API_GROUP_BEGIN
21
22// Checks that a list-directed input value has been entirely consumed and
23// doesn't contain unparsed characters before the next value separator.
24static inline RT_API_ATTRS bool IsCharValueSeparator(
25 const DataEdit &edit, char32_t ch) {
26 char32_t comma{
27 edit.modes.editingFlags & decimalComma ? char32_t{';'} : char32_t{','}};
28 return ch == ' ' || ch == '\t' || ch == comma || ch == '/' ||
29 (edit.IsNamelist() && (ch == '&' || ch == '$'));
30}
31
32static RT_API_ATTRS bool CheckCompleteListDirectedField(
33 IoStatementState &io, const DataEdit &edit) {
34 if (edit.IsListDirected()) {
35 std::size_t byteCount;
36 if (auto ch{io.GetCurrentChar(byteCount)}) {
37 if (IsCharValueSeparator(edit, *ch)) {
38 return true;
39 } else {
40 const auto &connection{io.GetConnectionState()};
41 io.GetIoErrorHandler().SignalError(IostatBadListDirectedInputSeparator,
42 "invalid character (0x%x) after list-directed input value, "
43 "at column %d in record %d",
44 static_cast<unsigned>(*ch),
45 static_cast<int>(connection.positionInRecord + 1),
46 static_cast<int>(connection.currentRecordNumber));
47 return false;
48 }
49 } else {
50 return true; // end of record: ok
51 }
52 } else {
53 return true;
54 }
55}
56
57template <int LOG2_BASE>
58static RT_API_ATTRS bool EditBOZInput(
59 IoStatementState &io, const DataEdit &edit, void *n, std::size_t bytes) {
60 // Skip leading white space & zeroes
61 Fortran::common::optional<int> remaining{io.CueUpInput(edit)};
62 auto start{io.GetConnectionState().positionInRecord};
63 Fortran::common::optional<char32_t> next{io.NextInField(remaining, edit)};
64 if (next.value_or('?') == '0') {
65 do {
66 start = io.GetConnectionState().positionInRecord;
67 next = io.NextInField(remaining, edit);
68 } while (next && *next == '0');
69 }
70 // Count significant digits after any leading white space & zeroes
71 int digits{0};
72 int significantBits{0};
73 for (; next; next = io.NextInField(remaining, edit)) {
74 char32_t ch{*next};
75 if (ch == ' ' || ch == '\t') {
76 if (edit.modes.editingFlags & blankZero) {
77 ch = '0'; // BZ mode - treat blank as if it were zero
78 } else {
79 continue;
80 }
81 }
82 if (ch >= '0' && ch <= '1') {
83 } else if (LOG2_BASE >= 3 && ch >= '2' && ch <= '7') {
84 } else if (LOG2_BASE >= 4 && ch >= '8' && ch <= '9') {
85 } else if (LOG2_BASE >= 4 && ch >= 'A' && ch <= 'F') {
86 } else if (LOG2_BASE >= 4 && ch >= 'a' && ch <= 'f') {
87 } else if (ch == ',') {
88 break; // end non-list-directed field early
89 } else {
90 io.GetIoErrorHandler().SignalError(
91 "Bad character '%lc' in B/O/Z input field", ch);
92 return false;
93 }
94 if (digits++ == 0) {
95 significantBits = 4;
96 if (ch >= '0' && ch <= '1') {
97 significantBits = 1;
98 } else if (ch >= '2' && ch <= '3') {
99 significantBits = 2;
100 } else if (ch >= '4' && ch <= '7') {
101 significantBits = 3;
102 } else {
103 significantBits = 4;
104 }
105 } else {
106 significantBits += LOG2_BASE;
107 }
108 }
109 auto significantBytes{static_cast<std::size_t>(significantBits + 7) / 8};
110 if (significantBytes > bytes) {
111 io.GetIoErrorHandler().SignalError(IostatBOZInputOverflow,
112 "B/O/Z input of %d digits overflows %zd-byte variable", digits, bytes);
113 return false;
114 }
115 // Reset to start of significant digits
116 io.HandleAbsolutePosition(start);
117 remaining.reset();
118 // Make a second pass now that the digit count is known
119 std::memset(n, 0, bytes);
120 int increment{isHostLittleEndian ? -1 : 1};
121 auto *data{reinterpret_cast<unsigned char *>(n) +
122 (isHostLittleEndian ? significantBytes - 1 : 0)};
123 int shift{((digits - 1) * LOG2_BASE) & 7};
124 while (digits > 0) {
125 char32_t ch{*io.NextInField(remaining, edit)};
126 int digit{0};
127 if (ch == ' ' || ch == '\t') {
128 if (edit.modes.editingFlags & blankZero) {
129 ch = '0'; // BZ mode - treat blank as if it were zero
130 } else {
131 continue;
132 }
133 }
134 --digits;
135 if (ch >= '0' && ch <= '9') {
136 digit = ch - '0';
137 } else if (ch >= 'A' && ch <= 'F') {
138 digit = ch + 10 - 'A';
139 } else if (ch >= 'a' && ch <= 'f') {
140 digit = ch + 10 - 'a';
141 } else {
142 continue;
143 }
144 if (shift < 0) {
145 if (shift + LOG2_BASE > 0) { // misaligned octal
146 *data |= digit >> -shift;
147 }
148 shift += 8;
149 data += increment;
150 }
151 *data |= digit << shift;
152 shift -= LOG2_BASE;
153 }
154 return CheckCompleteListDirectedField(io, edit);
155}
156
157static inline RT_API_ATTRS char32_t GetRadixPointChar(const DataEdit &edit) {
158 return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'};
159}
160
161// Prepares input from a field, and returns the sign, if any, else '\0'.
162static RT_API_ATTRS char ScanNumericPrefix(IoStatementState &io,
163 const DataEdit &edit, Fortran::common::optional<char32_t> &next,
164 Fortran::common::optional<int> &remaining) {
165 remaining = io.CueUpInput(edit);
166 next = io.NextInField(remaining, edit);
167 char sign{'\0'};
168 if (next) {
169 if (*next == '-' || *next == '+') {
170 sign = *next;
171 if (!edit.IsListDirected()) {
172 io.SkipSpaces(remaining);
173 }
174 next = io.NextInField(remaining, edit);
175 }
176 }
177 return sign;
178}
179
180RT_API_ATTRS bool EditIntegerInput(
181 IoStatementState &io, const DataEdit &edit, void *n, int kind) {
182 RUNTIME_CHECK(io.GetIoErrorHandler(), kind >= 1 && !(kind & (kind - 1)));
183 switch (edit.descriptor) {
184 case DataEdit::ListDirected:
185 if (IsNamelistNameOrSlash(io)) {
186 return false;
187 }
188 break;
189 case 'G':
190 case 'I':
191 break;
192 case 'B':
193 return EditBOZInput<1>(io, edit, n, kind);
194 case 'O':
195 return EditBOZInput<3>(io, edit, n, kind);
196 case 'Z':
197 return EditBOZInput<4>(io, edit, n, kind);
198 case 'A': // legacy extension
199 return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), kind);
200 default:
201 io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
202 "Data edit descriptor '%c' may not be used with an INTEGER data item",
203 edit.descriptor);
204 return false;
205 }
206 Fortran::common::optional<int> remaining;
207 Fortran::common::optional<char32_t> next;
208 char sign{ScanNumericPrefix(io, edit, next, remaining)};
209 common::UnsignedInt128 value{0};
210 bool any{!!sign};
211 bool overflow{false};
212 for (; next; next = io.NextInField(remaining, edit)) {
213 char32_t ch{*next};
214 if (ch == ' ' || ch == '\t') {
215 if (edit.modes.editingFlags & blankZero) {
216 ch = '0'; // BZ mode - treat blank as if it were zero
217 } else {
218 continue;
219 }
220 }
221 int digit{0};
222 if (ch >= '0' && ch <= '9') {
223 digit = ch - '0';
224 } else if (ch == ',') {
225 break; // end non-list-directed field early
226 } else {
227 io.GetIoErrorHandler().SignalError(
228 "Bad character '%lc' in INTEGER input field", ch);
229 return false;
230 }
231 static constexpr auto maxu128{~common::UnsignedInt128{0}};
232 static constexpr auto maxu128OverTen{maxu128 / 10};
233 static constexpr int maxLastDigit{
234 static_cast<int>(maxu128 - (maxu128OverTen * 10))};
235 overflow |= value >= maxu128OverTen &&
236 (value > maxu128OverTen || digit > maxLastDigit);
237 value *= 10;
238 value += digit;
239 any = true;
240 }
241 if (!any && !remaining) {
242 io.GetIoErrorHandler().SignalError(
243 "Integer value absent from NAMELIST or list-directed input");
244 return false;
245 }
246 auto maxForKind{common::UnsignedInt128{1} << ((8 * kind) - 1)};
247 overflow |= value >= maxForKind && (value > maxForKind || sign != '-');
248 if (overflow) {
249 io.GetIoErrorHandler().SignalError(IostatIntegerInputOverflow,
250 "Decimal input overflows INTEGER(%d) variable", kind);
251 return false;
252 }
253 if (sign == '-') {
254 value = -value;
255 }
256 if (any || !io.GetIoErrorHandler().InError()) {
257 // The value is stored in the lower order bits on big endian platform.
258 // When memcpy, shift the value to the higher order bit.
259 auto shft{static_cast<int>(sizeof(value.low())) - kind};
260 // For kind==8 (i.e. shft==0), the value is stored in low_ in big endian.
261 if (!isHostLittleEndian && shft >= 0) {
262 auto l{value.low() << (8 * shft)};
263 std::memcpy(n, &l, kind);
264 } else {
265 std::memcpy(n, &value, kind); // a blank field means zero
266 }
267 return true;
268 } else {
269 return false;
270 }
271}
272
273// Parses a REAL input number from the input source as a normalized
274// fraction into a supplied buffer -- there's an optional '-', a
275// decimal point when the input is not hexadecimal, and at least one
276// digit. Replaces blanks with zeroes where appropriate.
277struct ScannedRealInput {
278 // Number of characters that (should) have been written to the
279 // buffer -- this can be larger than the buffer size, which
280 // indicates buffer overflow. Zero indicates an error.
281 int got{0};
282 int exponent{0}; // adjusted as necessary; binary if isHexadecimal
283 bool isHexadecimal{false}; // 0X...
284};
285static RT_API_ATTRS ScannedRealInput ScanRealInput(
286 char *buffer, int bufferSize, IoStatementState &io, const DataEdit &edit) {
287 Fortran::common::optional<int> remaining;
288 Fortran::common::optional<char32_t> next;
289 int got{0};
290 Fortran::common::optional<int> radixPointOffset;
291 // The following lambda definition violates the conding style,
292 // but cuda-11.8 nvcc hits an internal error with the brace initialization.
293 auto Put = [&](char ch) -> void {
294 if (got < bufferSize) {
295 buffer[got] = ch;
296 }
297 ++got;
298 };
299 char sign{ScanNumericPrefix(io, edit, next, remaining)};
300 if (sign == '-') {
301 Put('-');
302 }
303 bool bzMode{(edit.modes.editingFlags & blankZero) != 0};
304 int exponent{0};
305 if (!next || (!bzMode && *next == ' ') ||
306 (!(edit.modes.editingFlags & decimalComma) && *next == ',')) {
307 if (!edit.IsListDirected() && !io.GetConnectionState().IsAtEOF()) {
308 // An empty/blank field means zero when not list-directed.
309 // A fixed-width field containing only a sign is also zero;
310 // this behavior isn't standard-conforming in F'2023 but it is
311 // required to pass FCVS.
312 Put('0');
313 }
314 return {got, exponent, false};
315 }
316 char32_t radixPointChar{GetRadixPointChar(edit)};
317 char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next};
318 bool isHexadecimal{false};
319 if (first == 'N' || first == 'I') {
320 // NaN or infinity - convert to upper case
321 // Subtle: a blank field of digits could be followed by 'E' or 'D',
322 for (; next &&
323 ((*next >= 'a' && *next <= 'z') || (*next >= 'A' && *next <= 'Z'));
324 next = io.NextInField(remaining, edit)) {
325 if (*next >= 'a' && *next <= 'z') {
326 Put(*next - 'a' + 'A');
327 } else {
328 Put(*next);
329 }
330 }
331 if (next && *next == '(') { // NaN(...)
332 Put('(');
333 int depth{1};
334 while (true) {
335 next = io.NextInField(remaining, edit);
336 if (depth == 0) {
337 break;
338 } else if (!next) {
339 return {}; // error
340 } else if (*next == '(') {
341 ++depth;
342 } else if (*next == ')') {
343 --depth;
344 }
345 Put(*next);
346 }
347 }
348 } else if (first == radixPointChar || (first >= '0' && first <= '9') ||
349 (bzMode && (first == ' ' || first == '\t')) || first == 'E' ||
350 first == 'D' || first == 'Q') {
351 if (first == '0') {
352 next = io.NextInField(remaining, edit);
353 if (next && (*next == 'x' || *next == 'X')) { // 0X...
354 isHexadecimal = true;
355 next = io.NextInField(remaining, edit);
356 } else {
357 Put('0');
358 }
359 }
360 // input field is normalized to a fraction
361 if (!isHexadecimal) {
362 Put('.');
363 }
364 auto start{got};
365 for (; next; next = io.NextInField(remaining, edit)) {
366 char32_t ch{*next};
367 if (ch == ' ' || ch == '\t') {
368 if (isHexadecimal) {
369 return {}; // error
370 } else if (bzMode) {
371 ch = '0'; // BZ mode - treat blank as if it were zero
372 } else {
373 continue; // ignore blank in fixed field
374 }
375 }
376 if (ch == '0' && got == start && !radixPointOffset) {
377 // omit leading zeroes before the radix point
378 } else if (ch >= '0' && ch <= '9') {
379 Put(ch);
380 } else if (ch == radixPointChar && !radixPointOffset) {
381 // The radix point character is *not* copied to the buffer.
382 radixPointOffset = got - start; // # of digits before the radix point
383 } else if (isHexadecimal && ch >= 'A' && ch <= 'F') {
384 Put(ch);
385 } else if (isHexadecimal && ch >= 'a' && ch <= 'f') {
386 Put(ch - 'a' + 'A'); // normalize to capitals
387 } else {
388 break;
389 }
390 }
391 if (got == start) {
392 // Nothing but zeroes and maybe a radix point. F'2018 requires
393 // at least one digit, but F'77 did not, and a bare "." shows up in
394 // the FCVS suite.
395 Put('0'); // emit at least one digit
396 }
397 // In list-directed input, a bad exponent is not consumed.
398 auto nextBeforeExponent{next};
399 auto startExponent{io.GetConnectionState().positionInRecord};
400 bool hasGoodExponent{false};
401 if (next) {
402 if (isHexadecimal) {
403 if (*next == 'p' || *next == 'P') {
404 next = io.NextInField(remaining, edit);
405 } else {
406 // The binary exponent is not optional in the standard.
407 return {}; // error
408 }
409 } else if (*next == 'e' || *next == 'E' || *next == 'd' || *next == 'D' ||
410 *next == 'q' || *next == 'Q') {
411 // Optional exponent letter. Blanks are allowed between the
412 // optional exponent letter and the exponent value.
413 io.SkipSpaces(remaining);
414 next = io.NextInField(remaining, edit);
415 }
416 }
417 if (next &&
418 (*next == '-' || *next == '+' || (*next >= '0' && *next <= '9') ||
419 *next == ' ' || *next == '\t')) {
420 bool negExpo{*next == '-'};
421 if (negExpo || *next == '+') {
422 next = io.NextInField(remaining, edit);
423 }
424 for (; next; next = io.NextInField(remaining, edit)) {
425 if (*next >= '0' && *next <= '9') {
426 hasGoodExponent = true;
427 if (exponent < 10000) {
428 exponent = 10 * exponent + *next - '0';
429 }
430 } else if (*next == ' ' || *next == '\t') {
431 if (isHexadecimal) {
432 break;
433 } else if (bzMode) {
434 hasGoodExponent = true;
435 exponent = 10 * exponent;
436 }
437 } else {
438 break;
439 }
440 }
441 if (negExpo) {
442 exponent = -exponent;
443 }
444 }
445 if (!hasGoodExponent) {
446 if (isHexadecimal) {
447 return {}; // error
448 }
449 // There isn't a good exponent; do not consume it.
450 next = nextBeforeExponent;
451 io.HandleAbsolutePosition(startExponent);
452 // The default exponent is -kP, but the scale factor doesn't affect
453 // an explicit exponent.
454 exponent = -edit.modes.scale;
455 }
456 // Adjust exponent by number of digits before the radix point.
457 if (isHexadecimal) {
458 // Exponents for hexadecimal input are binary.
459 exponent += radixPointOffset.value_or(got - start) * 4;
460 } else if (radixPointOffset) {
461 exponent += *radixPointOffset;
462 } else {
463 // When no redix point (or comma) appears in the value, the 'd'
464 // part of the edit descriptor must be interpreted as the number of
465 // digits in the value to be interpreted as being to the *right* of
466 // the assumed radix point (13.7.2.3.2)
467 exponent += got - start - edit.digits.value_or(0);
468 }
469 }
470 // Consume the trailing ')' of a list-directed or NAMELIST complex
471 // input value.
472 if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
473 if (next && (*next == ' ' || *next == '\t')) {
474 io.SkipSpaces(remaining);
475 next = io.NextInField(remaining, edit);
476 }
477 if (!next) { // NextInField fails on separators like ')'
478 std::size_t byteCount{0};
479 next = io.GetCurrentChar(byteCount);
480 if (next && *next == ')') {
481 io.HandleRelativePosition(byteCount);
482 }
483 }
484 } else if (remaining) {
485 while (next && (*next == ' ' || *next == '\t')) {
486 next = io.NextInField(remaining, edit);
487 }
488 if (next && (*next != ',' || (edit.modes.editingFlags & decimalComma))) {
489 return {}; // error: unused nonblank character in fixed-width field
490 }
491 }
492 return {got, exponent, isHexadecimal};
493}
494
495static RT_API_ATTRS void RaiseFPExceptions(
496 decimal::ConversionResultFlags flags) {
497#undef RAISE
498#if defined(RT_DEVICE_COMPILATION)
499 Terminator terminator(__FILE__, __LINE__);
500#define RAISE(e) \
501 terminator.Crash( \
502 "not implemented yet: raising FP exception in device code: %s", #e);
503#else // !defined(RT_DEVICE_COMPILATION)
504#ifdef feraisexcept // a macro in some environments; omit std::
505#define RAISE feraiseexcept
506#else
507#define RAISE std::feraiseexcept
508#endif
509#endif // !defined(RT_DEVICE_COMPILATION)
510 if (flags & decimal::ConversionResultFlags::Overflow) {
511 RAISE(FE_OVERFLOW);
512 }
513 if (flags & decimal::ConversionResultFlags::Underflow) {
514 RAISE(FE_UNDERFLOW);
515 }
516 if (flags & decimal::ConversionResultFlags::Inexact) {
517 RAISE(FE_INEXACT);
518 }
519 if (flags & decimal::ConversionResultFlags::Invalid) {
520 RAISE(FE_INVALID);
521 }
522#undef RAISE
523}
524
525// If no special modes are in effect and the form of the input value
526// that's present in the input stream is acceptable to the decimal->binary
527// converter without modification, this fast path for real input
528// saves time by avoiding memory copies and reformatting of the exponent.
529template <int PRECISION>
530static RT_API_ATTRS bool TryFastPathRealDecimalInput(
531 IoStatementState &io, const DataEdit &edit, void *n) {
532 if (edit.modes.editingFlags & (blankZero | decimalComma)) {
533 return false;
534 }
535 if (edit.modes.scale != 0) {
536 return false;
537 }
538 const ConnectionState &connection{io.GetConnectionState()};
539 if (connection.internalIoCharKind > 1) {
540 return false; // reading non-default character
541 }
542 const char *str{nullptr};
543 std::size_t got{io.GetNextInputBytes(str)};
544 if (got == 0 || str == nullptr || !connection.recordLength.has_value()) {
545 return false; // could not access reliably-terminated input stream
546 }
547 const char *p{str};
548 std::int64_t maxConsume{
549 std::min<std::int64_t>(got, edit.width.value_or(got))};
550 const char *limit{str + maxConsume};
551 decimal::ConversionToBinaryResult<PRECISION> converted{
552 decimal::ConvertToBinary<PRECISION>(p, edit.modes.round, limit)};
553 if (converted.flags & (decimal::Invalid | decimal::Overflow)) {
554 return false;
555 }
556 if (edit.digits.value_or(0) != 0) {
557 // Edit descriptor is Fw.d (or other) with d != 0, which
558 // implies scaling
559 const char *q{str};
560 for (; q < limit; ++q) {
561 if (*q == '.' || *q == 'n' || *q == 'N') {
562 break;
563 }
564 }
565 if (q == limit) {
566 // No explicit decimal point, and not NaN/Inf.
567 return false;
568 }
569 }
570 if (edit.descriptor == DataEdit::ListDirectedImaginaryPart) {
571 // Need to consume a trailing ')', possibly with leading spaces
572 for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
573 }
574 if (p < limit && *p == ')') {
575 ++p;
576 } else {
577 return false;
578 }
579 } else if (edit.IsListDirected()) {
580 if (p < limit && !IsCharValueSeparator(edit, ch: *p)) {
581 return false;
582 }
583 } else {
584 for (; p < limit && (*p == ' ' || *p == '\t'); ++p) {
585 }
586 if (edit.width && p < str + *edit.width) {
587 return false; // unconverted characters remain in fixed width field
588 }
589 }
590 // Success on the fast path!
591 *reinterpret_cast<decimal::BinaryFloatingPointNumber<PRECISION> *>(n) =
592 converted.binary;
593 io.HandleRelativePosition(p - str);
594 // Set FP exception flags
595 if (converted.flags != decimal::ConversionResultFlags::Exact) {
596 RaiseFPExceptions(converted.flags);
597 }
598 return true;
599}
600
601template <int binaryPrecision>
602RT_API_ATTRS decimal::ConversionToBinaryResult<binaryPrecision>
603ConvertHexadecimal(
604 const char *&p, enum decimal::FortranRounding rounding, int expo) {
605 using RealType = decimal::BinaryFloatingPointNumber<binaryPrecision>;
606 using RawType = typename RealType::RawType;
607 bool isNegative{*p == '-'};
608 constexpr RawType one{1};
609 RawType signBit{0};
610 if (isNegative) {
611 ++p;
612 signBit = one << (RealType::bits - 1);
613 }
614 RawType fraction{0};
615 // Adjust the incoming binary P+/- exponent to shift the radix point
616 // to below the LSB and add in the bias.
617 expo += binaryPrecision - 1 + RealType::exponentBias;
618 // Input the fraction.
619 int roundingBit{0};
620 int guardBit{0};
621 for (; *p; ++p) {
622 fraction <<= 4;
623 expo -= 4;
624 if (*p >= '0' && *p <= '9') {
625 fraction |= *p - '0';
626 } else if (*p >= 'A' && *p <= 'F') {
627 fraction |= *p - 'A' + 10; // data were normalized to capitals
628 } else {
629 break;
630 }
631 if (fraction >> binaryPrecision) {
632 while (fraction >> binaryPrecision) {
633 guardBit |= roundingBit;
634 roundingBit = (int)fraction & 1;
635 fraction >>= 1;
636 ++expo;
637 }
638 // Consume excess digits
639 while (*++p) {
640 if (*p == '0') {
641 } else if ((*p >= '1' && *p <= '9') || (*p >= 'A' && *p <= 'F')) {
642 guardBit = 1;
643 } else {
644 break;
645 }
646 }
647 break;
648 }
649 }
650 if (fraction) {
651 // Boost biased expo if too small
652 while (expo < 1) {
653 guardBit |= roundingBit;
654 roundingBit = (int)fraction & 1;
655 fraction >>= 1;
656 ++expo;
657 }
658 // Normalize
659 while (expo > 1 && !(fraction >> (binaryPrecision - 1))) {
660 fraction <<= 1;
661 --expo;
662 guardBit = roundingBit = 0;
663 }
664 }
665 // Rounding
666 bool increase{false};
667 switch (rounding) {
668 case decimal::RoundNearest: // RN & RP
669 increase = roundingBit && (guardBit | ((int)fraction & 1));
670 break;
671 case decimal::RoundUp: // RU
672 increase = !isNegative && (roundingBit | guardBit);
673 break;
674 case decimal::RoundDown: // RD
675 increase = isNegative && (roundingBit | guardBit);
676 break;
677 case decimal::RoundToZero: // RZ
678 break;
679 case decimal::RoundCompatible: // RC
680 increase = roundingBit != 0;
681 break;
682 }
683 if (increase) {
684 ++fraction;
685 if (fraction >> binaryPrecision) {
686 fraction >>= 1;
687 ++expo;
688 }
689 }
690 // Package & return result
691 constexpr RawType significandMask{(one << RealType::significandBits) - 1};
692 int flags{(roundingBit | guardBit) ? decimal::Inexact : decimal::Exact};
693 if (!fraction) {
694 expo = 0;
695 } else if (expo == 1 && !(fraction >> (binaryPrecision - 1))) {
696 expo = 0; // subnormal
697 flags |= decimal::Underflow;
698 } else if (expo >= RealType::maxExponent) {
699 if (rounding == decimal::RoundToZero ||
700 (rounding == decimal::RoundDown && !isNegative) ||
701 (rounding == decimal::RoundUp && isNegative)) {
702 expo = RealType::maxExponent - 1; // +/-HUGE()
703 fraction = significandMask;
704 } else {
705 expo = RealType::maxExponent; // +/-Inf
706 fraction = 0;
707 flags |= decimal::Overflow;
708 }
709 } else {
710 fraction &= significandMask; // remove explicit normalization unless x87
711 }
712 return decimal::ConversionToBinaryResult<binaryPrecision>{
713 RealType{static_cast<RawType>(signBit |
714 static_cast<RawType>(expo) << RealType::significandBits | fraction)},
715 static_cast<decimal::ConversionResultFlags>(flags)};
716}
717
718template <int KIND>
719RT_API_ATTRS bool EditCommonRealInput(
720 IoStatementState &io, const DataEdit &edit, void *n) {
721 constexpr int binaryPrecision{common::PrecisionOfRealKind(KIND)};
722 if (TryFastPathRealDecimalInput<binaryPrecision>(io, edit, n)) {
723 return CheckCompleteListDirectedField(io, edit);
724 }
725 // Fast path wasn't available or didn't work; go the more general route
726 static constexpr int maxDigits{
727 common::MaxDecimalConversionDigits(binaryPrecision)};
728 static constexpr int bufferSize{maxDigits + 18};
729 char buffer[bufferSize];
730 auto scanned{ScanRealInput(buffer, maxDigits + 2, io, edit)};
731 int got{scanned.got};
732 if (got >= maxDigits + 2) {
733 io.GetIoErrorHandler().Crash("EditCommonRealInput: buffer was too small");
734 return false;
735 }
736 if (got == 0) {
737 const auto &connection{io.GetConnectionState()};
738 io.GetIoErrorHandler().SignalError(IostatBadRealInput,
739 "Bad real input data at column %d of record %d",
740 static_cast<int>(connection.positionInRecord + 1),
741 static_cast<int>(connection.currentRecordNumber));
742 return false;
743 }
744 decimal::ConversionToBinaryResult<binaryPrecision> converted;
745 const char *p{buffer};
746 if (scanned.isHexadecimal) {
747 buffer[got] = '\0';
748 converted = ConvertHexadecimal<binaryPrecision>(
749 p, edit.modes.round, scanned.exponent);
750 } else {
751 bool hadExtra{got > maxDigits};
752 int exponent{scanned.exponent};
753 if (exponent != 0) {
754 buffer[got++] = 'e';
755 if (exponent < 0) {
756 buffer[got++] = '-';
757 exponent = -exponent;
758 }
759 if (exponent > 9999) {
760 exponent = 9999; // will convert to +/-Inf
761 }
762 if (exponent > 999) {
763 int dig{exponent / 1000};
764 buffer[got++] = '0' + dig;
765 int rest{exponent - 1000 * dig};
766 dig = rest / 100;
767 buffer[got++] = '0' + dig;
768 rest -= 100 * dig;
769 dig = rest / 10;
770 buffer[got++] = '0' + dig;
771 buffer[got++] = '0' + (rest - 10 * dig);
772 } else if (exponent > 99) {
773 int dig{exponent / 100};
774 buffer[got++] = '0' + dig;
775 int rest{exponent - 100 * dig};
776 dig = rest / 10;
777 buffer[got++] = '0' + dig;
778 buffer[got++] = '0' + (rest - 10 * dig);
779 } else if (exponent > 9) {
780 int dig{exponent / 10};
781 buffer[got++] = '0' + dig;
782 buffer[got++] = '0' + (exponent - 10 * dig);
783 } else {
784 buffer[got++] = '0' + exponent;
785 }
786 }
787 buffer[got] = '\0';
788 converted = decimal::ConvertToBinary<binaryPrecision>(p, edit.modes.round);
789 if (hadExtra) {
790 converted.flags = static_cast<enum decimal::ConversionResultFlags>(
791 converted.flags | decimal::Inexact);
792 }
793 }
794 if (*p) { // unprocessed junk after value
795 const auto &connection{io.GetConnectionState()};
796 io.GetIoErrorHandler().SignalError(IostatBadRealInput,
797 "Trailing characters after real input data at column %d of record %d",
798 static_cast<int>(connection.positionInRecord + 1),
799 static_cast<int>(connection.currentRecordNumber));
800 return false;
801 }
802 *reinterpret_cast<decimal::BinaryFloatingPointNumber<binaryPrecision> *>(n) =
803 converted.binary;
804 // Set FP exception flags
805 if (converted.flags != decimal::ConversionResultFlags::Exact) {
806 if (converted.flags & decimal::ConversionResultFlags::Overflow) {
807 io.GetIoErrorHandler().SignalError(IostatRealInputOverflow);
808 return false;
809 }
810 RaiseFPExceptions(converted.flags);
811 }
812 return CheckCompleteListDirectedField(io, edit);
813}
814
815template <int KIND>
816RT_API_ATTRS bool EditRealInput(
817 IoStatementState &io, const DataEdit &edit, void *n) {
818 switch (edit.descriptor) {
819 case DataEdit::ListDirected:
820 if (IsNamelistNameOrSlash(io)) {
821 return false;
822 }
823 return EditCommonRealInput<KIND>(io, edit, n);
824 case DataEdit::ListDirectedRealPart:
825 case DataEdit::ListDirectedImaginaryPart:
826 case 'F':
827 case 'E': // incl. EN, ES, & EX
828 case 'D':
829 case 'G':
830 return EditCommonRealInput<KIND>(io, edit, n);
831 case 'B':
832 return EditBOZInput<1>(io, edit, n,
833 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
834 case 'O':
835 return EditBOZInput<3>(io, edit, n,
836 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
837 case 'Z':
838 return EditBOZInput<4>(io, edit, n,
839 common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3);
840 case 'A': // legacy extension
841 return EditCharacterInput(io, edit, reinterpret_cast<char *>(n), KIND);
842 default:
843 io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
844 "Data edit descriptor '%c' may not be used for REAL input",
845 edit.descriptor);
846 return false;
847 }
848}
849
850// 13.7.3 in Fortran 2018
851RT_API_ATTRS bool EditLogicalInput(
852 IoStatementState &io, const DataEdit &edit, bool &x) {
853 switch (edit.descriptor) {
854 case DataEdit::ListDirected:
855 if (IsNamelistNameOrSlash(io)) {
856 return false;
857 }
858 break;
859 case 'L':
860 case 'G':
861 break;
862 default:
863 io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
864 "Data edit descriptor '%c' may not be used for LOGICAL input",
865 edit.descriptor);
866 return false;
867 }
868 Fortran::common::optional<int> remaining{io.CueUpInput(edit)};
869 Fortran::common::optional<char32_t> next{io.NextInField(remaining, edit)};
870 if (next && *next == '.') { // skip optional period
871 next = io.NextInField(remaining, edit);
872 }
873 if (!next) {
874 io.GetIoErrorHandler().SignalError("Empty LOGICAL input field");
875 return false;
876 }
877 switch (*next) {
878 case 'T':
879 case 't':
880 x = true;
881 break;
882 case 'F':
883 case 'f':
884 x = false;
885 break;
886 default:
887 io.GetIoErrorHandler().SignalError(
888 "Bad character '%lc' in LOGICAL input field", *next);
889 return false;
890 }
891 if (remaining) { // ignore the rest of a fixed-width field
892 io.HandleRelativePosition(*remaining);
893 } else if (edit.descriptor == DataEdit::ListDirected) {
894 while (io.NextInField(remaining, edit)) { // discard rest of field
895 }
896 }
897 return CheckCompleteListDirectedField(io, edit);
898}
899
900// See 13.10.3.1 paragraphs 7-9 in Fortran 2018
901template <typename CHAR>
902static RT_API_ATTRS bool EditDelimitedCharacterInput(
903 IoStatementState &io, CHAR *x, std::size_t length, char32_t delimiter) {
904 bool result{true};
905 while (true) {
906 std::size_t byteCount{0};
907 auto ch{io.GetCurrentChar(byteCount)};
908 if (!ch) {
909 if (io.AdvanceRecord()) {
910 continue;
911 } else {
912 result = false; // EOF in character value
913 break;
914 }
915 }
916 io.HandleRelativePosition(byteCount);
917 if (*ch == delimiter) {
918 auto next{io.GetCurrentChar(byteCount)};
919 if (next && *next == delimiter) {
920 // Repeated delimiter: use as character value
921 io.HandleRelativePosition(byteCount);
922 } else {
923 break; // closing delimiter
924 }
925 }
926 if (length > 0) {
927 *x++ = *ch;
928 --length;
929 }
930 }
931 Fortran::runtime::fill_n(x, length, ' ');
932 return result;
933}
934
935template <typename CHAR>
936static RT_API_ATTRS bool EditListDirectedCharacterInput(
937 IoStatementState &io, CHAR *x, std::size_t length, const DataEdit &edit) {
938 std::size_t byteCount{0};
939 auto ch{io.GetCurrentChar(byteCount)};
940 if (ch && (*ch == '\'' || *ch == '"')) {
941 io.HandleRelativePosition(byteCount);
942 return EditDelimitedCharacterInput(io, x, length, *ch);
943 }
944 if (IsNamelistNameOrSlash(io) || io.GetConnectionState().IsAtEOF()) {
945 return false;
946 }
947 // Undelimited list-directed character input: stop at a value separator
948 // or the end of the current record. Subtlety: the "remaining" count
949 // here is a dummy that's used to avoid the interpretation of separators
950 // in NextInField.
951 Fortran::common::optional<int> remaining{length > 0 ? maxUTF8Bytes : 0};
952 while (Fortran::common::optional<char32_t> next{
953 io.NextInField(remaining, edit)}) {
954 bool isSep{false};
955 switch (*next) {
956 case ' ':
957 case '\t':
958 case '/':
959 isSep = true;
960 break;
961 case '&':
962 case '$':
963 isSep = edit.IsNamelist();
964 break;
965 case ',':
966 isSep = !(edit.modes.editingFlags & decimalComma);
967 break;
968 case ';':
969 isSep = !!(edit.modes.editingFlags & decimalComma);
970 break;
971 default:
972 break;
973 }
974 if (isSep) {
975 remaining = 0;
976 } else {
977 *x++ = *next;
978 remaining = --length > 0 ? maxUTF8Bytes : 0;
979 }
980 }
981 Fortran::runtime::fill_n(x, length, ' ');
982 return true;
983}
984
985template <typename CHAR>
986RT_API_ATTRS bool EditCharacterInput(IoStatementState &io, const DataEdit &edit,
987 CHAR *x, std::size_t lengthChars) {
988 switch (edit.descriptor) {
989 case DataEdit::ListDirected:
990 return EditListDirectedCharacterInput(io, x, lengthChars, edit);
991 case 'A':
992 case 'G':
993 break;
994 case 'B':
995 return EditBOZInput<1>(io, edit, x, lengthChars * sizeof *x);
996 case 'O':
997 return EditBOZInput<3>(io, edit, x, lengthChars * sizeof *x);
998 case 'Z':
999 return EditBOZInput<4>(io, edit, x, lengthChars * sizeof *x);
1000 default:
1001 io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
1002 "Data edit descriptor '%c' may not be used with a CHARACTER data item",
1003 edit.descriptor);
1004 return false;
1005 }
1006 const ConnectionState &connection{io.GetConnectionState()};
1007 std::size_t remainingChars{lengthChars};
1008 // Skip leading characters.
1009 // Their bytes don't count towards INQUIRE(IOLENGTH=).
1010 std::size_t skipChars{0};
1011 if (edit.width && *edit.width > 0) {
1012 remainingChars = *edit.width;
1013 if (remainingChars > lengthChars) {
1014 skipChars = remainingChars - lengthChars;
1015 }
1016 }
1017 // When the field is wider than the variable, we drop the leading
1018 // characters. When the variable is wider than the field, there can be
1019 // trailing padding or an EOR condition.
1020 const char *input{nullptr};
1021 std::size_t readyBytes{0};
1022 // Transfer payload bytes; these do count.
1023 while (remainingChars > 0) {
1024 if (readyBytes == 0) {
1025 readyBytes = io.GetNextInputBytes(input);
1026 if (readyBytes == 0 ||
1027 (readyBytes < remainingChars && edit.modes.nonAdvancing)) {
1028 if (io.CheckForEndOfRecord(readyBytes)) {
1029 if (readyBytes == 0) {
1030 // PAD='YES' and no more data
1031 Fortran::runtime::fill_n(x, lengthChars, ' ');
1032 return !io.GetIoErrorHandler().InError();
1033 } else {
1034 // Do partial read(s) then pad on last iteration
1035 }
1036 } else {
1037 return !io.GetIoErrorHandler().InError();
1038 }
1039 }
1040 }
1041 std::size_t chunkBytes;
1042 std::size_t chunkChars{1};
1043 bool skipping{skipChars > 0};
1044 if (connection.isUTF8) {
1045 chunkBytes = MeasureUTF8Bytes(first: *input);
1046 if (skipping) {
1047 --skipChars;
1048 } else if (auto ucs{DecodeUTF8(input)}) {
1049 if ((sizeof *x == 1 && *ucs > 0xff) ||
1050 (sizeof *x == 2 && *ucs > 0xffff)) {
1051 *x++ = '?';
1052 } else {
1053 *x++ = *ucs;
1054 }
1055 --lengthChars;
1056 } else if (chunkBytes == 0) {
1057 // error recovery: skip bad encoding
1058 chunkBytes = 1;
1059 }
1060 } else if (connection.internalIoCharKind > 1) {
1061 // Reading from non-default character internal unit
1062 chunkBytes = connection.internalIoCharKind;
1063 if (skipping) {
1064 --skipChars;
1065 } else {
1066 char32_t buffer{0};
1067 std::memcpy(&buffer, input, chunkBytes);
1068 if ((sizeof *x == 1 && buffer > 0xff) ||
1069 (sizeof *x == 2 && buffer > 0xffff)) {
1070 *x++ = '?';
1071 } else {
1072 *x++ = buffer;
1073 }
1074 --lengthChars;
1075 }
1076 } else if constexpr (sizeof *x > 1) {
1077 // Read single byte with expansion into multi-byte CHARACTER
1078 chunkBytes = 1;
1079 if (skipping) {
1080 --skipChars;
1081 } else {
1082 *x++ = static_cast<unsigned char>(*input);
1083 --lengthChars;
1084 }
1085 } else { // single bytes -> default CHARACTER
1086 if (skipping) {
1087 chunkBytes = std::min<std::size_t>(a: skipChars, b: readyBytes);
1088 chunkChars = chunkBytes;
1089 skipChars -= chunkChars;
1090 } else {
1091 chunkBytes = std::min<std::size_t>(a: remainingChars, b: readyBytes);
1092 chunkBytes = std::min<std::size_t>(a: lengthChars, b: chunkBytes);
1093 chunkChars = chunkBytes;
1094 std::memcpy(x, input, chunkBytes);
1095 x += chunkBytes;
1096 lengthChars -= chunkChars;
1097 }
1098 }
1099 input += chunkBytes;
1100 remainingChars -= chunkChars;
1101 if (!skipping) {
1102 io.GotChar(chunkBytes);
1103 }
1104 io.HandleRelativePosition(chunkBytes);
1105 readyBytes -= chunkBytes;
1106 }
1107 // Pad the remainder of the input variable, if any.
1108 Fortran::runtime::fill_n(x, lengthChars, ' ');
1109 return CheckCompleteListDirectedField(io, edit);
1110}
1111
1112template RT_API_ATTRS bool EditRealInput<2>(
1113 IoStatementState &, const DataEdit &, void *);
1114template RT_API_ATTRS bool EditRealInput<3>(
1115 IoStatementState &, const DataEdit &, void *);
1116template RT_API_ATTRS bool EditRealInput<4>(
1117 IoStatementState &, const DataEdit &, void *);
1118template RT_API_ATTRS bool EditRealInput<8>(
1119 IoStatementState &, const DataEdit &, void *);
1120template RT_API_ATTRS bool EditRealInput<10>(
1121 IoStatementState &, const DataEdit &, void *);
1122// TODO: double/double
1123template RT_API_ATTRS bool EditRealInput<16>(
1124 IoStatementState &, const DataEdit &, void *);
1125
1126template RT_API_ATTRS bool EditCharacterInput(
1127 IoStatementState &, const DataEdit &, char *, std::size_t);
1128template RT_API_ATTRS bool EditCharacterInput(
1129 IoStatementState &, const DataEdit &, char16_t *, std::size_t);
1130template RT_API_ATTRS bool EditCharacterInput(
1131 IoStatementState &, const DataEdit &, char32_t *, std::size_t);
1132
1133RT_OFFLOAD_API_GROUP_END
1134} // namespace Fortran::runtime::io
1135

source code of flang/runtime/edit-input.cpp