1//===-- runtime/format-implementation.h -------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9// Implements out-of-line member functions of template class FormatControl
10
11#ifndef FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
12#define FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
13
14#include "emit-encoded.h"
15#include "format.h"
16#include "io-stmt.h"
17#include "memory.h"
18#include "flang/Common/format.h"
19#include "flang/Decimal/decimal.h"
20#include "flang/Runtime/main.h"
21#include <algorithm>
22#include <cstring>
23#include <limits>
24
25namespace Fortran::runtime::io {
26
27template <typename CONTEXT>
28RT_API_ATTRS FormatControl<CONTEXT>::FormatControl(const Terminator &terminator,
29 const CharType *format, std::size_t formatLength,
30 const Descriptor *formatDescriptor, int maxHeight)
31 : maxHeight_{static_cast<std::uint8_t>(maxHeight)}, format_{format},
32 formatLength_{static_cast<int>(formatLength)} {
33 RUNTIME_CHECK(terminator, maxHeight == maxHeight_);
34 if (!format && formatDescriptor) {
35 // The format is a character array passed via a descriptor.
36 std::size_t elements{formatDescriptor->Elements()};
37 std::size_t elementBytes{formatDescriptor->ElementBytes()};
38 formatLength = elements * elementBytes / sizeof(CharType);
39 formatLength_ = static_cast<int>(formatLength);
40 if (formatDescriptor->IsContiguous()) {
41 // Treat the contiguous array as a single character value.
42 format_ = const_cast<const CharType *>(
43 reinterpret_cast<CharType *>(formatDescriptor->raw().base_addr));
44 } else {
45 // Concatenate its elements into a temporary array.
46 char *p{reinterpret_cast<char *>(
47 AllocateMemoryOrCrash(terminator, formatLength * sizeof(CharType)))};
48 format_ = p;
49 SubscriptValue at[maxRank];
50 formatDescriptor->GetLowerBounds(at);
51 for (std::size_t j{0}; j < elements; ++j) {
52 std::memcpy(p, formatDescriptor->Element<char>(at), elementBytes);
53 p += elementBytes;
54 formatDescriptor->IncrementSubscripts(at);
55 }
56 freeFormat_ = true;
57 }
58 }
59 RUNTIME_CHECK(
60 terminator, formatLength == static_cast<std::size_t>(formatLength_));
61 stack_[0].start = offset_;
62 stack_[0].remaining = Iteration::unlimited; // 13.4(8)
63}
64
65template <typename CONTEXT>
66RT_API_ATTRS int FormatControl<CONTEXT>::GetIntField(
67 IoErrorHandler &handler, CharType firstCh, bool *hadError) {
68 CharType ch{firstCh ? firstCh : PeekNext()};
69 bool negate{ch == '-'};
70 if (negate || ch == '+') {
71 if (firstCh) {
72 firstCh = '\0';
73 } else {
74 ++offset_;
75 }
76 ch = PeekNext();
77 }
78 if (ch < '0' || ch > '9') {
79 handler.SignalError(IostatErrorInFormat,
80 "Invalid FORMAT: integer expected at '%c'", static_cast<char>(ch));
81 if (hadError) {
82 *hadError = true;
83 }
84 return 0;
85 }
86 int result{0};
87 while (ch >= '0' && ch <= '9') {
88 constexpr int tenth{std::numeric_limits<int>::max() / 10};
89 if (result > tenth ||
90 ch - '0' > std::numeric_limits<int>::max() - 10 * result) {
91 handler.SignalError(
92 IostatErrorInFormat, "FORMAT integer field out of range");
93 if (hadError) {
94 *hadError = true;
95 }
96 return result;
97 }
98 result = 10 * result + ch - '0';
99 if (firstCh) {
100 firstCh = '\0';
101 } else {
102 ++offset_;
103 }
104 ch = PeekNext();
105 }
106 if (negate && (result *= -1) > 0) {
107 handler.SignalError(
108 IostatErrorInFormat, "FORMAT integer field out of range");
109 if (hadError) {
110 *hadError = true;
111 }
112 }
113 return result;
114}
115
116template <typename CONTEXT>
117static RT_API_ATTRS void HandleControl(
118 CONTEXT &context, char ch, char next, int n) {
119 MutableModes &modes{context.mutableModes()};
120 switch (ch) {
121 case 'B':
122 if (next == 'Z') {
123 modes.editingFlags |= blankZero;
124 return;
125 }
126 if (next == 'N') {
127 modes.editingFlags &= ~blankZero;
128 return;
129 }
130 break;
131 case 'D':
132 if (next == 'C') {
133 modes.editingFlags |= decimalComma;
134 return;
135 }
136 if (next == 'P') {
137 modes.editingFlags &= ~decimalComma;
138 return;
139 }
140 break;
141 case 'P':
142 if (!next) {
143 modes.scale = n; // kP - decimal scaling by 10**k
144 return;
145 }
146 break;
147 case 'R':
148 switch (next) {
149 case 'N':
150 modes.round = decimal::RoundNearest;
151 return;
152 case 'Z':
153 modes.round = decimal::RoundToZero;
154 return;
155 case 'U':
156 modes.round = decimal::RoundUp;
157 return;
158 case 'D':
159 modes.round = decimal::RoundDown;
160 return;
161 case 'C':
162 modes.round = decimal::RoundCompatible;
163 return;
164 case 'P':
165 modes.round = executionEnvironment.defaultOutputRoundingMode;
166 return;
167 default:
168 break;
169 }
170 break;
171 case 'X':
172 if (!next) {
173 ConnectionState &connection{context.GetConnectionState()};
174 if (connection.internalIoCharKind > 1) {
175 n *= connection.internalIoCharKind;
176 }
177 context.HandleRelativePosition(n);
178 return;
179 }
180 break;
181 case 'S':
182 if (next == 'P') {
183 modes.editingFlags |= signPlus;
184 return;
185 }
186 if (!next || next == 'S') {
187 modes.editingFlags &= ~signPlus;
188 return;
189 }
190 break;
191 case 'T': {
192 if (!next) { // Tn
193 --n; // convert 1-based to 0-based
194 }
195 ConnectionState &connection{context.GetConnectionState()};
196 if (connection.internalIoCharKind > 1) {
197 n *= connection.internalIoCharKind;
198 }
199 if (!next) { // Tn
200 context.HandleAbsolutePosition(n);
201 return;
202 }
203 if (next == 'L' || next == 'R') { // TLn & TRn
204 context.HandleRelativePosition(next == 'L' ? -n : n);
205 return;
206 }
207 } break;
208 default:
209 break;
210 }
211 if (next) {
212 context.SignalError(IostatErrorInFormat,
213 "Unknown '%c%c' edit descriptor in FORMAT", ch, next);
214 } else {
215 context.SignalError(
216 IostatErrorInFormat, "Unknown '%c' edit descriptor in FORMAT", ch);
217 }
218}
219
220// Locates the next data edit descriptor in the format.
221// Handles all repetition counts and control edit descriptors.
222// Generally assumes that the format string has survived the common
223// format validator gauntlet.
224template <typename CONTEXT>
225RT_API_ATTRS int FormatControl<CONTEXT>::CueUpNextDataEdit(
226 Context &context, bool stop) {
227 bool hitUnlimitedLoopEnd{false};
228 // Do repetitions remain on an unparenthesized data edit?
229 while (height_ > 1 && format_[stack_[height_ - 1].start] != '(') {
230 offset_ = stack_[height_ - 1].start;
231 int repeat{stack_[height_ - 1].remaining};
232 --height_;
233 if (repeat > 0) {
234 return repeat;
235 }
236 }
237 while (true) {
238 Fortran::common::optional<int> repeat;
239 bool unlimited{false};
240 auto maybeReversionPoint{offset_};
241 CharType ch{GetNextChar(context)};
242 while (ch == ',' || ch == ':') {
243 // Skip commas, and don't complain if they're missing; the format
244 // validator does that.
245 if (stop && ch == ':') {
246 return 0;
247 }
248 ch = GetNextChar(context);
249 }
250 if (ch == '-' || ch == '+' || (ch >= '0' && ch <= '9')) {
251 bool hadSign{ch == '-' || ch == '+'};
252 repeat = GetIntField(context, ch);
253 ch = GetNextChar(context);
254 if (hadSign && ch != 'p' && ch != 'P') {
255 ReportBadFormat(context,
256 "Invalid FORMAT: signed integer may appear only before 'P",
257 maybeReversionPoint);
258 return 0;
259 }
260 } else if (ch == '*') {
261 unlimited = true;
262 ch = GetNextChar(context);
263 if (ch != '(') {
264 ReportBadFormat(context,
265 "Invalid FORMAT: '*' may appear only before '('",
266 maybeReversionPoint);
267 return 0;
268 }
269 if (height_ != 1) {
270 ReportBadFormat(context,
271 "Invalid FORMAT: '*' must be nested in exactly one set of "
272 "parentheses",
273 maybeReversionPoint);
274 return 0;
275 }
276 }
277 ch = Capitalize(ch);
278 if (ch == '(') {
279 if (height_ >= maxHeight_) {
280 ReportBadFormat(context,
281 "FORMAT stack overflow: too many nested parentheses",
282 maybeReversionPoint);
283 return 0;
284 }
285 stack_[height_].start = offset_ - 1; // the '('
286 RUNTIME_CHECK(context, format_[stack_[height_].start] == '(');
287 if (unlimited || height_ == 0) {
288 stack_[height_].remaining = Iteration::unlimited;
289 } else if (repeat) {
290 if (*repeat <= 0) {
291 *repeat = 1; // error recovery
292 }
293 stack_[height_].remaining = *repeat - 1;
294 } else {
295 stack_[height_].remaining = 0;
296 }
297 if (height_ == 1 && !hitEnd_) {
298 // Subtle point (F'2018 13.4 para 9): the last parenthesized group
299 // at height 1 becomes the restart point after control reaches the
300 // end of the format, including its repeat count.
301 stack_[0].start = maybeReversionPoint;
302 }
303 ++height_;
304 } else if (height_ == 0) {
305 ReportBadFormat(context, "FORMAT lacks initial '('", maybeReversionPoint);
306 return 0;
307 } else if (ch == ')') {
308 if (height_ == 1) {
309 hitEnd_ = true;
310 if (stop) {
311 return 0; // end of FORMAT and no data items remain
312 }
313 context.AdvanceRecord(); // implied / before rightmost )
314 }
315 auto restart{stack_[height_ - 1].start};
316 if (format_[restart] == '(') {
317 ++restart;
318 }
319 if (stack_[height_ - 1].remaining == Iteration::unlimited) {
320 if (height_ > 1 && GetNextChar(context) != ')') {
321 ReportBadFormat(context,
322 "Unlimited repetition in FORMAT may not be followed by more "
323 "items",
324 restart);
325 return 0;
326 }
327 if (hitUnlimitedLoopEnd) {
328 ReportBadFormat(context,
329 "Unlimited repetition in FORMAT lacks data edit descriptors",
330 restart);
331 return 0;
332 }
333 hitUnlimitedLoopEnd = true;
334 offset_ = restart;
335 } else if (stack_[height_ - 1].remaining-- > 0) {
336 offset_ = restart;
337 } else {
338 --height_;
339 }
340 } else if (ch == '\'' || ch == '"') {
341 // Quoted 'character literal'
342 CharType quote{ch};
343 auto start{offset_};
344 while (offset_ < formatLength_ && format_[offset_] != quote) {
345 ++offset_;
346 }
347 if (offset_ >= formatLength_) {
348 ReportBadFormat(context,
349 "FORMAT missing closing quote on character literal",
350 maybeReversionPoint);
351 return 0;
352 }
353 ++offset_;
354 std::size_t chars{
355 static_cast<std::size_t>(&format_[offset_] - &format_[start])};
356 if (offset_ < formatLength_ && format_[offset_] == quote) {
357 // subtle: handle doubled quote character in a literal by including
358 // the first in the output, then treating the second as the start
359 // of another character literal.
360 } else {
361 --chars;
362 }
363 EmitAscii(context, format_ + start, chars);
364 } else if (ch == 'H') {
365 // 9HHOLLERITH
366 if (!repeat || *repeat < 1 || offset_ + *repeat > formatLength_) {
367 ReportBadFormat(context, "Invalid width on Hollerith in FORMAT",
368 maybeReversionPoint);
369 return 0;
370 }
371 EmitAscii(context, format_ + offset_, static_cast<std::size_t>(*repeat));
372 offset_ += *repeat;
373 } else if (ch >= 'A' && ch <= 'Z') {
374 int start{offset_ - 1};
375 CharType next{'\0'};
376 if (ch != 'P') { // 1PE5.2 - comma not required (C1302)
377 CharType peek{Capitalize(PeekNext())};
378 if (peek >= 'A' && peek <= 'Z') {
379 if (ch == 'A' /* anticipate F'202X AT editing */ || ch == 'B' ||
380 ch == 'D' || ch == 'E' || ch == 'R' || ch == 'S' || ch == 'T') {
381 // Assume a two-letter edit descriptor
382 next = peek;
383 ++offset_;
384 } else {
385 // extension: assume a comma between 'ch' and 'peek'
386 }
387 }
388 }
389 if ((!next &&
390 (ch == 'A' || ch == 'I' || ch == 'B' || ch == 'E' || ch == 'D' ||
391 ch == 'O' || ch == 'Z' || ch == 'F' || ch == 'G' ||
392 ch == 'L')) ||
393 (ch == 'E' && (next == 'N' || next == 'S' || next == 'X')) ||
394 (ch == 'D' && next == 'T')) {
395 // Data edit descriptor found
396 offset_ = start;
397 return repeat && *repeat > 0 ? *repeat : 1;
398 } else {
399 // Control edit descriptor
400 if (ch == 'T') { // Tn, TLn, TRn
401 repeat = GetIntField(context);
402 }
403 HandleControl(context, static_cast<char>(ch), static_cast<char>(next),
404 repeat ? *repeat : 1);
405 }
406 } else if (ch == '/') {
407 context.AdvanceRecord(repeat && *repeat > 0 ? *repeat : 1);
408 } else if (ch == '$' || ch == '\\') {
409 context.mutableModes().nonAdvancing = true;
410 } else if (ch == '\t' || ch == '\v') {
411 // Tabs (extension)
412 // TODO: any other raw characters?
413 EmitAscii(context, format_ + offset_ - 1, 1);
414 } else {
415 ReportBadFormat(
416 context, "Invalid character in FORMAT", maybeReversionPoint);
417 return 0;
418 }
419 }
420}
421
422// Returns the next data edit descriptor
423template <typename CONTEXT>
424RT_API_ATTRS Fortran::common::optional<DataEdit>
425FormatControl<CONTEXT>::GetNextDataEdit(Context &context, int maxRepeat) {
426 int repeat{CueUpNextDataEdit(context)};
427 auto start{offset_};
428 DataEdit edit;
429 edit.modes = context.mutableModes();
430 // Handle repeated nonparenthesized edit descriptors
431 edit.repeat = std::min(repeat, maxRepeat); // 0 if maxRepeat==0
432 if (repeat > maxRepeat) {
433 stack_[height_].start = start; // after repeat count
434 stack_[height_].remaining = repeat - edit.repeat;
435 ++height_;
436 }
437 edit.descriptor = static_cast<char>(Capitalize(GetNextChar(context)));
438 if (edit.descriptor == 'D' && Capitalize(PeekNext()) == 'T') {
439 // DT['iotype'][(v_list)] defined I/O
440 edit.descriptor = DataEdit::DefinedDerivedType;
441 ++offset_;
442 if (auto quote{static_cast<char>(PeekNext())};
443 quote == '\'' || quote == '"') {
444 // Capture the quoted 'iotype'
445 bool ok{false};
446 for (++offset_; offset_ < formatLength_;) {
447 auto ch{static_cast<char>(format_[offset_++])};
448 if (ch == quote &&
449 (offset_ == formatLength_ ||
450 static_cast<char>(format_[offset_]) != quote)) {
451 ok = true;
452 break; // that was terminating quote
453 }
454 if (edit.ioTypeChars >= edit.maxIoTypeChars) {
455 ReportBadFormat(context, "Excessive DT'iotype' in FORMAT", start);
456 return Fortran::common::nullopt;
457 }
458 edit.ioType[edit.ioTypeChars++] = ch;
459 if (ch == quote) {
460 ++offset_;
461 }
462 }
463 if (!ok) {
464 ReportBadFormat(context, "Unclosed DT'iotype' in FORMAT", start);
465 return Fortran::common::nullopt;
466 }
467 }
468 if (PeekNext() == '(') {
469 // Capture the v_list arguments
470 bool ok{false};
471 for (++offset_; offset_ < formatLength_;) {
472 bool hadError{false};
473 int n{GetIntField(context, '\0', &hadError)};
474 if (hadError) {
475 ok = false;
476 break;
477 }
478 if (edit.vListEntries >= edit.maxVListEntries) {
479 ReportBadFormat(context, "Excessive DT(v_list) in FORMAT", start);
480 return Fortran::common::nullopt;
481 }
482 edit.vList[edit.vListEntries++] = n;
483 auto ch{static_cast<char>(GetNextChar(context))};
484 if (ch != ',') {
485 ok = ch == ')';
486 break;
487 }
488 }
489 if (!ok) {
490 ReportBadFormat(context, "Unclosed DT(v_list) in FORMAT", start);
491 return Fortran::common::nullopt;
492 }
493 }
494 } else { // not DT'iotype'
495 if (edit.descriptor == 'E') {
496 if (auto next{static_cast<char>(Capitalize(PeekNext()))};
497 next == 'N' || next == 'S' || next == 'X') {
498 edit.variation = next;
499 ++offset_;
500 }
501 }
502 // Width is optional for A[w] in the standard and optional
503 // for Lw in most compilers.
504 // Intel & (presumably, from bug report) Fujitsu allow
505 // a missing 'w' & 'd'/'m' for other edit descriptors -- but not
506 // 'd'/'m' with a missing 'w' -- and so interpret "(E)" as "(E0)".
507 if (CharType ch{PeekNext()}; (ch >= '0' && ch <= '9') || ch == '.') {
508 edit.width = GetIntField(context);
509 if constexpr (std::is_base_of_v<InputStatementState, CONTEXT>) {
510 if (edit.width.value_or(-1) == 0) {
511 ReportBadFormat(context, "Input field width is zero", start);
512 }
513 }
514 if (PeekNext() == '.') {
515 ++offset_;
516 edit.digits = GetIntField(context);
517 if (CharType ch{PeekNext()};
518 ch == 'e' || ch == 'E' || ch == 'd' || ch == 'D') {
519 ++offset_;
520 edit.expoDigits = GetIntField(context);
521 }
522 }
523 }
524 }
525 return edit;
526}
527
528template <typename CONTEXT>
529RT_API_ATTRS void FormatControl<CONTEXT>::Finish(Context &context) {
530 CueUpNextDataEdit(context, true /* stop at colon or end of FORMAT */);
531 if (freeFormat_) {
532 FreeMemory(const_cast<CharType *>(format_));
533 }
534}
535} // namespace Fortran::runtime::io
536#endif // FORTRAN_RUNTIME_FORMAT_IMPLEMENTATION_H_
537

source code of flang/runtime/format-implementation.h