1//===-- lib/Parser/prescan.h ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef FORTRAN_PARSER_PRESCAN_H_
10#define FORTRAN_PARSER_PRESCAN_H_
11
12// Defines a fast Fortran source prescanning phase that implements some
13// character-level features of the language that can be inefficient to
14// support directly in a backtracking parser. This phase handles Fortran
15// line continuation, comment removal, card image margins, padding out
16// fixed form character literals on truncated card images, file
17// inclusion, and driving the Fortran source preprocessor.
18
19#include "flang/Common/Fortran-features.h"
20#include "flang/Parser/characters.h"
21#include "flang/Parser/message.h"
22#include "flang/Parser/provenance.h"
23#include "flang/Parser/token-sequence.h"
24#include <bitset>
25#include <optional>
26#include <string>
27#include <unordered_set>
28
29namespace Fortran::parser {
30
31class Messages;
32class Preprocessor;
33
34class Prescanner {
35public:
36 Prescanner(Messages &, CookedSource &, Preprocessor &,
37 common::LanguageFeatureControl);
38 Prescanner(const Prescanner &);
39
40 const AllSources &allSources() const { return allSources_; }
41 AllSources &allSources() { return allSources_; }
42 const Messages &messages() const { return messages_; }
43 Messages &messages() { return messages_; }
44 const Preprocessor &preprocessor() const { return preprocessor_; }
45 Preprocessor &preprocessor() { return preprocessor_; }
46
47 Prescanner &set_fixedForm(bool yes) {
48 inFixedForm_ = yes;
49 return *this;
50 }
51 Prescanner &set_encoding(Encoding code) {
52 encoding_ = code;
53 return *this;
54 }
55 Prescanner &set_fixedFormColumnLimit(int limit) {
56 fixedFormColumnLimit_ = limit;
57 return *this;
58 }
59
60 Prescanner &AddCompilerDirectiveSentinel(const std::string &);
61
62 void Prescan(ProvenanceRange);
63 void Statement();
64 void NextLine();
65
66 // Callbacks for use by Preprocessor.
67 bool IsAtEnd() const { return nextLine_ >= limit_; }
68 bool IsNextLinePreprocessorDirective() const;
69 TokenSequence TokenizePreprocessorDirective();
70 Provenance GetCurrentProvenance() const { return GetProvenance(at_); }
71
72 const char *IsCompilerDirectiveSentinel(const char *, std::size_t) const;
73 const char *IsCompilerDirectiveSentinel(CharBlock) const;
74
75 template <typename... A> Message &Say(A &&...a) {
76 return messages_.Say(std::forward<A>(a)...);
77 }
78
79private:
80 struct LineClassification {
81 enum class Kind {
82 Comment,
83 ConditionalCompilationDirective,
84 IncludeDirective, // #include
85 DefinitionDirective, // #define & #undef
86 PreprocessorDirective,
87 IncludeLine, // Fortran INCLUDE
88 CompilerDirective,
89 Source
90 };
91 LineClassification(Kind k, std::size_t po = 0, const char *s = nullptr)
92 : kind{k}, payloadOffset{po}, sentinel{s} {}
93 LineClassification(LineClassification &&) = default;
94 Kind kind;
95 std::size_t payloadOffset; // byte offset of content
96 const char *sentinel; // if it's a compiler directive
97 };
98
99 void BeginSourceLine(const char *at) {
100 at_ = at;
101 column_ = 1;
102 tabInCurrentLine_ = false;
103 }
104
105 void BeginSourceLineAndAdvance() {
106 BeginSourceLine(at: nextLine_);
107 NextLine();
108 }
109
110 void BeginStatementAndAdvance() {
111 BeginSourceLineAndAdvance();
112 slashInCurrentStatement_ = false;
113 preventHollerith_ = false;
114 parenthesisNesting_ = 0;
115 continuationLines_ = 0;
116 isPossibleMacroCall_ = false;
117 }
118
119 Provenance GetProvenance(const char *sourceChar) const {
120 return startProvenance_ + (sourceChar - start_);
121 }
122
123 ProvenanceRange GetProvenanceRange(
124 const char *first, const char *afterLast) const {
125 std::size_t bytes = afterLast - first;
126 return {startProvenance_ + (first - start_), bytes};
127 }
128
129 void EmitChar(TokenSequence &tokens, char ch) {
130 tokens.PutNextTokenChar(ch, GetCurrentProvenance());
131 }
132
133 void EmitInsertedChar(TokenSequence &tokens, char ch) {
134 Provenance provenance{allSources_.CompilerInsertionProvenance(ch)};
135 tokens.PutNextTokenChar(ch, provenance);
136 }
137
138 char EmitCharAndAdvance(TokenSequence &tokens, char ch) {
139 EmitChar(tokens, ch);
140 NextChar();
141 return *at_;
142 }
143
144 bool InCompilerDirective() const { return directiveSentinel_ != nullptr; }
145 bool InFixedFormSource() const {
146 return inFixedForm_ && !inPreprocessorDirective_ && !InCompilerDirective();
147 }
148
149 bool IsCComment(const char *p) const {
150 return p[0] == '/' && p[1] == '*' &&
151 (inPreprocessorDirective_ ||
152 (!inCharLiteral_ &&
153 features_.IsEnabled(
154 common::LanguageFeature::ClassicCComments)));
155 }
156
157 void LabelField(TokenSequence &);
158 void EnforceStupidEndStatementRules(const TokenSequence &);
159 void SkipToEndOfLine();
160 bool MustSkipToEndOfLine() const;
161 void NextChar();
162 // True when input flowed to a continuation line
163 bool SkipToNextSignificantCharacter();
164 void SkipCComments();
165 void SkipSpaces();
166 static const char *SkipWhiteSpace(const char *);
167 const char *SkipWhiteSpaceAndCComments(const char *) const;
168 const char *SkipCComment(const char *) const;
169 bool NextToken(TokenSequence &);
170 bool ExponentAndKind(TokenSequence &);
171 void QuotedCharacterLiteral(TokenSequence &, const char *start);
172 void Hollerith(TokenSequence &, int count, const char *start);
173 bool PadOutCharacterLiteral(TokenSequence &);
174 bool SkipCommentLine(bool afterAmpersand);
175 bool IsFixedFormCommentLine(const char *) const;
176 const char *IsFreeFormComment(const char *) const;
177 std::optional<std::size_t> IsIncludeLine(const char *) const;
178 void FortranInclude(const char *quote);
179 const char *IsPreprocessorDirectiveLine(const char *) const;
180 const char *FixedFormContinuationLine(bool mightNeedSpace);
181 const char *FreeFormContinuationLine(bool ampersand);
182 bool IsImplicitContinuation() const;
183 bool FixedFormContinuation(bool mightNeedSpace);
184 bool FreeFormContinuation();
185 bool Continuation(bool mightNeedFixedFormSpace);
186 std::optional<LineClassification> IsFixedFormCompilerDirectiveLine(
187 const char *) const;
188 std::optional<LineClassification> IsFreeFormCompilerDirectiveLine(
189 const char *) const;
190 LineClassification ClassifyLine(const char *) const;
191 void SourceFormChange(std::string &&);
192 bool CompilerDirectiveContinuation(TokenSequence &, const char *sentinel);
193 bool SourceLineContinuation(TokenSequence &);
194
195 Messages &messages_;
196 CookedSource &cooked_;
197 Preprocessor &preprocessor_;
198 AllSources &allSources_;
199 common::LanguageFeatureControl features_;
200 bool inFixedForm_{false};
201 int fixedFormColumnLimit_{72};
202 Encoding encoding_{Encoding::UTF_8};
203 int parenthesisNesting_{0};
204 int prescannerNesting_{0};
205 int continuationLines_{0};
206 bool isPossibleMacroCall_{false};
207
208 Provenance startProvenance_;
209 const char *start_{nullptr}; // beginning of current source file content
210 const char *limit_{nullptr}; // first address after end of current source
211 const char *nextLine_{nullptr}; // next line to process; <= limit_
212 const char *directiveSentinel_{nullptr}; // current compiler directive
213
214 // These data members are state for processing the source line containing
215 // "at_", which goes to up to the newline character before "nextLine_".
216 const char *at_{nullptr}; // next character to process; < nextLine_
217 int column_{1}; // card image column position of next character
218 bool tabInCurrentLine_{false};
219 bool slashInCurrentStatement_{false};
220 bool preventHollerith_{false}; // CHARACTER*4HIMOM not Hollerith
221 bool inCharLiteral_{false};
222 bool continuationInCharLiteral_{false};
223 bool inPreprocessorDirective_{false};
224
225 // In some edge cases of compiler directive continuation lines, it
226 // is necessary to treat the line break as a space character by
227 // setting this flag, which is cleared by EmitChar().
228 bool insertASpace_{false};
229
230 // When a free form continuation marker (&) appears at the end of a line
231 // before a INCLUDE or #include, we delete it and omit the newline, so
232 // that the first line of the included file is truly a continuation of
233 // the line before. Also used when the & appears at the end of the last
234 // line in an include file.
235 bool omitNewline_{false};
236 bool skipLeadingAmpersand_{false};
237
238 const Provenance spaceProvenance_{
239 allSources_.CompilerInsertionProvenance(' ')};
240 const Provenance backslashProvenance_{
241 allSources_.CompilerInsertionProvenance('\\')};
242
243 // To avoid probing the set of active compiler directive sentinel strings
244 // on every comment line, they're checked first with a cheap Bloom filter.
245 static const int prime1{1019}, prime2{1021};
246 std::bitset<prime2> compilerDirectiveBloomFilter_; // 128 bytes
247 std::unordered_set<std::string> compilerDirectiveSentinels_;
248};
249} // namespace Fortran::parser
250#endif // FORTRAN_PARSER_PRESCAN_H_
251

source code of flang/lib/Parser/prescan.h