1 | //===--- LiteralSupport.h ---------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the NumericLiteralParser, CharLiteralParser, and |
10 | // StringLiteralParser interfaces. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_CLANG_LEX_LITERALSUPPORT_H |
15 | #define LLVM_CLANG_LEX_LITERALSUPPORT_H |
16 | |
17 | #include "clang/Basic/CharInfo.h" |
18 | #include "clang/Basic/LLVM.h" |
19 | #include "clang/Basic/TokenKinds.h" |
20 | #include "llvm/ADT/APFloat.h" |
21 | #include "llvm/ADT/ArrayRef.h" |
22 | #include "llvm/ADT/SmallString.h" |
23 | #include "llvm/ADT/StringRef.h" |
24 | #include "llvm/Support/DataTypes.h" |
25 | |
26 | namespace clang { |
27 | |
28 | class DiagnosticsEngine; |
29 | class Preprocessor; |
30 | class Token; |
31 | class SourceLocation; |
32 | class TargetInfo; |
33 | class SourceManager; |
34 | class LangOptions; |
35 | |
36 | /// Copy characters from Input to Buf, expanding any UCNs. |
37 | void expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input); |
38 | |
39 | /// NumericLiteralParser - This performs strict semantic analysis of the content |
40 | /// of a ppnumber, classifying it as either integer, floating, or erroneous, |
41 | /// determines the radix of the value and can convert it to a useful value. |
42 | class NumericLiteralParser { |
43 | const SourceManager &SM; |
44 | const LangOptions &LangOpts; |
45 | DiagnosticsEngine &Diags; |
46 | |
47 | const char *const ThisTokBegin; |
48 | const char *const ThisTokEnd; |
49 | const char *DigitsBegin, *SuffixBegin; // markers |
50 | const char *s; // cursor |
51 | |
52 | unsigned radix; |
53 | |
54 | bool saw_exponent, saw_period, saw_ud_suffix, saw_fixed_point_suffix; |
55 | |
56 | SmallString<32> UDSuffixBuf; |
57 | |
58 | public: |
59 | NumericLiteralParser(StringRef TokSpelling, SourceLocation TokLoc, |
60 | const SourceManager &SM, const LangOptions &LangOpts, |
61 | const TargetInfo &Target, DiagnosticsEngine &Diags); |
62 | bool hadError : 1; |
63 | bool isUnsigned : 1; |
64 | bool isLong : 1; // This is *not* set for long long. |
65 | bool isLongLong : 1; |
66 | bool isSizeT : 1; // 1z, 1uz (C++2b) |
67 | bool isHalf : 1; // 1.0h |
68 | bool isFloat : 1; // 1.0f |
69 | bool isImaginary : 1; // 1.0i |
70 | bool isFloat16 : 1; // 1.0f16 |
71 | bool isFloat128 : 1; // 1.0q |
72 | uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64. |
73 | |
74 | bool isFract : 1; // 1.0hr/r/lr/uhr/ur/ulr |
75 | bool isAccum : 1; // 1.0hk/k/lk/uhk/uk/ulk |
76 | |
77 | bool isFixedPointLiteral() const { |
78 | return (saw_period || saw_exponent) && saw_fixed_point_suffix; |
79 | } |
80 | |
81 | bool isIntegerLiteral() const { |
82 | return !saw_period && !saw_exponent && !isFixedPointLiteral(); |
83 | } |
84 | bool isFloatingLiteral() const { |
85 | return (saw_period || saw_exponent) && !isFixedPointLiteral(); |
86 | } |
87 | |
88 | bool hasUDSuffix() const { |
89 | return saw_ud_suffix; |
90 | } |
91 | StringRef getUDSuffix() const { |
92 | assert(saw_ud_suffix); |
93 | return UDSuffixBuf; |
94 | } |
95 | unsigned getUDSuffixOffset() const { |
96 | assert(saw_ud_suffix); |
97 | return SuffixBegin - ThisTokBegin; |
98 | } |
99 | |
100 | static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix); |
101 | |
102 | unsigned getRadix() const { return radix; } |
103 | |
104 | /// GetIntegerValue - Convert this numeric literal value to an APInt that |
105 | /// matches Val's input width. If there is an overflow (i.e., if the unsigned |
106 | /// value read is larger than the APInt's bits will hold), set Val to the low |
107 | /// bits of the result and return true. Otherwise, return false. |
108 | bool GetIntegerValue(llvm::APInt &Val); |
109 | |
110 | /// GetFloatValue - Convert this numeric literal to a floating value, using |
111 | /// the specified APFloat fltSemantics (specifying float, double, etc). |
112 | /// The optional bool isExact (passed-by-reference) has its value |
113 | /// set to true if the returned APFloat can represent the number in the |
114 | /// literal exactly, and false otherwise. |
115 | llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result); |
116 | |
117 | /// GetFixedPointValue - Convert this numeric literal value into a |
118 | /// scaled integer that represents this value. Returns true if an overflow |
119 | /// occurred when calculating the integral part of the scaled integer or |
120 | /// calculating the digit sequence of the exponent. |
121 | bool GetFixedPointValue(llvm::APInt &StoreVal, unsigned Scale); |
122 | |
123 | private: |
124 | |
125 | void ParseNumberStartingWithZero(SourceLocation TokLoc); |
126 | void ParseDecimalOrOctalCommon(SourceLocation TokLoc); |
127 | |
128 | static bool isDigitSeparator(char C) { return C == '\''; } |
129 | |
130 | /// Determine whether the sequence of characters [Start, End) contains |
131 | /// any real digits (not digit separators). |
132 | bool containsDigits(const char *Start, const char *End) { |
133 | return Start != End && (Start + 1 != End || !isDigitSeparator(Start[0])); |
134 | } |
135 | |
136 | enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits }; |
137 | |
138 | /// Ensure that we don't have a digit separator here. |
139 | void checkSeparator(SourceLocation TokLoc, const char *Pos, |
140 | CheckSeparatorKind IsAfterDigits); |
141 | |
142 | /// SkipHexDigits - Read and skip over any hex digits, up to End. |
143 | /// Return a pointer to the first non-hex digit or End. |
144 | const char *SkipHexDigits(const char *ptr) { |
145 | while (ptr != ThisTokEnd && (isHexDigit(*ptr) || isDigitSeparator(*ptr))) |
146 | ptr++; |
147 | return ptr; |
148 | } |
149 | |
150 | /// SkipOctalDigits - Read and skip over any octal digits, up to End. |
151 | /// Return a pointer to the first non-hex digit or End. |
152 | const char *SkipOctalDigits(const char *ptr) { |
153 | while (ptr != ThisTokEnd && |
154 | ((*ptr >= '0' && *ptr <= '7') || isDigitSeparator(*ptr))) |
155 | ptr++; |
156 | return ptr; |
157 | } |
158 | |
159 | /// SkipDigits - Read and skip over any digits, up to End. |
160 | /// Return a pointer to the first non-hex digit or End. |
161 | const char *SkipDigits(const char *ptr) { |
162 | while (ptr != ThisTokEnd && (isDigit(*ptr) || isDigitSeparator(*ptr))) |
163 | ptr++; |
164 | return ptr; |
165 | } |
166 | |
167 | /// SkipBinaryDigits - Read and skip over any binary digits, up to End. |
168 | /// Return a pointer to the first non-binary digit or End. |
169 | const char *SkipBinaryDigits(const char *ptr) { |
170 | while (ptr != ThisTokEnd && |
171 | (*ptr == '0' || *ptr == '1' || isDigitSeparator(*ptr))) |
172 | ptr++; |
173 | return ptr; |
174 | } |
175 | |
176 | }; |
177 | |
178 | /// CharLiteralParser - Perform interpretation and semantic analysis of a |
179 | /// character literal. |
180 | class CharLiteralParser { |
181 | uint64_t Value; |
182 | tok::TokenKind Kind; |
183 | bool IsMultiChar; |
184 | bool HadError; |
185 | SmallString<32> UDSuffixBuf; |
186 | unsigned UDSuffixOffset; |
187 | public: |
188 | CharLiteralParser(const char *begin, const char *end, |
189 | SourceLocation Loc, Preprocessor &PP, |
190 | tok::TokenKind kind); |
191 | |
192 | bool hadError() const { return HadError; } |
193 | bool isAscii() const { return Kind == tok::char_constant; } |
194 | bool isWide() const { return Kind == tok::wide_char_constant; } |
195 | bool isUTF8() const { return Kind == tok::utf8_char_constant; } |
196 | bool isUTF16() const { return Kind == tok::utf16_char_constant; } |
197 | bool isUTF32() const { return Kind == tok::utf32_char_constant; } |
198 | bool isMultiChar() const { return IsMultiChar; } |
199 | uint64_t getValue() const { return Value; } |
200 | StringRef getUDSuffix() const { return UDSuffixBuf; } |
201 | unsigned getUDSuffixOffset() const { |
202 | assert(!UDSuffixBuf.empty() && "no ud-suffix" ); |
203 | return UDSuffixOffset; |
204 | } |
205 | }; |
206 | |
207 | /// StringLiteralParser - This decodes string escape characters and performs |
208 | /// wide string analysis and Translation Phase #6 (concatenation of string |
209 | /// literals) (C99 5.1.1.2p1). |
210 | class StringLiteralParser { |
211 | const SourceManager &SM; |
212 | const LangOptions &Features; |
213 | const TargetInfo &Target; |
214 | DiagnosticsEngine *Diags; |
215 | |
216 | unsigned MaxTokenLength; |
217 | unsigned SizeBound; |
218 | unsigned CharByteWidth; |
219 | tok::TokenKind Kind; |
220 | SmallString<512> ResultBuf; |
221 | char *ResultPtr; // cursor |
222 | SmallString<32> UDSuffixBuf; |
223 | unsigned UDSuffixToken; |
224 | unsigned UDSuffixOffset; |
225 | public: |
226 | StringLiteralParser(ArrayRef<Token> StringToks, |
227 | Preprocessor &PP, bool Complain = true); |
228 | StringLiteralParser(ArrayRef<Token> StringToks, |
229 | const SourceManager &sm, const LangOptions &features, |
230 | const TargetInfo &target, |
231 | DiagnosticsEngine *diags = nullptr) |
232 | : SM(sm), Features(features), Target(target), Diags(diags), |
233 | MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), |
234 | ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { |
235 | init(StringToks); |
236 | } |
237 | |
238 | |
239 | bool hadError; |
240 | bool Pascal; |
241 | |
242 | StringRef GetString() const { |
243 | return StringRef(ResultBuf.data(), GetStringLength()); |
244 | } |
245 | unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); } |
246 | |
247 | unsigned GetNumStringChars() const { |
248 | return GetStringLength() / CharByteWidth; |
249 | } |
250 | /// getOffsetOfStringByte - This function returns the offset of the |
251 | /// specified byte of the string data represented by Token. This handles |
252 | /// advancing over escape sequences in the string. |
253 | /// |
254 | /// If the Diagnostics pointer is non-null, then this will do semantic |
255 | /// checking of the string literal and emit errors and warnings. |
256 | unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const; |
257 | |
258 | bool isAscii() const { return Kind == tok::string_literal; } |
259 | bool isWide() const { return Kind == tok::wide_string_literal; } |
260 | bool isUTF8() const { return Kind == tok::utf8_string_literal; } |
261 | bool isUTF16() const { return Kind == tok::utf16_string_literal; } |
262 | bool isUTF32() const { return Kind == tok::utf32_string_literal; } |
263 | bool isPascal() const { return Pascal; } |
264 | |
265 | StringRef getUDSuffix() const { return UDSuffixBuf; } |
266 | |
267 | /// Get the index of a token containing a ud-suffix. |
268 | unsigned getUDSuffixToken() const { |
269 | assert(!UDSuffixBuf.empty() && "no ud-suffix" ); |
270 | return UDSuffixToken; |
271 | } |
272 | /// Get the spelling offset of the first byte of the ud-suffix. |
273 | unsigned getUDSuffixOffset() const { |
274 | assert(!UDSuffixBuf.empty() && "no ud-suffix" ); |
275 | return UDSuffixOffset; |
276 | } |
277 | |
278 | static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix); |
279 | |
280 | private: |
281 | void init(ArrayRef<Token> StringToks); |
282 | bool CopyStringFragment(const Token &Tok, const char *TokBegin, |
283 | StringRef Fragment); |
284 | void DiagnoseLexingError(SourceLocation Loc); |
285 | }; |
286 | |
287 | } // end namespace clang |
288 | |
289 | #endif |
290 | |