Warning: That file was not part of the compilation database. It may have many parsing errors.
1 | //===--- Token.h - Token interface ------------------------------*- C++ -*-===// |
---|---|
2 | // |
3 | // The LLVM Compiler Infrastructure |
4 | // |
5 | // This file is distributed under the University of Illinois Open Source |
6 | // License. See LICENSE.TXT for details. |
7 | // |
8 | //===----------------------------------------------------------------------===// |
9 | // |
10 | // This file defines the Token interface. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_CLANG_LEX_TOKEN_H |
15 | #define LLVM_CLANG_LEX_TOKEN_H |
16 | |
17 | #include "clang/Basic/SourceLocation.h" |
18 | #include "clang/Basic/TokenKinds.h" |
19 | #include "llvm/ADT/StringRef.h" |
20 | #include <cassert> |
21 | |
22 | namespace clang { |
23 | |
24 | class IdentifierInfo; |
25 | |
26 | /// Token - This structure provides full information about a lexed token. |
27 | /// It is not intended to be space efficient, it is intended to return as much |
28 | /// information as possible about each returned token. This is expected to be |
29 | /// compressed into a smaller form if memory footprint is important. |
30 | /// |
31 | /// The parser can create a special "annotation token" representing a stream of |
32 | /// tokens that were parsed and semantically resolved, e.g.: "foo::MyClass<int>" |
33 | /// can be represented by a single typename annotation token that carries |
34 | /// information about the SourceRange of the tokens and the type object. |
35 | class Token { |
36 | /// The location of the token. This is actually a SourceLocation. |
37 | unsigned Loc; |
38 | |
39 | // Conceptually these next two fields could be in a union. However, this |
40 | // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical |
41 | // routine. Keeping as separate members with casts until a more beautiful fix |
42 | // presents itself. |
43 | |
44 | /// UintData - This holds either the length of the token text, when |
45 | /// a normal token, or the end of the SourceRange when an annotation |
46 | /// token. |
47 | unsigned UintData; |
48 | |
49 | /// PtrData - This is a union of four different pointer types, which depends |
50 | /// on what type of token this is: |
51 | /// Identifiers, keywords, etc: |
52 | /// This is an IdentifierInfo*, which contains the uniqued identifier |
53 | /// spelling. |
54 | /// Literals: isLiteral() returns true. |
55 | /// This is a pointer to the start of the token in a text buffer, which |
56 | /// may be dirty (have trigraphs / escaped newlines). |
57 | /// Annotations (resolved type names, C++ scopes, etc): isAnnotation(). |
58 | /// This is a pointer to sema-specific data for the annotation token. |
59 | /// Eof: |
60 | // This is a pointer to a Decl. |
61 | /// Other: |
62 | /// This is null. |
63 | void *PtrData; |
64 | |
65 | /// Kind - The actual flavor of token this is. |
66 | tok::TokenKind Kind; |
67 | |
68 | /// Flags - Bits we track about this token, members of the TokenFlags enum. |
69 | unsigned short Flags; |
70 | |
71 | public: |
72 | // Various flags set per token: |
73 | enum TokenFlags { |
74 | StartOfLine = 0x01, // At start of line or only after whitespace |
75 | // (considering the line after macro expansion). |
76 | LeadingSpace = 0x02, // Whitespace exists before this token (considering |
77 | // whitespace after macro expansion). |
78 | DisableExpand = 0x04, // This identifier may never be macro expanded. |
79 | NeedsCleaning = 0x08, // Contained an escaped newline or trigraph. |
80 | LeadingEmptyMacro = 0x10, // Empty macro exists before this token. |
81 | HasUDSuffix = 0x20, // This string or character literal has a ud-suffix. |
82 | HasUCN = 0x40, // This identifier contains a UCN. |
83 | IgnoredComma = 0x80, // This comma is not a macro argument separator (MS). |
84 | StringifiedInMacro = 0x100, // This string or character literal is formed by |
85 | // macro stringizing or charizing operator. |
86 | CommaAfterElided = 0x200, // The comma following this token was elided (MS). |
87 | IsEditorPlaceholder = 0x400, // This identifier is a placeholder. |
88 | }; |
89 | |
90 | tok::TokenKind getKind() const { return Kind; } |
91 | void setKind(tok::TokenKind K) { Kind = K; } |
92 | |
93 | /// is/isNot - Predicates to check if this token is a specific kind, as in |
94 | /// "if (Tok.is(tok::l_brace)) {...}". |
95 | bool is(tok::TokenKind K) const { return Kind == K; } |
96 | bool isNot(tok::TokenKind K) const { return Kind != K; } |
97 | bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const { |
98 | return is(K1) || is(K2); |
99 | } |
100 | template <typename... Ts> |
101 | bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, Ts... Ks) const { |
102 | return is(K1) || isOneOf(K2, Ks...); |
103 | } |
104 | |
105 | /// Return true if this is a raw identifier (when lexing |
106 | /// in raw mode) or a non-keyword identifier (when lexing in non-raw mode). |
107 | bool isAnyIdentifier() const { |
108 | return tok::isAnyIdentifier(getKind()); |
109 | } |
110 | |
111 | /// Return true if this is a "literal", like a numeric |
112 | /// constant, string, etc. |
113 | bool isLiteral() const { |
114 | return tok::isLiteral(getKind()); |
115 | } |
116 | |
117 | /// Return true if this is any of tok::annot_* kind tokens. |
118 | bool isAnnotation() const { |
119 | return tok::isAnnotation(getKind()); |
120 | } |
121 | |
122 | /// Return a source location identifier for the specified |
123 | /// offset in the current file. |
124 | SourceLocation getLocation() const { |
125 | return SourceLocation::getFromRawEncoding(Loc); |
126 | } |
127 | unsigned getLength() const { |
128 | assert(!isAnnotation() && "Annotation tokens have no length field"); |
129 | return UintData; |
130 | } |
131 | |
132 | void setLocation(SourceLocation L) { Loc = L.getRawEncoding(); } |
133 | void setLength(unsigned Len) { |
134 | assert(!isAnnotation() && "Annotation tokens have no length field"); |
135 | UintData = Len; |
136 | } |
137 | |
138 | SourceLocation getAnnotationEndLoc() const { |
139 | assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token"); |
140 | return SourceLocation::getFromRawEncoding(UintData ? UintData : Loc); |
141 | } |
142 | void setAnnotationEndLoc(SourceLocation L) { |
143 | assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token"); |
144 | UintData = L.getRawEncoding(); |
145 | } |
146 | |
147 | SourceLocation getLastLoc() const { |
148 | return isAnnotation() ? getAnnotationEndLoc() : getLocation(); |
149 | } |
150 | |
151 | SourceLocation getEndLoc() const { |
152 | return isAnnotation() ? getAnnotationEndLoc() |
153 | : getLocation().getLocWithOffset(getLength()); |
154 | } |
155 | |
156 | /// SourceRange of the group of tokens that this annotation token |
157 | /// represents. |
158 | SourceRange getAnnotationRange() const { |
159 | return SourceRange(getLocation(), getAnnotationEndLoc()); |
160 | } |
161 | void setAnnotationRange(SourceRange R) { |
162 | setLocation(R.getBegin()); |
163 | setAnnotationEndLoc(R.getEnd()); |
164 | } |
165 | |
166 | const char *getName() const { return tok::getTokenName(Kind); } |
167 | |
168 | /// Reset all flags to cleared. |
169 | void startToken() { |
170 | Kind = tok::unknown; |
171 | Flags = 0; |
172 | PtrData = nullptr; |
173 | UintData = 0; |
174 | Loc = SourceLocation().getRawEncoding(); |
175 | } |
176 | |
177 | IdentifierInfo *getIdentifierInfo() const { |
178 | assert(isNot(tok::raw_identifier) && |
179 | "getIdentifierInfo() on a tok::raw_identifier token!"); |
180 | assert(!isAnnotation() && |
181 | "getIdentifierInfo() on an annotation token!"); |
182 | if (isLiteral()) return nullptr; |
183 | if (is(tok::eof)) return nullptr; |
184 | return (IdentifierInfo*) PtrData; |
185 | } |
186 | void setIdentifierInfo(IdentifierInfo *II) { |
187 | PtrData = (void*) II; |
188 | } |
189 | |
190 | const void *getEofData() const { |
191 | assert(is(tok::eof)); |
192 | return reinterpret_cast<const void *>(PtrData); |
193 | } |
194 | void setEofData(const void *D) { |
195 | assert(is(tok::eof)); |
196 | assert(!PtrData); |
197 | PtrData = const_cast<void *>(D); |
198 | } |
199 | |
200 | /// getRawIdentifier - For a raw identifier token (i.e., an identifier |
201 | /// lexed in raw mode), returns a reference to the text substring in the |
202 | /// buffer if known. |
203 | StringRef getRawIdentifier() const { |
204 | assert(is(tok::raw_identifier)); |
205 | return StringRef(reinterpret_cast<const char *>(PtrData), getLength()); |
206 | } |
207 | void setRawIdentifierData(const char *Ptr) { |
208 | assert(is(tok::raw_identifier)); |
209 | PtrData = const_cast<char*>(Ptr); |
210 | } |
211 | |
212 | /// getLiteralData - For a literal token (numeric constant, string, etc), this |
213 | /// returns a pointer to the start of it in the text buffer if known, null |
214 | /// otherwise. |
215 | const char *getLiteralData() const { |
216 | assert(isLiteral() && "Cannot get literal data of non-literal"); |
217 | return reinterpret_cast<const char*>(PtrData); |
218 | } |
219 | void setLiteralData(const char *Ptr) { |
220 | assert(isLiteral() && "Cannot set literal data of non-literal"); |
221 | PtrData = const_cast<char*>(Ptr); |
222 | } |
223 | |
224 | void *getAnnotationValue() const { |
225 | assert(isAnnotation() && "Used AnnotVal on non-annotation token"); |
226 | return PtrData; |
227 | } |
228 | void setAnnotationValue(void *val) { |
229 | assert(isAnnotation() && "Used AnnotVal on non-annotation token"); |
230 | PtrData = val; |
231 | } |
232 | |
233 | /// Set the specified flag. |
234 | void setFlag(TokenFlags Flag) { |
235 | Flags |= Flag; |
236 | } |
237 | |
238 | /// Get the specified flag. |
239 | bool getFlag(TokenFlags Flag) const { |
240 | return (Flags & Flag) != 0; |
241 | } |
242 | |
243 | /// Unset the specified flag. |
244 | void clearFlag(TokenFlags Flag) { |
245 | Flags &= ~Flag; |
246 | } |
247 | |
248 | /// Return the internal represtation of the flags. |
249 | /// |
250 | /// This is only intended for low-level operations such as writing tokens to |
251 | /// disk. |
252 | unsigned getFlags() const { |
253 | return Flags; |
254 | } |
255 | |
256 | /// Set a flag to either true or false. |
257 | void setFlagValue(TokenFlags Flag, bool Val) { |
258 | if (Val) |
259 | setFlag(Flag); |
260 | else |
261 | clearFlag(Flag); |
262 | } |
263 | |
264 | /// isAtStartOfLine - Return true if this token is at the start of a line. |
265 | /// |
266 | bool isAtStartOfLine() const { return getFlag(StartOfLine); } |
267 | |
268 | /// Return true if this token has whitespace before it. |
269 | /// |
270 | bool hasLeadingSpace() const { return getFlag(LeadingSpace); } |
271 | |
272 | /// Return true if this identifier token should never |
273 | /// be expanded in the future, due to C99 6.10.3.4p2. |
274 | bool isExpandDisabled() const { return getFlag(DisableExpand); } |
275 | |
276 | /// Return true if we have an ObjC keyword identifier. |
277 | bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const; |
278 | |
279 | /// Return the ObjC keyword kind. |
280 | tok::ObjCKeywordKind getObjCKeywordID() const; |
281 | |
282 | /// Return true if this token has trigraphs or escaped newlines in it. |
283 | bool needsCleaning() const { return getFlag(NeedsCleaning); } |
284 | |
285 | /// Return true if this token has an empty macro before it. |
286 | /// |
287 | bool hasLeadingEmptyMacro() const { return getFlag(LeadingEmptyMacro); } |
288 | |
289 | /// Return true if this token is a string or character literal which |
290 | /// has a ud-suffix. |
291 | bool hasUDSuffix() const { return getFlag(HasUDSuffix); } |
292 | |
293 | /// Returns true if this token contains a universal character name. |
294 | bool hasUCN() const { return getFlag(HasUCN); } |
295 | |
296 | /// Returns true if this token is formed by macro by stringizing or charizing |
297 | /// operator. |
298 | bool stringifiedInMacro() const { return getFlag(StringifiedInMacro); } |
299 | |
300 | /// Returns true if the comma after this token was elided. |
301 | bool commaAfterElided() const { return getFlag(CommaAfterElided); } |
302 | |
303 | /// Returns true if this token is an editor placeholder. |
304 | /// |
305 | /// Editor placeholders are produced by the code-completion engine and are |
306 | /// represented as characters between '<#' and '#>' in the source code. The |
307 | /// lexer uses identifier tokens to represent placeholders. |
308 | bool isEditorPlaceholder() const { return getFlag(IsEditorPlaceholder); } |
309 | }; |
310 | |
311 | /// Information about the conditional stack (\#if directives) |
312 | /// currently active. |
313 | struct PPConditionalInfo { |
314 | /// Location where the conditional started. |
315 | SourceLocation IfLoc; |
316 | |
317 | /// True if this was contained in a skipping directive, e.g., |
318 | /// in a "\#if 0" block. |
319 | bool WasSkipping; |
320 | |
321 | /// True if we have emitted tokens already, and now we're in |
322 | /// an \#else block or something. Only useful in Skipping blocks. |
323 | bool FoundNonSkip; |
324 | |
325 | /// True if we've seen a \#else in this block. If so, |
326 | /// \#elif/\#else directives are not allowed. |
327 | bool FoundElse; |
328 | }; |
329 | |
330 | } // end namespace clang |
331 | |
332 | namespace llvm { |
333 | template <> |
334 | struct isPodLike<clang::Token> { static const bool value = true; }; |
335 | } // end namespace llvm |
336 | |
337 | #endif // LLVM_CLANG_LEX_TOKEN_H |
338 |
Warning: That file was not part of the compilation database. It may have many parsing errors.