Token.h source code [clang/include/clang/Lex/Token.h]

1	//===--- Token.h - Token interface ------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file defines the Token interface.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#ifndef LLVM_CLANG_LEX_TOKEN_H
14	#define LLVM_CLANG_LEX_TOKEN_H
15
16	#include "clang/Basic/SourceLocation.h"
17	#include "clang/Basic/TokenKinds.h"
18	#include "llvm/ADT/ArrayRef.h"
19	#include "llvm/ADT/StringRef.h"
20	#include <cassert>
21
22	namespace clang {
23
24	class IdentifierInfo;
25	class LangOptions;
26
27	/// Token - This structure provides full information about a lexed token.
28	/// It is not intended to be space efficient, it is intended to return as much
29	/// information as possible about each returned token. This is expected to be
30	/// compressed into a smaller form if memory footprint is important.
31	///
32	/// The parser can create a special "annotation token" representing a stream of
33	/// tokens that were parsed and semantically resolved, e.g.: "foo::MyClass<int>"
34	/// can be represented by a single typename annotation token that carries
35	/// information about the SourceRange of the tokens and the type object.
36	class Token {
37	/// The location of the token. This is actually a SourceLocation.
38	SourceLocation::UIntTy Loc;
39
40	// Conceptually these next two fields could be in a union. However, this
41	// causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical
42	// routine. Keeping as separate members with casts until a more beautiful fix
43	// presents itself.
44
45	/// UintData - This holds either the length of the token text, when
46	/// a normal token, or the end of the SourceRange when an annotation
47	/// token.
48	SourceLocation::UIntTy UintData;
49
50	/// PtrData - This is a union of four different pointer types, which depends
51	/// on what type of token this is:
52	/// Identifiers, keywords, etc:
53	/// This is an IdentifierInfo, which contains the uniqued identifier*
54	/// spelling.
55	/// Literals: isLiteral() returns true.
56	/// This is a pointer to the start of the token in a text buffer, which
57	/// may be dirty (have trigraphs / escaped newlines).
58	/// Annotations (resolved type names, C++ scopes, etc): isAnnotation().
59	/// This is a pointer to sema-specific data for the annotation token.
60	/// Eof:
61	// This is a pointer to a Decl.
62	/// Other:
63	/// This is null.
64	void *PtrData;
65
66	/// Kind - The actual flavor of token this is.
67	tok::TokenKind Kind;
68
69	/// Flags - Bits we track about this token, members of the TokenFlags enum.
70	unsigned short Flags;
71
72	public:
73	// Various flags set per token:
74	enum TokenFlags {
75	StartOfLine = `0x01`, // At start of line or only after whitespace
76	// (considering the line after macro expansion).
77	LeadingSpace = `0x02`, // Whitespace exists before this token (considering
78	// whitespace after macro expansion).
79	DisableExpand = `0x04`, // This identifier may never be macro expanded.
80	NeedsCleaning = `0x08`, // Contained an escaped newline or trigraph.
81	LeadingEmptyMacro = `0x10`, // Empty macro exists before this token.
82	HasUDSuffix = `0x20`, // This string or character literal has a ud-suffix.
83	HasUCN = `0x40`, // This identifier contains a UCN.
84	IgnoredComma = `0x80`, // This comma is not a macro argument separator (MS).
85	StringifiedInMacro = `0x100`, // This string or character literal is formed by
86	// macro stringizing or charizing operator.
87	CommaAfterElided = `0x200`, // The comma following this token was elided (MS).
88	IsEditorPlaceholder = `0x400`, // This identifier is a placeholder.
89	IsReinjected = `0x800`, // A phase 4 token that was produced before and
90	// re-added, e.g. via EnterTokenStream. Annotation
91	// tokens are not* reinjected.*
92	};
93
94	tok::TokenKind getKind() const { return Kind; }
95	void setKind(tok::TokenKind K) { Kind = K; }
96
97	/// is/isNot - Predicates to check if this token is a specific kind, as in
98	/// "if (Tok.is(tok::l_brace)) {...}".
99	bool is(tok::TokenKind K) const { return Kind == K; }
100	bool isNot(tok::TokenKind K) const { return Kind != K; }
101	bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
102	return is(K: K1) \|\| is(K: K2);
103	}
104	template <typename... Ts> bool isOneOf(tok::TokenKind K1, Ts... Ks) const {
105	return is(K: K1) \|\| isOneOf(Ks...);
106	}
107
108	/// Return true if this is a raw identifier (when lexing
109	/// in raw mode) or a non-keyword identifier (when lexing in non-raw mode).
110	bool isAnyIdentifier() const {
111	return tok::isAnyIdentifier(K: getKind());
112	}
113
114	/// Return true if this is a "literal", like a numeric
115	/// constant, string, etc.
116	bool isLiteral() const {
117	return tok::isLiteral(K: getKind());
118	}
119
120	/// Return true if this is any of tok::annot_ kind tokens.*
121	bool isAnnotation() const { return tok::isAnnotation(K: getKind()); }
122
123	/// Return true if the token is a keyword that is parsed in the same
124	/// position as a standard attribute, but that has semantic meaning
125	/// and so cannot be a true attribute.
126	bool isRegularKeywordAttribute() const {
127	return tok::isRegularKeywordAttribute(K: getKind());
128	}
129
130	/// Return a source location identifier for the specified
131	/// offset in the current file.
132	SourceLocation getLocation() const {
133	return SourceLocation::getFromRawEncoding(Encoding: Loc);
134	}
135	unsigned getLength() const {
136	assert(!isAnnotation() && "Annotation tokens have no length field");
137	return UintData;
138	}
139
140	void setLocation(SourceLocation L) { Loc = L.getRawEncoding(); }
141	void setLength(unsigned Len) {
142	assert(!isAnnotation() && "Annotation tokens have no length field");
143	UintData = Len;
144	}
145
146	SourceLocation getAnnotationEndLoc() const {
147	assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
148	return SourceLocation::getFromRawEncoding(Encoding: UintData ? UintData : Loc);
149	}
150	void setAnnotationEndLoc(SourceLocation L) {
151	assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
152	UintData = L.getRawEncoding();
153	}
154
155	SourceLocation getLastLoc() const {
156	return isAnnotation() ? getAnnotationEndLoc() : getLocation();
157	}
158
159	SourceLocation getEndLoc() const {
160	return isAnnotation() ? getAnnotationEndLoc()
161	: getLocation().getLocWithOffset(Offset: getLength());
162	}
163
164	/// SourceRange of the group of tokens that this annotation token
165	/// represents.
166	SourceRange getAnnotationRange() const {
167	return SourceRange (getLocation(), getAnnotationEndLoc());
168	}
169	void setAnnotationRange(SourceRange R) {
170	setLocation(R.getBegin());
171	setAnnotationEndLoc(R.getEnd());
172	}
173
174	const char getName() const* { return tok::getTokenName(Kind); }
175
176	/// Reset all flags to cleared.
177	void startToken() {
178	Kind = tok::unknown;
179	Flags = `0`;
180	PtrData = nullptr;
181	UintData = `0`;
182	Loc = SourceLocation ().getRawEncoding();
183	}
184
185	bool hasPtrData() const { return PtrData != nullptr; }
186
187	IdentifierInfo getIdentifierInfo() const* {
188	assert(isNot(tok::raw_identifier) &&
189	"getIdentifierInfo() on a tok::raw_identifier token!");
190	assert(!isAnnotation() &&
191	"getIdentifierInfo() on an annotation token!");
192	if (isLiteral()) return nullptr;
193	if (is(K: tok::eof)) return nullptr;
194	return (IdentifierInfo*) PtrData;
195	}
196	void setIdentifierInfo(IdentifierInfo *II) {
197	PtrData = (void*) II;
198	}
199
200	const void getEofData() const* {
201	assert(is(tok::eof));
202	return reinterpret_cast<const void *>(PtrData);
203	}
204	void setEofData(const void *D) {
205	assert(is(tok::eof));
206	assert(!PtrData);
207	PtrData = const_cast<void *>(D);
208	}
209
210	/// getRawIdentifier - For a raw identifier token (i.e., an identifier
211	/// lexed in raw mode), returns a reference to the text substring in the
212	/// buffer if known.
213	StringRef getRawIdentifier() const {
214	assert(is(tok::raw_identifier));
215	return StringRef(reinterpret_cast<const char *>(PtrData), getLength());
216	}
217	void setRawIdentifierData(const char *Ptr) {
218	assert(is(tok::raw_identifier));
219	PtrData = const_cast<char*>(Ptr);
220	}
221
222	/// getLiteralData - For a literal token (numeric constant, string, etc), this
223	/// returns a pointer to the start of it in the text buffer if known, null
224	/// otherwise.
225	const char getLiteralData() const* {
226	assert(isLiteral() && "Cannot get literal data of non-literal");
227	return reinterpret_cast<const char*>(PtrData);
228	}
229	void setLiteralData(const char *Ptr) {
230	assert(isLiteral() && "Cannot set literal data of non-literal");
231	PtrData = const_cast<char*>(Ptr);
232	}
233
234	void getAnnotationValue() const* {
235	assert(isAnnotation() && "Used AnnotVal on non-annotation token");
236	return PtrData;
237	}
238	void setAnnotationValue(void *val) {
239	assert(isAnnotation() && "Used AnnotVal on non-annotation token");
240	PtrData = val;
241	}
242
243	/// Set the specified flag.
244	void setFlag(TokenFlags Flag) {
245	Flags \|= Flag;
246	}
247
248	/// Get the specified flag.
249	bool getFlag(TokenFlags Flag) const {
250	return (Flags & Flag) != `0`;
251	}
252
253	/// Unset the specified flag.
254	void clearFlag(TokenFlags Flag) {
255	Flags &= ~Flag;
256	}
257
258	/// Return the internal represtation of the flags.
259	///
260	/// This is only intended for low-level operations such as writing tokens to
261	/// disk.
262	unsigned getFlags() const {
263	return Flags;
264	}
265
266	/// Set a flag to either true or false.
267	void setFlagValue(TokenFlags Flag, bool Val) {
268	if (Val)
269	setFlag(Flag);
270	else
271	clearFlag(Flag);
272	}
273
274	/// isAtStartOfLine - Return true if this token is at the start of a line.
275	///
276	bool isAtStartOfLine() const { return getFlag(Flag: StartOfLine); }
277
278	/// Return true if this token has whitespace before it.
279	///
280	bool hasLeadingSpace() const { return getFlag(Flag: LeadingSpace); }
281
282	/// Return true if this identifier token should never
283	/// be expanded in the future, due to C99 6.10.3.4p2.
284	bool isExpandDisabled() const { return getFlag(Flag: DisableExpand); }
285
286	/// Return true if we have an ObjC keyword identifier.
287	bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const;
288
289	/// Return the ObjC keyword kind.
290	tok::ObjCKeywordKind getObjCKeywordID() const;
291
292	bool isSimpleTypeSpecifier(const LangOptions &LangOpts) const;
293
294	/// Return true if this token has trigraphs or escaped newlines in it.
295	bool needsCleaning() const { return getFlag(Flag: NeedsCleaning); }
296
297	/// Return true if this token has an empty macro before it.
298	///
299	bool hasLeadingEmptyMacro() const { return getFlag(Flag: LeadingEmptyMacro); }
300
301	/// Return true if this token is a string or character literal which
302	/// has a ud-suffix.
303	bool hasUDSuffix() const { return getFlag(Flag: HasUDSuffix); }
304
305	/// Returns true if this token contains a universal character name.
306	bool hasUCN() const { return getFlag(Flag: HasUCN); }
307
308	/// Returns true if this token is formed by macro by stringizing or charizing
309	/// operator.
310	bool stringifiedInMacro() const { return getFlag(Flag: StringifiedInMacro); }
311
312	/// Returns true if the comma after this token was elided.
313	bool commaAfterElided() const { return getFlag(Flag: CommaAfterElided); }
314
315	/// Returns true if this token is an editor placeholder.
316	///
317	/// Editor placeholders are produced by the code-completion engine and are
318	/// represented as characters between '<#' and '#>' in the source code. The
319	/// lexer uses identifier tokens to represent placeholders.
320	bool isEditorPlaceholder() const { return getFlag(Flag: IsEditorPlaceholder); }
321	};
322
323	/// Information about the conditional stack (\#if directives)
324	/// currently active.
325	struct PPConditionalInfo {
326	/// Location where the conditional started.
327	SourceLocation IfLoc;
328
329	/// True if this was contained in a skipping directive, e.g.,
330	/// in a "\#if 0" block.
331	bool WasSkipping;
332
333	/// True if we have emitted tokens already, and now we're in
334	/// an \#else block or something. Only useful in Skipping blocks.
335	bool FoundNonSkip;
336
337	/// True if we've seen a \#else in this block. If so,
338	/// \#elif/\#else directives are not allowed.
339	bool FoundElse;
340	};
341
342	// Extra information needed for annonation tokens.
343	struct PragmaLoopHintInfo {
344	Token PragmaName;
345	Token Option;
346	ArrayRef<Token> Toks;
347	};
348	} // end namespace clang
349
350	#endif // LLVM_CLANG_LEX_TOKEN_H
351

source code of clang/include/clang/Lex/Token.h