1//===- Preprocessor.h - C Language Family Preprocessor ----------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10/// \file
11/// Defines the clang::Preprocessor interface.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_CLANG_LEX_PREPROCESSOR_H
16#define LLVM_CLANG_LEX_PREPROCESSOR_H
17
18#include "clang/Basic/Builtins.h"
19#include "clang/Basic/Diagnostic.h"
20#include "clang/Basic/IdentifierTable.h"
21#include "clang/Basic/LLVM.h"
22#include "clang/Basic/LangOptions.h"
23#include "clang/Basic/Module.h"
24#include "clang/Basic/SourceLocation.h"
25#include "clang/Basic/SourceManager.h"
26#include "clang/Basic/TokenKinds.h"
27#include "clang/Lex/Lexer.h"
28#include "clang/Lex/MacroInfo.h"
29#include "clang/Lex/ModuleLoader.h"
30#include "clang/Lex/ModuleMap.h"
31#include "clang/Lex/PPCallbacks.h"
32#include "clang/Lex/PTHLexer.h"
33#include "clang/Lex/Token.h"
34#include "clang/Lex/TokenLexer.h"
35#include "llvm/ADT/ArrayRef.h"
36#include "llvm/ADT/DenseMap.h"
37#include "llvm/ADT/FoldingSet.h"
38#include "llvm/ADT/None.h"
39#include "llvm/ADT/Optional.h"
40#include "llvm/ADT/PointerUnion.h"
41#include "llvm/ADT/STLExtras.h"
42#include "llvm/ADT/SmallPtrSet.h"
43#include "llvm/ADT/SmallVector.h"
44#include "llvm/ADT/StringRef.h"
45#include "llvm/ADT/TinyPtrVector.h"
46#include "llvm/ADT/iterator_range.h"
47#include "llvm/Support/Allocator.h"
48#include "llvm/Support/Casting.h"
49#include "llvm/Support/Registry.h"
50#include <cassert>
51#include <cstddef>
52#include <cstdint>
53#include <memory>
54#include <map>
55#include <string>
56#include <utility>
57#include <vector>
58
59namespace llvm {
60
61template<unsigned InternalLen> class SmallString;
62
63} // namespace llvm
64
65namespace clang {
66
67class CodeCompletionHandler;
68class CommentHandler;
69class DirectoryEntry;
70class DirectoryLookup;
71class ExternalPreprocessorSource;
72class FileEntry;
73class FileManager;
74class HeaderSearch;
75class MacroArgs;
76class MemoryBufferCache;
77class PragmaHandler;
78class PragmaNamespace;
79class PreprocessingRecord;
80class PreprocessorLexer;
81class PreprocessorOptions;
82class PTHManager;
83class ScratchBuffer;
84class TargetInfo;
85
86/// Stores token information for comparing actual tokens with
87/// predefined values. Only handles simple tokens and identifiers.
88class TokenValue {
89 tok::TokenKind Kind;
90 IdentifierInfo *II;
91
92public:
93 TokenValue(tok::TokenKind Kind) : Kind(Kind), II(nullptr) {
94 assert(Kind != tok::raw_identifier && "Raw identifiers are not supported.");
95 assert(Kind != tok::identifier &&
96 "Identifiers should be created by TokenValue(IdentifierInfo *)");
97 assert(!tok::isLiteral(Kind) && "Literals are not supported.");
98 assert(!tok::isAnnotation(Kind) && "Annotations are not supported.");
99 }
100
101 TokenValue(IdentifierInfo *II) : Kind(tok::identifier), II(II) {}
102
103 bool operator==(const Token &Tok) const {
104 return Tok.getKind() == Kind &&
105 (!II || II == Tok.getIdentifierInfo());
106 }
107};
108
109/// Context in which macro name is used.
110enum MacroUse {
111 // other than #define or #undef
112 MU_Other = 0,
113
114 // macro name specified in #define
115 MU_Define = 1,
116
117 // macro name specified in #undef
118 MU_Undef = 2
119};
120
121/// Engages in a tight little dance with the lexer to efficiently
122/// preprocess tokens.
123///
124/// Lexers know only about tokens within a single source file, and don't
125/// know anything about preprocessor-level issues like the \#include stack,
126/// token expansion, etc.
127class Preprocessor {
128 friend class VAOptDefinitionContext;
129 friend class VariadicMacroScopeGuard;
130
131 std::shared_ptr<PreprocessorOptions> PPOpts;
132 DiagnosticsEngine *Diags;
133 LangOptions &LangOpts;
134 const TargetInfo *Target = nullptr;
135 const TargetInfo *AuxTarget = nullptr;
136 FileManager &FileMgr;
137 SourceManager &SourceMgr;
138 MemoryBufferCache &PCMCache;
139 std::unique_ptr<ScratchBuffer> ScratchBuf;
140 HeaderSearch &HeaderInfo;
141 ModuleLoader &TheModuleLoader;
142
143 /// External source of macros.
144 ExternalPreprocessorSource *ExternalSource;
145
146 /// An optional PTHManager object used for getting tokens from
147 /// a token cache rather than lexing the original source file.
148 std::unique_ptr<PTHManager> PTH;
149
150 /// A BumpPtrAllocator object used to quickly allocate and release
151 /// objects internal to the Preprocessor.
152 llvm::BumpPtrAllocator BP;
153
154 /// Identifiers for builtin macros and other builtins.
155 IdentifierInfo *Ident__LINE__, *Ident__FILE__; // __LINE__, __FILE__
156 IdentifierInfo *Ident__DATE__, *Ident__TIME__; // __DATE__, __TIME__
157 IdentifierInfo *Ident__INCLUDE_LEVEL__; // __INCLUDE_LEVEL__
158 IdentifierInfo *Ident__BASE_FILE__; // __BASE_FILE__
159 IdentifierInfo *Ident__TIMESTAMP__; // __TIMESTAMP__
160 IdentifierInfo *Ident__COUNTER__; // __COUNTER__
161 IdentifierInfo *Ident_Pragma, *Ident__pragma; // _Pragma, __pragma
162 IdentifierInfo *Ident__identifier; // __identifier
163 IdentifierInfo *Ident__VA_ARGS__; // __VA_ARGS__
164 IdentifierInfo *Ident__VA_OPT__; // __VA_OPT__
165 IdentifierInfo *Ident__has_feature; // __has_feature
166 IdentifierInfo *Ident__has_extension; // __has_extension
167 IdentifierInfo *Ident__has_builtin; // __has_builtin
168 IdentifierInfo *Ident__has_attribute; // __has_attribute
169 IdentifierInfo *Ident__has_include; // __has_include
170 IdentifierInfo *Ident__has_include_next; // __has_include_next
171 IdentifierInfo *Ident__has_warning; // __has_warning
172 IdentifierInfo *Ident__is_identifier; // __is_identifier
173 IdentifierInfo *Ident__building_module; // __building_module
174 IdentifierInfo *Ident__MODULE__; // __MODULE__
175 IdentifierInfo *Ident__has_cpp_attribute; // __has_cpp_attribute
176 IdentifierInfo *Ident__has_c_attribute; // __has_c_attribute
177 IdentifierInfo *Ident__has_declspec; // __has_declspec_attribute
178 IdentifierInfo *Ident__is_target_arch; // __is_target_arch
179 IdentifierInfo *Ident__is_target_vendor; // __is_target_vendor
180 IdentifierInfo *Ident__is_target_os; // __is_target_os
181 IdentifierInfo *Ident__is_target_environment; // __is_target_environment
182
183 SourceLocation DATELoc, TIMELoc;
184
185 // Next __COUNTER__ value, starts at 0.
186 unsigned CounterValue = 0;
187
188 enum {
189 /// Maximum depth of \#includes.
190 MaxAllowedIncludeStackDepth = 200
191 };
192
193 // State that is set before the preprocessor begins.
194 bool KeepComments : 1;
195 bool KeepMacroComments : 1;
196 bool SuppressIncludeNotFoundError : 1;
197
198 // State that changes while the preprocessor runs:
199 bool InMacroArgs : 1; // True if parsing fn macro invocation args.
200
201 /// Whether the preprocessor owns the header search object.
202 bool OwnsHeaderSearch : 1;
203
204 /// True if macro expansion is disabled.
205 bool DisableMacroExpansion : 1;
206
207 /// Temporarily disables DisableMacroExpansion (i.e. enables expansion)
208 /// when parsing preprocessor directives.
209 bool MacroExpansionInDirectivesOverride : 1;
210
211 class ResetMacroExpansionHelper;
212
213 /// Whether we have already loaded macros from the external source.
214 mutable bool ReadMacrosFromExternalSource : 1;
215
216 /// True if pragmas are enabled.
217 bool PragmasEnabled : 1;
218
219 /// True if the current build action is a preprocessing action.
220 bool PreprocessedOutput : 1;
221
222 /// True if we are currently preprocessing a #if or #elif directive
223 bool ParsingIfOrElifDirective;
224
225 /// True if we are pre-expanding macro arguments.
226 bool InMacroArgPreExpansion;
227
228 /// Mapping/lookup information for all identifiers in
229 /// the program, including program keywords.
230 mutable IdentifierTable Identifiers;
231
232 /// This table contains all the selectors in the program.
233 ///
234 /// Unlike IdentifierTable above, this table *isn't* populated by the
235 /// preprocessor. It is declared/expanded here because its role/lifetime is
236 /// conceptually similar to the IdentifierTable. In addition, the current
237 /// control flow (in clang::ParseAST()), make it convenient to put here.
238 ///
239 /// FIXME: Make sure the lifetime of Identifiers/Selectors *isn't* tied to
240 /// the lifetime of the preprocessor.
241 SelectorTable Selectors;
242
243 /// Information about builtins.
244 Builtin::Context BuiltinInfo;
245
246 /// Tracks all of the pragmas that the client registered
247 /// with this preprocessor.
248 std::unique_ptr<PragmaNamespace> PragmaHandlers;
249
250 /// Pragma handlers of the original source is stored here during the
251 /// parsing of a model file.
252 std::unique_ptr<PragmaNamespace> PragmaHandlersBackup;
253
254 /// Tracks all of the comment handlers that the client registered
255 /// with this preprocessor.
256 std::vector<CommentHandler *> CommentHandlers;
257
258 /// True if we want to ignore EOF token and continue later on (thus
259 /// avoid tearing the Lexer and etc. down).
260 bool IncrementalProcessing = false;
261
262 /// The kind of translation unit we are processing.
263 TranslationUnitKind TUKind;
264
265 /// The code-completion handler.
266 CodeCompletionHandler *CodeComplete = nullptr;
267
268 /// The file that we're performing code-completion for, if any.
269 const FileEntry *CodeCompletionFile = nullptr;
270
271 /// The offset in file for the code-completion point.
272 unsigned CodeCompletionOffset = 0;
273
274 /// The location for the code-completion point. This gets instantiated
275 /// when the CodeCompletionFile gets \#include'ed for preprocessing.
276 SourceLocation CodeCompletionLoc;
277
278 /// The start location for the file of the code-completion point.
279 ///
280 /// This gets instantiated when the CodeCompletionFile gets \#include'ed
281 /// for preprocessing.
282 SourceLocation CodeCompletionFileLoc;
283
284 /// The source location of the \c import contextual keyword we just
285 /// lexed, if any.
286 SourceLocation ModuleImportLoc;
287
288 /// The module import path that we're currently processing.
289 SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> ModuleImportPath;
290
291 /// Whether the last token we lexed was an '@'.
292 bool LastTokenWasAt = false;
293
294 /// Whether the module import expects an identifier next. Otherwise,
295 /// it expects a '.' or ';'.
296 bool ModuleImportExpectsIdentifier = false;
297
298 /// The source location of the currently-active
299 /// \#pragma clang arc_cf_code_audited begin.
300 SourceLocation PragmaARCCFCodeAuditedLoc;
301
302 /// The source location of the currently-active
303 /// \#pragma clang assume_nonnull begin.
304 SourceLocation PragmaAssumeNonNullLoc;
305
306 /// True if we hit the code-completion point.
307 bool CodeCompletionReached = false;
308
309 /// The code completion token containing the information
310 /// on the stem that is to be code completed.
311 IdentifierInfo *CodeCompletionII = nullptr;
312
313 /// Range for the code completion token.
314 SourceRange CodeCompletionTokenRange;
315
316 /// The directory that the main file should be considered to occupy,
317 /// if it does not correspond to a real file (as happens when building a
318 /// module).
319 const DirectoryEntry *MainFileDir = nullptr;
320
321 /// The number of bytes that we will initially skip when entering the
322 /// main file, along with a flag that indicates whether skipping this number
323 /// of bytes will place the lexer at the start of a line.
324 ///
325 /// This is used when loading a precompiled preamble.
326 std::pair<int, bool> SkipMainFilePreamble;
327
328public:
329 struct PreambleSkipInfo {
330 SourceLocation HashTokenLoc;
331 SourceLocation IfTokenLoc;
332 bool FoundNonSkipPortion;
333 bool FoundElse;
334 SourceLocation ElseLoc;
335
336 PreambleSkipInfo(SourceLocation HashTokenLoc, SourceLocation IfTokenLoc,
337 bool FoundNonSkipPortion, bool FoundElse,
338 SourceLocation ElseLoc)
339 : HashTokenLoc(HashTokenLoc), IfTokenLoc(IfTokenLoc),
340 FoundNonSkipPortion(FoundNonSkipPortion), FoundElse(FoundElse),
341 ElseLoc(ElseLoc) {}
342 };
343
344private:
345 friend class ASTReader;
346 friend class MacroArgs;
347
348 class PreambleConditionalStackStore {
349 enum State {
350 Off = 0,
351 Recording = 1,
352 Replaying = 2,
353 };
354
355 public:
356 PreambleConditionalStackStore() = default;
357
358 void startRecording() { ConditionalStackState = Recording; }
359 void startReplaying() { ConditionalStackState = Replaying; }
360 bool isRecording() const { return ConditionalStackState == Recording; }
361 bool isReplaying() const { return ConditionalStackState == Replaying; }
362
363 ArrayRef<PPConditionalInfo> getStack() const {
364 return ConditionalStack;
365 }
366
367 void doneReplaying() {
368 ConditionalStack.clear();
369 ConditionalStackState = Off;
370 }
371
372 void setStack(ArrayRef<PPConditionalInfo> s) {
373 if (!isRecording() && !isReplaying())
374 return;
375 ConditionalStack.clear();
376 ConditionalStack.append(s.begin(), s.end());
377 }
378
379 bool hasRecordedPreamble() const { return !ConditionalStack.empty(); }
380
381 bool reachedEOFWhileSkipping() const { return SkipInfo.hasValue(); }
382
383 void clearSkipInfo() { SkipInfo.reset(); }
384
385 llvm::Optional<PreambleSkipInfo> SkipInfo;
386
387 private:
388 SmallVector<PPConditionalInfo, 4> ConditionalStack;
389 State ConditionalStackState = Off;
390 } PreambleConditionalStack;
391
392 /// The current top of the stack that we're lexing from if
393 /// not expanding a macro and we are lexing directly from source code.
394 ///
395 /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
396 std::unique_ptr<Lexer> CurLexer;
397
398 /// The current top of stack that we're lexing from if
399 /// not expanding from a macro and we are lexing from a PTH cache.
400 ///
401 /// Only one of CurLexer, CurPTHLexer, or CurTokenLexer will be non-null.
402 std::unique_ptr<PTHLexer> CurPTHLexer;
403
404 /// The current top of the stack what we're lexing from
405 /// if not expanding a macro.
406 ///
407 /// This is an alias for either CurLexer or CurPTHLexer.
408 PreprocessorLexer *CurPPLexer = nullptr;
409
410 /// Used to find the current FileEntry, if CurLexer is non-null
411 /// and if applicable.
412 ///
413 /// This allows us to implement \#include_next and find directory-specific
414 /// properties.
415 const DirectoryLookup *CurDirLookup = nullptr;
416
417 /// The current macro we are expanding, if we are expanding a macro.
418 ///
419 /// One of CurLexer and CurTokenLexer must be null.
420 std::unique_ptr<TokenLexer> CurTokenLexer;
421
422 /// The kind of lexer we're currently working with.
423 enum CurLexerKind {
424 CLK_Lexer,
425 CLK_PTHLexer,
426 CLK_TokenLexer,
427 CLK_CachingLexer,
428 CLK_LexAfterModuleImport
429 } CurLexerKind = CLK_Lexer;
430
431 /// If the current lexer is for a submodule that is being built, this
432 /// is that submodule.
433 Module *CurLexerSubmodule = nullptr;
434
435 /// Keeps track of the stack of files currently
436 /// \#included, and macros currently being expanded from, not counting
437 /// CurLexer/CurTokenLexer.
438 struct IncludeStackInfo {
439 enum CurLexerKind CurLexerKind;
440 Module *TheSubmodule;
441 std::unique_ptr<Lexer> TheLexer;
442 std::unique_ptr<PTHLexer> ThePTHLexer;
443 PreprocessorLexer *ThePPLexer;
444 std::unique_ptr<TokenLexer> TheTokenLexer;
445 const DirectoryLookup *TheDirLookup;
446
447 // The following constructors are completely useless copies of the default
448 // versions, only needed to pacify MSVC.
449 IncludeStackInfo(enum CurLexerKind CurLexerKind, Module *TheSubmodule,
450 std::unique_ptr<Lexer> &&TheLexer,
451 std::unique_ptr<PTHLexer> &&ThePTHLexer,
452 PreprocessorLexer *ThePPLexer,
453 std::unique_ptr<TokenLexer> &&TheTokenLexer,
454 const DirectoryLookup *TheDirLookup)
455 : CurLexerKind(std::move(CurLexerKind)),
456 TheSubmodule(std::move(TheSubmodule)), TheLexer(std::move(TheLexer)),
457 ThePTHLexer(std::move(ThePTHLexer)),
458 ThePPLexer(std::move(ThePPLexer)),
459 TheTokenLexer(std::move(TheTokenLexer)),
460 TheDirLookup(std::move(TheDirLookup)) {}
461 };
462 std::vector<IncludeStackInfo> IncludeMacroStack;
463
464 /// Actions invoked when some preprocessor activity is
465 /// encountered (e.g. a file is \#included, etc).
466 std::unique_ptr<PPCallbacks> Callbacks;
467
468 struct MacroExpandsInfo {
469 Token Tok;
470 MacroDefinition MD;
471 SourceRange Range;
472
473 MacroExpandsInfo(Token Tok, MacroDefinition MD, SourceRange Range)
474 : Tok(Tok), MD(MD), Range(Range) {}
475 };
476 SmallVector<MacroExpandsInfo, 2> DelayedMacroExpandsCallbacks;
477
478 /// Information about a name that has been used to define a module macro.
479 struct ModuleMacroInfo {
480 /// The most recent macro directive for this identifier.
481 MacroDirective *MD;
482
483 /// The active module macros for this identifier.
484 llvm::TinyPtrVector<ModuleMacro *> ActiveModuleMacros;
485
486 /// The generation number at which we last updated ActiveModuleMacros.
487 /// \see Preprocessor::VisibleModules.
488 unsigned ActiveModuleMacrosGeneration = 0;
489
490 /// Whether this macro name is ambiguous.
491 bool IsAmbiguous = false;
492
493 /// The module macros that are overridden by this macro.
494 llvm::TinyPtrVector<ModuleMacro *> OverriddenMacros;
495
496 ModuleMacroInfo(MacroDirective *MD) : MD(MD) {}
497 };
498
499 /// The state of a macro for an identifier.
500 class MacroState {
501 mutable llvm::PointerUnion<MacroDirective *, ModuleMacroInfo *> State;
502
503 ModuleMacroInfo *getModuleInfo(Preprocessor &PP,
504 const IdentifierInfo *II) const {
505 if (II->isOutOfDate())
506 PP.updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
507 // FIXME: Find a spare bit on IdentifierInfo and store a
508 // HasModuleMacros flag.
509 if (!II->hasMacroDefinition() ||
510 (!PP.getLangOpts().Modules &&
511 !PP.getLangOpts().ModulesLocalVisibility) ||
512 !PP.CurSubmoduleState->VisibleModules.getGeneration())
513 return nullptr;
514
515 auto *Info = State.dyn_cast<ModuleMacroInfo*>();
516 if (!Info) {
517 Info = new (PP.getPreprocessorAllocator())
518 ModuleMacroInfo(State.get<MacroDirective *>());
519 State = Info;
520 }
521
522 if (PP.CurSubmoduleState->VisibleModules.getGeneration() !=
523 Info->ActiveModuleMacrosGeneration)
524 PP.updateModuleMacroInfo(II, *Info);
525 return Info;
526 }
527
528 public:
529 MacroState() : MacroState(nullptr) {}
530 MacroState(MacroDirective *MD) : State(MD) {}
531
532 MacroState(MacroState &&O) noexcept : State(O.State) {
533 O.State = (MacroDirective *)nullptr;
534 }
535
536 MacroState &operator=(MacroState &&O) noexcept {
537 auto S = O.State;
538 O.State = (MacroDirective *)nullptr;
539 State = S;
540 return *this;
541 }
542
543 ~MacroState() {
544 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
545 Info->~ModuleMacroInfo();
546 }
547
548 MacroDirective *getLatest() const {
549 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
550 return Info->MD;
551 return State.get<MacroDirective*>();
552 }
553
554 void setLatest(MacroDirective *MD) {
555 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
556 Info->MD = MD;
557 else
558 State = MD;
559 }
560
561 bool isAmbiguous(Preprocessor &PP, const IdentifierInfo *II) const {
562 auto *Info = getModuleInfo(PP, II);
563 return Info ? Info->IsAmbiguous : false;
564 }
565
566 ArrayRef<ModuleMacro *>
567 getActiveModuleMacros(Preprocessor &PP, const IdentifierInfo *II) const {
568 if (auto *Info = getModuleInfo(PP, II))
569 return Info->ActiveModuleMacros;
570 return None;
571 }
572
573 MacroDirective::DefInfo findDirectiveAtLoc(SourceLocation Loc,
574 SourceManager &SourceMgr) const {
575 // FIXME: Incorporate module macros into the result of this.
576 if (auto *Latest = getLatest())
577 return Latest->findDirectiveAtLoc(Loc, SourceMgr);
578 return {};
579 }
580
581 void overrideActiveModuleMacros(Preprocessor &PP, IdentifierInfo *II) {
582 if (auto *Info = getModuleInfo(PP, II)) {
583 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
584 Info->ActiveModuleMacros.begin(),
585 Info->ActiveModuleMacros.end());
586 Info->ActiveModuleMacros.clear();
587 Info->IsAmbiguous = false;
588 }
589 }
590
591 ArrayRef<ModuleMacro*> getOverriddenMacros() const {
592 if (auto *Info = State.dyn_cast<ModuleMacroInfo*>())
593 return Info->OverriddenMacros;
594 return None;
595 }
596
597 void setOverriddenMacros(Preprocessor &PP,
598 ArrayRef<ModuleMacro *> Overrides) {
599 auto *Info = State.dyn_cast<ModuleMacroInfo*>();
600 if (!Info) {
601 if (Overrides.empty())
602 return;
603 Info = new (PP.getPreprocessorAllocator())
604 ModuleMacroInfo(State.get<MacroDirective *>());
605 State = Info;
606 }
607 Info->OverriddenMacros.clear();
608 Info->OverriddenMacros.insert(Info->OverriddenMacros.end(),
609 Overrides.begin(), Overrides.end());
610 Info->ActiveModuleMacrosGeneration = 0;
611 }
612 };
613
614 /// For each IdentifierInfo that was associated with a macro, we
615 /// keep a mapping to the history of all macro definitions and #undefs in
616 /// the reverse order (the latest one is in the head of the list).
617 ///
618 /// This mapping lives within the \p CurSubmoduleState.
619 using MacroMap = llvm::DenseMap<const IdentifierInfo *, MacroState>;
620
621 struct SubmoduleState;
622
623 /// Information about a submodule that we're currently building.
624 struct BuildingSubmoduleInfo {
625 /// The module that we are building.
626 Module *M;
627
628 /// The location at which the module was included.
629 SourceLocation ImportLoc;
630
631 /// Whether we entered this submodule via a pragma.
632 bool IsPragma;
633
634 /// The previous SubmoduleState.
635 SubmoduleState *OuterSubmoduleState;
636
637 /// The number of pending module macro names when we started building this.
638 unsigned OuterPendingModuleMacroNames;
639
640 BuildingSubmoduleInfo(Module *M, SourceLocation ImportLoc, bool IsPragma,
641 SubmoduleState *OuterSubmoduleState,
642 unsigned OuterPendingModuleMacroNames)
643 : M(M), ImportLoc(ImportLoc), IsPragma(IsPragma),
644 OuterSubmoduleState(OuterSubmoduleState),
645 OuterPendingModuleMacroNames(OuterPendingModuleMacroNames) {}
646 };
647 SmallVector<BuildingSubmoduleInfo, 8> BuildingSubmoduleStack;
648
649 /// Information about a submodule's preprocessor state.
650 struct SubmoduleState {
651 /// The macros for the submodule.
652 MacroMap Macros;
653
654 /// The set of modules that are visible within the submodule.
655 VisibleModuleSet VisibleModules;
656
657 // FIXME: CounterValue?
658 // FIXME: PragmaPushMacroInfo?
659 };
660 std::map<Module *, SubmoduleState> Submodules;
661
662 /// The preprocessor state for preprocessing outside of any submodule.
663 SubmoduleState NullSubmoduleState;
664
665 /// The current submodule state. Will be \p NullSubmoduleState if we're not
666 /// in a submodule.
667 SubmoduleState *CurSubmoduleState;
668
669 /// The set of known macros exported from modules.
670 llvm::FoldingSet<ModuleMacro> ModuleMacros;
671
672 /// The names of potential module macros that we've not yet processed.
673 llvm::SmallVector<const IdentifierInfo *, 32> PendingModuleMacroNames;
674
675 /// The list of module macros, for each identifier, that are not overridden by
676 /// any other module macro.
677 llvm::DenseMap<const IdentifierInfo *, llvm::TinyPtrVector<ModuleMacro *>>
678 LeafModuleMacros;
679
680 /// Macros that we want to warn because they are not used at the end
681 /// of the translation unit.
682 ///
683 /// We store just their SourceLocations instead of
684 /// something like MacroInfo*. The benefit of this is that when we are
685 /// deserializing from PCH, we don't need to deserialize identifier & macros
686 /// just so that we can report that they are unused, we just warn using
687 /// the SourceLocations of this set (that will be filled by the ASTReader).
688 /// We are using SmallPtrSet instead of a vector for faster removal.
689 using WarnUnusedMacroLocsTy = llvm::SmallPtrSet<SourceLocation, 32>;
690 WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
691
692 /// A "freelist" of MacroArg objects that can be
693 /// reused for quick allocation.
694 MacroArgs *MacroArgCache = nullptr;
695
696 /// For each IdentifierInfo used in a \#pragma push_macro directive,
697 /// we keep a MacroInfo stack used to restore the previous macro value.
698 llvm::DenseMap<IdentifierInfo *, std::vector<MacroInfo *>>
699 PragmaPushMacroInfo;
700
701 // Various statistics we track for performance analysis.
702 unsigned NumDirectives = 0;
703 unsigned NumDefined = 0;
704 unsigned NumUndefined = 0;
705 unsigned NumPragma = 0;
706 unsigned NumIf = 0;
707 unsigned NumElse = 0;
708 unsigned NumEndif = 0;
709 unsigned NumEnteredSourceFiles = 0;
710 unsigned MaxIncludeStackDepth = 0;
711 unsigned NumMacroExpanded = 0;
712 unsigned NumFnMacroExpanded = 0;
713 unsigned NumBuiltinMacroExpanded = 0;
714 unsigned NumFastMacroExpanded = 0;
715 unsigned NumTokenPaste = 0;
716 unsigned NumFastTokenPaste = 0;
717 unsigned NumSkipped = 0;
718
719 /// The predefined macros that preprocessor should use from the
720 /// command line etc.
721 std::string Predefines;
722
723 /// The file ID for the preprocessor predefines.
724 FileID PredefinesFileID;
725
726 /// The file ID for the PCH through header.
727 FileID PCHThroughHeaderFileID;
728
729 /// Whether tokens are being skipped until a #pragma hdrstop is seen.
730 bool SkippingUntilPragmaHdrStop = false;
731
732 /// Whether tokens are being skipped until the through header is seen.
733 bool SkippingUntilPCHThroughHeader = false;
734
735 /// \{
736 /// Cache of macro expanders to reduce malloc traffic.
737 enum { TokenLexerCacheSize = 8 };
738 unsigned NumCachedTokenLexers;
739 std::unique_ptr<TokenLexer> TokenLexerCache[TokenLexerCacheSize];
740 /// \}
741
742 /// Keeps macro expanded tokens for TokenLexers.
743 //
744 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
745 /// going to lex in the cache and when it finishes the tokens are removed
746 /// from the end of the cache.
747 SmallVector<Token, 16> MacroExpandedTokens;
748 std::vector<std::pair<TokenLexer *, size_t>> MacroExpandingLexersStack;
749
750 /// A record of the macro definitions and expansions that
751 /// occurred during preprocessing.
752 ///
753 /// This is an optional side structure that can be enabled with
754 /// \c createPreprocessingRecord() prior to preprocessing.
755 PreprocessingRecord *Record = nullptr;
756
757 /// Cached tokens state.
758 using CachedTokensTy = SmallVector<Token, 1>;
759
760 /// Cached tokens are stored here when we do backtracking or
761 /// lookahead. They are "lexed" by the CachingLex() method.
762 CachedTokensTy CachedTokens;
763
764 /// The position of the cached token that CachingLex() should
765 /// "lex" next.
766 ///
767 /// If it points beyond the CachedTokens vector, it means that a normal
768 /// Lex() should be invoked.
769 CachedTokensTy::size_type CachedLexPos = 0;
770
771 /// Stack of backtrack positions, allowing nested backtracks.
772 ///
773 /// The EnableBacktrackAtThisPos() method pushes a position to
774 /// indicate where CachedLexPos should be set when the BackTrack() method is
775 /// invoked (at which point the last position is popped).
776 std::vector<CachedTokensTy::size_type> BacktrackPositions;
777
778 struct MacroInfoChain {
779 MacroInfo MI;
780 MacroInfoChain *Next;
781 };
782
783 /// MacroInfos are managed as a chain for easy disposal. This is the head
784 /// of that list.
785 MacroInfoChain *MIChainHead = nullptr;
786
787 void updateOutOfDateIdentifier(IdentifierInfo &II) const;
788
789public:
790 Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
791 DiagnosticsEngine &diags, LangOptions &opts, SourceManager &SM,
792 MemoryBufferCache &PCMCache,
793 HeaderSearch &Headers, ModuleLoader &TheModuleLoader,
794 IdentifierInfoLookup *IILookup = nullptr,
795 bool OwnsHeaderSearch = false,
796 TranslationUnitKind TUKind = TU_Complete);
797
798 ~Preprocessor();
799
800 /// Initialize the preprocessor using information about the target.
801 ///
802 /// \param Target is owned by the caller and must remain valid for the
803 /// lifetime of the preprocessor.
804 /// \param AuxTarget is owned by the caller and must remain valid for
805 /// the lifetime of the preprocessor.
806 void Initialize(const TargetInfo &Target,
807 const TargetInfo *AuxTarget = nullptr);
808
809 /// Initialize the preprocessor to parse a model file
810 ///
811 /// To parse model files the preprocessor of the original source is reused to
812 /// preserver the identifier table. However to avoid some duplicate
813 /// information in the preprocessor some cleanup is needed before it is used
814 /// to parse model files. This method does that cleanup.
815 void InitializeForModelFile();
816
817 /// Cleanup after model file parsing
818 void FinalizeForModelFile();
819
820 /// Retrieve the preprocessor options used to initialize this
821 /// preprocessor.
822 PreprocessorOptions &getPreprocessorOpts() const { return *PPOpts; }
823
824 DiagnosticsEngine &getDiagnostics() const { return *Diags; }
825 void setDiagnostics(DiagnosticsEngine &D) { Diags = &D; }
826
827 const LangOptions &getLangOpts() const { return LangOpts; }
828 const TargetInfo &getTargetInfo() const { return *Target; }
829 const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
830 FileManager &getFileManager() const { return FileMgr; }
831 SourceManager &getSourceManager() const { return SourceMgr; }
832 MemoryBufferCache &getPCMCache() const { return PCMCache; }
833 HeaderSearch &getHeaderSearchInfo() const { return HeaderInfo; }
834
835 IdentifierTable &getIdentifierTable() { return Identifiers; }
836 const IdentifierTable &getIdentifierTable() const { return Identifiers; }
837 SelectorTable &getSelectorTable() { return Selectors; }
838 Builtin::Context &getBuiltinInfo() { return BuiltinInfo; }
839 llvm::BumpPtrAllocator &getPreprocessorAllocator() { return BP; }
840
841 void setPTHManager(PTHManager* pm);
842
843 PTHManager *getPTHManager() { return PTH.get(); }
844
845 void setExternalSource(ExternalPreprocessorSource *Source) {
846 ExternalSource = Source;
847 }
848
849 ExternalPreprocessorSource *getExternalSource() const {
850 return ExternalSource;
851 }
852
853 /// Retrieve the module loader associated with this preprocessor.
854 ModuleLoader &getModuleLoader() const { return TheModuleLoader; }
855
856 bool hadModuleLoaderFatalFailure() const {
857 return TheModuleLoader.HadFatalFailure;
858 }
859
860 /// True if we are currently preprocessing a #if or #elif directive
861 bool isParsingIfOrElifDirective() const {
862 return ParsingIfOrElifDirective;
863 }
864
865 /// Control whether the preprocessor retains comments in output.
866 void SetCommentRetentionState(bool KeepComments, bool KeepMacroComments) {
867 this->KeepComments = KeepComments | KeepMacroComments;
868 this->KeepMacroComments = KeepMacroComments;
869 }
870
871 bool getCommentRetentionState() const { return KeepComments; }
872
873 void setPragmasEnabled(bool Enabled) { PragmasEnabled = Enabled; }
874 bool getPragmasEnabled() const { return PragmasEnabled; }
875
876 void SetSuppressIncludeNotFoundError(bool Suppress) {
877 SuppressIncludeNotFoundError = Suppress;
878 }
879
880 bool GetSuppressIncludeNotFoundError() {
881 return SuppressIncludeNotFoundError;
882 }
883
884 /// Sets whether the preprocessor is responsible for producing output or if
885 /// it is producing tokens to be consumed by Parse and Sema.
886 void setPreprocessedOutput(bool IsPreprocessedOutput) {
887 PreprocessedOutput = IsPreprocessedOutput;
888 }
889
890 /// Returns true if the preprocessor is responsible for generating output,
891 /// false if it is producing tokens to be consumed by Parse and Sema.
892 bool isPreprocessedOutput() const { return PreprocessedOutput; }
893
894 /// Return true if we are lexing directly from the specified lexer.
895 bool isCurrentLexer(const PreprocessorLexer *L) const {
896 return CurPPLexer == L;
897 }
898
899 /// Return the current lexer being lexed from.
900 ///
901 /// Note that this ignores any potentially active macro expansions and _Pragma
902 /// expansions going on at the time.
903 PreprocessorLexer *getCurrentLexer() const { return CurPPLexer; }
904
905 /// Return the current file lexer being lexed from.
906 ///
907 /// Note that this ignores any potentially active macro expansions and _Pragma
908 /// expansions going on at the time.
909 PreprocessorLexer *getCurrentFileLexer() const;
910
911 /// Return the submodule owning the file being lexed. This may not be
912 /// the current module if we have changed modules since entering the file.
913 Module *getCurrentLexerSubmodule() const { return CurLexerSubmodule; }
914
915 /// Returns the FileID for the preprocessor predefines.
916 FileID getPredefinesFileID() const { return PredefinesFileID; }
917
918 /// \{
919 /// Accessors for preprocessor callbacks.
920 ///
921 /// Note that this class takes ownership of any PPCallbacks object given to
922 /// it.
923 PPCallbacks *getPPCallbacks() const { return Callbacks.get(); }
924 void addPPCallbacks(std::unique_ptr<PPCallbacks> C) {
925 if (Callbacks)
926 C = llvm::make_unique<PPChainedCallbacks>(std::move(C),
927 std::move(Callbacks));
928 Callbacks = std::move(C);
929 }
930 /// \}
931
932 bool isMacroDefined(StringRef Id) {
933 return isMacroDefined(&Identifiers.get(Id));
934 }
935 bool isMacroDefined(const IdentifierInfo *II) {
936 return II->hasMacroDefinition() &&
937 (!getLangOpts().Modules || (bool)getMacroDefinition(II));
938 }
939
940 /// Determine whether II is defined as a macro within the module M,
941 /// if that is a module that we've already preprocessed. Does not check for
942 /// macros imported into M.
943 bool isMacroDefinedInLocalModule(const IdentifierInfo *II, Module *M) {
944 if (!II->hasMacroDefinition())
945 return false;
946 auto I = Submodules.find(M);
947 if (I == Submodules.end())
948 return false;
949 auto J = I->second.Macros.find(II);
950 if (J == I->second.Macros.end())
951 return false;
952 auto *MD = J->second.getLatest();
953 return MD && MD->isDefined();
954 }
955
956 MacroDefinition getMacroDefinition(const IdentifierInfo *II) {
957 if (!II->hasMacroDefinition())
958 return {};
959
960 MacroState &S = CurSubmoduleState->Macros[II];
961 auto *MD = S.getLatest();
962 while (MD && isa<VisibilityMacroDirective>(MD))
963 MD = MD->getPrevious();
964 return MacroDefinition(dyn_cast_or_null<DefMacroDirective>(MD),
965 S.getActiveModuleMacros(*this, II),
966 S.isAmbiguous(*this, II));
967 }
968
969 MacroDefinition getMacroDefinitionAtLoc(const IdentifierInfo *II,
970 SourceLocation Loc) {
971 if (!II->hadMacroDefinition())
972 return {};
973
974 MacroState &S = CurSubmoduleState->Macros[II];
975 MacroDirective::DefInfo DI;
976 if (auto *MD = S.getLatest())
977 DI = MD->findDirectiveAtLoc(Loc, getSourceManager());
978 // FIXME: Compute the set of active module macros at the specified location.
979 return MacroDefinition(DI.getDirective(),
980 S.getActiveModuleMacros(*this, II),
981 S.isAmbiguous(*this, II));
982 }
983
984 /// Given an identifier, return its latest non-imported MacroDirective
985 /// if it is \#define'd and not \#undef'd, or null if it isn't \#define'd.
986 MacroDirective *getLocalMacroDirective(const IdentifierInfo *II) const {
987 if (!II->hasMacroDefinition())
988 return nullptr;
989
990 auto *MD = getLocalMacroDirectiveHistory(II);
991 if (!MD || MD->getDefinition().isUndefined())
992 return nullptr;
993
994 return MD;
995 }
996
997 const MacroInfo *getMacroInfo(const IdentifierInfo *II) const {
998 return const_cast<Preprocessor*>(this)->getMacroInfo(II);
999 }
1000
1001 MacroInfo *getMacroInfo(const IdentifierInfo *II) {
1002 if (!II->hasMacroDefinition())
1003 return nullptr;
1004 if (auto MD = getMacroDefinition(II))
1005 return MD.getMacroInfo();
1006 return nullptr;
1007 }
1008
1009 /// Given an identifier, return the latest non-imported macro
1010 /// directive for that identifier.
1011 ///
1012 /// One can iterate over all previous macro directives from the most recent
1013 /// one.
1014 MacroDirective *getLocalMacroDirectiveHistory(const IdentifierInfo *II) const;
1015
1016 /// Add a directive to the macro directive history for this identifier.
1017 void appendMacroDirective(IdentifierInfo *II, MacroDirective *MD);
1018 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II, MacroInfo *MI,
1019 SourceLocation Loc) {
1020 DefMacroDirective *MD = AllocateDefMacroDirective(MI, Loc);
1021 appendMacroDirective(II, MD);
1022 return MD;
1023 }
1024 DefMacroDirective *appendDefMacroDirective(IdentifierInfo *II,
1025 MacroInfo *MI) {
1026 return appendDefMacroDirective(II, MI, MI->getDefinitionLoc());
1027 }
1028
1029 /// Set a MacroDirective that was loaded from a PCH file.
1030 void setLoadedMacroDirective(IdentifierInfo *II, MacroDirective *ED,
1031 MacroDirective *MD);
1032
1033 /// Register an exported macro for a module and identifier.
1034 ModuleMacro *addModuleMacro(Module *Mod, IdentifierInfo *II, MacroInfo *Macro,
1035 ArrayRef<ModuleMacro *> Overrides, bool &IsNew);
1036 ModuleMacro *getModuleMacro(Module *Mod, IdentifierInfo *II);
1037
1038 /// Get the list of leaf (non-overridden) module macros for a name.
1039 ArrayRef<ModuleMacro*> getLeafModuleMacros(const IdentifierInfo *II) const {
1040 if (II->isOutOfDate())
1041 updateOutOfDateIdentifier(const_cast<IdentifierInfo&>(*II));
1042 auto I = LeafModuleMacros.find(II);
1043 if (I != LeafModuleMacros.end())
1044 return I->second;
1045 return None;
1046 }
1047
1048 /// \{
1049 /// Iterators for the macro history table. Currently defined macros have
1050 /// IdentifierInfo::hasMacroDefinition() set and an empty
1051 /// MacroInfo::getUndefLoc() at the head of the list.
1052 using macro_iterator = MacroMap::const_iterator;
1053
1054 macro_iterator macro_begin(bool IncludeExternalMacros = true) const;
1055 macro_iterator macro_end(bool IncludeExternalMacros = true) const;
1056
1057 llvm::iterator_range<macro_iterator>
1058 macros(bool IncludeExternalMacros = true) const {
1059 macro_iterator begin = macro_begin(IncludeExternalMacros);
1060 macro_iterator end = macro_end(IncludeExternalMacros);
1061 return llvm::make_range(begin, end);
1062 }
1063
1064 /// \}
1065
1066 /// Return the name of the macro defined before \p Loc that has
1067 /// spelling \p Tokens. If there are multiple macros with same spelling,
1068 /// return the last one defined.
1069 StringRef getLastMacroWithSpelling(SourceLocation Loc,
1070 ArrayRef<TokenValue> Tokens) const;
1071
1072 const std::string &getPredefines() const { return Predefines; }
1073
1074 /// Set the predefines for this Preprocessor.
1075 ///
1076 /// These predefines are automatically injected when parsing the main file.
1077 void setPredefines(const char *P) { Predefines = P; }
1078 void setPredefines(StringRef P) { Predefines = P; }
1079
1080 /// Return information about the specified preprocessor
1081 /// identifier token.
1082 IdentifierInfo *getIdentifierInfo(StringRef Name) const {
1083 return &Identifiers.get(Name);
1084 }
1085
1086 /// Add the specified pragma handler to this preprocessor.
1087 ///
1088 /// If \p Namespace is non-null, then it is a token required to exist on the
1089 /// pragma line before the pragma string starts, e.g. "STDC" or "GCC".
1090 void AddPragmaHandler(StringRef Namespace, PragmaHandler *Handler);
1091 void AddPragmaHandler(PragmaHandler *Handler) {
1092 AddPragmaHandler(StringRef(), Handler);
1093 }
1094
1095 /// Remove the specific pragma handler from this preprocessor.
1096 ///
1097 /// If \p Namespace is non-null, then it should be the namespace that
1098 /// \p Handler was added to. It is an error to remove a handler that
1099 /// has not been registered.
1100 void RemovePragmaHandler(StringRef Namespace, PragmaHandler *Handler);
1101 void RemovePragmaHandler(PragmaHandler *Handler) {
1102 RemovePragmaHandler(StringRef(), Handler);
1103 }
1104
1105 /// Install empty handlers for all pragmas (making them ignored).
1106 void IgnorePragmas();
1107
1108 /// Add the specified comment handler to the preprocessor.
1109 void addCommentHandler(CommentHandler *Handler);
1110
1111 /// Remove the specified comment handler.
1112 ///
1113 /// It is an error to remove a handler that has not been registered.
1114 void removeCommentHandler(CommentHandler *Handler);
1115
1116 /// Set the code completion handler to the given object.
1117 void setCodeCompletionHandler(CodeCompletionHandler &Handler) {
1118 CodeComplete = &Handler;
1119 }
1120
1121 /// Retrieve the current code-completion handler.
1122 CodeCompletionHandler *getCodeCompletionHandler() const {
1123 return CodeComplete;
1124 }
1125
1126 /// Clear out the code completion handler.
1127 void clearCodeCompletionHandler() {
1128 CodeComplete = nullptr;
1129 }
1130
1131 /// Hook used by the lexer to invoke the "included file" code
1132 /// completion point.
1133 void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled);
1134
1135 /// Hook used by the lexer to invoke the "natural language" code
1136 /// completion point.
1137 void CodeCompleteNaturalLanguage();
1138
1139 /// Set the code completion token for filtering purposes.
1140 void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter) {
1141 CodeCompletionII = Filter;
1142 }
1143
1144 /// Set the code completion token range for detecting replacement range later
1145 /// on.
1146 void setCodeCompletionTokenRange(const SourceLocation Start,
1147 const SourceLocation End) {
1148 CodeCompletionTokenRange = {Start, End};
1149 }
1150 SourceRange getCodeCompletionTokenRange() const {
1151 return CodeCompletionTokenRange;
1152 }
1153
1154 /// Get the code completion token for filtering purposes.
1155 StringRef getCodeCompletionFilter() {
1156 if (CodeCompletionII)
1157 return CodeCompletionII->getName();
1158 return {};
1159 }
1160
1161 /// Retrieve the preprocessing record, or NULL if there is no
1162 /// preprocessing record.
1163 PreprocessingRecord *getPreprocessingRecord() const { return Record; }
1164
1165 /// Create a new preprocessing record, which will keep track of
1166 /// all macro expansions, macro definitions, etc.
1167 void createPreprocessingRecord();
1168
1169 /// Returns true if the FileEntry is the PCH through header.
1170 bool isPCHThroughHeader(const FileEntry *FE);
1171
1172 /// True if creating a PCH with a through header.
1173 bool creatingPCHWithThroughHeader();
1174
1175 /// True if using a PCH with a through header.
1176 bool usingPCHWithThroughHeader();
1177
1178 /// True if creating a PCH with a #pragma hdrstop.
1179 bool creatingPCHWithPragmaHdrStop();
1180
1181 /// True if using a PCH with a #pragma hdrstop.
1182 bool usingPCHWithPragmaHdrStop();
1183
1184 /// Skip tokens until after the #include of the through header or
1185 /// until after a #pragma hdrstop.
1186 void SkipTokensWhileUsingPCH();
1187
1188 /// Process directives while skipping until the through header or
1189 /// #pragma hdrstop is found.
1190 void HandleSkippedDirectiveWhileUsingPCH(Token &Result,
1191 SourceLocation HashLoc);
1192
1193 /// Enter the specified FileID as the main source file,
1194 /// which implicitly adds the builtin defines etc.
1195 void EnterMainSourceFile();
1196
1197 /// Inform the preprocessor callbacks that processing is complete.
1198 void EndSourceFile();
1199
1200 /// Add a source file to the top of the include stack and
1201 /// start lexing tokens from it instead of the current buffer.
1202 ///
1203 /// Emits a diagnostic, doesn't enter the file, and returns true on error.
1204 bool EnterSourceFile(FileID FID, const DirectoryLookup *Dir,
1205 SourceLocation Loc);
1206
1207 /// Add a Macro to the top of the include stack and start lexing
1208 /// tokens from it instead of the current buffer.
1209 ///
1210 /// \param Args specifies the tokens input to a function-like macro.
1211 /// \param ILEnd specifies the location of the ')' for a function-like macro
1212 /// or the identifier for an object-like macro.
1213 void EnterMacro(Token &Tok, SourceLocation ILEnd, MacroInfo *Macro,
1214 MacroArgs *Args);
1215
1216 /// Add a "macro" context to the top of the include stack,
1217 /// which will cause the lexer to start returning the specified tokens.
1218 ///
1219 /// If \p DisableMacroExpansion is true, tokens lexed from the token stream
1220 /// will not be subject to further macro expansion. Otherwise, these tokens
1221 /// will be re-macro-expanded when/if expansion is enabled.
1222 ///
1223 /// If \p OwnsTokens is false, this method assumes that the specified stream
1224 /// of tokens has a permanent owner somewhere, so they do not need to be
1225 /// copied. If it is true, it assumes the array of tokens is allocated with
1226 /// \c new[] and the Preprocessor will delete[] it.
1227private:
1228 void EnterTokenStream(const Token *Toks, unsigned NumToks,
1229 bool DisableMacroExpansion, bool OwnsTokens);
1230
1231public:
1232 void EnterTokenStream(std::unique_ptr<Token[]> Toks, unsigned NumToks,
1233 bool DisableMacroExpansion) {
1234 EnterTokenStream(Toks.release(), NumToks, DisableMacroExpansion, true);
1235 }
1236
1237 void EnterTokenStream(ArrayRef<Token> Toks, bool DisableMacroExpansion) {
1238 EnterTokenStream(Toks.data(), Toks.size(), DisableMacroExpansion, false);
1239 }
1240
1241 /// Pop the current lexer/macro exp off the top of the lexer stack.
1242 ///
1243 /// This should only be used in situations where the current state of the
1244 /// top-of-stack lexer is known.
1245 void RemoveTopOfLexerStack();
1246
1247 /// From the point that this method is called, and until
1248 /// CommitBacktrackedTokens() or Backtrack() is called, the Preprocessor
1249 /// keeps track of the lexed tokens so that a subsequent Backtrack() call will
1250 /// make the Preprocessor re-lex the same tokens.
1251 ///
1252 /// Nested backtracks are allowed, meaning that EnableBacktrackAtThisPos can
1253 /// be called multiple times and CommitBacktrackedTokens/Backtrack calls will
1254 /// be combined with the EnableBacktrackAtThisPos calls in reverse order.
1255 ///
1256 /// NOTE: *DO NOT* forget to call either CommitBacktrackedTokens or Backtrack
1257 /// at some point after EnableBacktrackAtThisPos. If you don't, caching of
1258 /// tokens will continue indefinitely.
1259 ///
1260 void EnableBacktrackAtThisPos();
1261
1262 /// Disable the last EnableBacktrackAtThisPos call.
1263 void CommitBacktrackedTokens();
1264
1265 struct CachedTokensRange {
1266 CachedTokensTy::size_type Begin, End;
1267 };
1268
1269private:
1270 /// A range of cached tokens that should be erased after lexing
1271 /// when backtracking requires the erasure of such cached tokens.
1272 Optional<CachedTokensRange> CachedTokenRangeToErase;
1273
1274public:
1275 /// Returns the range of cached tokens that were lexed since
1276 /// EnableBacktrackAtThisPos() was previously called.
1277 CachedTokensRange LastCachedTokenRange();
1278
1279 /// Erase the range of cached tokens that were lexed since
1280 /// EnableBacktrackAtThisPos() was previously called.
1281 void EraseCachedTokens(CachedTokensRange TokenRange);
1282
1283 /// Make Preprocessor re-lex the tokens that were lexed since
1284 /// EnableBacktrackAtThisPos() was previously called.
1285 void Backtrack();
1286
1287 /// True if EnableBacktrackAtThisPos() was called and
1288 /// caching of tokens is on.
1289 bool isBacktrackEnabled() const { return !BacktrackPositions.empty(); }
1290
1291 /// Lex the next token for this preprocessor.
1292 void Lex(Token &Result);
1293
1294 void LexAfterModuleImport(Token &Result);
1295
1296 void makeModuleVisible(Module *M, SourceLocation Loc);
1297
1298 SourceLocation getModuleImportLoc(Module *M) const {
1299 return CurSubmoduleState->VisibleModules.getImportLoc(M);
1300 }
1301
1302 /// Lex a string literal, which may be the concatenation of multiple
1303 /// string literals and may even come from macro expansion.
1304 /// \returns true on success, false if a error diagnostic has been generated.
1305 bool LexStringLiteral(Token &Result, std::string &String,
1306 const char *DiagnosticTag, bool AllowMacroExpansion) {
1307 if (AllowMacroExpansion)
1308 Lex(Result);
1309 else
1310 LexUnexpandedToken(Result);
1311 return FinishLexStringLiteral(Result, String, DiagnosticTag,
1312 AllowMacroExpansion);
1313 }
1314
1315 /// Complete the lexing of a string literal where the first token has
1316 /// already been lexed (see LexStringLiteral).
1317 bool FinishLexStringLiteral(Token &Result, std::string &String,
1318 const char *DiagnosticTag,
1319 bool AllowMacroExpansion);
1320
1321 /// Lex a token. If it's a comment, keep lexing until we get
1322 /// something not a comment.
1323 ///
1324 /// This is useful in -E -C mode where comments would foul up preprocessor
1325 /// directive handling.
1326 void LexNonComment(Token &Result) {
1327 do
1328 Lex(Result);
1329 while (Result.getKind() == tok::comment);
1330 }
1331
1332 /// Just like Lex, but disables macro expansion of identifier tokens.
1333 void LexUnexpandedToken(Token &Result) {
1334 // Disable macro expansion.
1335 bool OldVal = DisableMacroExpansion;
1336 DisableMacroExpansion = true;
1337 // Lex the token.
1338 Lex(Result);
1339
1340 // Reenable it.
1341 DisableMacroExpansion = OldVal;
1342 }
1343
1344 /// Like LexNonComment, but this disables macro expansion of
1345 /// identifier tokens.
1346 void LexUnexpandedNonComment(Token &Result) {
1347 do
1348 LexUnexpandedToken(Result);
1349 while (Result.getKind() == tok::comment);
1350 }
1351
1352 /// Parses a simple integer literal to get its numeric value. Floating
1353 /// point literals and user defined literals are rejected. Used primarily to
1354 /// handle pragmas that accept integer arguments.
1355 bool parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value);
1356
1357 /// Disables macro expansion everywhere except for preprocessor directives.
1358 void SetMacroExpansionOnlyInDirectives() {
1359 DisableMacroExpansion = true;
1360 MacroExpansionInDirectivesOverride = true;
1361 }
1362
1363 /// Peeks ahead N tokens and returns that token without consuming any
1364 /// tokens.
1365 ///
1366 /// LookAhead(0) returns the next token that would be returned by Lex(),
1367 /// LookAhead(1) returns the token after it, etc. This returns normal
1368 /// tokens after phase 5. As such, it is equivalent to using
1369 /// 'Lex', not 'LexUnexpandedToken'.
1370 const Token &LookAhead(unsigned N) {
1371 if (CachedLexPos + N < CachedTokens.size())
1372 return CachedTokens[CachedLexPos+N];
1373 else
1374 return PeekAhead(N+1);
1375 }
1376
1377 /// When backtracking is enabled and tokens are cached,
1378 /// this allows to revert a specific number of tokens.
1379 ///
1380 /// Note that the number of tokens being reverted should be up to the last
1381 /// backtrack position, not more.
1382 void RevertCachedTokens(unsigned N) {
1383 assert(isBacktrackEnabled() &&
1384 "Should only be called when tokens are cached for backtracking");
1385 assert(signed(CachedLexPos) - signed(N) >= signed(BacktrackPositions.back())
1386 && "Should revert tokens up to the last backtrack position, not more");
1387 assert(signed(CachedLexPos) - signed(N) >= 0 &&
1388 "Corrupted backtrack positions ?");
1389 CachedLexPos -= N;
1390 }
1391
1392 /// Enters a token in the token stream to be lexed next.
1393 ///
1394 /// If BackTrack() is called afterwards, the token will remain at the
1395 /// insertion point.
1396 void EnterToken(const Token &Tok) {
1397 EnterCachingLexMode();
1398 CachedTokens.insert(CachedTokens.begin()+CachedLexPos, Tok);
1399 }
1400
1401 /// We notify the Preprocessor that if it is caching tokens (because
1402 /// backtrack is enabled) it should replace the most recent cached tokens
1403 /// with the given annotation token. This function has no effect if
1404 /// backtracking is not enabled.
1405 ///
1406 /// Note that the use of this function is just for optimization, so that the
1407 /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
1408 /// invoked.
1409 void AnnotateCachedTokens(const Token &Tok) {
1410 assert(Tok.isAnnotation() && "Expected annotation token");
1411 if (CachedLexPos != 0 && isBacktrackEnabled())
1412 AnnotatePreviousCachedTokens(Tok);
1413 }
1414
1415 /// Get the location of the last cached token, suitable for setting the end
1416 /// location of an annotation token.
1417 SourceLocation getLastCachedTokenLocation() const {
1418 assert(CachedLexPos != 0);
1419 return CachedTokens[CachedLexPos-1].getLastLoc();
1420 }
1421
1422 /// Whether \p Tok is the most recent token (`CachedLexPos - 1`) in
1423 /// CachedTokens.
1424 bool IsPreviousCachedToken(const Token &Tok) const;
1425
1426 /// Replace token in `CachedLexPos - 1` in CachedTokens by the tokens
1427 /// in \p NewToks.
1428 ///
1429 /// Useful when a token needs to be split in smaller ones and CachedTokens
1430 /// most recent token must to be updated to reflect that.
1431 void ReplacePreviousCachedToken(ArrayRef<Token> NewToks);
1432
1433 /// Replace the last token with an annotation token.
1434 ///
1435 /// Like AnnotateCachedTokens(), this routine replaces an
1436 /// already-parsed (and resolved) token with an annotation
1437 /// token. However, this routine only replaces the last token with
1438 /// the annotation token; it does not affect any other cached
1439 /// tokens. This function has no effect if backtracking is not
1440 /// enabled.
1441 void ReplaceLastTokenWithAnnotation(const Token &Tok) {
1442 assert(Tok.isAnnotation() && "Expected annotation token");
1443 if (CachedLexPos != 0 && isBacktrackEnabled())
1444 CachedTokens[CachedLexPos-1] = Tok;
1445 }
1446
1447 /// Enter an annotation token into the token stream.
1448 void EnterAnnotationToken(SourceRange Range, tok::TokenKind Kind,
1449 void *AnnotationVal);
1450
1451 /// Update the current token to represent the provided
1452 /// identifier, in order to cache an action performed by typo correction.
1453 void TypoCorrectToken(const Token &Tok) {
1454 assert(Tok.getIdentifierInfo() && "Expected identifier token");
1455 if (CachedLexPos != 0 && isBacktrackEnabled())
1456 CachedTokens[CachedLexPos-1] = Tok;
1457 }
1458
1459 /// Recompute the current lexer kind based on the CurLexer/CurPTHLexer/
1460 /// CurTokenLexer pointers.
1461 void recomputeCurLexerKind();
1462
1463 /// Returns true if incremental processing is enabled
1464 bool isIncrementalProcessingEnabled() const { return IncrementalProcessing; }
1465
1466 /// Enables the incremental processing
1467 void enableIncrementalProcessing(bool value = true) {
1468 IncrementalProcessing = value;
1469 }
1470
1471 /// Specify the point at which code-completion will be performed.
1472 ///
1473 /// \param File the file in which code completion should occur. If
1474 /// this file is included multiple times, code-completion will
1475 /// perform completion the first time it is included. If NULL, this
1476 /// function clears out the code-completion point.
1477 ///
1478 /// \param Line the line at which code completion should occur
1479 /// (1-based).
1480 ///
1481 /// \param Column the column at which code completion should occur
1482 /// (1-based).
1483 ///
1484 /// \returns true if an error occurred, false otherwise.
1485 bool SetCodeCompletionPoint(const FileEntry *File,
1486 unsigned Line, unsigned Column);
1487
1488 /// Determine if we are performing code completion.
1489 bool isCodeCompletionEnabled() const { return CodeCompletionFile != nullptr; }
1490
1491 /// Returns the location of the code-completion point.
1492 ///
1493 /// Returns an invalid location if code-completion is not enabled or the file
1494 /// containing the code-completion point has not been lexed yet.
1495 SourceLocation getCodeCompletionLoc() const { return CodeCompletionLoc; }
1496
1497 /// Returns the start location of the file of code-completion point.
1498 ///
1499 /// Returns an invalid location if code-completion is not enabled or the file
1500 /// containing the code-completion point has not been lexed yet.
1501 SourceLocation getCodeCompletionFileLoc() const {
1502 return CodeCompletionFileLoc;
1503 }
1504
1505 /// Returns true if code-completion is enabled and we have hit the
1506 /// code-completion point.
1507 bool isCodeCompletionReached() const { return CodeCompletionReached; }
1508
1509 /// Note that we hit the code-completion point.
1510 void setCodeCompletionReached() {
1511 assert(isCodeCompletionEnabled() && "Code-completion not enabled!");
1512 CodeCompletionReached = true;
1513 // Silence any diagnostics that occur after we hit the code-completion.
1514 getDiagnostics().setSuppressAllDiagnostics(true);
1515 }
1516
1517 /// The location of the currently-active \#pragma clang
1518 /// arc_cf_code_audited begin.
1519 ///
1520 /// Returns an invalid location if there is no such pragma active.
1521 SourceLocation getPragmaARCCFCodeAuditedLoc() const {
1522 return PragmaARCCFCodeAuditedLoc;
1523 }
1524
1525 /// Set the location of the currently-active \#pragma clang
1526 /// arc_cf_code_audited begin. An invalid location ends the pragma.
1527 void setPragmaARCCFCodeAuditedLoc(SourceLocation Loc) {
1528 PragmaARCCFCodeAuditedLoc = Loc;
1529 }
1530
1531 /// The location of the currently-active \#pragma clang
1532 /// assume_nonnull begin.
1533 ///
1534 /// Returns an invalid location if there is no such pragma active.
1535 SourceLocation getPragmaAssumeNonNullLoc() const {
1536 return PragmaAssumeNonNullLoc;
1537 }
1538
1539 /// Set the location of the currently-active \#pragma clang
1540 /// assume_nonnull begin. An invalid location ends the pragma.
1541 void setPragmaAssumeNonNullLoc(SourceLocation Loc) {
1542 PragmaAssumeNonNullLoc = Loc;
1543 }
1544
1545 /// Set the directory in which the main file should be considered
1546 /// to have been found, if it is not a real file.
1547 void setMainFileDir(const DirectoryEntry *Dir) {
1548 MainFileDir = Dir;
1549 }
1550
1551 /// Instruct the preprocessor to skip part of the main source file.
1552 ///
1553 /// \param Bytes The number of bytes in the preamble to skip.
1554 ///
1555 /// \param StartOfLine Whether skipping these bytes puts the lexer at the
1556 /// start of a line.
1557 void setSkipMainFilePreamble(unsigned Bytes, bool StartOfLine) {
1558 SkipMainFilePreamble.first = Bytes;
1559 SkipMainFilePreamble.second = StartOfLine;
1560 }
1561
1562 /// Forwarding function for diagnostics. This emits a diagnostic at
1563 /// the specified Token's location, translating the token's start
1564 /// position in the current buffer into a SourcePosition object for rendering.
1565 DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const {
1566 return Diags->Report(Loc, DiagID);
1567 }
1568
1569 DiagnosticBuilder Diag(const Token &Tok, unsigned DiagID) const {
1570 return Diags->Report(Tok.getLocation(), DiagID);
1571 }
1572
1573 /// Return the 'spelling' of the token at the given
1574 /// location; does not go up to the spelling location or down to the
1575 /// expansion location.
1576 ///
1577 /// \param buffer A buffer which will be used only if the token requires
1578 /// "cleaning", e.g. if it contains trigraphs or escaped newlines
1579 /// \param invalid If non-null, will be set \c true if an error occurs.
1580 StringRef getSpelling(SourceLocation loc,
1581 SmallVectorImpl<char> &buffer,
1582 bool *invalid = nullptr) const {
1583 return Lexer::getSpelling(loc, buffer, SourceMgr, LangOpts, invalid);
1584 }
1585
1586 /// Return the 'spelling' of the Tok token.
1587 ///
1588 /// The spelling of a token is the characters used to represent the token in
1589 /// the source file after trigraph expansion and escaped-newline folding. In
1590 /// particular, this wants to get the true, uncanonicalized, spelling of
1591 /// things like digraphs, UCNs, etc.
1592 ///
1593 /// \param Invalid If non-null, will be set \c true if an error occurs.
1594 std::string getSpelling(const Token &Tok, bool *Invalid = nullptr) const {
1595 return Lexer::getSpelling(Tok, SourceMgr, LangOpts, Invalid);
1596 }
1597
1598 /// Get the spelling of a token into a preallocated buffer, instead
1599 /// of as an std::string.
1600 ///
1601 /// The caller is required to allocate enough space for the token, which is
1602 /// guaranteed to be at least Tok.getLength() bytes long. The length of the
1603 /// actual result is returned.
1604 ///
1605 /// Note that this method may do two possible things: it may either fill in
1606 /// the buffer specified with characters, or it may *change the input pointer*
1607 /// to point to a constant buffer with the data already in it (avoiding a
1608 /// copy). The caller is not allowed to modify the returned buffer pointer
1609 /// if an internal buffer is returned.
1610 unsigned getSpelling(const Token &Tok, const char *&Buffer,
1611 bool *Invalid = nullptr) const {
1612 return Lexer::getSpelling(Tok, Buffer, SourceMgr, LangOpts, Invalid);
1613 }
1614
1615 /// Get the spelling of a token into a SmallVector.
1616 ///
1617 /// Note that the returned StringRef may not point to the
1618 /// supplied buffer if a copy can be avoided.
1619 StringRef getSpelling(const Token &Tok,
1620 SmallVectorImpl<char> &Buffer,
1621 bool *Invalid = nullptr) const;
1622
1623 /// Relex the token at the specified location.
1624 /// \returns true if there was a failure, false on success.
1625 bool getRawToken(SourceLocation Loc, Token &Result,
1626 bool IgnoreWhiteSpace = false) {
1627 return Lexer::getRawToken(Loc, Result, SourceMgr, LangOpts, IgnoreWhiteSpace);
1628 }
1629
1630 /// Given a Token \p Tok that is a numeric constant with length 1,
1631 /// return the character.
1632 char
1633 getSpellingOfSingleCharacterNumericConstant(const Token &Tok,
1634 bool *Invalid = nullptr) const {
1635 assert(Tok.is(tok::numeric_constant) &&
1636 Tok.getLength() == 1 && "Called on unsupported token");
1637 assert(!Tok.needsCleaning() && "Token can't need cleaning with length 1");
1638
1639 // If the token is carrying a literal data pointer, just use it.
1640 if (const char *D = Tok.getLiteralData())
1641 return *D;
1642
1643 // Otherwise, fall back on getCharacterData, which is slower, but always
1644 // works.
1645 return *SourceMgr.getCharacterData(Tok.getLocation(), Invalid);
1646 }
1647
1648 /// Retrieve the name of the immediate macro expansion.
1649 ///
1650 /// This routine starts from a source location, and finds the name of the
1651 /// macro responsible for its immediate expansion. It looks through any
1652 /// intervening macro argument expansions to compute this. It returns a
1653 /// StringRef that refers to the SourceManager-owned buffer of the source
1654 /// where that macro name is spelled. Thus, the result shouldn't out-live
1655 /// the SourceManager.
1656 StringRef getImmediateMacroName(SourceLocation Loc) {
1657 return Lexer::getImmediateMacroName(Loc, SourceMgr, getLangOpts());
1658 }
1659
1660 /// Plop the specified string into a scratch buffer and set the
1661 /// specified token's location and length to it.
1662 ///
1663 /// If specified, the source location provides a location of the expansion
1664 /// point of the token.
1665 void CreateString(StringRef Str, Token &Tok,
1666 SourceLocation ExpansionLocStart = SourceLocation(),
1667 SourceLocation ExpansionLocEnd = SourceLocation());
1668
1669 /// Split the first Length characters out of the token starting at TokLoc
1670 /// and return a location pointing to the split token. Re-lexing from the
1671 /// split token will return the split token rather than the original.
1672 SourceLocation SplitToken(SourceLocation TokLoc, unsigned Length);
1673
1674 /// Computes the source location just past the end of the
1675 /// token at this source location.
1676 ///
1677 /// This routine can be used to produce a source location that
1678 /// points just past the end of the token referenced by \p Loc, and
1679 /// is generally used when a diagnostic needs to point just after a
1680 /// token where it expected something different that it received. If
1681 /// the returned source location would not be meaningful (e.g., if
1682 /// it points into a macro), this routine returns an invalid
1683 /// source location.
1684 ///
1685 /// \param Offset an offset from the end of the token, where the source
1686 /// location should refer to. The default offset (0) produces a source
1687 /// location pointing just past the end of the token; an offset of 1 produces
1688 /// a source location pointing to the last character in the token, etc.
1689 SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset = 0) {
1690 return Lexer::getLocForEndOfToken(Loc, Offset, SourceMgr, LangOpts);
1691 }
1692
1693 /// Returns true if the given MacroID location points at the first
1694 /// token of the macro expansion.
1695 ///
1696 /// \param MacroBegin If non-null and function returns true, it is set to
1697 /// begin location of the macro.
1698 bool isAtStartOfMacroExpansion(SourceLocation loc,
1699 SourceLocation *MacroBegin = nullptr) const {
1700 return Lexer::isAtStartOfMacroExpansion(loc, SourceMgr, LangOpts,
1701 MacroBegin);
1702 }
1703
1704 /// Returns true if the given MacroID location points at the last
1705 /// token of the macro expansion.
1706 ///
1707 /// \param MacroEnd If non-null and function returns true, it is set to
1708 /// end location of the macro.
1709 bool isAtEndOfMacroExpansion(SourceLocation loc,
1710 SourceLocation *MacroEnd = nullptr) const {
1711 return Lexer::isAtEndOfMacroExpansion(loc, SourceMgr, LangOpts, MacroEnd);
1712 }
1713
1714 /// Print the token to stderr, used for debugging.
1715 void DumpToken(const Token &Tok, bool DumpFlags = false) const;
1716 void DumpLocation(SourceLocation Loc) const;
1717 void DumpMacro(const MacroInfo &MI) const;
1718 void dumpMacroInfo(const IdentifierInfo *II);
1719
1720 /// Given a location that specifies the start of a
1721 /// token, return a new location that specifies a character within the token.
1722 SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,
1723 unsigned Char) const {
1724 return Lexer::AdvanceToTokenCharacter(TokStart, Char, SourceMgr, LangOpts);
1725 }
1726
1727 /// Increment the counters for the number of token paste operations
1728 /// performed.
1729 ///
1730 /// If fast was specified, this is a 'fast paste' case we handled.
1731 void IncrementPasteCounter(bool isFast) {
1732 if (isFast)
1733 ++NumFastTokenPaste;
1734 else
1735 ++NumTokenPaste;
1736 }
1737
1738 void PrintStats();
1739
1740 size_t getTotalMemory() const;
1741
1742 /// When the macro expander pastes together a comment (/##/) in Microsoft
1743 /// mode, this method handles updating the current state, returning the
1744 /// token on the next source line.
1745 void HandleMicrosoftCommentPaste(Token &Tok);
1746
1747 //===--------------------------------------------------------------------===//
1748 // Preprocessor callback methods. These are invoked by a lexer as various
1749 // directives and events are found.
1750
1751 /// Given a tok::raw_identifier token, look up the
1752 /// identifier information for the token and install it into the token,
1753 /// updating the token kind accordingly.
1754 IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
1755
1756private:
1757 llvm::DenseMap<IdentifierInfo*,unsigned> PoisonReasons;
1758
1759public:
1760 /// Specifies the reason for poisoning an identifier.
1761 ///
1762 /// If that identifier is accessed while poisoned, then this reason will be
1763 /// used instead of the default "poisoned" diagnostic.
1764 void SetPoisonReason(IdentifierInfo *II, unsigned DiagID);
1765
1766 /// Display reason for poisoned identifier.
1767 void HandlePoisonedIdentifier(Token & Identifier);
1768
1769 void MaybeHandlePoisonedIdentifier(Token & Identifier) {
1770 if(IdentifierInfo * II = Identifier.getIdentifierInfo()) {
1771 if(II->isPoisoned()) {
1772 HandlePoisonedIdentifier(Identifier);
1773 }
1774 }
1775 }
1776
1777private:
1778 /// Identifiers used for SEH handling in Borland. These are only
1779 /// allowed in particular circumstances
1780 // __except block
1781 IdentifierInfo *Ident__exception_code,
1782 *Ident___exception_code,
1783 *Ident_GetExceptionCode;
1784 // __except filter expression
1785 IdentifierInfo *Ident__exception_info,
1786 *Ident___exception_info,
1787 *Ident_GetExceptionInfo;
1788 // __finally
1789 IdentifierInfo *Ident__abnormal_termination,
1790 *Ident___abnormal_termination,
1791 *Ident_AbnormalTermination;
1792
1793 const char *getCurLexerEndPos();
1794 void diagnoseMissingHeaderInUmbrellaDir(const Module &Mod);
1795
1796public:
1797 void PoisonSEHIdentifiers(bool Poison = true); // Borland
1798
1799 /// Callback invoked when the lexer reads an identifier and has
1800 /// filled in the tokens IdentifierInfo member.
1801 ///
1802 /// This callback potentially macro expands it or turns it into a named
1803 /// token (like 'for').
1804 ///
1805 /// \returns true if we actually computed a token, false if we need to
1806 /// lex again.
1807 bool HandleIdentifier(Token &Identifier);
1808
1809 /// Callback invoked when the lexer hits the end of the current file.
1810 ///
1811 /// This either returns the EOF token and returns true, or
1812 /// pops a level off the include stack and returns false, at which point the
1813 /// client should call lex again.
1814 bool HandleEndOfFile(Token &Result, bool isEndOfMacro = false);
1815
1816 /// Callback invoked when the current TokenLexer hits the end of its
1817 /// token stream.
1818 bool HandleEndOfTokenLexer(Token &Result);
1819
1820 /// Callback invoked when the lexer sees a # token at the start of a
1821 /// line.
1822 ///
1823 /// This consumes the directive, modifies the lexer/preprocessor state, and
1824 /// advances the lexer(s) so that the next token read is the correct one.
1825 void HandleDirective(Token &Result);
1826
1827 /// Ensure that the next token is a tok::eod token.
1828 ///
1829 /// If not, emit a diagnostic and consume up until the eod.
1830 /// If \p EnableMacros is true, then we consider macros that expand to zero
1831 /// tokens as being ok.
1832 void CheckEndOfDirective(const char *DirType, bool EnableMacros = false);
1833
1834 /// Read and discard all tokens remaining on the current line until
1835 /// the tok::eod token is found.
1836 void DiscardUntilEndOfDirective();
1837
1838 /// Returns true if the preprocessor has seen a use of
1839 /// __DATE__ or __TIME__ in the file so far.
1840 bool SawDateOrTime() const {
1841 return DATELoc != SourceLocation() || TIMELoc != SourceLocation();
1842 }
1843 unsigned getCounterValue() const { return CounterValue; }
1844 void setCounterValue(unsigned V) { CounterValue = V; }
1845
1846 /// Retrieves the module that we're currently building, if any.
1847 Module *getCurrentModule();
1848
1849 /// Allocate a new MacroInfo object with the provided SourceLocation.
1850 MacroInfo *AllocateMacroInfo(SourceLocation L);
1851
1852 /// Turn the specified lexer token into a fully checked and spelled
1853 /// filename, e.g. as an operand of \#include.
1854 ///
1855 /// The caller is expected to provide a buffer that is large enough to hold
1856 /// the spelling of the filename, but is also expected to handle the case
1857 /// when this method decides to use a different buffer.
1858 ///
1859 /// \returns true if the input filename was in <>'s or false if it was
1860 /// in ""'s.
1861 bool GetIncludeFilenameSpelling(SourceLocation Loc,StringRef &Buffer);
1862
1863 /// Given a "foo" or \<foo> reference, look up the indicated file.
1864 ///
1865 /// Returns null on failure. \p isAngled indicates whether the file
1866 /// reference is for system \#include's or not (i.e. using <> instead of "").
1867 const FileEntry *LookupFile(SourceLocation FilenameLoc, StringRef Filename,
1868 bool isAngled, const DirectoryLookup *FromDir,
1869 const FileEntry *FromFile,
1870 const DirectoryLookup *&CurDir,
1871 SmallVectorImpl<char> *SearchPath,
1872 SmallVectorImpl<char> *RelativePath,
1873 ModuleMap::KnownHeader *SuggestedModule,
1874 bool *IsMapped, bool SkipCache = false);
1875
1876 /// Get the DirectoryLookup structure used to find the current
1877 /// FileEntry, if CurLexer is non-null and if applicable.
1878 ///
1879 /// This allows us to implement \#include_next and find directory-specific
1880 /// properties.
1881 const DirectoryLookup *GetCurDirLookup() { return CurDirLookup; }
1882
1883 /// Return true if we're in the top-level file, not in a \#include.
1884 bool isInPrimaryFile() const;
1885
1886 /// Handle cases where the \#include name is expanded
1887 /// from a macro as multiple tokens, which need to be glued together.
1888 ///
1889 /// This occurs for code like:
1890 /// \code
1891 /// \#define FOO <x/y.h>
1892 /// \#include FOO
1893 /// \endcode
1894 /// because in this case, "<x/y.h>" is returned as 7 tokens, not one.
1895 ///
1896 /// This code concatenates and consumes tokens up to the '>' token. It
1897 /// returns false if the > was found, otherwise it returns true if it finds
1898 /// and consumes the EOD marker.
1899 bool ConcatenateIncludeName(SmallString<128> &FilenameBuffer,
1900 SourceLocation &End);
1901
1902 /// Lex an on-off-switch (C99 6.10.6p2) and verify that it is
1903 /// followed by EOD. Return true if the token is not a valid on-off-switch.
1904 bool LexOnOffSwitch(tok::OnOffSwitch &Result);
1905
1906 bool CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef,
1907 bool *ShadowFlag = nullptr);
1908
1909 void EnterSubmodule(Module *M, SourceLocation ImportLoc, bool ForPragma);
1910 Module *LeaveSubmodule(bool ForPragma);
1911
1912private:
1913 friend void TokenLexer::ExpandFunctionArguments();
1914
1915 void PushIncludeMacroStack() {
1916 assert(CurLexerKind != CLK_CachingLexer && "cannot push a caching lexer");
1917 IncludeMacroStack.emplace_back(CurLexerKind, CurLexerSubmodule,
1918 std::move(CurLexer), std::move(CurPTHLexer),
1919 CurPPLexer, std::move(CurTokenLexer),
1920 CurDirLookup);
1921 CurPPLexer = nullptr;
1922 }
1923
1924 void PopIncludeMacroStack() {
1925 CurLexer = std::move(IncludeMacroStack.back().TheLexer);
1926 CurPTHLexer = std::move(IncludeMacroStack.back().ThePTHLexer);
1927 CurPPLexer = IncludeMacroStack.back().ThePPLexer;
1928 CurTokenLexer = std::move(IncludeMacroStack.back().TheTokenLexer);
1929 CurDirLookup = IncludeMacroStack.back().TheDirLookup;
1930 CurLexerSubmodule = IncludeMacroStack.back().TheSubmodule;
1931 CurLexerKind = IncludeMacroStack.back().CurLexerKind;
1932 IncludeMacroStack.pop_back();
1933 }
1934
1935 void PropagateLineStartLeadingSpaceInfo(Token &Result);
1936
1937 /// Determine whether we need to create module macros for #defines in the
1938 /// current context.
1939 bool needModuleMacros() const;
1940
1941 /// Update the set of active module macros and ambiguity flag for a module
1942 /// macro name.
1943 void updateModuleMacroInfo(const IdentifierInfo *II, ModuleMacroInfo &Info);
1944
1945 DefMacroDirective *AllocateDefMacroDirective(MacroInfo *MI,
1946 SourceLocation Loc);
1947 UndefMacroDirective *AllocateUndefMacroDirective(SourceLocation UndefLoc);
1948 VisibilityMacroDirective *AllocateVisibilityMacroDirective(SourceLocation Loc,
1949 bool isPublic);
1950
1951 /// Lex and validate a macro name, which occurs after a
1952 /// \#define or \#undef.
1953 ///
1954 /// \param MacroNameTok Token that represents the name defined or undefined.
1955 /// \param IsDefineUndef Kind if preprocessor directive.
1956 /// \param ShadowFlag Points to flag that is set if macro name shadows
1957 /// a keyword.
1958 ///
1959 /// This emits a diagnostic, sets the token kind to eod,
1960 /// and discards the rest of the macro line if the macro name is invalid.
1961 void ReadMacroName(Token &MacroNameTok, MacroUse IsDefineUndef = MU_Other,
1962 bool *ShadowFlag = nullptr);
1963
1964 /// ReadOptionalMacroParameterListAndBody - This consumes all (i.e. the
1965 /// entire line) of the macro's tokens and adds them to MacroInfo, and while
1966 /// doing so performs certain validity checks including (but not limited to):
1967 /// - # (stringization) is followed by a macro parameter
1968 /// \param MacroNameTok - Token that represents the macro name
1969 /// \param ImmediatelyAfterHeaderGuard - Macro follows an #ifdef header guard
1970 ///
1971 /// Either returns a pointer to a MacroInfo object OR emits a diagnostic and
1972 /// returns a nullptr if an invalid sequence of tokens is encountered.
1973 MacroInfo *ReadOptionalMacroParameterListAndBody(
1974 const Token &MacroNameTok, bool ImmediatelyAfterHeaderGuard);
1975
1976 /// The ( starting an argument list of a macro definition has just been read.
1977 /// Lex the rest of the parameters and the closing ), updating \p MI with
1978 /// what we learn and saving in \p LastTok the last token read.
1979 /// Return true if an error occurs parsing the arg list.
1980 bool ReadMacroParameterList(MacroInfo *MI, Token& LastTok);
1981
1982 /// We just read a \#if or related directive and decided that the
1983 /// subsequent tokens are in the \#if'd out portion of the
1984 /// file. Lex the rest of the file, until we see an \#endif. If \p
1985 /// FoundNonSkipPortion is true, then we have already emitted code for part of
1986 /// this \#if directive, so \#else/\#elif blocks should never be entered. If
1987 /// \p FoundElse is false, then \#else directives are ok, if not, then we have
1988 /// already seen one so a \#else directive is a duplicate. When this returns,
1989 /// the caller can lex the first valid token.
1990 void SkipExcludedConditionalBlock(SourceLocation HashTokenLoc,
1991 SourceLocation IfTokenLoc,
1992 bool FoundNonSkipPortion, bool FoundElse,
1993 SourceLocation ElseLoc = SourceLocation());
1994
1995 /// A fast PTH version of SkipExcludedConditionalBlock.
1996 void PTHSkipExcludedConditionalBlock();
1997
1998 /// Information about the result for evaluating an expression for a
1999 /// preprocessor directive.
2000 struct DirectiveEvalResult {
2001 /// Whether the expression was evaluated as true or not.
2002 bool Conditional;
2003
2004 /// True if the expression contained identifiers that were undefined.
2005 bool IncludedUndefinedIds;
2006 };
2007
2008 /// Evaluate an integer constant expression that may occur after a
2009 /// \#if or \#elif directive and return a \p DirectiveEvalResult object.
2010 ///
2011 /// If the expression is equivalent to "!defined(X)" return X in IfNDefMacro.
2012 DirectiveEvalResult EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro);
2013
2014 /// Install the standard preprocessor pragmas:
2015 /// \#pragma GCC poison/system_header/dependency and \#pragma once.
2016 void RegisterBuiltinPragmas();
2017
2018 /// Register builtin macros such as __LINE__ with the identifier table.
2019 void RegisterBuiltinMacros();
2020
2021 /// If an identifier token is read that is to be expanded as a macro, handle
2022 /// it and return the next token as 'Tok'. If we lexed a token, return true;
2023 /// otherwise the caller should lex again.
2024 bool HandleMacroExpandedIdentifier(Token &Identifier, const MacroDefinition &MD);
2025
2026 /// Cache macro expanded tokens for TokenLexers.
2027 //
2028 /// Works like a stack; a TokenLexer adds the macro expanded tokens that is
2029 /// going to lex in the cache and when it finishes the tokens are removed
2030 /// from the end of the cache.
2031 Token *cacheMacroExpandedTokens(TokenLexer *tokLexer,
2032 ArrayRef<Token> tokens);
2033
2034 void removeCachedMacroExpandedTokensOfLastLexer();
2035
2036 /// Determine whether the next preprocessor token to be
2037 /// lexed is a '('. If so, consume the token and return true, if not, this
2038 /// method should have no observable side-effect on the lexed tokens.
2039 bool isNextPPTokenLParen();
2040
2041 /// After reading "MACRO(", this method is invoked to read all of the formal
2042 /// arguments specified for the macro invocation. Returns null on error.
2043 MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI,
2044 SourceLocation &MacroEnd);
2045
2046 /// If an identifier token is read that is to be expanded
2047 /// as a builtin macro, handle it and return the next token as 'Tok'.
2048 void ExpandBuiltinMacro(Token &Tok);
2049
2050 /// Read a \c _Pragma directive, slice it up, process it, then
2051 /// return the first token after the directive.
2052 /// This assumes that the \c _Pragma token has just been read into \p Tok.
2053 void Handle_Pragma(Token &Tok);
2054
2055 /// Like Handle_Pragma except the pragma text is not enclosed within
2056 /// a string literal.
2057 void HandleMicrosoft__pragma(Token &Tok);
2058
2059 /// Add a lexer to the top of the include stack and
2060 /// start lexing tokens from it instead of the current buffer.
2061 void EnterSourceFileWithLexer(Lexer *TheLexer, const DirectoryLookup *Dir);
2062
2063 /// Add a lexer to the top of the include stack and
2064 /// start getting tokens from it using the PTH cache.
2065 void EnterSourceFileWithPTH(PTHLexer *PL, const DirectoryLookup *Dir);
2066
2067 /// Set the FileID for the preprocessor predefines.
2068 void setPredefinesFileID(FileID FID) {
2069 assert(PredefinesFileID.isInvalid() && "PredefinesFileID already set!");
2070 PredefinesFileID = FID;
2071 }
2072
2073 /// Set the FileID for the PCH through header.
2074 void setPCHThroughHeaderFileID(FileID FID);
2075
2076 /// Returns true if we are lexing from a file and not a
2077 /// pragma or a macro.
2078 static bool IsFileLexer(const Lexer* L, const PreprocessorLexer* P) {
2079 return L ? !L->isPragmaLexer() : P != nullptr;
2080 }
2081
2082 static bool IsFileLexer(const IncludeStackInfo& I) {
2083 return IsFileLexer(I.TheLexer.get(), I.ThePPLexer);
2084 }
2085
2086 bool IsFileLexer() const {
2087 return IsFileLexer(CurLexer.get(), CurPPLexer);
2088 }
2089
2090 //===--------------------------------------------------------------------===//
2091 // Caching stuff.
2092 void CachingLex(Token &Result);
2093
2094 bool InCachingLexMode() const {
2095 // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
2096 // that we are past EOF, not that we are in CachingLex mode.
2097 return !CurPPLexer && !CurTokenLexer && !CurPTHLexer &&
2098 !IncludeMacroStack.empty();
2099 }
2100
2101 void EnterCachingLexMode();
2102
2103 void ExitCachingLexMode() {
2104 if (InCachingLexMode())
2105 RemoveTopOfLexerStack();
2106 }
2107
2108 const Token &PeekAhead(unsigned N);
2109 void AnnotatePreviousCachedTokens(const Token &Tok);
2110
2111 //===--------------------------------------------------------------------===//
2112 /// Handle*Directive - implement the various preprocessor directives. These
2113 /// should side-effect the current preprocessor object so that the next call
2114 /// to Lex() will return the appropriate token next.
2115 void HandleLineDirective();
2116 void HandleDigitDirective(Token &Tok);
2117 void HandleUserDiagnosticDirective(Token &Tok, bool isWarning);
2118 void HandleIdentSCCSDirective(Token &Tok);
2119 void HandleMacroPublicDirective(Token &Tok);
2120 void HandleMacroPrivateDirective();
2121
2122 // File inclusion.
2123 void HandleIncludeDirective(SourceLocation HashLoc,
2124 Token &Tok,
2125 const DirectoryLookup *LookupFrom = nullptr,
2126 const FileEntry *LookupFromFile = nullptr,
2127 bool isImport = false);
2128 void HandleIncludeNextDirective(SourceLocation HashLoc, Token &Tok);
2129 void HandleIncludeMacrosDirective(SourceLocation HashLoc, Token &Tok);
2130 void HandleImportDirective(SourceLocation HashLoc, Token &Tok);
2131 void HandleMicrosoftImportDirective(Token &Tok);
2132
2133public:
2134 /// Check that the given module is available, producing a diagnostic if not.
2135 /// \return \c true if the check failed (because the module is not available).
2136 /// \c false if the module appears to be usable.
2137 static bool checkModuleIsAvailable(const LangOptions &LangOpts,
2138 const TargetInfo &TargetInfo,
2139 DiagnosticsEngine &Diags, Module *M);
2140
2141 // Module inclusion testing.
2142 /// Find the module that owns the source or header file that
2143 /// \p Loc points to. If the location is in a file that was included
2144 /// into a module, or is outside any module, returns nullptr.
2145 Module *getModuleForLocation(SourceLocation Loc);
2146
2147 /// We want to produce a diagnostic at location IncLoc concerning a
2148 /// missing module import.
2149 ///
2150 /// \param IncLoc The location at which the missing import was detected.
2151 /// \param M The desired module.
2152 /// \param MLoc A location within the desired module at which some desired
2153 /// effect occurred (eg, where a desired entity was declared).
2154 ///
2155 /// \return A file that can be #included to import a module containing MLoc.
2156 /// Null if no such file could be determined or if a #include is not
2157 /// appropriate.
2158 const FileEntry *getModuleHeaderToIncludeForDiagnostics(SourceLocation IncLoc,
2159 Module *M,
2160 SourceLocation MLoc);
2161
2162 bool isRecordingPreamble() const {
2163 return PreambleConditionalStack.isRecording();
2164 }
2165
2166 bool hasRecordedPreamble() const {
2167 return PreambleConditionalStack.hasRecordedPreamble();
2168 }
2169
2170 ArrayRef<PPConditionalInfo> getPreambleConditionalStack() const {
2171 return PreambleConditionalStack.getStack();
2172 }
2173
2174 void setRecordedPreambleConditionalStack(ArrayRef<PPConditionalInfo> s) {
2175 PreambleConditionalStack.setStack(s);
2176 }
2177
2178 void setReplayablePreambleConditionalStack(ArrayRef<PPConditionalInfo> s,
2179 llvm::Optional<PreambleSkipInfo> SkipInfo) {
2180 PreambleConditionalStack.startReplaying();
2181 PreambleConditionalStack.setStack(s);
2182 PreambleConditionalStack.SkipInfo = SkipInfo;
2183 }
2184
2185 llvm::Optional<PreambleSkipInfo> getPreambleSkipInfo() const {
2186 return PreambleConditionalStack.SkipInfo;
2187 }
2188
2189private:
2190 /// After processing predefined file, initialize the conditional stack from
2191 /// the preamble.
2192 void replayPreambleConditionalStack();
2193
2194 // Macro handling.
2195 void HandleDefineDirective(Token &Tok, bool ImmediatelyAfterHeaderGuard);
2196 void HandleUndefDirective();
2197
2198 // Conditional Inclusion.
2199 void HandleIfdefDirective(Token &Result, const Token &HashToken,
2200 bool isIfndef, bool ReadAnyTokensBeforeDirective);
2201 void HandleIfDirective(Token &IfToken, const Token &HashToken,
2202 bool ReadAnyTokensBeforeDirective);
2203 void HandleEndifDirective(Token &EndifToken);
2204 void HandleElseDirective(Token &Result, const Token &HashToken);
2205 void HandleElifDirective(Token &ElifToken, const Token &HashToken);
2206
2207 // Pragmas.
2208 void HandlePragmaDirective(SourceLocation IntroducerLoc,
2209 PragmaIntroducerKind Introducer);
2210
2211public:
2212 void HandlePragmaOnce(Token &OnceTok);
2213 void HandlePragmaMark();
2214 void HandlePragmaPoison();
2215 void HandlePragmaSystemHeader(Token &SysHeaderTok);
2216 void HandlePragmaDependency(Token &DependencyTok);
2217 void HandlePragmaPushMacro(Token &Tok);
2218 void HandlePragmaPopMacro(Token &Tok);
2219 void HandlePragmaIncludeAlias(Token &Tok);
2220 void HandlePragmaModuleBuild(Token &Tok);
2221 void HandlePragmaHdrstop(Token &Tok);
2222 IdentifierInfo *ParsePragmaPushOrPopMacro(Token &Tok);
2223
2224 // Return true and store the first token only if any CommentHandler
2225 // has inserted some tokens and getCommentRetentionState() is false.
2226 bool HandleComment(Token &result, SourceRange Comment);
2227
2228 /// A macro is used, update information about macros that need unused
2229 /// warnings.
2230 void markMacroAsUsed(MacroInfo *MI);
2231};
2232
2233/// Abstract base class that describes a handler that will receive
2234/// source ranges for each of the comments encountered in the source file.
2235class CommentHandler {
2236public:
2237 virtual ~CommentHandler();
2238
2239 // The handler shall return true if it has pushed any tokens
2240 // to be read using e.g. EnterToken or EnterTokenStream.
2241 virtual bool HandleComment(Preprocessor &PP, SourceRange Comment) = 0;
2242};
2243
2244/// Registry of pragma handlers added by plugins
2245using PragmaHandlerRegistry = llvm::Registry<PragmaHandler>;
2246
2247} // namespace clang
2248
2249#endif // LLVM_CLANG_LEX_PREPROCESSOR_H
2250