Preprocessor.cpp source code [clang/lib/Lex/Preprocessor.cpp]

1	//===- Preprocessor.cpp - C Language Family Preprocessor Implementation ---===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the Preprocessor interface.
10	//
11	//===----------------------------------------------------------------------===//
12	//
13	// Options to support:
14	// -H - Print the name of each header file used.
15	// -d[DNI] - Dump various things.
16	// -fworking-directory - #line's with preprocessor's working dir.
17	// -fpreprocessed
18	// -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD
19	// -W*
20	// -w
21	//
22	// Messages to emit:
23	// "Multiple include guards may be useful for:\n"
24	//
25	//===----------------------------------------------------------------------===//
26
27	#include "clang/Lex/Preprocessor.h"
28	#include "clang/Basic/Builtins.h"
29	#include "clang/Basic/FileManager.h"
30	#include "clang/Basic/FileSystemStatCache.h"
31	#include "clang/Basic/IdentifierTable.h"
32	#include "clang/Basic/LLVM.h"
33	#include "clang/Basic/LangOptions.h"
34	#include "clang/Basic/Module.h"
35	#include "clang/Basic/SourceLocation.h"
36	#include "clang/Basic/SourceManager.h"
37	#include "clang/Basic/TargetInfo.h"
38	#include "clang/Lex/CodeCompletionHandler.h"
39	#include "clang/Lex/ExternalPreprocessorSource.h"
40	#include "clang/Lex/HeaderSearch.h"
41	#include "clang/Lex/LexDiagnostic.h"
42	#include "clang/Lex/Lexer.h"
43	#include "clang/Lex/LiteralSupport.h"
44	#include "clang/Lex/MacroArgs.h"
45	#include "clang/Lex/MacroInfo.h"
46	#include "clang/Lex/ModuleLoader.h"
47	#include "clang/Lex/Pragma.h"
48	#include "clang/Lex/PreprocessingRecord.h"
49	#include "clang/Lex/PreprocessorLexer.h"
50	#include "clang/Lex/PreprocessorOptions.h"
51	#include "clang/Lex/ScratchBuffer.h"
52	#include "clang/Lex/Token.h"
53	#include "clang/Lex/TokenLexer.h"
54	#include "llvm/ADT/APInt.h"
55	#include "llvm/ADT/ArrayRef.h"
56	#include "llvm/ADT/DenseMap.h"
57	#include "llvm/ADT/STLExtras.h"
58	#include "llvm/ADT/SmallString.h"
59	#include "llvm/ADT/SmallVector.h"
60	#include "llvm/ADT/StringRef.h"
61	#include "llvm/Support/Capacity.h"
62	#include "llvm/Support/ErrorHandling.h"
63	#include "llvm/Support/MemoryBuffer.h"
64	#include "llvm/Support/raw_ostream.h"
65	#include <algorithm>
66	#include <cassert>
67	#include <memory>
68	#include <optional>
69	#include <string>
70	#include <utility>
71	#include <vector>
72
73	using namespace clang;
74
75	/// Minimum distance between two check points, in tokens.
76	static constexpr unsigned CheckPointStepSize = `1024`;
77
78	LLVM_INSTANTIATE_REGISTRY(PragmaHandlerRegistry)
79
80	ExternalPreprocessorSource::~ExternalPreprocessorSource() = default;
81
82	Preprocessor::Preprocessor(std::shared_ptr<PreprocessorOptions> PPOpts,
83	DiagnosticsEngine &diags, const LangOptions &opts,
84	SourceManager &SM, HeaderSearch &Headers,
85	ModuleLoader &TheModuleLoader,
86	IdentifierInfoLookup IILookup, bool* OwnsHeaders,
87	TranslationUnitKind TUKind)
88	: PPOpts (std::move(PPOpts)), Diags(&diags), LangOpts(opts),
89	FileMgr(Headers.getFileMgr()), SourceMgr(SM),
90	ScratchBuf (new ScratchBuffer (SourceMgr)), HeaderInfo(Headers),
91	TheModuleLoader(TheModuleLoader), ExternalSource(nullptr),
92	// As the language options may have not been loaded yet (when
93	// deserializing an ASTUnit), adding keywords to the identifier table is
94	// deferred to Preprocessor::Initialize().
95	Identifiers(IILookup), PragmaHandlers (new PragmaNamespace (StringRef())),
96	TUKind(TUKind), SkipMainFilePreamble (`0`, true),
97	CurSubmoduleState(&NullSubmoduleState) {
98	OwnsHeaderSearch = OwnsHeaders;
99
100	// Default to discarding comments.
101	KeepComments = false;
102	KeepMacroComments = false;
103	SuppressIncludeNotFoundError = false;
104
105	// Macro expansion is enabled.
106	DisableMacroExpansion = false;
107	MacroExpansionInDirectivesOverride = false;
108	InMacroArgs = false;
109	ArgMacro = nullptr;
110	InMacroArgPreExpansion = false;
111	NumCachedTokenLexers = `0`;
112	PragmasEnabled = true;
113	ParsingIfOrElifDirective = false;
114	PreprocessedOutput = false;
115
116	// We haven't read anything from the external source.
117	ReadMacrosFromExternalSource = false;
118
119	BuiltinInfo = std::make_unique<Builtin::Context>();
120
121	// "Poison" __VA_ARGS__, __VA_OPT__ which can only appear in the expansion of
122	// a macro. They get unpoisoned where it is allowed.
123	(Ident__VA_ARGS__ = getIdentifierInfo(Name: "__VA_ARGS__"))->setIsPoisoned();
124	SetPoisonReason(II: Ident__VA_ARGS__,diag::DiagID: ext_pp_bad_vaargs_use);
125	(Ident__VA_OPT__ = getIdentifierInfo(Name: "__VA_OPT__"))->setIsPoisoned();
126	SetPoisonReason(II: Ident__VA_OPT__,diag::DiagID: ext_pp_bad_vaopt_use);
127
128	// Initialize the pragma handlers.
129	RegisterBuiltinPragmas();
130
131	// Initialize builtin macros like __LINE__ and friends.
132	RegisterBuiltinMacros();
133
134	if(LangOpts.Borland) {
135	Ident__exception_info = getIdentifierInfo(Name: "_exception_info");
136	Ident___exception_info = getIdentifierInfo(Name: "__exception_info");
137	Ident_GetExceptionInfo = getIdentifierInfo(Name: "GetExceptionInformation");
138	Ident__exception_code = getIdentifierInfo(Name: "_exception_code");
139	Ident___exception_code = getIdentifierInfo(Name: "__exception_code");
140	Ident_GetExceptionCode = getIdentifierInfo(Name: "GetExceptionCode");
141	Ident__abnormal_termination = getIdentifierInfo(Name: "_abnormal_termination");
142	Ident___abnormal_termination = getIdentifierInfo(Name: "__abnormal_termination");
143	Ident_AbnormalTermination = getIdentifierInfo(Name: "AbnormalTermination");
144	} else {
145	Ident__exception_info = Ident__exception_code = nullptr;
146	Ident__abnormal_termination = Ident___exception_info = nullptr;
147	Ident___exception_code = Ident___abnormal_termination = nullptr;
148	Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr;
149	Ident_AbnormalTermination = nullptr;
150	}
151
152	// Default incremental processing to -fincremental-extensions, clients can
153	// override with `enableIncrementalProcessing` if desired.
154	IncrementalProcessing = LangOpts.IncrementalExtensions;
155
156	// If using a PCH where a #pragma hdrstop is expected, start skipping tokens.
157	if (usingPCHWithPragmaHdrStop())
158	SkippingUntilPragmaHdrStop = true;
159
160	// If using a PCH with a through header, start skipping tokens.
161	if (!this->PPOpts ->PCHThroughHeader.empty() &&
162	!this->PPOpts ->ImplicitPCHInclude.empty())
163	SkippingUntilPCHThroughHeader = true;
164
165	if (this->PPOpts ->GeneratePreamble)
166	PreambleConditionalStack.startRecording();
167
168	MaxTokens = LangOpts.MaxTokens;
169	}
170
171	Preprocessor::~Preprocessor() {
172	assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!");
173
174	IncludeMacroStack.clear();
175
176	// Free any cached macro expanders.
177	// This populates MacroArgCache, so all TokenLexers need to be destroyed
178	// before the code below that frees up the MacroArgCache list.
179	std::fill(first: TokenLexerCache, last: TokenLexerCache + NumCachedTokenLexers, value: nullptr);
180	CurTokenLexer.reset();
181
182	// Free any cached MacroArgs.
183	for (MacroArgs *ArgList = MacroArgCache; ArgList;)
184	ArgList = ArgList->deallocate();
185
186	// Delete the header search info, if we own it.
187	if (OwnsHeaderSearch)
188	delete &HeaderInfo;
189	}
190
191	void Preprocessor::Initialize(const TargetInfo &Target,
192	const TargetInfo *AuxTarget) {
193	assert((!this->Target \|\| this->Target == &Target) &&
194	"Invalid override of target information");
195	this->Target = &Target;
196
197	assert((!this->AuxTarget \|\| this->AuxTarget == AuxTarget) &&
198	"Invalid override of aux target information.");
199	this->AuxTarget = AuxTarget;
200
201	// Initialize information about built-ins.
202	BuiltinInfo ->InitializeTarget(Target, AuxTarget);
203	HeaderInfo.setTarget(Target);
204
205	// Populate the identifier table with info about keywords for the current language.
206	Identifiers.AddKeywords(LangOpts);
207
208	// Initialize the __FTL_EVAL_METHOD__ macro to the TargetInfo.
209	setTUFPEvalMethod(getTargetInfo().getFPEvalMethod());
210
211	if (getLangOpts().getFPEvalMethod() == LangOptions::FEM_UnsetOnCommandLine)
212	// Use setting from TargetInfo.
213	setCurrentFPEvalMethod(PragmaLoc: SourceLocation (), Val: Target.getFPEvalMethod());
214	else
215	// Set initial value of __FLT_EVAL_METHOD__ from the command line.
216	setCurrentFPEvalMethod(PragmaLoc: SourceLocation (), Val: getLangOpts().getFPEvalMethod());
217	}
218
219	void Preprocessor::InitializeForModelFile() {
220	NumEnteredSourceFiles = `0`;
221
222	// Reset pragmas
223	PragmaHandlersBackup = std::move(PragmaHandlers);
224	PragmaHandlers = std::make_unique<PragmaNamespace>(args: StringRef());
225	RegisterBuiltinPragmas();
226
227	// Reset PredefinesFileID
228	PredefinesFileID = FileID ();
229	}
230
231	void Preprocessor::FinalizeForModelFile() {
232	NumEnteredSourceFiles = `1`;
233
234	PragmaHandlers = std::move(PragmaHandlersBackup);
235	}
236
237	void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const {
238	llvm::errs() << tok::getTokenName(Kind: Tok.getKind());
239
240	if (!Tok.isAnnotation())
241	llvm::errs() << " '" << getSpelling(Tok) << "'";
242
243	if (!DumpFlags) return;
244
245	llvm::errs() << "\t";
246	if (Tok.isAtStartOfLine())
247	llvm::errs() << " [StartOfLine]";
248	if (Tok.hasLeadingSpace())
249	llvm::errs() << " [LeadingSpace]";
250	if (Tok.isExpandDisabled())
251	llvm::errs() << " [ExpandDisabled]";
252	if (Tok.needsCleaning()) {
253	const char *Start = SourceMgr.getCharacterData(SL: Tok.getLocation());
254	llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength())
255	<< "']";
256	}
257
258	llvm::errs() << "\tLoc=<";
259	DumpLocation(Loc: Tok.getLocation());
260	llvm::errs() << ">";
261	}
262
263	void Preprocessor::DumpLocation(SourceLocation Loc) const {
264	Loc.print(OS&: llvm::errs(), SM: SourceMgr);
265	}
266
267	void Preprocessor::DumpMacro(const MacroInfo &MI) const {
268	llvm::errs() << "MACRO: ";
269	for (unsigned i = `0`, e = MI.getNumTokens(); i != e; ++i) {
270	DumpToken(Tok: MI.getReplacementToken(Tok: i));
271	llvm::errs() << " ";
272	}
273	llvm::errs() << "\n";
274	}
275
276	void Preprocessor::PrintStats() {
277	llvm::errs() << "\n*** Preprocessor Stats:\n";
278	llvm::errs() << NumDirectives << " directives found:\n";
279	llvm::errs() << " " << NumDefined << " #define.\n";
280	llvm::errs() << " " << NumUndefined << " #undef.\n";
281	llvm::errs() << " #include/#include_next/#import:\n";
282	llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n";
283	llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n";
284	llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n";
285	llvm::errs() << " " << NumElse << " #else/#elif/#elifdef/#elifndef.\n";
286	llvm::errs() << " " << NumEndif << " #endif.\n";
287	llvm::errs() << " " << NumPragma << " #pragma.\n";
288	llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n";
289
290	llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/"
291	<< NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, "
292	<< NumFastMacroExpanded << " on the fast path.\n";
293	llvm::errs() << (NumFastTokenPaste+NumTokenPaste)
294	<< " token paste (##) operations performed, "
295	<< NumFastTokenPaste << " on the fast path.\n";
296
297	llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total";
298
299	llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory();
300	llvm::errs() << "\n Macro Expanded Tokens: "
301	<< llvm::capacity_in_bytes(X: MacroExpandedTokens);
302	llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity();
303	// FIXME: List information for all submodules.
304	llvm::errs() << "\n Macros: "
305	<< llvm::capacity_in_bytes(X: CurSubmoduleState->Macros);
306	llvm::errs() << "\n #pragma push_macro Info: "
307	<< llvm::capacity_in_bytes(X: PragmaPushMacroInfo);
308	llvm::errs() << "\n Poison Reasons: "
309	<< llvm::capacity_in_bytes(X: PoisonReasons);
310	llvm::errs() << "\n Comment Handlers: "
311	<< llvm::capacity_in_bytes(x: CommentHandlers) << "\n";
312	}
313
314	Preprocessor::macro_iterator
315	Preprocessor::macro_begin(bool IncludeExternalMacros) const {
316	if (IncludeExternalMacros && ExternalSource &&
317	!ReadMacrosFromExternalSource) {
318	ReadMacrosFromExternalSource = true;
319	ExternalSource->ReadDefinedMacros();
320	}
321
322	// Make sure we cover all macros in visible modules.
323	for (const ModuleMacro &Macro : ModuleMacros)
324	CurSubmoduleState->Macros.insert(KV: std::make_pair(x: Macro.II, y: MacroState ()));
325
326	return CurSubmoduleState->Macros.begin();
327	}
328
329	size_t Preprocessor::getTotalMemory() const {
330	return BP.getTotalMemory()
331	+ llvm::capacity_in_bytes(X: MacroExpandedTokens)
332	+ Predefines.capacity() / Predefines buffer. /
333	// FIXME: Include sizes from all submodules, and include MacroInfo sizes,
334	// and ModuleMacros.
335	+ llvm::capacity_in_bytes(X: CurSubmoduleState->Macros)
336	+ llvm::capacity_in_bytes(X: PragmaPushMacroInfo)
337	+ llvm::capacity_in_bytes(X: PoisonReasons)
338	+ llvm::capacity_in_bytes(x: CommentHandlers);
339	}
340
341	Preprocessor::macro_iterator
342	Preprocessor::macro_end(bool IncludeExternalMacros) const {
343	if (IncludeExternalMacros && ExternalSource &&
344	!ReadMacrosFromExternalSource) {
345	ReadMacrosFromExternalSource = true;
346	ExternalSource->ReadDefinedMacros();
347	}
348
349	return CurSubmoduleState->Macros.end();
350	}
351
352	/// Compares macro tokens with a specified token value sequence.
353	static bool MacroDefinitionEquals(const MacroInfo *MI,
354	ArrayRef<TokenValue> Tokens) {
355	return Tokens.size() == MI->getNumTokens() &&
356	std::equal(first1: Tokens.begin(), last1: Tokens.end(), first2: MI->tokens_begin());
357	}
358
359	StringRef Preprocessor::getLastMacroWithSpelling(
360	SourceLocation Loc,
361	ArrayRef<TokenValue> Tokens) const {
362	SourceLocation BestLocation;
363	StringRef BestSpelling;
364	for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end();
365	I != E; ++I) {
366	const MacroDirective::DefInfo
367	Def = I ->second.findDirectiveAtLoc(Loc, SourceMgr);
368	if (!Def \|\| !Def.getMacroInfo())
369	continue;
370	if (!Def.getMacroInfo()->isObjectLike())
371	continue;
372	if (!MacroDefinitionEquals(MI: Def.getMacroInfo(), Tokens))
373	continue;
374	SourceLocation Location = Def.getLocation();
375	// Choose the macro defined latest.
376	if (BestLocation.isInvalid() \|\|
377	(Location.isValid() &&
378	SourceMgr.isBeforeInTranslationUnit(LHS: BestLocation, RHS: Location))) {
379	BestLocation = Location;
380	BestSpelling = I ->first->getName();
381	}
382	}
383	return BestSpelling;
384	}
385
386	void Preprocessor::recomputeCurLexerKind() {
387	if (CurLexer)
388	CurLexerCallback = CurLexer ->isDependencyDirectivesLexer()
389	? CLK_DependencyDirectivesLexer
390	: CLK_Lexer;
391	else if (CurTokenLexer)
392	CurLexerCallback = CLK_TokenLexer;
393	else
394	CurLexerCallback = CLK_CachingLexer;
395	}
396
397	bool Preprocessor::SetCodeCompletionPoint(FileEntryRef File,
398	unsigned CompleteLine,
399	unsigned CompleteColumn) {
400	assert(CompleteLine && CompleteColumn && "Starts from 1:1");
401	assert(!CodeCompletionFile && "Already set");
402
403	// Load the actual file's contents.
404	std::optional<llvm::MemoryBufferRef> Buffer =
405	SourceMgr.getMemoryBufferForFileOrNone(File);
406	if (!Buffer)
407	return true;
408
409	// Find the byte position of the truncation point.
410	const char *Position = Buffer ->getBufferStart();
411	for (unsigned Line = `1`; Line < CompleteLine; ++Line) {
412	for (; *Position; ++Position) {
413	if (Position != `'\r'` && Position != `'\n'`)
414	continue;
415
416	// Eat \r\n or \n\r as a single line.
417	if ((Position[`1`] == `'\r'` \|\| Position[`1`] == `'\n'`) &&
418	Position[`0`] != Position[`1`])
419	++Position;
420	++Position;
421	break;
422	}
423	}
424
425	Position += CompleteColumn - `1`;
426
427	// If pointing inside the preamble, adjust the position at the beginning of
428	// the file after the preamble.
429	if (SkipMainFilePreamble.first &&
430	SourceMgr.getFileEntryForID(FID: SourceMgr.getMainFileID()) == File) {
431	if (Position - Buffer ->getBufferStart() < SkipMainFilePreamble.first)
432	Position = Buffer ->getBufferStart() + SkipMainFilePreamble.first;
433	}
434
435	if (Position > Buffer ->getBufferEnd())
436	Position = Buffer ->getBufferEnd();
437
438	CodeCompletionFile = File;
439	CodeCompletionOffset = Position - Buffer ->getBufferStart();
440
441	auto NewBuffer = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(
442	Size: Buffer ->getBufferSize() + `1`, BufferName: Buffer ->getBufferIdentifier());
443	char *NewBuf = NewBuffer ->getBufferStart();
444	char *NewPos = std::copy(first: Buffer ->getBufferStart(), last: Position, result: NewBuf);
445	*NewPos = `'\0'`;
446	std::copy(first: Position, last: Buffer ->getBufferEnd(), result: NewPos+`1`);
447	SourceMgr.overrideFileContents(SourceFile: File, Buffer: std::move(NewBuffer));
448
449	return false;
450	}
451
452	void Preprocessor::CodeCompleteIncludedFile(llvm::StringRef Dir,
453	bool IsAngled) {
454	setCodeCompletionReached();
455	if (CodeComplete)
456	CodeComplete->CodeCompleteIncludedFile(Dir, IsAngled);
457	}
458
459	void Preprocessor::CodeCompleteNaturalLanguage() {
460	setCodeCompletionReached();
461	if (CodeComplete)
462	CodeComplete->CodeCompleteNaturalLanguage();
463	}
464
465	/// getSpelling - This method is used to get the spelling of a token into a
466	/// SmallVector. Note that the returned StringRef may not point to the
467	/// supplied buffer if a copy can be avoided.
468	StringRef Preprocessor::getSpelling(const Token &Tok,
469	SmallVectorImpl<char> &Buffer,
470	bool Invalid) const* {
471	// NOTE: this has to be checked before* testing for an IdentifierInfo.*
472	if (Tok.isNot(K: tok::raw_identifier) && !Tok.hasUCN()) {
473	// Try the fast path.
474	if (const IdentifierInfo *II = Tok.getIdentifierInfo())
475	return II->getName();
476	}
477
478	// Resize the buffer if we need to copy into it.
479	if (Tok.needsCleaning())
480	Buffer.resize(N: Tok.getLength());
481
482	const char *Ptr = Buffer.data();
483	unsigned Len = getSpelling(Tok, Buffer&: Ptr, Invalid);
484	return StringRef(Ptr, Len);
485	}
486
487	/// CreateString - Plop the specified string into a scratch buffer and return a
488	/// location for it. If specified, the source location provides a source
489	/// location for the token.
490	void Preprocessor::CreateString(StringRef Str, Token &Tok,
491	SourceLocation ExpansionLocStart,
492	SourceLocation ExpansionLocEnd) {
493	Tok.setLength(Str.size());
494
495	const char *DestPtr;
496	SourceLocation Loc = ScratchBuf ->getToken(Buf: Str.data(), Len: Str.size(), DestPtr);
497
498	if (ExpansionLocStart.isValid())
499	Loc = SourceMgr.createExpansionLoc(SpellingLoc: Loc, ExpansionLocStart,
500	ExpansionLocEnd, Length: Str.size());
501	Tok.setLocation(Loc);
502
503	// If this is a raw identifier or a literal token, set the pointer data.
504	if (Tok.is(K: tok::raw_identifier))
505	Tok.setRawIdentifierData(DestPtr);
506	else if (Tok.isLiteral())
507	Tok.setLiteralData(DestPtr);
508	}
509
510	SourceLocation Preprocessor::SplitToken(SourceLocation Loc, unsigned Length) {
511	auto &SM = getSourceManager();
512	SourceLocation SpellingLoc = SM.getSpellingLoc(Loc);
513	std::pair<FileID, unsigned> LocInfo = SM.getDecomposedLoc(Loc: SpellingLoc);
514	bool Invalid = false;
515	StringRef Buffer = SM.getBufferData(FID: LocInfo.first, Invalid: &Invalid);
516	if (Invalid)
517	return SourceLocation ();
518
519	// FIXME: We could consider re-using spelling for tokens we see repeatedly.
520	const char *DestPtr;
521	SourceLocation Spelling =
522	ScratchBuf ->getToken(Buf: Buffer.data() + LocInfo.second, Len: Length, DestPtr);
523	return SM.createTokenSplitLoc(SpellingLoc: Spelling, TokenStart: Loc, TokenEnd: Loc.getLocWithOffset(Offset: Length));
524	}
525
526	Module *Preprocessor::getCurrentModule() {
527	if (!getLangOpts().isCompilingModule())
528	return nullptr;
529
530	return getHeaderSearchInfo().lookupModule(ModuleName: getLangOpts().CurrentModule);
531	}
532
533	Module *Preprocessor::getCurrentModuleImplementation() {
534	if (!getLangOpts().isCompilingModuleImplementation())
535	return nullptr;
536
537	return getHeaderSearchInfo().lookupModule(ModuleName: getLangOpts().ModuleName);
538	}
539
540	//===----------------------------------------------------------------------===//
541	// Preprocessor Initialization Methods
542	//===----------------------------------------------------------------------===//
543
544	/// EnterMainSourceFile - Enter the specified FileID as the main source file,
545	/// which implicitly adds the builtin defines etc.
546	void Preprocessor::EnterMainSourceFile() {
547	// We do not allow the preprocessor to reenter the main file. Doing so will
548	// cause FileID's to accumulate information from both runs (e.g. #line
549	// information) and predefined macros aren't guaranteed to be set properly.
550	assert(NumEnteredSourceFiles == `0` && "Cannot reenter the main file!");
551	FileID MainFileID = SourceMgr.getMainFileID();
552
553	// If MainFileID is loaded it means we loaded an AST file, no need to enter
554	// a main file.
555	if (!SourceMgr.isLoadedFileID(FID: MainFileID)) {
556	// Enter the main file source buffer.
557	EnterSourceFile(FID: MainFileID, Dir: nullptr, Loc: SourceLocation ());
558
559	// If we've been asked to skip bytes in the main file (e.g., as part of a
560	// precompiled preamble), do so now.
561	if (SkipMainFilePreamble.first > `0`)
562	CurLexer ->SetByteOffset(Offset: SkipMainFilePreamble.first,
563	StartOfLine: SkipMainFilePreamble.second);
564
565	// Tell the header info that the main file was entered. If the file is later
566	// #imported, it won't be re-entered.
567	if (OptionalFileEntryRef FE = SourceMgr.getFileEntryRefForID(FID: MainFileID))
568	markIncluded(File: *FE);
569	}
570
571	// Preprocess Predefines to populate the initial preprocessor state.
572	std::unique_ptr<llvm::MemoryBuffer> SB =
573	llvm::MemoryBuffer::getMemBufferCopy(InputData: Predefines, BufferName: "<built-in>");
574	assert(SB && "Cannot create predefined source buffer");
575	FileID FID = SourceMgr.createFileID(Buffer: std::move(SB));
576	assert(FID.isValid() && "Could not create FileID for predefines?");
577	setPredefinesFileID(FID);
578
579	// Start parsing the predefines.
580	EnterSourceFile(FID, Dir: nullptr, Loc: SourceLocation ());
581
582	if (!PPOpts ->PCHThroughHeader.empty()) {
583	// Lookup and save the FileID for the through header. If it isn't found
584	// in the search path, it's a fatal error.
585	OptionalFileEntryRef File = LookupFile(
586	FilenameLoc: SourceLocation (), Filename: PPOpts ->PCHThroughHeader,
587	/isAngled=/false, /FromDir=/nullptr, /FromFile=/nullptr,
588	/CurDir=/nullptr, /SearchPath=/nullptr, /RelativePath=/nullptr,
589	/SuggestedModule=/nullptr, /IsMapped=/nullptr,
590	/IsFrameworkFound=/nullptr);
591	if (!File) {
592	Diag(SourceLocation(), diag::err_pp_through_header_not_found)
593	<< PPOpts ->PCHThroughHeader;
594	return;
595	}
596	setPCHThroughHeaderFileID(
597	SourceMgr.createFileID(SourceFile: *File, IncludePos: SourceLocation (), FileCharacter: SrcMgr::C_User));
598	}
599
600	// Skip tokens from the Predefines and if needed the main file.
601	if ((usingPCHWithThroughHeader() && SkippingUntilPCHThroughHeader) \|\|
602	(usingPCHWithPragmaHdrStop() && SkippingUntilPragmaHdrStop))
603	SkipTokensWhileUsingPCH();
604	}
605
606	void Preprocessor::setPCHThroughHeaderFileID(FileID FID) {
607	assert(PCHThroughHeaderFileID.isInvalid() &&
608	"PCHThroughHeaderFileID already set!");
609	PCHThroughHeaderFileID = FID;
610	}
611
612	bool Preprocessor::isPCHThroughHeader(const FileEntry *FE) {
613	assert(PCHThroughHeaderFileID.isValid() &&
614	"Invalid PCH through header FileID");
615	return FE == SourceMgr.getFileEntryForID(FID: PCHThroughHeaderFileID);
616	}
617
618	bool Preprocessor::creatingPCHWithThroughHeader() {
619	return TUKind == TU_Prefix && !PPOpts ->PCHThroughHeader.empty() &&
620	PCHThroughHeaderFileID.isValid();
621	}
622
623	bool Preprocessor::usingPCHWithThroughHeader() {
624	return TUKind != TU_Prefix && !PPOpts ->PCHThroughHeader.empty() &&
625	PCHThroughHeaderFileID.isValid();
626	}
627
628	bool Preprocessor::creatingPCHWithPragmaHdrStop() {
629	return TUKind == TU_Prefix && PPOpts ->PCHWithHdrStop;
630	}
631
632	bool Preprocessor::usingPCHWithPragmaHdrStop() {
633	return TUKind != TU_Prefix && PPOpts ->PCHWithHdrStop;
634	}
635
636	/// Skip tokens until after the #include of the through header or
637	/// until after a #pragma hdrstop is seen. Tokens in the predefines file
638	/// and the main file may be skipped. If the end of the predefines file
639	/// is reached, skipping continues into the main file. If the end of the
640	/// main file is reached, it's a fatal error.
641	void Preprocessor::SkipTokensWhileUsingPCH() {
642	bool ReachedMainFileEOF = false;
643	bool UsingPCHThroughHeader = SkippingUntilPCHThroughHeader;
644	bool UsingPragmaHdrStop = SkippingUntilPragmaHdrStop;
645	Token Tok;
646	while (true) {
647	bool InPredefines =
648	(CurLexer && CurLexer ->getFileID() == getPredefinesFileID());
649	CurLexerCallback(*this, Tok);
650	if (Tok.is(K: tok::eof) && !InPredefines) {
651	ReachedMainFileEOF = true;
652	break;
653	}
654	if (UsingPCHThroughHeader && !SkippingUntilPCHThroughHeader)
655	break;
656	if (UsingPragmaHdrStop && !SkippingUntilPragmaHdrStop)
657	break;
658	}
659	if (ReachedMainFileEOF) {
660	if (UsingPCHThroughHeader)
661	Diag(SourceLocation(), diag::err_pp_through_header_not_seen)
662	<< PPOpts ->PCHThroughHeader << `1`;
663	else if (!PPOpts ->PCHWithHdrStopCreate)
664	Diag(SourceLocation(), diag::err_pp_pragma_hdrstop_not_seen);
665	}
666	}
667
668	void Preprocessor::replayPreambleConditionalStack() {
669	// Restore the conditional stack from the preamble, if there is one.
670	if (PreambleConditionalStack.isReplaying()) {
671	assert(CurPPLexer &&
672	"CurPPLexer is null when calling replayPreambleConditionalStack.");
673	CurPPLexer->setConditionalLevels(PreambleConditionalStack.getStack());
674	PreambleConditionalStack.doneReplaying();
675	if (PreambleConditionalStack.reachedEOFWhileSkipping())
676	SkipExcludedConditionalBlock(
677	HashTokenLoc: PreambleConditionalStack.SkipInfo ->HashTokenLoc,
678	IfTokenLoc: PreambleConditionalStack.SkipInfo ->IfTokenLoc,
679	FoundNonSkipPortion: PreambleConditionalStack.SkipInfo ->FoundNonSkipPortion,
680	FoundElse: PreambleConditionalStack.SkipInfo ->FoundElse,
681	ElseLoc: PreambleConditionalStack.SkipInfo ->ElseLoc);
682	}
683	}
684
685	void Preprocessor::EndSourceFile() {
686	// Notify the client that we reached the end of the source file.
687	if (Callbacks)
688	Callbacks ->EndOfMainFile();
689	}
690
691	//===----------------------------------------------------------------------===//
692	// Lexer Event Handling.
693	//===----------------------------------------------------------------------===//
694
695	/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
696	/// identifier information for the token and install it into the token,
697	/// updating the token kind accordingly.
698	IdentifierInfo Preprocessor::LookUpIdentifierInfo(Token &Identifier) const* {
699	assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!");
700
701	// Look up this token, see if it is a macro, or if it is a language keyword.
702	IdentifierInfo *II;
703	if (!Identifier.needsCleaning() && !Identifier.hasUCN()) {
704	// No cleaning needed, just use the characters from the lexed buffer.
705	II = getIdentifierInfo(Name: Identifier.getRawIdentifier());
706	} else {
707	// Cleaning needed, alloca a buffer, clean into it, then use the buffer.
708	SmallString<`64`> IdentifierBuffer;
709	StringRef CleanedStr = getSpelling(Tok: Identifier, Buffer&: IdentifierBuffer);
710
711	if (Identifier.hasUCN()) {
712	SmallString<`64`> UCNIdentifierBuffer;
713	expandUCNs(Buf&: UCNIdentifierBuffer, Input: CleanedStr);
714	II = getIdentifierInfo(Name: UCNIdentifierBuffer);
715	} else {
716	II = getIdentifierInfo(Name: CleanedStr);
717	}
718	}
719
720	// Update the token info (identifier info and appropriate token kind).
721	// FIXME: the raw_identifier may contain leading whitespace which is removed
722	// from the cleaned identifier token. The SourceLocation should be updated to
723	// refer to the non-whitespace character. For instance, the text "\\\nB" (a
724	// line continuation before 'B') is parsed as a single tok::raw_identifier and
725	// is cleaned to tok::identifier "B". After cleaning the token's length is
726	// still 3 and the SourceLocation refers to the location of the backslash.
727	Identifier.setIdentifierInfo(II);
728	Identifier.setKind(II->getTokenID());
729
730	return II;
731	}
732
733	void Preprocessor::SetPoisonReason(IdentifierInfo II, unsigned* DiagID) {
734	PoisonReasons [II] = DiagID;
735	}
736
737	void Preprocessor::PoisonSEHIdentifiers(bool Poison) {
738	assert(Ident__exception_code && Ident__exception_info);
739	assert(Ident___exception_code && Ident___exception_info);
740	Ident__exception_code->setIsPoisoned(Poison);
741	Ident___exception_code->setIsPoisoned(Poison);
742	Ident_GetExceptionCode->setIsPoisoned(Poison);
743	Ident__exception_info->setIsPoisoned(Poison);
744	Ident___exception_info->setIsPoisoned(Poison);
745	Ident_GetExceptionInfo->setIsPoisoned(Poison);
746	Ident__abnormal_termination->setIsPoisoned(Poison);
747	Ident___abnormal_termination->setIsPoisoned(Poison);
748	Ident_AbnormalTermination->setIsPoisoned(Poison);
749	}
750
751	void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) {
752	assert(Identifier.getIdentifierInfo() &&
753	"Can't handle identifiers without identifier info!");
754	llvm::DenseMap<IdentifierInfo,unsigned*>::const_iterator it =
755	PoisonReasons.find(Val: Identifier.getIdentifierInfo());
756	if(it == PoisonReasons.end())
757	Diag(Identifier, diag::err_pp_used_poisoned_id);
758	else
759	Diag(Tok: Identifier,DiagID: it ->second) << Identifier.getIdentifierInfo();
760	}
761
762	void Preprocessor::updateOutOfDateIdentifier(IdentifierInfo &II) const {
763	assert(II.isOutOfDate() && "not out of date");
764	getExternalSource()->updateOutOfDateIdentifier(II);
765	}
766
767	/// HandleIdentifier - This callback is invoked when the lexer reads an
768	/// identifier. This callback looks up the identifier in the map and/or
769	/// potentially macro expands it or turns it into a named token (like 'for').
770	///
771	/// Note that callers of this method are guarded by checking the
772	/// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the
773	/// IdentifierInfo methods that compute these properties will need to change to
774	/// match.
775	bool Preprocessor::HandleIdentifier(Token &Identifier) {
776	assert(Identifier.getIdentifierInfo() &&
777	"Can't handle identifiers without identifier info!");
778
779	IdentifierInfo &II = *Identifier.getIdentifierInfo();
780
781	// If the information about this identifier is out of date, update it from
782	// the external source.
783	// We have to treat __VA_ARGS__ in a special way, since it gets
784	// serialized with isPoisoned = true, but our preprocessor may have
785	// unpoisoned it if we're defining a C99 macro.
786	if (II.isOutOfDate()) {
787	bool CurrentIsPoisoned = false;
788	const bool IsSpecialVariadicMacro =
789	&II == Ident__VA_ARGS__ \|\| &II == Ident__VA_OPT__;
790	if (IsSpecialVariadicMacro)
791	CurrentIsPoisoned = II.isPoisoned();
792
793	updateOutOfDateIdentifier(II);
794	Identifier.setKind(II.getTokenID());
795
796	if (IsSpecialVariadicMacro)
797	II.setIsPoisoned(CurrentIsPoisoned);
798	}
799
800	// If this identifier was poisoned, and if it was not produced from a macro
801	// expansion, emit an error.
802	if (II.isPoisoned() && CurPPLexer) {
803	HandlePoisonedIdentifier(Identifier);
804	}
805
806	// If this is a macro to be expanded, do it.
807	if (const MacroDefinition MD = getMacroDefinition(II: &II)) {
808	const auto *MI = MD.getMacroInfo();
809	assert(MI && "macro definition with no macro info?");
810	if (!DisableMacroExpansion) {
811	if (!Identifier.isExpandDisabled() && MI->isEnabled()) {
812	// C99 6.10.3p10: If the preprocessing token immediately after the
813	// macro name isn't a '(', this macro should not be expanded.
814	if (!MI->isFunctionLike() \|\| isNextPPTokenLParen())
815	return HandleMacroExpandedIdentifier(Identifier, MD);
816	} else {
817	// C99 6.10.3.4p2 says that a disabled macro may never again be
818	// expanded, even if it's in a context where it could be expanded in the
819	// future.
820	Identifier.setFlag(Token::DisableExpand);
821	if (MI->isObjectLike() \|\| isNextPPTokenLParen())
822	Diag(Tok: Identifier, diag::DiagID: pp_disabled_macro_expansion);
823	}
824	}
825	}
826
827	// If this identifier is a keyword in a newer Standard or proposed Standard,
828	// produce a warning. Don't warn if we're not considering macro expansion,
829	// since this identifier might be the name of a macro.
830	// FIXME: This warning is disabled in cases where it shouldn't be, like
831	// "#define constexpr constexpr", "int constexpr;"
832	if (II.isFutureCompatKeyword() && !DisableMacroExpansion) {
833	Diag(Tok: Identifier, DiagID: getIdentifierTable().getFutureCompatDiagKind(II, LangOpts: getLangOpts()))
834	<< II.getName();
835	// Don't diagnose this keyword again in this translation unit.
836	II.setIsFutureCompatKeyword(false);
837	}
838
839	// If this is an extension token, diagnose its use.
840	// We avoid diagnosing tokens that originate from macro definitions.
841	// FIXME: This warning is disabled in cases where it shouldn't be,
842	// like "#define TY typeof", "TY(1) x".
843	if (II.isExtensionToken() && !DisableMacroExpansion)
844	Diag(Identifier, diag::ext_token_used);
845
846	// If this is the 'import' contextual keyword following an '@', note
847	// that the next token indicates a module name.
848	//
849	// Note that we do not treat 'import' as a contextual
850	// keyword when we're in a caching lexer, because caching lexers only get
851	// used in contexts where import declarations are disallowed.
852	//
853	// Likewise if this is the standard C++ import keyword.
854	if (((LastTokenWasAt && II.isModulesImport()) \|\|
855	Identifier.is(K: tok::kw_import)) &&
856	!InMacroArgs && !DisableMacroExpansion &&
857	(getLangOpts().Modules \|\| getLangOpts().DebuggerSupport) &&
858	CurLexerCallback != CLK_CachingLexer) {
859	ModuleImportLoc = Identifier.getLocation();
860	NamedModuleImportPath.clear();
861	IsAtImport = true;
862	ModuleImportExpectsIdentifier = true;
863	CurLexerCallback = CLK_LexAfterModuleImport;
864	}
865	return true;
866	}
867
868	void Preprocessor::Lex(Token &Result) {
869	++LexLevel;
870
871	// We loop here until a lex function returns a token; this avoids recursion.
872	while (!CurLexerCallback(*this, Result))
873	;
874
875	if (Result.is(K: tok::unknown) && TheModuleLoader.HadFatalFailure)
876	return;
877
878	if (Result.is(K: tok::code_completion) && Result.getIdentifierInfo()) {
879	// Remember the identifier before code completion token.
880	setCodeCompletionIdentifierInfo(Result.getIdentifierInfo());
881	setCodeCompletionTokenRange(Start: Result.getLocation(), End: Result.getEndLoc());
882	// Set IdenfitierInfo to null to avoid confusing code that handles both
883	// identifiers and completion tokens.
884	Result.setIdentifierInfo(nullptr);
885	}
886
887	// Update StdCXXImportSeqState to track our position within a C++20 import-seq
888	// if this token is being produced as a result of phase 4 of translation.
889	// Update TrackGMFState to decide if we are currently in a Global Module
890	// Fragment. GMF state updates should precede StdCXXImportSeq ones, since GMF state
891	// depends on the prevailing StdCXXImportSeq state in two cases.
892	if (getLangOpts().CPlusPlusModules && LexLevel == `1` &&
893	!Result.getFlag(Flag: Token::IsReinjected)) {
894	switch (Result.getKind()) {
895	case tok::l_paren: case tok::l_square: case tok::l_brace:
896	StdCXXImportSeqState.handleOpenBracket();
897	break;
898	case tok::r_paren: case tok::r_square:
899	StdCXXImportSeqState.handleCloseBracket();
900	break;
901	case tok::r_brace:
902	StdCXXImportSeqState.handleCloseBrace();
903	break;
904	// This token is injected to represent the translation of '#include "a.h"'
905	// into "import a.h;". Mimic the notional ';'.
906	case tok::annot_module_include:
907	case tok::semi:
908	TrackGMFState.handleSemi();
909	StdCXXImportSeqState.handleSemi();
910	ModuleDeclState.handleSemi();
911	break;
912	case tok::header_name:
913	case tok::annot_header_unit:
914	StdCXXImportSeqState.handleHeaderName();
915	break;
916	case tok::kw_export:
917	TrackGMFState.handleExport();
918	StdCXXImportSeqState.handleExport();
919	ModuleDeclState.handleExport();
920	break;
921	case tok::colon:
922	ModuleDeclState.handleColon();
923	break;
924	case tok::period:
925	ModuleDeclState.handlePeriod();
926	break;
927	case tok::identifier:
928	// Check "import" and "module" when there is no open bracket. The two
929	// identifiers are not meaningful with open brackets.
930	if (StdCXXImportSeqState.atTopLevel()) {
931	if (Result.getIdentifierInfo()->isModulesImport()) {
932	TrackGMFState.handleImport(AfterTopLevelTokenSeq: StdCXXImportSeqState.afterTopLevelSeq());
933	StdCXXImportSeqState.handleImport();
934	if (StdCXXImportSeqState.afterImportSeq()) {
935	ModuleImportLoc = Result.getLocation();
936	NamedModuleImportPath.clear();
937	IsAtImport = false;
938	ModuleImportExpectsIdentifier = true;
939	CurLexerCallback = CLK_LexAfterModuleImport;
940	}
941	break;
942	} else if (Result.getIdentifierInfo() == getIdentifierInfo(Name: "module")) {
943	TrackGMFState.handleModule(AfterTopLevelTokenSeq: StdCXXImportSeqState.afterTopLevelSeq());
944	ModuleDeclState.handleModule();
945	break;
946	}
947	}
948	ModuleDeclState.handleIdentifier(Identifier: Result.getIdentifierInfo());
949	if (ModuleDeclState.isModuleCandidate())
950	break;
951	[[fallthrough]];
952	default:
953	TrackGMFState.handleMisc();
954	StdCXXImportSeqState.handleMisc();
955	ModuleDeclState.handleMisc();
956	break;
957	}
958	}
959
960	if (CurLexer && ++CheckPointCounter == CheckPointStepSize) {
961	CheckPoints [CurLexer ->getFileID()].push_back(Elt: CurLexer ->BufferPtr);
962	CheckPointCounter = `0`;
963	}
964
965	LastTokenWasAt = Result.is(K: tok::at);
966	--LexLevel;
967
968	if ((LexLevel == `0` \|\| PreprocessToken) &&
969	!Result.getFlag(Flag: Token::IsReinjected)) {
970	if (LexLevel == `0`)
971	++TokenCount;
972	if (OnToken)
973	OnToken (Result);
974	}
975	}
976
977	void Preprocessor::LexTokensUntilEOF(std::vector<Token> *Tokens) {
978	while (`1`) {
979	Token Tok;
980	Lex(Result&: Tok);
981	if (Tok.isOneOf(K1: tok::unknown, Ks: tok::eof, Ks: tok::eod,
982	Ks: tok::annot_repl_input_end))
983	break;
984	if (Tokens != nullptr)
985	Tokens->push_back(x: Tok);
986	}
987	}
988
989	/// Lex a header-name token (including one formed from header-name-tokens if
990	/// \p AllowConcatenation is \c true).
991	///
992	/// \param FilenameTok Filled in with the next token. On success, this will
993	/// be either a header_name token. On failure, it will be whatever other
994	/// token was found instead.
995	/// \param AllowMacroExpansion If \c true, allow the header name to be formed
996	/// by macro expansion (concatenating tokens as necessary if the first
997	/// token is a '<').
998	/// \return \c true if we reached EOD or EOF while looking for a > token in
999	/// a concatenated header name and diagnosed it. \c false otherwise.
1000	bool Preprocessor::LexHeaderName(Token &FilenameTok, bool AllowMacroExpansion) {
1001	// Lex using header-name tokenization rules if tokens are being lexed from
1002	// a file. Just grab a token normally if we're in a macro expansion.
1003	if (CurPPLexer)
1004	CurPPLexer->LexIncludeFilename(FilenameTok);
1005	else
1006	Lex(Result&: FilenameTok);
1007
1008	// This could be a <foo/bar.h> file coming from a macro expansion. In this
1009	// case, glue the tokens together into an angle_string_literal token.
1010	SmallString<`128`> FilenameBuffer;
1011	if (FilenameTok.is(K: tok::less) && AllowMacroExpansion) {
1012	bool StartOfLine = FilenameTok.isAtStartOfLine();
1013	bool LeadingSpace = FilenameTok.hasLeadingSpace();
1014	bool LeadingEmptyMacro = FilenameTok.hasLeadingEmptyMacro();
1015
1016	SourceLocation Start = FilenameTok.getLocation();
1017	SourceLocation End;
1018	FilenameBuffer.push_back(Elt: `'<'`);
1019
1020	// Consume tokens until we find a '>'.
1021	// FIXME: A header-name could be formed starting or ending with an
1022	// alternative token. It's not clear whether that's ill-formed in all
1023	// cases.
1024	while (FilenameTok.isNot(K: tok::greater)) {
1025	Lex(Result&: FilenameTok);
1026	if (FilenameTok.isOneOf(K1: tok::eod, K2: tok::eof)) {
1027	Diag(FilenameTok.getLocation(), diag::err_expected) << tok::greater;
1028	Diag(Start, diag::note_matching) << tok::less;
1029	return true;
1030	}
1031
1032	End = FilenameTok.getLocation();
1033
1034	// FIXME: Provide code completion for #includes.
1035	if (FilenameTok.is(K: tok::code_completion)) {
1036	setCodeCompletionReached();
1037	Lex(Result&: FilenameTok);
1038	continue;
1039	}
1040
1041	// Append the spelling of this token to the buffer. If there was a space
1042	// before it, add it now.
1043	if (FilenameTok.hasLeadingSpace())
1044	FilenameBuffer.push_back(Elt: `' '`);
1045
1046	// Get the spelling of the token, directly into FilenameBuffer if
1047	// possible.
1048	size_t PreAppendSize = FilenameBuffer.size();
1049	FilenameBuffer.resize(N: PreAppendSize + FilenameTok.getLength());
1050
1051	const char *BufPtr = &FilenameBuffer [PreAppendSize];
1052	unsigned ActualLen = getSpelling(Tok: FilenameTok, Buffer&: BufPtr);
1053
1054	// If the token was spelled somewhere else, copy it into FilenameBuffer.
1055	if (BufPtr != &FilenameBuffer [PreAppendSize])
1056	memcpy(dest: &FilenameBuffer [PreAppendSize], src: BufPtr, n: ActualLen);
1057
1058	// Resize FilenameBuffer to the correct size.
1059	if (FilenameTok.getLength() != ActualLen)
1060	FilenameBuffer.resize(N: PreAppendSize + ActualLen);
1061	}
1062
1063	FilenameTok.startToken();
1064	FilenameTok.setKind(tok::header_name);
1065	FilenameTok.setFlagValue(Flag: Token::StartOfLine, Val: StartOfLine);
1066	FilenameTok.setFlagValue(Flag: Token::LeadingSpace, Val: LeadingSpace);
1067	FilenameTok.setFlagValue(Flag: Token::LeadingEmptyMacro, Val: LeadingEmptyMacro);
1068	CreateString(Str: FilenameBuffer, Tok&: FilenameTok, ExpansionLocStart: Start, ExpansionLocEnd: End);
1069	} else if (FilenameTok.is(K: tok::string_literal) && AllowMacroExpansion) {
1070	// Convert a string-literal token of the form " h-char-sequence "
1071	// (produced by macro expansion) into a header-name token.
1072	//
1073	// The rules for header-names don't quite match the rules for
1074	// string-literals, but all the places where they differ result in
1075	// undefined behavior, so we can and do treat them the same.
1076	//
1077	// A string-literal with a prefix or suffix is not translated into a
1078	// header-name. This could theoretically be observable via the C++20
1079	// context-sensitive header-name formation rules.
1080	StringRef Str = getSpelling(Tok: FilenameTok, Buffer&: FilenameBuffer);
1081	if (Str.size() >= `2` && Str.front() == `'"'` && Str.back() == `'"'`)
1082	FilenameTok.setKind(tok::header_name);
1083	}
1084
1085	return false;
1086	}
1087
1088	/// Collect the tokens of a C++20 pp-import-suffix.
1089	void Preprocessor::CollectPpImportSuffix(SmallVectorImpl<Token> &Toks) {
1090	// FIXME: For error recovery, consider recognizing attribute syntax here
1091	// and terminating / diagnosing a missing semicolon if we find anything
1092	// else? (Can we leave that to the parser?)
1093	unsigned BracketDepth = `0`;
1094	while (true) {
1095	Toks.emplace_back();
1096	Lex(Result&: Toks.back());
1097
1098	switch (Toks.back().getKind()) {
1099	case tok::l_paren: case tok::l_square: case tok::l_brace:
1100	++BracketDepth;
1101	break;
1102
1103	case tok::r_paren: case tok::r_square: case tok::r_brace:
1104	if (BracketDepth == `0`)
1105	return;
1106	--BracketDepth;
1107	break;
1108
1109	case tok::semi:
1110	if (BracketDepth == `0`)
1111	return;
1112	break;
1113
1114	case tok::eof:
1115	return;
1116
1117	default:
1118	break;
1119	}
1120	}
1121	}
1122
1123
1124	/// Lex a token following the 'import' contextual keyword.
1125	///
1126	/// pp-import: [C++20]
1127	/// import header-name pp-import-suffix[opt] ;
1128	/// import header-name-tokens pp-import-suffix[opt] ;
1129	/// [ObjC] @ import module-name ;
1130	/// [Clang] import module-name ;
1131	///
1132	/// header-name-tokens:
1133	/// string-literal
1134	/// < [any sequence of preprocessing-tokens other than >] >
1135	///
1136	/// module-name:
1137	/// module-name-qualifier[opt] identifier
1138	///
1139	/// module-name-qualifier
1140	/// module-name-qualifier[opt] identifier .
1141	///
1142	/// We respond to a pp-import by importing macros from the named module.
1143	bool Preprocessor::LexAfterModuleImport(Token &Result) {
1144	// Figure out what kind of lexer we actually have.
1145	recomputeCurLexerKind();
1146
1147	// Lex the next token. The header-name lexing rules are used at the start of
1148	// a pp-import.
1149	//
1150	// For now, we only support header-name imports in C++20 mode.
1151	// FIXME: Should we allow this in all language modes that support an import
1152	// declaration as an extension?
1153	if (NamedModuleImportPath.empty() && getLangOpts().CPlusPlusModules) {
1154	if (LexHeaderName(FilenameTok&: Result))
1155	return true;
1156
1157	if (Result.is(K: tok::colon) && ModuleDeclState.isNamedModule()) {
1158	std::string Name = ModuleDeclState.getPrimaryName().str();
1159	Name += ":";
1160	NamedModuleImportPath.push_back(
1161	Elt: {getIdentifierInfo(Name), Result.getLocation()});
1162	CurLexerCallback = CLK_LexAfterModuleImport;
1163	return true;
1164	}
1165	} else {
1166	Lex(Result);
1167	}
1168
1169	// Allocate a holding buffer for a sequence of tokens and introduce it into
1170	// the token stream.
1171	auto EnterTokens = [this](ArrayRef<Token> Toks) {
1172	auto ToksCopy = std::make_unique<Token[]>(num: Toks.size());
1173	std::copy(first: Toks.begin(), last: Toks.end(), result: ToksCopy.get());
1174	EnterTokenStream(Toks: std::move(ToksCopy), NumToks: Toks.size(),
1175	/DisableMacroExpansion/ true, /IsReinject/ false);
1176	};
1177
1178	bool ImportingHeader = Result.is(K: tok::header_name);
1179	// Check for a header-name.
1180	SmallVector<Token, `32`> Suffix;
1181	if (ImportingHeader) {
1182	// Enter the header-name token into the token stream; a Lex action cannot
1183	// both return a token and cache tokens (doing so would corrupt the token
1184	// cache if the call to Lex comes from CachingLex / PeekAhead).
1185	Suffix.push_back(Elt: Result);
1186
1187	// Consume the pp-import-suffix and expand any macros in it now. We'll add
1188	// it back into the token stream later.
1189	CollectPpImportSuffix(Toks&: Suffix);
1190	if (Suffix.back().isNot(K: tok::semi)) {
1191	// This is not a pp-import after all.
1192	EnterTokens (Suffix);
1193	return false;
1194	}
1195
1196	// C++2a [cpp.module]p1:
1197	// The ';' preprocessing-token terminating a pp-import shall not have
1198	// been produced by macro replacement.
1199	SourceLocation SemiLoc = Suffix.back().getLocation();
1200	if (SemiLoc.isMacroID())
1201	Diag(SemiLoc, diag::err_header_import_semi_in_macro);
1202
1203	// Reconstitute the import token.
1204	Token ImportTok;
1205	ImportTok.startToken();
1206	ImportTok.setKind(tok::kw_import);
1207	ImportTok.setLocation(ModuleImportLoc);
1208	ImportTok.setIdentifierInfo(getIdentifierInfo(Name: "import"));
1209	ImportTok.setLength(`6`);
1210
1211	auto Action = HandleHeaderIncludeOrImport(
1212	/HashLoc/ SourceLocation (), IncludeTok&: ImportTok, FilenameTok&: Suffix.front(), EndLoc: SemiLoc);
1213	switch (Action.Kind) {
1214	case ImportAction::None:
1215	break;
1216
1217	case ImportAction::ModuleBegin:
1218	// Let the parser know we're textually entering the module.
1219	Suffix.emplace_back();
1220	Suffix.back().startToken();
1221	Suffix.back().setKind(tok::annot_module_begin);
1222	Suffix.back().setLocation(SemiLoc);
1223	Suffix.back().setAnnotationEndLoc(SemiLoc);
1224	Suffix.back().setAnnotationValue(Action.ModuleForHeader);
1225	[[fallthrough]];
1226
1227	case ImportAction::ModuleImport:
1228	case ImportAction::HeaderUnitImport:
1229	case ImportAction::SkippedModuleImport:
1230	// We chose to import (or textually enter) the file. Convert the
1231	// header-name token into a header unit annotation token.
1232	Suffix [`0`].setKind(tok::annot_header_unit);
1233	Suffix [`0`].setAnnotationEndLoc(Suffix [`0`].getLocation());
1234	Suffix [`0`].setAnnotationValue(Action.ModuleForHeader);
1235	// FIXME: Call the moduleImport callback?
1236	break;
1237	case ImportAction::Failure:
1238	assert(TheModuleLoader.HadFatalFailure &&
1239	"This should be an early exit only to a fatal error");
1240	Result.setKind(tok::eof);
1241	CurLexer ->cutOffLexing();
1242	EnterTokens (Suffix);
1243	return true;
1244	}
1245
1246	EnterTokens (Suffix);
1247	return false;
1248	}
1249
1250	// The token sequence
1251	//
1252	// import identifier (. identifier)*
1253	//
1254	// indicates a module import directive. We already saw the 'import'
1255	// contextual keyword, so now we're looking for the identifiers.
1256	if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) {
1257	// We expected to see an identifier here, and we did; continue handling
1258	// identifiers.
1259	NamedModuleImportPath.push_back(
1260	Elt: std::make_pair(x: Result.getIdentifierInfo(), y: Result.getLocation()));
1261	ModuleImportExpectsIdentifier = false;
1262	CurLexerCallback = CLK_LexAfterModuleImport;
1263	return true;
1264	}
1265
1266	// If we're expecting a '.' or a ';', and we got a '.', then wait until we
1267	// see the next identifier. (We can also see a '[[' that begins an
1268	// attribute-specifier-seq here under the Standard C++ Modules.)
1269	if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) {
1270	ModuleImportExpectsIdentifier = true;
1271	CurLexerCallback = CLK_LexAfterModuleImport;
1272	return true;
1273	}
1274
1275	// If we didn't recognize a module name at all, this is not a (valid) import.
1276	if (NamedModuleImportPath.empty() \|\| Result.is(K: tok::eof))
1277	return true;
1278
1279	// Consume the pp-import-suffix and expand any macros in it now, if we're not
1280	// at the semicolon already.
1281	SourceLocation SemiLoc = Result.getLocation();
1282	if (Result.isNot(K: tok::semi)) {
1283	Suffix.push_back(Elt: Result);
1284	CollectPpImportSuffix(Toks&: Suffix);
1285	if (Suffix.back().isNot(K: tok::semi)) {
1286	// This is not an import after all.
1287	EnterTokens (Suffix);
1288	return false;
1289	}
1290	SemiLoc = Suffix.back().getLocation();
1291	}
1292
1293	// Under the standard C++ Modules, the dot is just part of the module name,
1294	// and not a real hierarchy separator. Flatten such module names now.
1295	//
1296	// FIXME: Is this the right level to be performing this transformation?
1297	std::string FlatModuleName;
1298	if (getLangOpts().CPlusPlusModules) {
1299	for (auto &Piece : NamedModuleImportPath) {
1300	// If the FlatModuleName ends with colon, it implies it is a partition.
1301	if (!FlatModuleName.empty() && FlatModuleName.back() != `':'`)
1302	FlatModuleName += ".";
1303	FlatModuleName += Piece.first->getName();
1304	}
1305	SourceLocation FirstPathLoc = NamedModuleImportPath [`0`].second;
1306	NamedModuleImportPath.clear();
1307	NamedModuleImportPath.push_back(
1308	Elt: std::make_pair(x: getIdentifierInfo(Name: FlatModuleName), y&: FirstPathLoc));
1309	}
1310
1311	Module Imported = nullptr*;
1312	// We don't/shouldn't load the standard c++20 modules when preprocessing.
1313	if (getLangOpts().Modules && !isInImportingCXXNamedModules()) {
1314	Imported = TheModuleLoader.loadModule(ImportLoc: ModuleImportLoc,
1315	Path: NamedModuleImportPath,
1316	Visibility: Module::Hidden,
1317	/IsInclusionDirective=/false);
1318	if (Imported)
1319	makeModuleVisible(M: Imported, Loc: SemiLoc);
1320	}
1321
1322	if (Callbacks)
1323	Callbacks ->moduleImport(ImportLoc: ModuleImportLoc, Path: NamedModuleImportPath, Imported);
1324
1325	if (!Suffix.empty()) {
1326	EnterTokens (Suffix);
1327	return false;
1328	}
1329	return true;
1330	}
1331
1332	void Preprocessor::makeModuleVisible(Module *M, SourceLocation Loc) {
1333	CurSubmoduleState->VisibleModules.setVisible(
1334	M, Loc, Vis: [](Module *) {},
1335	Cb: [&](ArrayRef<Module > Path, Module Conflict, StringRef Message) {
1336	// FIXME: Include the path in the diagnostic.
1337	// FIXME: Include the import location for the conflicting module.
1338	Diag(ModuleImportLoc, diag::warn_module_conflict)
1339	<< Path [`0`]->getFullModuleName()
1340	<< Conflict->getFullModuleName()
1341	<< Message;
1342	});
1343
1344	// Add this module to the imports list of the currently-built submodule.
1345	if (!BuildingSubmoduleStack.empty() && M != BuildingSubmoduleStack.back().M)
1346	BuildingSubmoduleStack.back().M->Imports.insert(X: M);
1347	}
1348
1349	bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String,
1350	const char *DiagnosticTag,
1351	bool AllowMacroExpansion) {
1352	// We need at least one string literal.
1353	if (Result.isNot(K: tok::string_literal)) {
1354	Diag(Result, diag::err_expected_string_literal)
1355	<< /Source='in...'/`0` << DiagnosticTag;
1356	return false;
1357	}
1358
1359	// Lex string literal tokens, optionally with macro expansion.
1360	SmallVector<Token, `4`> StrToks;
1361	do {
1362	StrToks.push_back(Elt: Result);
1363
1364	if (Result.hasUDSuffix())
1365	Diag(Result, diag::err_invalid_string_udl);
1366
1367	if (AllowMacroExpansion)
1368	Lex(Result);
1369	else
1370	LexUnexpandedToken(Result);
1371	} while (Result.is(K: tok::string_literal));
1372
1373	// Concatenate and parse the strings.
1374	StringLiteralParser Literal(StrToks, *this);
1375	assert(Literal.isOrdinary() && "Didn't allow wide strings in");
1376
1377	if (Literal.hadError)
1378	return false;
1379
1380	if (Literal.Pascal) {
1381	Diag(StrToks [`0`].getLocation(), diag::err_expected_string_literal)
1382	<< /Source='in...'/`0` << DiagnosticTag;
1383	return false;
1384	}
1385
1386	String = std::string (Literal.GetString());
1387	return true;
1388	}
1389
1390	bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) {
1391	assert(Tok.is(tok::numeric_constant));
1392	SmallString<`8`> IntegerBuffer;
1393	bool NumberInvalid = false;
1394	StringRef Spelling = getSpelling(Tok, Buffer&: IntegerBuffer, Invalid: &NumberInvalid);
1395	if (NumberInvalid)
1396	return false;
1397	NumericLiteralParser Literal(Spelling, Tok.getLocation(), getSourceManager(),
1398	getLangOpts(), getTargetInfo(),
1399	getDiagnostics());
1400	if (Literal.hadError \|\| !Literal.isIntegerLiteral() \|\| Literal.hasUDSuffix())
1401	return false;
1402	llvm::APInt APVal(`64`, `0`);
1403	if (Literal.GetIntegerValue(Val&: APVal))
1404	return false;
1405	Lex(Result&: Tok);
1406	Value = APVal.getLimitedValue();
1407	return true;
1408	}
1409
1410	void Preprocessor::addCommentHandler(CommentHandler *Handler) {
1411	assert(Handler && "NULL comment handler");
1412	assert(!llvm::is_contained(CommentHandlers, Handler) &&
1413	"Comment handler already registered");
1414	CommentHandlers.push_back(x: Handler);
1415	}
1416
1417	void Preprocessor::removeCommentHandler(CommentHandler *Handler) {
1418	std::vector<CommentHandler *>::iterator Pos =
1419	llvm::find(Range&: CommentHandlers, Val: Handler);
1420	assert(Pos != CommentHandlers.end() && "Comment handler not registered");
1421	CommentHandlers.erase(position: Pos);
1422	}
1423
1424	bool Preprocessor::HandleComment(Token &result, SourceRange Comment) {
1425	bool AnyPendingTokens = false;
1426	for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(),
1427	HEnd = CommentHandlers.end();
1428	H != HEnd; ++H) {
1429	if ((H)->HandleComment(PP&: this, Comment))
1430	AnyPendingTokens = true;
1431	}
1432	if (!AnyPendingTokens \|\| getCommentRetentionState())
1433	return false;
1434	Lex(Result&: result);
1435	return true;
1436	}
1437
1438	void Preprocessor::emitMacroDeprecationWarning(const Token &Identifier) const {
1439	const MacroAnnotations &A =
1440	getMacroAnnotations(II: Identifier.getIdentifierInfo());
1441	assert(A.DeprecationInfo &&
1442	"Macro deprecation warning without recorded annotation!");
1443	const MacroAnnotationInfo &Info = *A.DeprecationInfo;
1444	if (Info.Message.empty())
1445	Diag(Identifier, diag::warn_pragma_deprecated_macro_use)
1446	<< Identifier.getIdentifierInfo() << `0`;
1447	else
1448	Diag(Identifier, diag::warn_pragma_deprecated_macro_use)
1449	<< Identifier.getIdentifierInfo() << `1` << Info.Message;
1450	Diag(Info.Location, diag::note_pp_macro_annotation) << `0`;
1451	}
1452
1453	void Preprocessor::emitRestrictExpansionWarning(const Token &Identifier) const {
1454	const MacroAnnotations &A =
1455	getMacroAnnotations(II: Identifier.getIdentifierInfo());
1456	assert(A.RestrictExpansionInfo &&
1457	"Macro restricted expansion warning without recorded annotation!");
1458	const MacroAnnotationInfo &Info = *A.RestrictExpansionInfo;
1459	if (Info.Message.empty())
1460	Diag(Identifier, diag::warn_pragma_restrict_expansion_macro_use)
1461	<< Identifier.getIdentifierInfo() << `0`;
1462	else
1463	Diag(Identifier, diag::warn_pragma_restrict_expansion_macro_use)
1464	<< Identifier.getIdentifierInfo() << `1` << Info.Message;
1465	Diag(Info.Location, diag::note_pp_macro_annotation) << `1`;
1466	}
1467
1468	void Preprocessor::emitRestrictInfNaNWarning(const Token &Identifier,
1469	unsigned DiagSelection) const {
1470	Diag(Identifier, diag::warn_fp_nan_inf_when_disabled) << DiagSelection << `1`;
1471	}
1472
1473	void Preprocessor::emitFinalMacroWarning(const Token &Identifier,
1474	bool IsUndef) const {
1475	const MacroAnnotations &A =
1476	getMacroAnnotations(II: Identifier.getIdentifierInfo());
1477	assert(A.FinalAnnotationLoc &&
1478	"Final macro warning without recorded annotation!");
1479
1480	Diag(Identifier, diag::warn_pragma_final_macro)
1481	<< Identifier.getIdentifierInfo() << (IsUndef ? `0` : `1`);
1482	Diag(*A.FinalAnnotationLoc, diag::note_pp_macro_annotation) << `2`;
1483	}
1484
1485	bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr,
1486	const SourceLocation &Loc) const {
1487	// Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
1488	auto FirstRegionEndingAfterLoc = llvm::partition_point(
1489	Range: SafeBufferOptOutMap,
1490	P: [&SourceMgr,
1491	&Loc](const std::pair<SourceLocation, SourceLocation> &Region) {
1492	return SourceMgr.isBeforeInTranslationUnit(LHS: Region.second, RHS: Loc);
1493	});
1494
1495	if (FirstRegionEndingAfterLoc != SafeBufferOptOutMap.end()) {
1496	// To test if the start location of the found region precedes `Loc`:
1497	return SourceMgr.isBeforeInTranslationUnit(LHS: FirstRegionEndingAfterLoc->first,
1498	RHS: Loc);
1499	}
1500	// If we do not find a region whose end location passes `Loc`, we want to
1501	// check if the current region is still open:
1502	if (!SafeBufferOptOutMap.empty() &&
1503	SafeBufferOptOutMap.back().first == SafeBufferOptOutMap.back().second)
1504	return SourceMgr.isBeforeInTranslationUnit(LHS: SafeBufferOptOutMap.back().first,
1505	RHS: Loc);
1506	return false;
1507	}
1508
1509	bool Preprocessor::enterOrExitSafeBufferOptOutRegion(
1510	bool isEnter, const SourceLocation &Loc) {
1511	if (isEnter) {
1512	if (isPPInSafeBufferOptOutRegion())
1513	return true; // invalid enter action
1514	InSafeBufferOptOutRegion = true;
1515	CurrentSafeBufferOptOutStart = Loc;
1516
1517	// To set the start location of a new region:
1518
1519	if (!SafeBufferOptOutMap.empty()) {
1520	[[maybe_unused]] auto *PrevRegion = &SafeBufferOptOutMap.back();
1521	assert(PrevRegion->first != PrevRegion->second &&
1522	"Shall not begin a safe buffer opt-out region before closing the "
1523	"previous one.");
1524	}
1525	// If the start location equals to the end location, we call the region a
1526	// open region or a unclosed region (i.e., end location has not been set
1527	// yet).
1528	SafeBufferOptOutMap.emplace_back(Args: Loc, Args: Loc);
1529	} else {
1530	if (!isPPInSafeBufferOptOutRegion())
1531	return true; // invalid enter action
1532	InSafeBufferOptOutRegion = false;
1533
1534	// To set the end location of the current open region:
1535
1536	assert(!SafeBufferOptOutMap.empty() &&
1537	"Misordered safe buffer opt-out regions");
1538	auto *CurrRegion = &SafeBufferOptOutMap.back();
1539	assert(CurrRegion->first == CurrRegion->second &&
1540	"Set end location to a closed safe buffer opt-out region");
1541	CurrRegion->second = Loc;
1542	}
1543	return false;
1544	}
1545
1546	bool Preprocessor::isPPInSafeBufferOptOutRegion() {
1547	return InSafeBufferOptOutRegion;
1548	}
1549	bool Preprocessor::isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc) {
1550	StartLoc = CurrentSafeBufferOptOutStart;
1551	return InSafeBufferOptOutRegion;
1552	}
1553
1554	ModuleLoader::~ModuleLoader() = default;
1555
1556	CommentHandler::~CommentHandler() = default;
1557
1558	EmptylineHandler::~EmptylineHandler() = default;
1559
1560	CodeCompletionHandler::~CodeCompletionHandler() = default;
1561
1562	void Preprocessor::createPreprocessingRecord() {
1563	if (Record)
1564	return;
1565
1566	Record = new PreprocessingRecord (getSourceManager());
1567	addPPCallbacks(C: std::unique_ptr<PPCallbacks>(Record));
1568	}
1569
1570	const char Preprocessor::getCheckPoint(FileID FID, const* char Start) const* {
1571	if (auto It = CheckPoints.find(Val: FID); It != CheckPoints.end()) {
1572	const SmallVector<const char *> &FileCheckPoints = It ->second;
1573	const char Last = nullptr*;
1574	// FIXME: Do better than a linear search.
1575	for (const char *P : FileCheckPoints) {
1576	if (P > Start)
1577	break;
1578	Last = P;
1579	}
1580	return Last;
1581	}
1582
1583	return nullptr;
1584	}
1585

source code of clang/lib/Lex/Preprocessor.cpp