1//===--- FileIndex.cpp - Indexes for files. ------------------------ C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "FileIndex.h"
10#include "CollectMacros.h"
11#include "ParsedAST.h"
12#include "clang-include-cleaner/Record.h"
13#include "index/Index.h"
14#include "index/MemIndex.h"
15#include "index/Merge.h"
16#include "index/Ref.h"
17#include "index/Relation.h"
18#include "index/Serialization.h"
19#include "index/Symbol.h"
20#include "index/SymbolCollector.h"
21#include "index/SymbolID.h"
22#include "index/SymbolOrigin.h"
23#include "index/dex/Dex.h"
24#include "support/Logger.h"
25#include "support/MemoryTree.h"
26#include "support/Path.h"
27#include "clang/AST/ASTContext.h"
28#include "clang/Index/IndexingAction.h"
29#include "clang/Index/IndexingOptions.h"
30#include "clang/Lex/Preprocessor.h"
31#include "llvm/ADT/DenseMap.h"
32#include "llvm/ADT/STLExtras.h"
33#include "llvm/ADT/StringMap.h"
34#include "llvm/ADT/StringRef.h"
35#include <algorithm>
36#include <memory>
37#include <optional>
38#include <tuple>
39#include <utility>
40#include <vector>
41
42namespace clang {
43namespace clangd {
44namespace {
45
46SlabTuple indexSymbols(ASTContext &AST, Preprocessor &PP,
47 llvm::ArrayRef<Decl *> DeclsToIndex,
48 const MainFileMacros *MacroRefsToIndex,
49 const include_cleaner::PragmaIncludes &PI,
50 bool IsIndexMainAST, llvm::StringRef Version,
51 bool CollectMainFileRefs) {
52 SymbolCollector::Options CollectorOpts;
53 CollectorOpts.CollectIncludePath = true;
54 CollectorOpts.PragmaIncludes = &PI;
55 CollectorOpts.CountReferences = false;
56 CollectorOpts.Origin =
57 IsIndexMainAST ? SymbolOrigin::Open : SymbolOrigin::Preamble;
58 CollectorOpts.CollectMainFileRefs = CollectMainFileRefs;
59 // We want stdlib implementation details in the index only if we've opened the
60 // file in question. This does means xrefs won't work, though.
61 CollectorOpts.CollectReserved = IsIndexMainAST;
62
63 index::IndexingOptions IndexOpts;
64 // We only need declarations, because we don't count references.
65 IndexOpts.SystemSymbolFilter =
66 index::IndexingOptions::SystemSymbolFilterKind::DeclarationsOnly;
67 // We index function-local classes and its member functions only.
68 IndexOpts.IndexFunctionLocals = true;
69 if (IsIndexMainAST) {
70 // We only collect refs when indexing main AST.
71 CollectorOpts.RefFilter = RefKind::All;
72 // Comments for main file can always be obtained from sema, do not store
73 // them in the index.
74 CollectorOpts.StoreAllDocumentation = false;
75 } else {
76 IndexOpts.IndexMacrosInPreprocessor = true;
77 CollectorOpts.CollectMacro = true;
78 CollectorOpts.StoreAllDocumentation = true;
79 }
80
81 SymbolCollector Collector(std::move(CollectorOpts));
82 Collector.setPreprocessor(PP);
83 index::indexTopLevelDecls(Ctx&: AST, PP, Decls: DeclsToIndex, DataConsumer&: Collector, Opts: IndexOpts);
84 if (MacroRefsToIndex)
85 Collector.handleMacros(MacroRefsToIndex: *MacroRefsToIndex);
86
87 const auto &SM = AST.getSourceManager();
88 const auto MainFileEntry = SM.getFileEntryRefForID(FID: SM.getMainFileID());
89 std::string FileName =
90 std::string(MainFileEntry ? MainFileEntry->getName() : "");
91
92 auto Syms = Collector.takeSymbols();
93 auto Refs = Collector.takeRefs();
94 auto Relations = Collector.takeRelations();
95
96 vlog(Fmt: "indexed {0} AST for {1} version {2}:\n"
97 " symbol slab: {3} symbols, {4} bytes\n"
98 " ref slab: {5} symbols, {6} refs, {7} bytes\n"
99 " relations slab: {8} relations, {9} bytes",
100 Vals: IsIndexMainAST ? "file" : "preamble", Vals&: FileName, Vals&: Version, Vals: Syms.size(),
101 Vals: Syms.bytes(), Vals: Refs.size(), Vals: Refs.numRefs(), Vals: Refs.bytes(),
102 Vals: Relations.size(), Vals: Relations.bytes());
103 return std::make_tuple(args: std::move(Syms), args: std::move(Refs),
104 args: std::move(Relations));
105}
106
107// We keep only the node "U" and its edges. Any node other than "U" will be
108// empty in the resultant graph.
109IncludeGraph getSubGraph(llvm::StringRef URI, const IncludeGraph &FullGraph) {
110 IncludeGraph IG;
111
112 auto Entry = IG.try_emplace(Key: URI).first;
113 auto &Node = Entry->getValue();
114 Node = FullGraph.lookup(Key: Entry->getKey());
115 Node.URI = Entry->getKey();
116
117 // URIs inside nodes must point into the keys of the same IncludeGraph.
118 for (auto &Include : Node.DirectIncludes) {
119 auto I = IG.try_emplace(Key: Include).first;
120 I->getValue().URI = I->getKey();
121 Include = I->getKey();
122 }
123 return IG;
124}
125} // namespace
126
127FileShardedIndex::FileShardedIndex(IndexFileIn Input)
128 : Index(std::move(Input)) {
129 // Used to build RelationSlabs.
130 llvm::DenseMap<SymbolID, FileShard *> SymbolIDToFile;
131
132 // Attribute each Symbol to both their declaration and definition locations.
133 if (Index.Symbols) {
134 for (const auto &S : *Index.Symbols) {
135 auto It = Shards.try_emplace(Key: S.CanonicalDeclaration.FileURI);
136 It.first->getValue().Symbols.insert(V: &S);
137 SymbolIDToFile[S.ID] = &It.first->getValue();
138 // Only bother if definition file is different than declaration file.
139 if (S.Definition &&
140 S.Definition.FileURI != S.CanonicalDeclaration.FileURI) {
141 auto It = Shards.try_emplace(Key: S.Definition.FileURI);
142 It.first->getValue().Symbols.insert(V: &S);
143 }
144 }
145 }
146 // Attribute references into each file they occured in.
147 if (Index.Refs) {
148 for (const auto &SymRefs : *Index.Refs) {
149 for (const auto &R : SymRefs.second) {
150 const auto It = Shards.try_emplace(Key: R.Location.FileURI);
151 It.first->getValue().Refs.insert(V: &R);
152 RefToSymID[&R] = SymRefs.first;
153 }
154 }
155 }
156 // The Subject and/or Object shards might be part of multiple TUs. In
157 // such cases there will be a race and the last TU to write the shard
158 // will win and all the other relations will be lost. To avoid this,
159 // we store relations in both shards. A race might still happen if the
160 // same translation unit produces different relations under different
161 // configurations, but that's something clangd doesn't handle in general.
162 if (Index.Relations) {
163 for (const auto &R : *Index.Relations) {
164 // FIXME: RelationSlab shouldn't contain dangling relations.
165 FileShard *SubjectFile = SymbolIDToFile.lookup(Val: R.Subject);
166 FileShard *ObjectFile = SymbolIDToFile.lookup(Val: R.Object);
167 if (SubjectFile)
168 SubjectFile->Relations.insert(V: &R);
169 if (ObjectFile && ObjectFile != SubjectFile)
170 ObjectFile->Relations.insert(V: &R);
171 }
172 }
173 // Store only the direct includes of a file in a shard.
174 if (Index.Sources) {
175 const auto &FullGraph = *Index.Sources;
176 for (const auto &It : FullGraph) {
177 auto ShardIt = Shards.try_emplace(Key: It.first());
178 ShardIt.first->getValue().IG = getSubGraph(URI: It.first(), FullGraph);
179 }
180 }
181}
182std::vector<llvm::StringRef> FileShardedIndex::getAllSources() const {
183 // It should be enough to construct a vector with {Shards.keys().begin(),
184 // Shards.keys().end()} but MSVC fails to compile that.
185 std::vector<PathRef> Result;
186 Result.reserve(n: Shards.size());
187 for (auto Key : Shards.keys())
188 Result.push_back(x: Key);
189 return Result;
190}
191
192std::optional<IndexFileIn>
193FileShardedIndex::getShard(llvm::StringRef Uri) const {
194 auto It = Shards.find(Key: Uri);
195 if (It == Shards.end())
196 return std::nullopt;
197
198 IndexFileIn IF;
199 IF.Sources = It->getValue().IG;
200 IF.Cmd = Index.Cmd;
201
202 SymbolSlab::Builder SymB;
203 for (const auto *S : It->getValue().Symbols)
204 SymB.insert(S: *S);
205 IF.Symbols = std::move(SymB).build();
206
207 RefSlab::Builder RefB;
208 for (const auto *Ref : It->getValue().Refs) {
209 auto SID = RefToSymID.lookup(Val: Ref);
210 RefB.insert(ID: SID, S: *Ref);
211 }
212 IF.Refs = std::move(RefB).build();
213
214 RelationSlab::Builder RelB;
215 for (const auto *Rel : It->getValue().Relations) {
216 RelB.insert(R: *Rel);
217 }
218 IF.Relations = std::move(RelB).build();
219 // Explicit move here is needed by some compilers.
220 return std::move(IF);
221}
222
223SlabTuple indexMainDecls(ParsedAST &AST) {
224 return indexSymbols(
225 AST&: AST.getASTContext(), PP&: AST.getPreprocessor(), DeclsToIndex: AST.getLocalTopLevelDecls(),
226 MacroRefsToIndex: &AST.getMacros(), PI: AST.getPragmaIncludes(),
227 /*IsIndexMainAST=*/true, Version: AST.version(), /*CollectMainFileRefs=*/true);
228}
229
230SlabTuple indexHeaderSymbols(llvm::StringRef Version, ASTContext &AST,
231 Preprocessor &PP,
232 const include_cleaner::PragmaIncludes &PI) {
233 std::vector<Decl *> DeclsToIndex(
234 AST.getTranslationUnitDecl()->decls().begin(),
235 AST.getTranslationUnitDecl()->decls().end());
236 return indexSymbols(AST, PP, DeclsToIndex,
237 /*MainFileMacros=*/MacroRefsToIndex: nullptr, PI,
238 /*IsIndexMainAST=*/false, Version,
239 /*CollectMainFileRefs=*/false);
240}
241
242FileSymbols::FileSymbols(IndexContents IdxContents)
243 : IdxContents(IdxContents) {}
244
245void FileSymbols::update(llvm::StringRef Key,
246 std::unique_ptr<SymbolSlab> Symbols,
247 std::unique_ptr<RefSlab> Refs,
248 std::unique_ptr<RelationSlab> Relations,
249 bool CountReferences) {
250 std::lock_guard<std::mutex> Lock(Mutex);
251 ++Version;
252 if (!Symbols)
253 SymbolsSnapshot.erase(Key);
254 else
255 SymbolsSnapshot[Key] = std::move(Symbols);
256 if (!Refs) {
257 RefsSnapshot.erase(Key);
258 } else {
259 RefSlabAndCountReferences Item;
260 Item.CountReferences = CountReferences;
261 Item.Slab = std::move(Refs);
262 RefsSnapshot[Key] = std::move(Item);
263 }
264 if (!Relations)
265 RelationsSnapshot.erase(Key);
266 else
267 RelationsSnapshot[Key] = std::move(Relations);
268}
269
270std::unique_ptr<SymbolIndex>
271FileSymbols::buildIndex(IndexType Type, DuplicateHandling DuplicateHandle,
272 size_t *Version) {
273 std::vector<std::shared_ptr<SymbolSlab>> SymbolSlabs;
274 std::vector<std::shared_ptr<RefSlab>> RefSlabs;
275 std::vector<std::shared_ptr<RelationSlab>> RelationSlabs;
276 llvm::StringSet<> Files;
277 std::vector<RefSlab *> MainFileRefs;
278 {
279 std::lock_guard<std::mutex> Lock(Mutex);
280 for (const auto &FileAndSymbols : SymbolsSnapshot) {
281 SymbolSlabs.push_back(x: FileAndSymbols.second);
282 Files.insert(key: FileAndSymbols.first());
283 }
284 for (const auto &FileAndRefs : RefsSnapshot) {
285 RefSlabs.push_back(x: FileAndRefs.second.Slab);
286 Files.insert(key: FileAndRefs.first());
287 if (FileAndRefs.second.CountReferences)
288 MainFileRefs.push_back(x: RefSlabs.back().get());
289 }
290 for (const auto &FileAndRelations : RelationsSnapshot) {
291 Files.insert(key: FileAndRelations.first());
292 RelationSlabs.push_back(x: FileAndRelations.second);
293 }
294
295 if (Version)
296 *Version = this->Version;
297 }
298 std::vector<const Symbol *> AllSymbols;
299 std::vector<Symbol> SymsStorage;
300 switch (DuplicateHandle) {
301 case DuplicateHandling::Merge: {
302 llvm::DenseMap<SymbolID, Symbol> Merged;
303 for (const auto &Slab : SymbolSlabs) {
304 for (const auto &Sym : *Slab) {
305 assert(Sym.References == 0 &&
306 "Symbol with non-zero references sent to FileSymbols");
307 auto I = Merged.try_emplace(Key: Sym.ID, Args: Sym);
308 if (!I.second)
309 I.first->second = mergeSymbol(L: I.first->second, R: Sym);
310 }
311 }
312 for (const RefSlab *Refs : MainFileRefs)
313 for (const auto &Sym : *Refs) {
314 auto It = Merged.find(Val: Sym.first);
315 // This might happen while background-index is still running.
316 if (It == Merged.end())
317 continue;
318 It->getSecond().References += Sym.second.size();
319 }
320 SymsStorage.reserve(n: Merged.size());
321 for (auto &Sym : Merged) {
322 SymsStorage.push_back(x: std::move(Sym.second));
323 AllSymbols.push_back(x: &SymsStorage.back());
324 }
325 break;
326 }
327 case DuplicateHandling::PickOne: {
328 llvm::DenseSet<SymbolID> AddedSymbols;
329 for (const auto &Slab : SymbolSlabs)
330 for (const auto &Sym : *Slab) {
331 assert(Sym.References == 0 &&
332 "Symbol with non-zero references sent to FileSymbols");
333 if (AddedSymbols.insert(V: Sym.ID).second)
334 AllSymbols.push_back(x: &Sym);
335 }
336 break;
337 }
338 }
339
340 std::vector<Ref> RefsStorage; // Contiguous ranges for each SymbolID.
341 llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> AllRefs;
342 {
343 llvm::DenseMap<SymbolID, llvm::SmallVector<Ref, 4>> MergedRefs;
344 size_t Count = 0;
345 for (const auto &RefSlab : RefSlabs)
346 for (const auto &Sym : *RefSlab) {
347 MergedRefs[Sym.first].append(in_start: Sym.second.begin(), in_end: Sym.second.end());
348 Count += Sym.second.size();
349 }
350 RefsStorage.reserve(n: Count);
351 AllRefs.reserve(NumEntries: MergedRefs.size());
352 for (auto &Sym : MergedRefs) {
353 auto &SymRefs = Sym.second;
354 // Sorting isn't required, but yields more stable results over rebuilds.
355 llvm::sort(C&: SymRefs);
356 llvm::copy(Range&: SymRefs, Out: back_inserter(x&: RefsStorage));
357 AllRefs.try_emplace(
358 Key: Sym.first,
359 Args: llvm::ArrayRef<Ref>(&RefsStorage[RefsStorage.size() - SymRefs.size()],
360 SymRefs.size()));
361 }
362 }
363
364 std::vector<Relation> AllRelations;
365 for (const auto &RelationSlab : RelationSlabs) {
366 for (const auto &R : *RelationSlab)
367 AllRelations.push_back(x: R);
368 }
369 // Sort relations and remove duplicates that could arise due to
370 // relations being stored in both the shards containing their
371 // subject and object.
372 llvm::sort(C&: AllRelations);
373 AllRelations.erase(first: std::unique(first: AllRelations.begin(), last: AllRelations.end()),
374 last: AllRelations.end());
375
376 size_t StorageSize =
377 RefsStorage.size() * sizeof(Ref) + SymsStorage.size() * sizeof(Symbol);
378 for (const auto &Slab : SymbolSlabs)
379 StorageSize += Slab->bytes();
380 for (const auto &RefSlab : RefSlabs)
381 StorageSize += RefSlab->bytes();
382
383 // Index must keep the slabs and contiguous ranges alive.
384 switch (Type) {
385 case IndexType::Light:
386 return std::make_unique<MemIndex>(
387 args: llvm::make_pointee_range(Range&: AllSymbols), args: std::move(AllRefs),
388 args: std::move(AllRelations), args: std::move(Files), args&: IdxContents,
389 args: std::make_tuple(args: std::move(SymbolSlabs), args: std::move(RefSlabs),
390 args: std::move(RefsStorage), args: std::move(SymsStorage)),
391 args&: StorageSize);
392 case IndexType::Heavy:
393 return std::make_unique<dex::Dex>(
394 args: llvm::make_pointee_range(Range&: AllSymbols), args: std::move(AllRefs),
395 args: std::move(AllRelations), args: std::move(Files), args&: IdxContents,
396 args: std::make_tuple(args: std::move(SymbolSlabs), args: std::move(RefSlabs),
397 args: std::move(RefsStorage), args: std::move(SymsStorage)),
398 args&: StorageSize);
399 }
400 llvm_unreachable("Unknown clangd::IndexType");
401}
402
403void FileSymbols::profile(MemoryTree &MT) const {
404 std::lock_guard<std::mutex> Lock(Mutex);
405 for (const auto &SymSlab : SymbolsSnapshot) {
406 MT.detail(Name: SymSlab.first())
407 .child(Name: "symbols")
408 .addUsage(Increment: SymSlab.second->bytes());
409 }
410 for (const auto &RefSlab : RefsSnapshot) {
411 MT.detail(Name: RefSlab.first())
412 .child(Name: "references")
413 .addUsage(Increment: RefSlab.second.Slab->bytes());
414 }
415 for (const auto &RelSlab : RelationsSnapshot) {
416 MT.detail(Name: RelSlab.first())
417 .child(Name: "relations")
418 .addUsage(Increment: RelSlab.second->bytes());
419 }
420}
421
422FileIndex::FileIndex()
423 : MergedIndex(&MainFileIndex, &PreambleIndex),
424 PreambleSymbols(IndexContents::Symbols | IndexContents::Relations),
425 PreambleIndex(std::make_unique<MemIndex>()),
426 MainFileSymbols(IndexContents::All),
427 MainFileIndex(std::make_unique<MemIndex>()) {}
428
429void FileIndex::updatePreamble(IndexFileIn IF) {
430 FileShardedIndex ShardedIndex(std::move(IF));
431 for (auto Uri : ShardedIndex.getAllSources()) {
432 auto IF = ShardedIndex.getShard(Uri);
433 // We are using the key received from ShardedIndex, so it should always
434 // exist.
435 assert(IF);
436 PreambleSymbols.update(
437 Key: Uri, Symbols: std::make_unique<SymbolSlab>(args: std::move(*IF->Symbols)),
438 Refs: std::make_unique<RefSlab>(),
439 Relations: std::make_unique<RelationSlab>(args: std::move(*IF->Relations)),
440 /*CountReferences=*/false);
441 }
442 size_t IndexVersion = 0;
443 auto NewIndex = PreambleSymbols.buildIndex(
444 Type: IndexType::Heavy, DuplicateHandle: DuplicateHandling::PickOne, Version: &IndexVersion);
445 {
446 std::lock_guard<std::mutex> Lock(UpdateIndexMu);
447 if (IndexVersion <= PreambleIndexVersion) {
448 // We lost the race, some other thread built a later version.
449 return;
450 }
451 PreambleIndexVersion = IndexVersion;
452 PreambleIndex.reset(std::move(NewIndex));
453 vlog(
454 Fmt: "Build dynamic index for header symbols with estimated memory usage of "
455 "{0} bytes",
456 Vals: PreambleIndex.estimateMemoryUsage());
457 }
458}
459
460void FileIndex::updatePreamble(PathRef Path, llvm::StringRef Version,
461 ASTContext &AST, Preprocessor &PP,
462 const include_cleaner::PragmaIncludes &PI) {
463 IndexFileIn IF;
464 std::tie(args&: IF.Symbols, args: std::ignore, args&: IF.Relations) =
465 indexHeaderSymbols(Version, AST, PP, PI);
466 updatePreamble(IF: std::move(IF));
467}
468
469void FileIndex::updateMain(PathRef Path, ParsedAST &AST) {
470 auto Contents = indexMainDecls(AST);
471 MainFileSymbols.update(
472 Key: URI::create(AbsolutePath: Path).toString(),
473 Symbols: std::make_unique<SymbolSlab>(args: std::move(std::get<0>(t&: Contents))),
474 Refs: std::make_unique<RefSlab>(args: std::move(std::get<1>(t&: Contents))),
475 Relations: std::make_unique<RelationSlab>(args: std::move(std::get<2>(t&: Contents))),
476 /*CountReferences=*/true);
477 size_t IndexVersion = 0;
478 auto NewIndex = MainFileSymbols.buildIndex(
479 Type: IndexType::Light, DuplicateHandle: DuplicateHandling::Merge, Version: &IndexVersion);
480 {
481 std::lock_guard<std::mutex> Lock(UpdateIndexMu);
482 if (IndexVersion <= MainIndexVersion) {
483 // We lost the race, some other thread built a later version.
484 return;
485 }
486 MainIndexVersion = IndexVersion;
487 MainFileIndex.reset(std::move(NewIndex));
488 vlog(
489 Fmt: "Build dynamic index for main-file symbols with estimated memory usage "
490 "of {0} bytes",
491 Vals: MainFileIndex.estimateMemoryUsage());
492 }
493}
494
495void FileIndex::profile(MemoryTree &MT) const {
496 PreambleSymbols.profile(MT&: MT.child(Name: "preamble").child(Name: "slabs"));
497 MT.child(Name: "preamble")
498 .child(Name: "index")
499 .addUsage(Increment: PreambleIndex.estimateMemoryUsage());
500 MainFileSymbols.profile(MT&: MT.child(Name: "main_file").child(Name: "slabs"));
501 MT.child(Name: "main_file")
502 .child(Name: "index")
503 .addUsage(Increment: MainFileIndex.estimateMemoryUsage());
504}
505} // namespace clangd
506} // namespace clang
507

source code of clang-tools-extra/clangd/index/FileIndex.cpp