1//===--- GlobalCompilationDatabase.cpp ---------------------------*- C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "GlobalCompilationDatabase.h"
10#include "Config.h"
11#include "FS.h"
12#include "SourceCode.h"
13#include "support/Logger.h"
14#include "support/Path.h"
15#include "support/Threading.h"
16#include "support/ThreadsafeFS.h"
17#include "clang/Tooling/ArgumentsAdjusters.h"
18#include "clang/Tooling/CompilationDatabase.h"
19#include "clang/Tooling/CompilationDatabasePluginRegistry.h"
20#include "clang/Tooling/JSONCompilationDatabase.h"
21#include "clang/Tooling/Tooling.h"
22#include "llvm/ADT/PointerIntPair.h"
23#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/ScopeExit.h"
25#include "llvm/ADT/SmallString.h"
26#include "llvm/ADT/StringMap.h"
27#include "llvm/Support/Path.h"
28#include "llvm/Support/VirtualFileSystem.h"
29#include "llvm/TargetParser/Host.h"
30#include <atomic>
31#include <chrono>
32#include <condition_variable>
33#include <mutex>
34#include <optional>
35#include <string>
36#include <tuple>
37#include <vector>
38
39namespace clang {
40namespace clangd {
41namespace {
42
43// Runs the given action on all parent directories of filename, starting from
44// deepest directory and going up to root. Stops whenever action succeeds.
45void actOnAllParentDirectories(PathRef FileName,
46 llvm::function_ref<bool(PathRef)> Action) {
47 for (auto Path = absoluteParent(Path: FileName); !Path.empty() && !Action(Path);
48 Path = absoluteParent(Path))
49 ;
50}
51
52} // namespace
53
54tooling::CompileCommand
55GlobalCompilationDatabase::getFallbackCommand(PathRef File) const {
56 std::vector<std::string> Argv = {"clang"};
57 // Clang treats .h files as C by default and files without extension as linker
58 // input, resulting in unhelpful diagnostics.
59 // Parsing as Objective C++ is friendly to more cases.
60 auto FileExtension = llvm::sys::path::extension(path: File);
61 if (FileExtension.empty() || FileExtension == ".h")
62 Argv.push_back(x: "-xobjective-c++-header");
63 Argv.push_back(x: std::string(File));
64 tooling::CompileCommand Cmd(llvm::sys::path::parent_path(path: File),
65 llvm::sys::path::filename(path: File), std::move(Argv),
66 /*Output=*/"");
67 Cmd.Heuristic = "clangd fallback";
68 return Cmd;
69}
70
71// Loads and caches the CDB from a single directory.
72//
73// This class is threadsafe, which is to say we have independent locks for each
74// directory we're searching for a CDB.
75// Loading is deferred until first access.
76//
77// The DirectoryBasedCDB keeps a map from path => DirectoryCache.
78// Typical usage is to:
79// - 1) determine all the paths that might be searched
80// - 2) acquire the map lock and get-or-create all the DirectoryCache entries
81// - 3) release the map lock and query the caches as desired
82class DirectoryBasedGlobalCompilationDatabase::DirectoryCache {
83 using stopwatch = std::chrono::steady_clock;
84
85 // CachedFile is used to read a CDB file on disk (e.g. compile_commands.json).
86 // It specializes in being able to quickly bail out if the file is unchanged,
87 // which is the common case.
88 // Internally, it stores file metadata so a stat() can verify it's unchanged.
89 // We don't actually cache the content as it's not needed - if the file is
90 // unchanged then the previous CDB is valid.
91 struct CachedFile {
92 CachedFile(llvm::StringRef Parent, llvm::StringRef Rel) {
93 llvm::SmallString<256> Path = Parent;
94 llvm::sys::path::append(path&: Path, a: Rel);
95 this->Path = Path.str().str();
96 }
97 std::string Path;
98 size_t Size = NoFileCached;
99 llvm::sys::TimePoint<> ModifiedTime;
100 FileDigest ContentHash;
101
102 static constexpr size_t NoFileCached = -1;
103
104 struct LoadResult {
105 enum {
106 FileNotFound,
107 TransientError,
108 FoundSameData,
109 FoundNewData,
110 } Result;
111 std::unique_ptr<llvm::MemoryBuffer> Buffer; // Set only if FoundNewData
112 };
113
114 LoadResult load(llvm::vfs::FileSystem &FS, bool HasOldData);
115 };
116
117 // If we've looked for a CDB here and found none, the time when that happened.
118 // (Atomics make it possible for get() to return without taking a lock)
119 std::atomic<stopwatch::rep> NoCDBAt = {
120 stopwatch::time_point::min().time_since_epoch().count()};
121
122 // Guards the following cache state.
123 std::mutex Mu;
124 // When was the cache last known to be in sync with disk state?
125 stopwatch::time_point CachePopulatedAt = stopwatch::time_point::min();
126 // Whether a new CDB has been loaded but not broadcast yet.
127 bool NeedsBroadcast = false;
128 // Last loaded CDB, meaningful if CachePopulatedAt was ever set.
129 // shared_ptr so we can overwrite this when callers are still using the CDB.
130 std::shared_ptr<tooling::CompilationDatabase> CDB;
131 // File metadata for the CDB files we support tracking directly.
132 CachedFile CompileCommandsJson;
133 CachedFile BuildCompileCommandsJson;
134 CachedFile CompileFlagsTxt;
135 // CachedFile member corresponding to CDB.
136 // CDB | ACF | Scenario
137 // null | null | no CDB found, or initial empty cache
138 // set | null | CDB was loaded via generic plugin interface
139 // null | set | found known CDB file, but parsing it failed
140 // set | set | CDB was parsed from a known file
141 CachedFile *ActiveCachedFile = nullptr;
142
143public:
144 DirectoryCache(llvm::StringRef Path)
145 : CompileCommandsJson(Path, "compile_commands.json"),
146 BuildCompileCommandsJson(Path, "build/compile_commands.json"),
147 CompileFlagsTxt(Path, "compile_flags.txt"), Path(Path) {
148 assert(llvm::sys::path::is_absolute(Path));
149 }
150
151 // Absolute canonical path that we're the cache for. (Not case-folded).
152 const std::string Path;
153
154 // Get the CDB associated with this directory.
155 // ShouldBroadcast:
156 // - as input, signals whether the caller is willing to broadcast a
157 // newly-discovered CDB. (e.g. to trigger background indexing)
158 // - as output, signals whether the caller should do so.
159 // (If a new CDB is discovered and ShouldBroadcast is false, we mark the
160 // CDB as needing broadcast, and broadcast it next time we can).
161 std::shared_ptr<const tooling::CompilationDatabase>
162 get(const ThreadsafeFS &TFS, bool &ShouldBroadcast,
163 stopwatch::time_point FreshTime, stopwatch::time_point FreshTimeMissing) {
164 // Fast path for common case without taking lock.
165 if (stopwatch::time_point(stopwatch::duration(NoCDBAt.load())) >
166 FreshTimeMissing) {
167 ShouldBroadcast = false;
168 return nullptr;
169 }
170
171 std::lock_guard<std::mutex> Lock(Mu);
172 auto RequestBroadcast = llvm::make_scope_exit(F: [&, OldCDB(CDB.get())] {
173 // If we loaded a new CDB, it should be broadcast at some point.
174 if (CDB != nullptr && CDB.get() != OldCDB)
175 NeedsBroadcast = true;
176 else if (CDB == nullptr) // nothing to broadcast anymore!
177 NeedsBroadcast = false;
178 // If we have something to broadcast, then do so iff allowed.
179 if (!ShouldBroadcast)
180 return;
181 ShouldBroadcast = NeedsBroadcast;
182 NeedsBroadcast = false;
183 });
184
185 // If our cache is valid, serve from it.
186 if (CachePopulatedAt > FreshTime)
187 return CDB;
188
189 if (/*MayCache=*/load(FS&: *TFS.view(/*CWD=*/std::nullopt))) {
190 // Use new timestamp, as loading may be slow.
191 CachePopulatedAt = stopwatch::now();
192 NoCDBAt.store(i: (CDB ? stopwatch::time_point::min() : CachePopulatedAt)
193 .time_since_epoch()
194 .count());
195 }
196
197 return CDB;
198 }
199
200private:
201 // Updates `CDB` from disk state. Returns false on failure.
202 bool load(llvm::vfs::FileSystem &FS);
203};
204
205DirectoryBasedGlobalCompilationDatabase::DirectoryCache::CachedFile::LoadResult
206DirectoryBasedGlobalCompilationDatabase::DirectoryCache::CachedFile::load(
207 llvm::vfs::FileSystem &FS, bool HasOldData) {
208 auto Stat = FS.status(Path);
209 if (!Stat || !Stat->isRegularFile()) {
210 Size = NoFileCached;
211 ContentHash = {};
212 return {.Result: LoadResult::FileNotFound, .Buffer: nullptr};
213 }
214 // If both the size and mtime match, presume unchanged without reading.
215 if (HasOldData && Stat->getLastModificationTime() == ModifiedTime &&
216 Stat->getSize() == Size)
217 return {.Result: LoadResult::FoundSameData, .Buffer: nullptr};
218 auto Buf = FS.getBufferForFile(Name: Path);
219 if (!Buf || (*Buf)->getBufferSize() != Stat->getSize()) {
220 // Don't clear the cache - possible we're seeing inconsistent size as the
221 // file is being recreated. If it ends up identical later, great!
222 //
223 // This isn't a complete solution: if we see a partial file but stat/read
224 // agree on its size, we're ultimately going to have spurious CDB reloads.
225 // May be worth fixing if generators don't write atomically (CMake does).
226 elog(Fmt: "Failed to read {0}: {1}", Vals&: Path,
227 Vals: Buf ? "size changed" : Buf.getError().message());
228 return {.Result: LoadResult::TransientError, .Buffer: nullptr};
229 }
230
231 FileDigest NewContentHash = digest(Content: (*Buf)->getBuffer());
232 if (HasOldData && NewContentHash == ContentHash) {
233 // mtime changed but data is the same: avoid rebuilding the CDB.
234 ModifiedTime = Stat->getLastModificationTime();
235 return {.Result: LoadResult::FoundSameData, .Buffer: nullptr};
236 }
237
238 Size = (*Buf)->getBufferSize();
239 ModifiedTime = Stat->getLastModificationTime();
240 ContentHash = NewContentHash;
241 return {.Result: LoadResult::FoundNewData, .Buffer: std::move(*Buf)};
242}
243
244// Adapt CDB-loading functions to a common interface for DirectoryCache::load().
245static std::unique_ptr<tooling::CompilationDatabase>
246parseJSON(PathRef Path, llvm::StringRef Data, std::string &Error) {
247 if (auto CDB = tooling::JSONCompilationDatabase::loadFromBuffer(
248 DatabaseString: Data, ErrorMessage&: Error, Syntax: tooling::JSONCommandLineSyntax::AutoDetect)) {
249 // FS used for expanding response files.
250 // FIXME: ExpandResponseFilesDatabase appears not to provide the usual
251 // thread-safety guarantees, as the access to FS is not locked!
252 // For now, use the real FS, which is known to be threadsafe (if we don't
253 // use/change working directory, which ExpandResponseFilesDatabase doesn't).
254 // NOTE: response files have to be expanded before inference because
255 // inference needs full command line to check/fix driver mode and file type.
256 auto FS = llvm::vfs::getRealFileSystem();
257 return tooling::inferMissingCompileCommands(
258 expandResponseFiles(Base: std::move(CDB), FS: std::move(FS)));
259 }
260 return nullptr;
261}
262static std::unique_ptr<tooling::CompilationDatabase>
263parseFixed(PathRef Path, llvm::StringRef Data, std::string &Error) {
264 return tooling::FixedCompilationDatabase::loadFromBuffer(
265 Directory: llvm::sys::path::parent_path(path: Path), Data, ErrorMsg&: Error);
266}
267
268bool DirectoryBasedGlobalCompilationDatabase::DirectoryCache::load(
269 llvm::vfs::FileSystem &FS) {
270 dlog("Probing directory {0}", Path);
271 std::string Error;
272
273 // Load from the specially-supported compilation databases (JSON + Fixed).
274 // For these, we know the files they read and cache their metadata so we can
275 // cheaply validate whether they've changed, and hot-reload if they have.
276 // (As a bonus, these are also VFS-clean)!
277 struct CDBFile {
278 CachedFile *File;
279 // Wrapper for {Fixed,JSON}CompilationDatabase::loadFromBuffer.
280 std::unique_ptr<tooling::CompilationDatabase> (*Parser)(
281 PathRef,
282 /*Data*/ llvm::StringRef,
283 /*ErrorMsg*/ std::string &);
284 };
285 for (const auto &Entry : {CDBFile{.File: &CompileCommandsJson, .Parser: parseJSON},
286 CDBFile{.File: &BuildCompileCommandsJson, .Parser: parseJSON},
287 CDBFile{.File: &CompileFlagsTxt, .Parser: parseFixed}}) {
288 bool Active = ActiveCachedFile == Entry.File;
289 auto Loaded = Entry.File->load(FS, HasOldData: Active);
290 switch (Loaded.Result) {
291 case CachedFile::LoadResult::FileNotFound:
292 if (Active) {
293 log(Fmt: "Unloaded compilation database from {0}", Vals&: Entry.File->Path);
294 ActiveCachedFile = nullptr;
295 CDB = nullptr;
296 }
297 // Continue looking at other candidates.
298 break;
299 case CachedFile::LoadResult::TransientError:
300 // File existed but we couldn't read it. Reuse the cache, retry later.
301 return false; // Load again next time.
302 case CachedFile::LoadResult::FoundSameData:
303 assert(Active && "CachedFile may not return 'same data' if !HasOldData");
304 // This is the critical file, and it hasn't changed.
305 return true;
306 case CachedFile::LoadResult::FoundNewData:
307 // We have a new CDB!
308 CDB = Entry.Parser(Entry.File->Path, Loaded.Buffer->getBuffer(), Error);
309 if (CDB)
310 log(Fmt: "{0} compilation database from {1}", Vals: Active ? "Reloaded" : "Loaded",
311 Vals&: Entry.File->Path);
312 else
313 elog(Fmt: "Failed to load compilation database from {0}: {1}",
314 Vals&: Entry.File->Path, Vals&: Error);
315 ActiveCachedFile = Entry.File;
316 return true;
317 }
318 }
319
320 // Fall back to generic handling of compilation databases.
321 // We don't know what files they read, so can't efficiently check whether
322 // they need to be reloaded. So we never do that.
323 // FIXME: the interface doesn't provide a way to virtualize FS access.
324
325 // Don't try these more than once. If we've scanned before, we're done.
326 if (CachePopulatedAt > stopwatch::time_point::min())
327 return true;
328 for (const auto &Entry :
329 tooling::CompilationDatabasePluginRegistry::entries()) {
330 // Avoid duplicating the special cases handled above.
331 if (Entry.getName() == "fixed-compilation-database" ||
332 Entry.getName() == "json-compilation-database")
333 continue;
334 auto Plugin = Entry.instantiate();
335 if (auto CDB = Plugin->loadFromDirectory(Directory: Path, ErrorMessage&: Error)) {
336 log(Fmt: "Loaded compilation database from {0} with plugin {1}", Vals: Path,
337 Vals: Entry.getName());
338 this->CDB = std::move(CDB);
339 return true;
340 }
341 // Don't log Error here, it's usually just "couldn't find <file>".
342 }
343 dlog("No compilation database at {0}", Path);
344 return true;
345}
346
347DirectoryBasedGlobalCompilationDatabase::
348 DirectoryBasedGlobalCompilationDatabase(const Options &Opts)
349 : Opts(Opts), Broadcaster(std::make_unique<BroadcastThread>(args&: *this)) {
350 if (!this->Opts.ContextProvider)
351 this->Opts.ContextProvider = [](llvm::StringRef) {
352 return Context::current().clone();
353 };
354}
355
356DirectoryBasedGlobalCompilationDatabase::
357 ~DirectoryBasedGlobalCompilationDatabase() = default;
358
359std::optional<tooling::CompileCommand>
360DirectoryBasedGlobalCompilationDatabase::getCompileCommand(PathRef File) const {
361 CDBLookupRequest Req;
362 Req.FileName = File;
363 Req.ShouldBroadcast = true;
364 auto Now = std::chrono::steady_clock::now();
365 Req.FreshTime = Now - Opts.RevalidateAfter;
366 Req.FreshTimeMissing = Now - Opts.RevalidateMissingAfter;
367
368 auto Res = lookupCDB(Request: Req);
369 if (!Res) {
370 log(Fmt: "Failed to find compilation database for {0}", Vals&: File);
371 return std::nullopt;
372 }
373
374 auto Candidates = Res->CDB->getCompileCommands(FilePath: File);
375 if (!Candidates.empty())
376 return std::move(Candidates.front());
377
378 return std::nullopt;
379}
380
381std::vector<DirectoryBasedGlobalCompilationDatabase::DirectoryCache *>
382DirectoryBasedGlobalCompilationDatabase::getDirectoryCaches(
383 llvm::ArrayRef<llvm::StringRef> Dirs) const {
384 std::vector<std::string> FoldedDirs;
385 FoldedDirs.reserve(n: Dirs.size());
386 for (const auto &Dir : Dirs) {
387#ifndef NDEBUG
388 if (!llvm::sys::path::is_absolute(path: Dir))
389 elog(Fmt: "Trying to cache CDB for relative {0}");
390#endif
391 FoldedDirs.push_back(x: maybeCaseFoldPath(Path: Dir));
392 }
393
394 std::vector<DirectoryCache *> Ret;
395 Ret.reserve(n: Dirs.size());
396
397 std::lock_guard<std::mutex> Lock(DirCachesMutex);
398 for (unsigned I = 0; I < Dirs.size(); ++I)
399 Ret.push_back(x: &DirCaches.try_emplace(Key: FoldedDirs[I], Args: Dirs[I]).first->second);
400 return Ret;
401}
402
403std::optional<DirectoryBasedGlobalCompilationDatabase::CDBLookupResult>
404DirectoryBasedGlobalCompilationDatabase::lookupCDB(
405 CDBLookupRequest Request) const {
406 assert(llvm::sys::path::is_absolute(Request.FileName) &&
407 "path must be absolute");
408
409 std::string Storage;
410 std::vector<llvm::StringRef> SearchDirs;
411 if (Opts.CompileCommandsDir) // FIXME: unify this case with config.
412 SearchDirs = {*Opts.CompileCommandsDir};
413 else {
414 WithContext WithProvidedContext(Opts.ContextProvider(Request.FileName));
415 const auto &Spec = Config::current().CompileFlags.CDBSearch;
416 switch (Spec.Policy) {
417 case Config::CDBSearchSpec::NoCDBSearch:
418 return std::nullopt;
419 case Config::CDBSearchSpec::FixedDir:
420 Storage = *Spec.FixedCDBPath;
421 SearchDirs = {Storage};
422 break;
423 case Config::CDBSearchSpec::Ancestors:
424 // Traverse the canonical version to prevent false positives. i.e.:
425 // src/build/../a.cc can detect a CDB in /src/build if not
426 // canonicalized.
427 Storage = removeDots(File: Request.FileName);
428 actOnAllParentDirectories(FileName: Storage, Action: [&](llvm::StringRef Dir) {
429 SearchDirs.push_back(x: Dir);
430 return false;
431 });
432 }
433 }
434
435 std::shared_ptr<const tooling::CompilationDatabase> CDB = nullptr;
436 bool ShouldBroadcast = false;
437 DirectoryCache *DirCache = nullptr;
438 for (DirectoryCache *Candidate : getDirectoryCaches(Dirs: SearchDirs)) {
439 bool CandidateShouldBroadcast = Request.ShouldBroadcast;
440 if ((CDB = Candidate->get(TFS: Opts.TFS, ShouldBroadcast&: CandidateShouldBroadcast,
441 FreshTime: Request.FreshTime, FreshTimeMissing: Request.FreshTimeMissing))) {
442 DirCache = Candidate;
443 ShouldBroadcast = CandidateShouldBroadcast;
444 break;
445 }
446 }
447
448 if (!CDB)
449 return std::nullopt;
450
451 CDBLookupResult Result;
452 Result.CDB = std::move(CDB);
453 Result.PI.SourceRoot = DirCache->Path;
454
455 if (ShouldBroadcast)
456 broadcastCDB(Res: Result);
457 return Result;
458}
459
460// The broadcast thread announces files with new compile commands to the world.
461// Primarily this is used to enqueue them for background indexing.
462//
463// It's on a separate thread because:
464// - otherwise it would block the first parse of the initial file
465// - we need to enumerate all files in the CDB, of which there are many
466// - we (will) have to evaluate config for every file in the CDB, which is slow
467class DirectoryBasedGlobalCompilationDatabase::BroadcastThread {
468 class Filter;
469 DirectoryBasedGlobalCompilationDatabase &Parent;
470
471 std::mutex Mu;
472 std::condition_variable CV;
473 // Shutdown flag (CV is notified after writing).
474 // This is atomic so that broadcasts can also observe it and abort early.
475 std::atomic<bool> ShouldStop = {false};
476 struct Task {
477 CDBLookupResult Lookup;
478 Context Ctx;
479 };
480 std::deque<Task> Queue;
481 std::optional<Task> ActiveTask;
482 std::thread Thread; // Must be last member.
483
484 // Thread body: this is just the basic queue procesing boilerplate.
485 void run() {
486 std::unique_lock<std::mutex> Lock(Mu);
487 while (true) {
488 bool Stopping = false;
489 CV.wait(lock&: Lock, p: [&] {
490 return (Stopping = ShouldStop.load(m: std::memory_order_acquire)) ||
491 !Queue.empty();
492 });
493 if (Stopping) {
494 Queue.clear();
495 CV.notify_all();
496 return;
497 }
498 ActiveTask = std::move(Queue.front());
499 Queue.pop_front();
500
501 Lock.unlock();
502 {
503 WithContext WithCtx(std::move(ActiveTask->Ctx));
504 process(T: ActiveTask->Lookup);
505 }
506 Lock.lock();
507 ActiveTask.reset();
508 CV.notify_all();
509 }
510 }
511
512 // Inspects a new CDB and broadcasts the files it owns.
513 void process(const CDBLookupResult &T);
514
515public:
516 BroadcastThread(DirectoryBasedGlobalCompilationDatabase &Parent)
517 : Parent(Parent), Thread([this] { run(); }) {}
518
519 void enqueue(CDBLookupResult Lookup) {
520 {
521 assert(!Lookup.PI.SourceRoot.empty());
522 std::lock_guard<std::mutex> Lock(Mu);
523 // New CDB takes precedence over any queued one for the same directory.
524 llvm::erase_if(C&: Queue, P: [&](const Task &T) {
525 return T.Lookup.PI.SourceRoot == Lookup.PI.SourceRoot;
526 });
527 Queue.push_back(x: {.Lookup: std::move(Lookup), .Ctx: Context::current().clone()});
528 }
529 CV.notify_all();
530 }
531
532 bool blockUntilIdle(Deadline Timeout) {
533 std::unique_lock<std::mutex> Lock(Mu);
534 return wait(Lock, CV, D: Timeout,
535 F: [&] { return Queue.empty() && !ActiveTask; });
536 }
537
538 ~BroadcastThread() {
539 {
540 std::lock_guard<std::mutex> Lock(Mu);
541 ShouldStop.store(i: true, m: std::memory_order_release);
542 }
543 CV.notify_all();
544 Thread.join();
545 }
546};
547
548// The DirBasedCDB associates each file with a specific CDB.
549// When a CDB is discovered, it may claim to describe files that we associate
550// with a different CDB. We do not want to broadcast discovery of these, and
551// trigger background indexing of them.
552//
553// We must filter the list, and check whether they are associated with this CDB.
554// This class attempts to do so efficiently.
555//
556// Roughly, it:
557// - loads the config for each file, and determines the relevant search path
558// - gathers all directories that are part of any search path
559// - (lazily) checks for a CDB in each such directory at most once
560// - walks the search path for each file and determines whether to include it.
561class DirectoryBasedGlobalCompilationDatabase::BroadcastThread::Filter {
562 llvm::StringRef ThisDir;
563 DirectoryBasedGlobalCompilationDatabase &Parent;
564
565 // Keep track of all directories we might check for CDBs.
566 struct DirInfo {
567 DirectoryCache *Cache = nullptr;
568 enum { Unknown, Missing, TargetCDB, OtherCDB } State = Unknown;
569 DirInfo *Parent = nullptr;
570 };
571 llvm::StringMap<DirInfo> Dirs;
572
573 // A search path starts at a directory, and either includes ancestors or not.
574 using SearchPath = llvm::PointerIntPair<DirInfo *, 1>;
575
576 // Add all ancestor directories of FilePath to the tracked set.
577 // Returns the immediate parent of the file.
578 DirInfo *addParents(llvm::StringRef FilePath) {
579 DirInfo *Leaf = nullptr;
580 DirInfo *Child = nullptr;
581 actOnAllParentDirectories(FileName: FilePath, Action: [&](llvm::StringRef Dir) {
582 auto &Info = Dirs[Dir];
583 // If this is the first iteration, then this node is the overall result.
584 if (!Leaf)
585 Leaf = &Info;
586 // Fill in the parent link from the previous iteration to this parent.
587 if (Child)
588 Child->Parent = &Info;
589 // Keep walking, whether we inserted or not, if parent link is missing.
590 // (If it's present, parent links must be present up to the root, so stop)
591 Child = &Info;
592 return Info.Parent != nullptr;
593 });
594 return Leaf;
595 }
596
597 // Populates DirInfo::Cache (and State, if it is TargetCDB).
598 void grabCaches() {
599 // Fast path out if there were no files, or CDB loading is off.
600 if (Dirs.empty())
601 return;
602
603 std::vector<llvm::StringRef> DirKeys;
604 std::vector<DirInfo *> DirValues;
605 DirKeys.reserve(n: Dirs.size() + 1);
606 DirValues.reserve(n: Dirs.size());
607 for (auto &E : Dirs) {
608 DirKeys.push_back(x: E.first());
609 DirValues.push_back(x: &E.second);
610 }
611
612 // Also look up the cache entry for the CDB we're broadcasting.
613 // Comparing DirectoryCache pointers is more robust than checking string
614 // equality, e.g. reuses the case-sensitivity handling.
615 DirKeys.push_back(x: ThisDir);
616 auto DirCaches = Parent.getDirectoryCaches(Dirs: DirKeys);
617 const DirectoryCache *ThisCache = DirCaches.back();
618 DirCaches.pop_back();
619 DirKeys.pop_back();
620
621 for (unsigned I = 0; I < DirKeys.size(); ++I) {
622 DirValues[I]->Cache = DirCaches[I];
623 if (DirCaches[I] == ThisCache)
624 DirValues[I]->State = DirInfo::TargetCDB;
625 }
626 }
627
628 // Should we include a file from this search path?
629 bool shouldInclude(SearchPath P) {
630 DirInfo *Info = P.getPointer();
631 if (!Info)
632 return false;
633 if (Info->State == DirInfo::Unknown) {
634 assert(Info->Cache && "grabCaches() should have filled this");
635 // Given that we know that CDBs have been moved/generated, don't trust
636 // caches. (This should be rare, so it's OK to add a little latency).
637 constexpr auto IgnoreCache = std::chrono::steady_clock::time_point::max();
638 // Don't broadcast CDBs discovered while broadcasting!
639 bool ShouldBroadcast = false;
640 bool Exists =
641 nullptr != Info->Cache->get(TFS: Parent.Opts.TFS, ShouldBroadcast,
642 /*FreshTime=*/IgnoreCache,
643 /*FreshTimeMissing=*/IgnoreCache);
644 Info->State = Exists ? DirInfo::OtherCDB : DirInfo::Missing;
645 }
646 // If we have a CDB, include the file if it's the target CDB only.
647 if (Info->State != DirInfo::Missing)
648 return Info->State == DirInfo::TargetCDB;
649 // If we have no CDB and no relevant parent, don't include the file.
650 if (!P.getInt() || !Info->Parent)
651 return false;
652 // Walk up to the next parent.
653 return shouldInclude(P: SearchPath(Info->Parent, 1));
654 }
655
656public:
657 Filter(llvm::StringRef ThisDir,
658 DirectoryBasedGlobalCompilationDatabase &Parent)
659 : ThisDir(ThisDir), Parent(Parent) {}
660
661 std::vector<std::string> filter(std::vector<std::string> AllFiles,
662 std::atomic<bool> &ShouldStop) {
663 std::vector<std::string> Filtered;
664 // Allow for clean early-exit of the slow parts.
665 auto ExitEarly = [&] {
666 if (ShouldStop.load(m: std::memory_order_acquire)) {
667 log(Fmt: "Giving up on broadcasting CDB, as we're shutting down");
668 Filtered.clear();
669 return true;
670 }
671 return false;
672 };
673 // Compute search path for each file.
674 std::vector<SearchPath> SearchPaths(AllFiles.size());
675 for (unsigned I = 0; I < AllFiles.size(); ++I) {
676 if (Parent.Opts.CompileCommandsDir) { // FIXME: unify with config
677 SearchPaths[I].setPointer(&Dirs[*Parent.Opts.CompileCommandsDir]);
678 continue;
679 }
680 if (ExitEarly()) // loading config may be slow
681 return Filtered;
682 WithContext WithProvidedContent(Parent.Opts.ContextProvider(AllFiles[I]));
683 const Config::CDBSearchSpec &Spec =
684 Config::current().CompileFlags.CDBSearch;
685 switch (Spec.Policy) {
686 case Config::CDBSearchSpec::NoCDBSearch:
687 break;
688 case Config::CDBSearchSpec::Ancestors:
689 SearchPaths[I].setInt(/*Recursive=*/1);
690 SearchPaths[I].setPointer(addParents(FilePath: AllFiles[I]));
691 break;
692 case Config::CDBSearchSpec::FixedDir:
693 SearchPaths[I].setPointer(&Dirs[*Spec.FixedCDBPath]);
694 break;
695 }
696 }
697 // Get the CDB cache for each dir on the search path, but don't load yet.
698 grabCaches();
699 // Now work out which files we want to keep, loading CDBs where needed.
700 for (unsigned I = 0; I < AllFiles.size(); ++I) {
701 if (ExitEarly()) // loading CDBs may be slow
702 return Filtered;
703 if (shouldInclude(P: SearchPaths[I]))
704 Filtered.push_back(x: std::move(AllFiles[I]));
705 }
706 return Filtered;
707 }
708};
709
710void DirectoryBasedGlobalCompilationDatabase::BroadcastThread::process(
711 const CDBLookupResult &T) {
712 vlog(Fmt: "Broadcasting compilation database from {0}", Vals: T.PI.SourceRoot);
713 std::vector<std::string> GovernedFiles =
714 Filter(T.PI.SourceRoot, Parent).filter(AllFiles: T.CDB->getAllFiles(), ShouldStop);
715 if (!GovernedFiles.empty())
716 Parent.OnCommandChanged.broadcast(V: std::move(GovernedFiles));
717}
718
719void DirectoryBasedGlobalCompilationDatabase::broadcastCDB(
720 CDBLookupResult Result) const {
721 assert(Result.CDB && "Trying to broadcast an invalid CDB!");
722 Broadcaster->enqueue(Lookup: Result);
723}
724
725bool DirectoryBasedGlobalCompilationDatabase::blockUntilIdle(
726 Deadline Timeout) const {
727 return Broadcaster->blockUntilIdle(Timeout);
728}
729
730std::optional<ProjectInfo>
731DirectoryBasedGlobalCompilationDatabase::getProjectInfo(PathRef File) const {
732 CDBLookupRequest Req;
733 Req.FileName = File;
734 Req.ShouldBroadcast = false;
735 Req.FreshTime = Req.FreshTimeMissing =
736 std::chrono::steady_clock::time_point::min();
737 auto Res = lookupCDB(Request: Req);
738 if (!Res)
739 return std::nullopt;
740 return Res->PI;
741}
742
743OverlayCDB::OverlayCDB(const GlobalCompilationDatabase *Base,
744 std::vector<std::string> FallbackFlags,
745 CommandMangler Mangler)
746 : DelegatingCDB(Base), Mangler(std::move(Mangler)),
747 FallbackFlags(std::move(FallbackFlags)) {}
748
749std::optional<tooling::CompileCommand>
750OverlayCDB::getCompileCommand(PathRef File) const {
751 std::optional<tooling::CompileCommand> Cmd;
752 {
753 std::lock_guard<std::mutex> Lock(Mutex);
754 auto It = Commands.find(Key: removeDots(File));
755 if (It != Commands.end())
756 Cmd = It->second;
757 }
758 if (Cmd) {
759 // FS used for expanding response files.
760 // FIXME: ExpandResponseFiles appears not to provide the usual
761 // thread-safety guarantees, as the access to FS is not locked!
762 // For now, use the real FS, which is known to be threadsafe (if we don't
763 // use/change working directory, which ExpandResponseFiles doesn't).
764 auto FS = llvm::vfs::getRealFileSystem();
765 auto Tokenizer = llvm::Triple(llvm::sys::getProcessTriple()).isOSWindows()
766 ? llvm::cl::TokenizeWindowsCommandLine
767 : llvm::cl::TokenizeGNUCommandLine;
768 // Compile command pushed via LSP protocol may have response files that need
769 // to be expanded before further processing. For CDB for files it happens in
770 // the main CDB when reading it from the JSON file.
771 tooling::addExpandedResponseFiles(CommandLine&: Cmd->CommandLine, WorkingDir: Cmd->Directory,
772 Tokenizer, FS&: *FS);
773 }
774 if (!Cmd)
775 Cmd = DelegatingCDB::getCompileCommand(File);
776 if (!Cmd)
777 return std::nullopt;
778 if (Mangler)
779 Mangler(*Cmd, File);
780 return Cmd;
781}
782
783tooling::CompileCommand OverlayCDB::getFallbackCommand(PathRef File) const {
784 auto Cmd = DelegatingCDB::getFallbackCommand(File);
785 std::lock_guard<std::mutex> Lock(Mutex);
786 Cmd.CommandLine.insert(position: Cmd.CommandLine.end(), first: FallbackFlags.begin(),
787 last: FallbackFlags.end());
788 if (Mangler)
789 Mangler(Cmd, File);
790 return Cmd;
791}
792
793void OverlayCDB::setCompileCommand(PathRef File,
794 std::optional<tooling::CompileCommand> Cmd) {
795 // We store a canonical version internally to prevent mismatches between set
796 // and get compile commands. Also it assures clients listening to broadcasts
797 // doesn't receive different names for the same file.
798 std::string CanonPath = removeDots(File);
799 {
800 std::unique_lock<std::mutex> Lock(Mutex);
801 if (Cmd)
802 Commands[CanonPath] = std::move(*Cmd);
803 else
804 Commands.erase(Key: CanonPath);
805 }
806 OnCommandChanged.broadcast(V: {CanonPath});
807}
808
809DelegatingCDB::DelegatingCDB(const GlobalCompilationDatabase *Base)
810 : Base(Base) {
811 if (Base)
812 BaseChanged = Base->watch(L: [this](const std::vector<std::string> Changes) {
813 OnCommandChanged.broadcast(V: Changes);
814 });
815}
816
817DelegatingCDB::DelegatingCDB(std::unique_ptr<GlobalCompilationDatabase> Base)
818 : DelegatingCDB(Base.get()) {
819 BaseOwner = std::move(Base);
820}
821
822std::optional<tooling::CompileCommand>
823DelegatingCDB::getCompileCommand(PathRef File) const {
824 if (!Base)
825 return std::nullopt;
826 return Base->getCompileCommand(File);
827}
828
829std::optional<ProjectInfo> DelegatingCDB::getProjectInfo(PathRef File) const {
830 if (!Base)
831 return std::nullopt;
832 return Base->getProjectInfo(File);
833}
834
835tooling::CompileCommand DelegatingCDB::getFallbackCommand(PathRef File) const {
836 if (!Base)
837 return GlobalCompilationDatabase::getFallbackCommand(File);
838 return Base->getFallbackCommand(File);
839}
840
841bool DelegatingCDB::blockUntilIdle(Deadline D) const {
842 if (!Base)
843 return true;
844 return Base->blockUntilIdle(D);
845}
846
847} // namespace clangd
848} // namespace clang
849

source code of clang-tools-extra/clangd/GlobalCompilationDatabase.cpp