1//===-- StdLib.cpp ----------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#include "StdLib.h"
9#include <fstream>
10#include <memory>
11#include <optional>
12#include <string>
13#include <vector>
14
15#include "Compiler.h"
16#include "Config.h"
17#include "SymbolCollector.h"
18#include "index/IndexAction.h"
19#include "support/Logger.h"
20#include "support/ThreadsafeFS.h"
21#include "support/Trace.h"
22#include "clang/Basic/LangOptions.h"
23#include "clang/Frontend/CompilerInvocation.h"
24#include "clang/Lex/PreprocessorOptions.h"
25#include "clang/Tooling/Inclusions/StandardLibrary.h"
26#include "llvm/ADT/IntrusiveRefCntPtr.h"
27#include "llvm/ADT/StringRef.h"
28#include "llvm/Support/MemoryBuffer.h"
29#include "llvm/Support/Path.h"
30
31namespace clang {
32namespace clangd {
33namespace {
34
35enum Lang { C, CXX };
36
37Lang langFromOpts(const LangOptions &LO) { return LO.CPlusPlus ? CXX : C; }
38llvm::StringLiteral mandatoryHeader(Lang L) {
39 switch (L) {
40 case C:
41 return "stdio.h";
42 case CXX:
43 return "vector";
44 }
45 llvm_unreachable("unhandled Lang");
46}
47
48LangStandard::Kind standardFromOpts(const LangOptions &LO) {
49 if (LO.CPlusPlus) {
50 if (LO.CPlusPlus23)
51 return LangStandard::lang_cxx23;
52 if (LO.CPlusPlus20)
53 return LangStandard::lang_cxx20;
54 if (LO.CPlusPlus17)
55 return LangStandard::lang_cxx17;
56 if (LO.CPlusPlus14)
57 return LangStandard::lang_cxx14;
58 if (LO.CPlusPlus11)
59 return LangStandard::lang_cxx11;
60 return LangStandard::lang_cxx98;
61 }
62 if (LO.C23)
63 return LangStandard::lang_c23;
64 // C17 has no new features, so treat {C11,C17} as C17.
65 if (LO.C11)
66 return LangStandard::lang_c17;
67 return LangStandard::lang_c99;
68}
69
70std::string buildUmbrella(llvm::StringLiteral Mandatory,
71 llvm::ArrayRef<tooling::stdlib::Header> Headers) {
72 std::string Result;
73 llvm::raw_string_ostream OS(Result);
74
75 // We __has_include guard all our #includes to avoid errors when using older
76 // stdlib version that don't have headers for the newest language standards.
77 // But make sure we get *some* error if things are totally broken.
78 OS << llvm::formatv(
79 Fmt: "#if !__has_include(<{0}>)\n"
80 "#error Mandatory header <{0}> not found in standard library!\n"
81 "#endif\n",
82 Vals&: Mandatory);
83
84 for (auto Header : Headers) {
85 OS << llvm::formatv(Fmt: "#if __has_include({0})\n"
86 "#include {0}\n"
87 "#endif\n",
88 Vals&: Header);
89 }
90 OS.flush();
91 return Result;
92}
93
94} // namespace
95
96llvm::StringRef getStdlibUmbrellaHeader(const LangOptions &LO) {
97 // The umbrella header is the same for all versions of each language.
98 // Headers that are unsupported in old lang versions are usually guarded by
99 // #if. Some headers may be not present in old stdlib versions, the umbrella
100 // header guards with __has_include for this purpose.
101 Lang L = langFromOpts(LO);
102 switch (L) {
103 case CXX:
104 static std::string *UmbrellaCXX = new std::string(buildUmbrella(
105 Mandatory: mandatoryHeader(L),
106 Headers: tooling::stdlib::Header::all(L: tooling::stdlib::Lang::CXX)));
107 return *UmbrellaCXX;
108 case C:
109 static std::string *UmbrellaC = new std::string(
110 buildUmbrella(Mandatory: mandatoryHeader(L),
111 Headers: tooling::stdlib::Header::all(L: tooling::stdlib::Lang::C)));
112 return *UmbrellaC;
113 }
114 llvm_unreachable("invalid Lang in langFromOpts");
115}
116
117namespace {
118
119// Including the standard library leaks unwanted transitively included symbols.
120//
121// We want to drop these, they're a bit tricky to identify:
122// - we don't want to limit to symbols on our list, as our list has only
123// top-level symbols (and there may be legitimate stdlib extensions).
124// - we can't limit to only symbols defined in known stdlib headers, as stdlib
125// internal structure is murky
126// - we can't strictly require symbols to come from a particular path, e.g.
127// libstdc++ is mostly under /usr/include/c++/10/...
128// but std::ctype_base is under /usr/include/<platform>/c++/10/...
129// We require the symbol to come from a header that is *either* from
130// the standard library path (as identified by the location of <vector>), or
131// another header that defines a symbol from our stdlib list.
132SymbolSlab filter(SymbolSlab Slab, const StdLibLocation &Loc) {
133 SymbolSlab::Builder Result;
134
135 static auto &StandardHeaders = *[] {
136 auto *Set = new llvm::DenseSet<llvm::StringRef>();
137 for (auto Header : tooling::stdlib::Header::all(L: tooling::stdlib::Lang::CXX))
138 Set->insert(V: Header.name());
139 for (auto Header : tooling::stdlib::Header::all(L: tooling::stdlib::Lang::C))
140 Set->insert(V: Header.name());
141 return Set;
142 }();
143
144 // Form prefixes like file:///usr/include/c++/10/
145 // These can be trivially prefix-compared with URIs in the indexed symbols.
146 llvm::SmallVector<std::string> StdLibURIPrefixes;
147 for (const auto &Path : Loc.Paths) {
148 StdLibURIPrefixes.push_back(Elt: URI::create(AbsolutePath: Path).toString());
149 if (StdLibURIPrefixes.back().back() != '/')
150 StdLibURIPrefixes.back().push_back(c: '/');
151 }
152 // For each header URI, is it *either* prefixed by StdLibURIPrefixes *or*
153 // owner of a symbol whose insertable header is in StandardHeaders?
154 // Pointer key because strings in a SymbolSlab are interned.
155 llvm::DenseMap<const char *, bool> GoodHeader;
156 for (const Symbol &S : Slab) {
157 if (!S.IncludeHeaders.empty() &&
158 StandardHeaders.contains(V: S.IncludeHeaders.front().IncludeHeader)) {
159 GoodHeader[S.CanonicalDeclaration.FileURI] = true;
160 GoodHeader[S.Definition.FileURI] = true;
161 continue;
162 }
163 for (const char *URI :
164 {S.CanonicalDeclaration.FileURI, S.Definition.FileURI}) {
165 auto R = GoodHeader.try_emplace(Key: URI, Args: false);
166 if (R.second) {
167 R.first->second = llvm::any_of(
168 Range&: StdLibURIPrefixes,
169 P: [&, URIStr(llvm::StringRef(URI))](const std::string &Prefix) {
170 return URIStr.starts_with(Prefix);
171 });
172 }
173 }
174 }
175#ifndef NDEBUG
176 for (const auto &Good : GoodHeader)
177 if (Good.second && *Good.first)
178 dlog("Stdlib header: {0}", Good.first);
179#endif
180 // Empty URIs aren't considered good. (Definition can be blank).
181 auto IsGoodHeader = [&](const char *C) { return *C && GoodHeader.lookup(Val: C); };
182
183 for (const Symbol &S : Slab) {
184 if (!(IsGoodHeader(S.CanonicalDeclaration.FileURI) ||
185 IsGoodHeader(S.Definition.FileURI))) {
186 dlog("Ignoring wrong-header symbol {0}{1} in {2}", S.Scope, S.Name,
187 S.CanonicalDeclaration.FileURI);
188 continue;
189 }
190 Result.insert(S);
191 }
192
193 return std::move(Result).build();
194}
195
196} // namespace
197
198SymbolSlab indexStandardLibrary(llvm::StringRef HeaderSources,
199 std::unique_ptr<CompilerInvocation> CI,
200 const StdLibLocation &Loc,
201 const ThreadsafeFS &TFS) {
202 if (CI->getFrontendOpts().Inputs.size() != 1 ||
203 !CI->getPreprocessorOpts().ImplicitPCHInclude.empty()) {
204 elog(Fmt: "Indexing standard library failed: bad CompilerInvocation");
205 assert(false && "indexing stdlib with a dubious CompilerInvocation!");
206 return SymbolSlab();
207 }
208 const FrontendInputFile &Input = CI->getFrontendOpts().Inputs.front();
209 trace::Span Tracer("StandardLibraryIndex");
210 LangStandard::Kind LangStd = standardFromOpts(LO: CI->getLangOpts());
211 log(Fmt: "Indexing {0} standard library in the context of {1}",
212 Vals: LangStandard::getLangStandardForKind(K: LangStd).getName(), Vals: Input.getFile());
213
214 SymbolSlab Symbols;
215 IgnoreDiagnostics IgnoreDiags;
216 // CompilerInvocation is taken from elsewhere, and may map a dirty buffer.
217 CI->getPreprocessorOpts().clearRemappedFiles();
218 auto Clang = prepareCompilerInstance(
219 std::move(CI), /*Preamble=*/nullptr,
220 MainFile: llvm::MemoryBuffer::getMemBuffer(InputData: HeaderSources, BufferName: Input.getFile()),
221 TFS.view(/*CWD=*/std::nullopt), IgnoreDiags);
222 if (!Clang) {
223 elog(Fmt: "Standard Library Index: Couldn't build compiler instance");
224 return Symbols;
225 }
226
227 SymbolCollector::Options IndexOpts;
228 IndexOpts.Origin = SymbolOrigin::StdLib;
229 IndexOpts.CollectMainFileSymbols = false;
230 IndexOpts.CollectMainFileRefs = false;
231 IndexOpts.CollectMacro = true;
232 IndexOpts.StoreAllDocumentation = true;
233 // Sadly we can't use IndexOpts.FileFilter to restrict indexing scope.
234 // Files from outside the StdLibLocation may define true std symbols anyway.
235 // We end up "blessing" such headers, and can only do that by indexing
236 // everything first.
237
238 // Refs, relations, include graph in the stdlib mostly aren't useful.
239 auto Action = createStaticIndexingAction(
240 Opts: IndexOpts, SymbolsCallback: [&](SymbolSlab S) { Symbols = std::move(S); }, RefsCallback: nullptr,
241 RelationsCallback: nullptr, IncludeGraphCallback: nullptr);
242
243 if (!Action->BeginSourceFile(CI&: *Clang, Input)) {
244 elog(Fmt: "Standard Library Index: BeginSourceFile() failed");
245 return Symbols;
246 }
247
248 if (llvm::Error Err = Action->Execute()) {
249 elog(Fmt: "Standard Library Index: Execute failed: {0}", Vals: std::move(Err));
250 return Symbols;
251 }
252
253 Action->EndSourceFile();
254
255 unsigned SymbolsBeforeFilter = Symbols.size();
256 Symbols = filter(Slab: std::move(Symbols), Loc);
257 bool Errors = Clang->hasDiagnostics() &&
258 Clang->getDiagnostics().hasUncompilableErrorOccurred();
259 log(Fmt: "Indexed {0} standard library{3}: {1} symbols, {2} filtered",
260 Vals: LangStandard::getLangStandardForKind(K: LangStd).getName(), Vals: Symbols.size(),
261 Vals: SymbolsBeforeFilter - Symbols.size(),
262 Vals: Errors ? " (incomplete due to errors)" : "");
263 SPAN_ATTACH(Tracer, "symbols", int(Symbols.size()));
264 return Symbols;
265}
266
267SymbolSlab indexStandardLibrary(std::unique_ptr<CompilerInvocation> Invocation,
268 const StdLibLocation &Loc,
269 const ThreadsafeFS &TFS) {
270 llvm::StringRef Header = getStdlibUmbrellaHeader(LO: Invocation->getLangOpts());
271 return indexStandardLibrary(HeaderSources: Header, CI: std::move(Invocation), Loc, TFS);
272}
273
274bool StdLibSet::isBest(const LangOptions &LO) const {
275 return standardFromOpts(LO) >=
276 Best[langFromOpts(LO)].load(m: std::memory_order_acquire);
277}
278
279std::optional<StdLibLocation> StdLibSet::add(const LangOptions &LO,
280 const HeaderSearch &HS) {
281 Lang L = langFromOpts(LO);
282 int OldVersion = Best[L].load(m: std::memory_order_acquire);
283 int NewVersion = standardFromOpts(LO);
284 dlog("Index stdlib? {0}",
285 LangStandard::getLangStandardForKind(standardFromOpts(LO)).getName());
286
287 if (!Config::current().Index.StandardLibrary) {
288 dlog("No: disabled in config");
289 return std::nullopt;
290 }
291
292 if (NewVersion <= OldVersion) {
293 dlog("No: have {0}, {1}>={2}",
294 LangStandard::getLangStandardForKind(
295 static_cast<LangStandard::Kind>(NewVersion))
296 .getName(),
297 OldVersion, NewVersion);
298 return std::nullopt;
299 }
300
301 // We'd like to index a standard library here if there is one.
302 // Check for the existence of <vector> on the search path.
303 // We could cache this, but we only get here repeatedly when there's no
304 // stdlib, and even then only once per preamble build.
305 llvm::StringLiteral ProbeHeader = mandatoryHeader(L);
306 llvm::SmallString<256> Path; // Scratch space.
307 llvm::SmallVector<std::string> SearchPaths;
308 auto RecordHeaderPath = [&](llvm::StringRef HeaderPath) {
309 llvm::StringRef DirPath = llvm::sys::path::parent_path(path: HeaderPath);
310 if (!HS.getFileMgr().getVirtualFileSystem().getRealPath(Path: DirPath, Output&: Path))
311 SearchPaths.emplace_back(Args&: Path);
312 };
313 for (const auto &DL :
314 llvm::make_range(x: HS.search_dir_begin(), y: HS.search_dir_end())) {
315 switch (DL.getLookupType()) {
316 case DirectoryLookup::LT_NormalDir: {
317 Path = DL.getDirRef()->getName();
318 llvm::sys::path::append(path&: Path, a: ProbeHeader);
319 llvm::vfs::Status Stat;
320 if (!HS.getFileMgr().getNoncachedStatValue(Path, Result&: Stat) &&
321 Stat.isRegularFile())
322 RecordHeaderPath(Path);
323 break;
324 }
325 case DirectoryLookup::LT_Framework:
326 // stdlib can't be a framework (framework includes must have a slash)
327 continue;
328 case DirectoryLookup::LT_HeaderMap:
329 llvm::StringRef Target =
330 DL.getHeaderMap()->lookupFilename(Filename: ProbeHeader, DestPath&: Path);
331 if (!Target.empty())
332 RecordHeaderPath(Target);
333 break;
334 }
335 }
336 if (SearchPaths.empty())
337 return std::nullopt;
338
339 dlog("Found standard library in {0}", llvm::join(SearchPaths, ", "));
340
341 while (!Best[L].compare_exchange_weak(i1&: OldVersion, i2: NewVersion,
342 m: std::memory_order_acq_rel))
343 if (OldVersion >= NewVersion) {
344 dlog("No: lost the race");
345 return std::nullopt; // Another thread won the race while we were
346 // checking.
347 }
348
349 dlog("Yes, index stdlib!");
350 return StdLibLocation{.Paths: std::move(SearchPaths)};
351}
352
353} // namespace clangd
354} // namespace clang
355

source code of clang-tools-extra/clangd/index/StdLib.cpp