1//===-- SerializationTests.cpp - Binary and YAML serialization unit tests -===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "Headers.h"
10#include "RIFF.h"
11#include "index/Index.h"
12#include "index/Serialization.h"
13#include "support/Logger.h"
14#include "clang/Tooling/CompilationDatabase.h"
15#include "llvm/ADT/ScopeExit.h"
16#include "llvm/ADT/StringExtras.h"
17#include "llvm/Support/Compression.h"
18#include "llvm/Support/Error.h"
19#include "llvm/Support/ScopedPrinter.h"
20#include "gmock/gmock.h"
21#include "gtest/gtest.h"
22#ifdef LLVM_ON_UNIX
23#include <sys/resource.h>
24#endif
25
26using ::testing::ElementsAre;
27using ::testing::Pair;
28using ::testing::UnorderedElementsAre;
29using ::testing::UnorderedElementsAreArray;
30
31namespace clang {
32namespace clangd {
33namespace {
34
35const char *YAML = R"(
36---
37!Symbol
38ID: 057557CEBF6E6B2D
39Name: 'Foo1'
40Scope: 'clang::'
41SymInfo:
42 Kind: Function
43 Lang: Cpp
44CanonicalDeclaration:
45 FileURI: file:///path/foo.h
46 Start:
47 Line: 1
48 Column: 0
49 End:
50 Line: 1
51 Column: 1
52Origin: 128
53Flags: 129
54Documentation: 'Foo doc'
55ReturnType: 'int'
56IncludeHeaders:
57 - Header: 'include1'
58 References: 7
59 - Header: 'include2'
60 References: 3
61...
62---
63!Symbol
64ID: 057557CEBF6E6B2E
65Name: 'Foo2'
66Scope: 'clang::'
67SymInfo:
68 Kind: Function
69 Lang: Cpp
70CanonicalDeclaration:
71 FileURI: file:///path/bar.h
72 Start:
73 Line: 1
74 Column: 0
75 End:
76 Line: 1
77 Column: 1
78Flags: 2
79Signature: '-sig'
80CompletionSnippetSuffix: '-snippet'
81...
82!Refs
83ID: 057557CEBF6E6B2D
84References:
85 - Kind: 4
86 Location:
87 FileURI: file:///path/foo.cc
88 Start:
89 Line: 5
90 Column: 3
91 End:
92 Line: 5
93 Column: 8
94...
95--- !Relations
96Subject:
97 ID: 6481EE7AF2841756
98Predicate: 0
99Object:
100 ID: 6512AEC512EA3A2D
101...
102--- !Cmd
103Directory: 'testdir'
104CommandLine:
105 - 'cmd1'
106 - 'cmd2'
107...
108--- !Source
109URI: 'file:///path/source1.cpp'
110Flags: 1
111Digest: EED8F5EAF25C453C
112DirectIncludes:
113 - 'file:///path/inc1.h'
114 - 'file:///path/inc2.h'
115...
116)";
117
118MATCHER_P(ID, I, "") { return arg.ID == cantFail(SymbolID::fromStr(I)); }
119MATCHER_P(QName, Name, "") { return (arg.Scope + arg.Name).str() == Name; }
120MATCHER_P2(IncludeHeaderWithRef, IncludeHeader, References, "") {
121 return (arg.IncludeHeader == IncludeHeader) && (arg.References == References);
122}
123
124TEST(SerializationTest, NoCrashOnEmptyYAML) {
125 EXPECT_TRUE(bool(readIndexFile("")));
126}
127
128TEST(SerializationTest, YAMLConversions) {
129 auto ParsedYAML = readIndexFile(YAML);
130 ASSERT_TRUE(bool(ParsedYAML)) << ParsedYAML.takeError();
131 ASSERT_TRUE(bool(ParsedYAML->Symbols));
132 EXPECT_THAT(
133 *ParsedYAML->Symbols,
134 UnorderedElementsAre(ID("057557CEBF6E6B2D"), ID("057557CEBF6E6B2E")));
135
136 auto Sym1 = *ParsedYAML->Symbols->find(
137 cantFail(SymbolID::fromStr("057557CEBF6E6B2D")));
138 auto Sym2 = *ParsedYAML->Symbols->find(
139 cantFail(SymbolID::fromStr("057557CEBF6E6B2E")));
140
141 EXPECT_THAT(Sym1, QName("clang::Foo1"));
142 EXPECT_EQ(Sym1.Signature, "");
143 EXPECT_EQ(Sym1.Documentation, "Foo doc");
144 EXPECT_EQ(Sym1.ReturnType, "int");
145 EXPECT_EQ(StringRef(Sym1.CanonicalDeclaration.FileURI), "file:///path/foo.h");
146 EXPECT_EQ(Sym1.Origin, static_cast<SymbolOrigin>(1 << 7));
147 EXPECT_EQ(static_cast<uint8_t>(Sym1.Flags), 129);
148 EXPECT_TRUE(Sym1.Flags & Symbol::IndexedForCodeCompletion);
149 EXPECT_FALSE(Sym1.Flags & Symbol::Deprecated);
150 EXPECT_THAT(Sym1.IncludeHeaders,
151 UnorderedElementsAre(IncludeHeaderWithRef("include1", 7u),
152 IncludeHeaderWithRef("include2", 3u)));
153
154 EXPECT_THAT(Sym2, QName("clang::Foo2"));
155 EXPECT_EQ(Sym2.Signature, "-sig");
156 EXPECT_EQ(Sym2.ReturnType, "");
157 EXPECT_EQ(llvm::StringRef(Sym2.CanonicalDeclaration.FileURI),
158 "file:///path/bar.h");
159 EXPECT_FALSE(Sym2.Flags & Symbol::IndexedForCodeCompletion);
160 EXPECT_TRUE(Sym2.Flags & Symbol::Deprecated);
161
162 ASSERT_TRUE(bool(ParsedYAML->Refs));
163 EXPECT_THAT(
164 *ParsedYAML->Refs,
165 UnorderedElementsAre(Pair(cantFail(SymbolID::fromStr("057557CEBF6E6B2D")),
166 ::testing::SizeIs(1))));
167 auto Ref1 = ParsedYAML->Refs->begin()->second.front();
168 EXPECT_EQ(Ref1.Kind, RefKind::Reference);
169 EXPECT_EQ(StringRef(Ref1.Location.FileURI), "file:///path/foo.cc");
170
171 SymbolID Base = cantFail(SymbolID::fromStr("6481EE7AF2841756"));
172 SymbolID Derived = cantFail(SymbolID::fromStr("6512AEC512EA3A2D"));
173 ASSERT_TRUE(bool(ParsedYAML->Relations));
174 EXPECT_THAT(
175 *ParsedYAML->Relations,
176 UnorderedElementsAre(Relation{Base, RelationKind::BaseOf, Derived}));
177
178 ASSERT_TRUE(bool(ParsedYAML->Cmd));
179 auto &Cmd = *ParsedYAML->Cmd;
180 ASSERT_EQ(Cmd.Directory, "testdir");
181 EXPECT_THAT(Cmd.CommandLine, ElementsAre("cmd1", "cmd2"));
182
183 ASSERT_TRUE(bool(ParsedYAML->Sources));
184 const auto *URI = "file:///path/source1.cpp";
185 ASSERT_TRUE(ParsedYAML->Sources->count(URI));
186 auto IGNDeserialized = ParsedYAML->Sources->lookup(URI);
187 EXPECT_EQ(llvm::toHex(IGNDeserialized.Digest), "EED8F5EAF25C453C");
188 EXPECT_THAT(IGNDeserialized.DirectIncludes,
189 ElementsAre("file:///path/inc1.h", "file:///path/inc2.h"));
190 EXPECT_EQ(IGNDeserialized.URI, URI);
191 EXPECT_EQ(IGNDeserialized.Flags, IncludeGraphNode::SourceFlag(1));
192}
193
194std::vector<std::string> YAMLFromSymbols(const SymbolSlab &Slab) {
195 std::vector<std::string> Result;
196 for (const auto &Sym : Slab)
197 Result.push_back(toYAML(Sym));
198 return Result;
199}
200std::vector<std::string> YAMLFromRefs(const RefSlab &Slab) {
201 std::vector<std::string> Result;
202 for (const auto &Refs : Slab)
203 Result.push_back(toYAML(Refs));
204 return Result;
205}
206
207std::vector<std::string> YAMLFromRelations(const RelationSlab &Slab) {
208 std::vector<std::string> Result;
209 for (const auto &Rel : Slab)
210 Result.push_back(toYAML(Rel));
211 return Result;
212}
213
214TEST(SerializationTest, BinaryConversions) {
215 auto In = readIndexFile(YAML);
216 EXPECT_TRUE(bool(In)) << In.takeError();
217
218 // Write to binary format, and parse again.
219 IndexFileOut Out(*In);
220 Out.Format = IndexFileFormat::RIFF;
221 std::string Serialized = llvm::to_string(Out);
222
223 auto In2 = readIndexFile(Serialized);
224 ASSERT_TRUE(bool(In2)) << In.takeError();
225 ASSERT_TRUE(In2->Symbols);
226 ASSERT_TRUE(In2->Refs);
227 ASSERT_TRUE(In2->Relations);
228
229 // Assert the YAML serializations match, for nice comparisons and diffs.
230 EXPECT_THAT(YAMLFromSymbols(*In2->Symbols),
231 UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols)));
232 EXPECT_THAT(YAMLFromRefs(*In2->Refs),
233 UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
234 EXPECT_THAT(YAMLFromRelations(*In2->Relations),
235 UnorderedElementsAreArray(YAMLFromRelations(*In->Relations)));
236}
237
238TEST(SerializationTest, SrcsTest) {
239 auto In = readIndexFile(YAML);
240 EXPECT_TRUE(bool(In)) << In.takeError();
241
242 std::string TestContent("TestContent");
243 IncludeGraphNode IGN;
244 IGN.Digest = digest(TestContent);
245 IGN.DirectIncludes = {"inc1", "inc2"};
246 IGN.URI = "URI";
247 IGN.Flags |= IncludeGraphNode::SourceFlag::IsTU;
248 IGN.Flags |= IncludeGraphNode::SourceFlag::HadErrors;
249 IncludeGraph Sources;
250 Sources[IGN.URI] = IGN;
251 // Write to binary format, and parse again.
252 IndexFileOut Out(*In);
253 Out.Format = IndexFileFormat::RIFF;
254 Out.Sources = &Sources;
255 {
256 std::string Serialized = llvm::to_string(Out);
257
258 auto In = readIndexFile(Serialized);
259 ASSERT_TRUE(bool(In)) << In.takeError();
260 ASSERT_TRUE(In->Symbols);
261 ASSERT_TRUE(In->Refs);
262 ASSERT_TRUE(In->Sources);
263 ASSERT_TRUE(In->Sources->count(IGN.URI));
264 // Assert the YAML serializations match, for nice comparisons and diffs.
265 EXPECT_THAT(YAMLFromSymbols(*In->Symbols),
266 UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols)));
267 EXPECT_THAT(YAMLFromRefs(*In->Refs),
268 UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
269 auto IGNDeserialized = In->Sources->lookup(IGN.URI);
270 EXPECT_EQ(IGNDeserialized.Digest, IGN.Digest);
271 EXPECT_EQ(IGNDeserialized.DirectIncludes, IGN.DirectIncludes);
272 EXPECT_EQ(IGNDeserialized.URI, IGN.URI);
273 EXPECT_EQ(IGNDeserialized.Flags, IGN.Flags);
274 }
275}
276
277TEST(SerializationTest, CmdlTest) {
278 auto In = readIndexFile(YAML);
279 EXPECT_TRUE(bool(In)) << In.takeError();
280
281 tooling::CompileCommand Cmd;
282 Cmd.Directory = "testdir";
283 Cmd.CommandLine.push_back("cmd1");
284 Cmd.CommandLine.push_back("cmd2");
285 Cmd.Filename = "ignored";
286 Cmd.Heuristic = "ignored";
287 Cmd.Output = "ignored";
288
289 IndexFileOut Out(*In);
290 Out.Format = IndexFileFormat::RIFF;
291 Out.Cmd = &Cmd;
292 {
293 std::string Serialized = llvm::to_string(Out);
294
295 auto In = readIndexFile(Serialized);
296 ASSERT_TRUE(bool(In)) << In.takeError();
297 ASSERT_TRUE(In->Cmd);
298
299 const tooling::CompileCommand &SerializedCmd = In->Cmd.getValue();
300 EXPECT_EQ(SerializedCmd.CommandLine, Cmd.CommandLine);
301 EXPECT_EQ(SerializedCmd.Directory, Cmd.Directory);
302 EXPECT_NE(SerializedCmd.Filename, Cmd.Filename);
303 EXPECT_NE(SerializedCmd.Heuristic, Cmd.Heuristic);
304 EXPECT_NE(SerializedCmd.Output, Cmd.Output);
305 }
306}
307
308// rlimit is part of POSIX.
309// ASan uses a lot of address space, so we can't apply strict limits.
310#if LLVM_ON_UNIX && !LLVM_ADDRESS_SANITIZER_BUILD
311class ScopedMemoryLimit {
312 struct rlimit OriginalLimit;
313 bool Succeeded = false;
314
315public:
316 ScopedMemoryLimit(rlim_t Bytes) {
317 if (!getrlimit(RLIMIT_AS, &OriginalLimit)) {
318 struct rlimit NewLimit = OriginalLimit;
319 NewLimit.rlim_cur = Bytes;
320 Succeeded = !setrlimit(RLIMIT_AS, &NewLimit);
321 }
322 if (!Succeeded)
323 log("Failed to set rlimit");
324 }
325
326 ~ScopedMemoryLimit() {
327 if (Succeeded)
328 setrlimit(RLIMIT_AS, &OriginalLimit);
329 }
330};
331#else
332class ScopedMemoryLimit {
333public:
334 ScopedMemoryLimit(unsigned Bytes) { log("rlimit unsupported"); }
335};
336#endif
337
338// Test that our deserialization detects invalid array sizes without allocating.
339// If this detection fails, the test should allocate a huge array and crash.
340TEST(SerializationTest, NoCrashOnBadArraySize) {
341 // This test is tricky because we need to construct a subtly invalid file.
342 // First, create a valid serialized file.
343 auto In = readIndexFile(YAML);
344 ASSERT_FALSE(!In) << In.takeError();
345 IndexFileOut Out(*In);
346 Out.Format = IndexFileFormat::RIFF;
347 std::string Serialized = llvm::to_string(Out);
348
349 // Low-level parse it again and find the `srcs` chunk we're going to corrupt.
350 auto Parsed = riff::readFile(Serialized);
351 ASSERT_FALSE(!Parsed) << Parsed.takeError();
352 auto Srcs = llvm::find_if(Parsed->Chunks, [](riff::Chunk C) {
353 return C.ID == riff::fourCC("srcs");
354 });
355 ASSERT_NE(Srcs, Parsed->Chunks.end());
356
357 // Srcs consists of a sequence of IncludeGraphNodes. In our case, just one.
358 // The node has:
359 // - 1 byte: flags (1)
360 // - varint(stringID): URI
361 // - 8 byte: file digest
362 // - varint: DirectIncludes.length
363 // - repeated varint(stringID): DirectIncludes
364 // We want to set DirectIncludes.length to a huge number.
365 // The offset isn't trivial to find, so we use the file digest.
366 std::string FileDigest = llvm::fromHex("EED8F5EAF25C453C");
367 unsigned Pos = Srcs->Data.find_first_of(FileDigest);
368 ASSERT_NE(Pos, StringRef::npos) << "Couldn't locate file digest";
369 Pos += FileDigest.size();
370
371 // Varints are little-endian base-128 numbers, where the top-bit of each byte
372 // indicates whether there are more. ffffffff0f -> 0xffffffff.
373 std::string CorruptSrcs =
374 (Srcs->Data.take_front(Pos) + llvm::fromHex("ffffffff0f") +
375 "some_random_garbage")
376 .str();
377 Srcs->Data = CorruptSrcs;
378
379 // Try to crash rather than hang on large allocation.
380 ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB
381
382 std::string CorruptFile = llvm::to_string(*Parsed);
383 auto CorruptParsed = readIndexFile(CorruptFile);
384 ASSERT_TRUE(!CorruptParsed);
385 EXPECT_EQ(llvm::toString(CorruptParsed.takeError()),
386 "malformed or truncated include uri");
387}
388
389// Check we detect invalid string table size size without allocating it first.
390// If this detection fails, the test should allocate a huge array and crash.
391TEST(SerializationTest, NoCrashOnBadStringTableSize) {
392 if (!llvm::zlib::isAvailable()) {
393 log("skipping test, no zlib");
394 return;
395 }
396
397 // First, create a valid serialized file.
398 auto In = readIndexFile(YAML);
399 ASSERT_FALSE(!In) << In.takeError();
400 IndexFileOut Out(*In);
401 Out.Format = IndexFileFormat::RIFF;
402 std::string Serialized = llvm::to_string(Out);
403
404 // Low-level parse it again, we're going to replace the `stri` chunk.
405 auto Parsed = riff::readFile(Serialized);
406 ASSERT_FALSE(!Parsed) << Parsed.takeError();
407 auto Stri = llvm::find_if(Parsed->Chunks, [](riff::Chunk C) {
408 return C.ID == riff::fourCC("stri");
409 });
410 ASSERT_NE(Stri, Parsed->Chunks.end());
411
412 // stri consists of an 8 byte uncompressed-size, and then compressed data.
413 // We'll claim our small amount of data expands to 4GB
414 std::string CorruptStri =
415 (llvm::fromHex("ffffffff") + Stri->Data.drop_front(4)).str();
416 Stri->Data = CorruptStri;
417 std::string FileDigest = llvm::fromHex("EED8F5EAF25C453C");
418
419 // Try to crash rather than hang on large allocation.
420 ScopedMemoryLimit MemLimit(1000 * 1024 * 1024); // 1GB
421
422 std::string CorruptFile = llvm::to_string(*Parsed);
423 auto CorruptParsed = readIndexFile(CorruptFile);
424 ASSERT_TRUE(!CorruptParsed);
425 EXPECT_THAT(llvm::toString(CorruptParsed.takeError()),
426 testing::HasSubstr("bytes is implausible"));
427}
428
429} // namespace
430} // namespace clangd
431} // namespace clang
432