1//===--- Dex.h - Dex Symbol Index Implementation ----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This defines Dex - a symbol index implementation based on query iterators
11/// over symbol tokens, such as fuzzy matching trigrams, scopes, types, etc.
12/// While consuming more memory and having longer build stage due to
13/// preprocessing, Dex will have substantially lower latency. It will also allow
14/// efficient symbol searching which is crucial for operations like code
15/// completion, and can be very important for a number of different code
16/// transformations which will be eventually supported by Clangd.
17///
18//===----------------------------------------------------------------------===//
19
20#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
21#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
22
23#include "index/dex/Iterator.h"
24#include "index/Index.h"
25#include "index/Relation.h"
26#include "index/dex/PostingList.h"
27#include "index/dex/Token.h"
28#include "llvm/ADT/StringSet.h"
29
30namespace clang {
31namespace clangd {
32namespace dex {
33
34/// In-memory Dex trigram-based index implementation.
35class Dex : public SymbolIndex {
36public:
37 // All data must outlive this index.
38 template <typename SymbolRange, typename RefsRange, typename RelationsRange>
39 Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations)
40 : Corpus(0) {
41 for (auto &&Sym : Symbols)
42 this->Symbols.push_back(&Sym);
43 for (auto &&Ref : Refs)
44 this->Refs.try_emplace(Ref.first, Ref.second);
45 for (auto &&Rel : Relations)
46 this->Relations[std::make_pair(Rel.Subject,
47 static_cast<uint8_t>(Rel.Predicate))]
48 .push_back(Rel.Object);
49 buildIndex();
50 }
51 // Symbols and Refs are owned by BackingData, Index takes ownership.
52 template <typename SymbolRange, typename RefsRange, typename RelationsRange,
53 typename Payload>
54 Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations,
55 Payload &&BackingData, size_t BackingDataSize)
56 : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs),
57 std::forward<RelationsRange>(Relations)) {
58 KeepAlive = std::shared_ptr<void>(
59 std::make_shared<Payload>(std::move(BackingData)), nullptr);
60 this->BackingDataSize = BackingDataSize;
61 }
62
63 template <typename SymbolRange, typename RefsRange, typename RelationsRange,
64 typename FileRange, typename Payload>
65 Dex(SymbolRange &&Symbols, RefsRange &&Refs, RelationsRange &&Relations,
66 FileRange &&Files, IndexContents IdxContents, Payload &&BackingData,
67 size_t BackingDataSize)
68 : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs),
69 std::forward<RelationsRange>(Relations),
70 std::forward<Payload>(BackingData), BackingDataSize) {
71 this->Files = std::forward<FileRange>(Files);
72 this->IdxContents = IdxContents;
73 }
74
75 /// Builds an index from slabs. The index takes ownership of the slab.
76 static std::unique_ptr<SymbolIndex> build(SymbolSlab, RefSlab, RelationSlab);
77
78 bool
79 fuzzyFind(const FuzzyFindRequest &Req,
80 llvm::function_ref<void(const Symbol &)> Callback) const override;
81
82 void lookup(const LookupRequest &Req,
83 llvm::function_ref<void(const Symbol &)> Callback) const override;
84
85 bool refs(const RefsRequest &Req,
86 llvm::function_ref<void(const Ref &)> Callback) const override;
87
88 void relations(const RelationsRequest &Req,
89 llvm::function_ref<void(const SymbolID &, const Symbol &)>
90 Callback) const override;
91
92 llvm::unique_function<IndexContents(llvm::StringRef) const>
93 indexedFiles() const override;
94
95 size_t estimateMemoryUsage() const override;
96
97private:
98 void buildIndex();
99 std::unique_ptr<Iterator> iterator(const Token &Tok) const;
100 std::unique_ptr<Iterator>
101 createFileProximityIterator(llvm::ArrayRef<std::string> ProximityPaths) const;
102 std::unique_ptr<Iterator>
103 createTypeBoostingIterator(llvm::ArrayRef<std::string> Types) const;
104
105 /// Stores symbols sorted in the descending order of symbol quality..
106 std::vector<const Symbol *> Symbols;
107 /// SymbolQuality[I] is the quality of Symbols[I].
108 std::vector<float> SymbolQuality;
109 llvm::DenseMap<SymbolID, const Symbol *> LookupTable;
110 /// Inverted index is a mapping from the search token to the posting list,
111 /// which contains all items which can be characterized by such search token.
112 /// For example, if the search token is scope "std::", the corresponding
113 /// posting list would contain all indices of symbols defined in namespace
114 /// std. Inverted index is used to retrieve posting lists which are processed
115 /// during the fuzzyFind process.
116 llvm::DenseMap<Token, PostingList> InvertedIndex;
117 dex::Corpus Corpus;
118 llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> Refs;
119 static_assert(sizeof(RelationKind) == sizeof(uint8_t),
120 "RelationKind should be of same size as a uint8_t");
121 llvm::DenseMap<std::pair<SymbolID, uint8_t>, std::vector<SymbolID>> Relations;
122 std::shared_ptr<void> KeepAlive; // poor man's move-only std::any
123 // Set of files which were used during this index build.
124 llvm::StringSet<> Files;
125 // Contents of the index (symbols, references, etc.)
126 IndexContents IdxContents;
127 // Size of memory retained by KeepAlive.
128 size_t BackingDataSize = 0;
129};
130
131/// Returns Search Token for a number of parent directories of given Path.
132/// Should be used within the index build process.
133///
134/// This function is exposed for testing only.
135llvm::SmallVector<llvm::StringRef, 5> generateProximityURIs(llvm::StringRef);
136
137} // namespace dex
138} // namespace clangd
139} // namespace clang
140
141#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
142

source code of clang-tools-extra/clangd/index/dex/Dex.h