1 | //===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H |
10 | #define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H |
11 | |
12 | #include "llvm/ADT/SparseBitVector.h" |
13 | #include "llvm/ADT/iterator.h" |
14 | #include "llvm/DebugInfo/PDB/Native/RawError.h" |
15 | #include "llvm/Support/BinaryStreamReader.h" |
16 | #include "llvm/Support/BinaryStreamWriter.h" |
17 | #include "llvm/Support/Endian.h" |
18 | #include "llvm/Support/Error.h" |
19 | #include <cstdint> |
20 | #include <iterator> |
21 | #include <utility> |
22 | #include <vector> |
23 | |
24 | namespace llvm { |
25 | |
26 | namespace pdb { |
27 | |
28 | Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V); |
29 | Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec); |
30 | |
31 | template <typename ValueT> class HashTable; |
32 | |
33 | template <typename ValueT> |
34 | class HashTableIterator |
35 | : public iterator_facade_base<HashTableIterator<ValueT>, |
36 | std::forward_iterator_tag, |
37 | const std::pair<uint32_t, ValueT>> { |
38 | using BaseT = typename HashTableIterator::iterator_facade_base; |
39 | friend HashTable<ValueT>; |
40 | |
41 | HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index, |
42 | bool IsEnd) |
43 | : Map(&Map), Index(Index), IsEnd(IsEnd) {} |
44 | |
45 | public: |
46 | HashTableIterator(const HashTable<ValueT> &Map) : Map(&Map) { |
47 | int I = Map.Present.find_first(); |
48 | if (I == -1) { |
49 | Index = 0; |
50 | IsEnd = true; |
51 | } else { |
52 | Index = static_cast<uint32_t>(I); |
53 | IsEnd = false; |
54 | } |
55 | } |
56 | |
57 | HashTableIterator(const HashTableIterator &R) = default; |
58 | HashTableIterator &operator=(const HashTableIterator &R) { |
59 | Map = R.Map; |
60 | return *this; |
61 | } |
62 | bool operator==(const HashTableIterator &R) const { |
63 | if (IsEnd && R.IsEnd) |
64 | return true; |
65 | if (IsEnd != R.IsEnd) |
66 | return false; |
67 | |
68 | return (Map == R.Map) && (Index == R.Index); |
69 | } |
70 | const std::pair<uint32_t, ValueT> &operator*() const { |
71 | assert(Map->Present.test(Index)); |
72 | return Map->Buckets[Index]; |
73 | } |
74 | |
75 | // Implement postfix op++ in terms of prefix op++ by using the superclass |
76 | // implementation. |
77 | using BaseT::operator++; |
78 | HashTableIterator &operator++() { |
79 | while (Index < Map->Buckets.size()) { |
80 | ++Index; |
81 | if (Map->Present.test(Index)) |
82 | return *this; |
83 | } |
84 | |
85 | IsEnd = true; |
86 | return *this; |
87 | } |
88 | |
89 | private: |
90 | bool isEnd() const { return IsEnd; } |
91 | uint32_t index() const { return Index; } |
92 | |
93 | const HashTable<ValueT> *Map; |
94 | uint32_t Index; |
95 | bool IsEnd; |
96 | }; |
97 | |
98 | template <typename ValueT> |
99 | class HashTable { |
100 | struct { |
101 | support::ulittle32_t ; |
102 | support::ulittle32_t ; |
103 | }; |
104 | |
105 | using BucketList = std::vector<std::pair<uint32_t, ValueT>>; |
106 | |
107 | public: |
108 | using const_iterator = HashTableIterator<ValueT>; |
109 | friend const_iterator; |
110 | |
111 | HashTable() { Buckets.resize(8); } |
112 | explicit HashTable(uint32_t Capacity) { |
113 | Buckets.resize(Capacity); |
114 | } |
115 | |
116 | Error load(BinaryStreamReader &Stream) { |
117 | const Header *H; |
118 | if (auto EC = Stream.readObject(H)) |
119 | return EC; |
120 | if (H->Capacity == 0) |
121 | return make_error<RawError>(Args: raw_error_code::corrupt_file, |
122 | Args: "Invalid Hash Table Capacity" ); |
123 | if (H->Size > maxLoad(capacity: H->Capacity)) |
124 | return make_error<RawError>(Args: raw_error_code::corrupt_file, |
125 | Args: "Invalid Hash Table Size" ); |
126 | |
127 | Buckets.resize(H->Capacity); |
128 | |
129 | if (auto EC = readSparseBitVector(Stream, V&: Present)) |
130 | return EC; |
131 | if (Present.count() != H->Size) |
132 | return make_error<RawError>(Args: raw_error_code::corrupt_file, |
133 | Args: "Present bit vector does not match size!" ); |
134 | |
135 | if (auto EC = readSparseBitVector(Stream, V&: Deleted)) |
136 | return EC; |
137 | if (Present.intersects(RHS: Deleted)) |
138 | return make_error<RawError>(Args: raw_error_code::corrupt_file, |
139 | Args: "Present bit vector intersects deleted!" ); |
140 | |
141 | for (uint32_t P : Present) { |
142 | if (auto EC = Stream.readInteger(Buckets[P].first)) |
143 | return EC; |
144 | const ValueT *Value; |
145 | if (auto EC = Stream.readObject(Value)) |
146 | return EC; |
147 | Buckets[P].second = *Value; |
148 | } |
149 | |
150 | return Error::success(); |
151 | } |
152 | |
153 | uint32_t calculateSerializedLength() const { |
154 | uint32_t Size = sizeof(Header); |
155 | |
156 | constexpr int BitsPerWord = 8 * sizeof(uint32_t); |
157 | |
158 | int NumBitsP = Present.find_last() + 1; |
159 | int NumBitsD = Deleted.find_last() + 1; |
160 | |
161 | uint32_t NumWordsP = alignTo(Value: NumBitsP, Align: BitsPerWord) / BitsPerWord; |
162 | uint32_t NumWordsD = alignTo(Value: NumBitsD, Align: BitsPerWord) / BitsPerWord; |
163 | |
164 | // Present bit set number of words (4 bytes), followed by that many actual |
165 | // words (4 bytes each). |
166 | Size += sizeof(uint32_t); |
167 | Size += NumWordsP * sizeof(uint32_t); |
168 | |
169 | // Deleted bit set number of words (4 bytes), followed by that many actual |
170 | // words (4 bytes each). |
171 | Size += sizeof(uint32_t); |
172 | Size += NumWordsD * sizeof(uint32_t); |
173 | |
174 | // One (Key, ValueT) pair for each entry Present. |
175 | Size += (sizeof(uint32_t) + sizeof(ValueT)) * size(); |
176 | |
177 | return Size; |
178 | } |
179 | |
180 | Error commit(BinaryStreamWriter &Writer) const { |
181 | Header H; |
182 | H.Size = size(); |
183 | H.Capacity = capacity(); |
184 | if (auto EC = Writer.writeObject(H)) |
185 | return EC; |
186 | |
187 | if (auto EC = writeSparseBitVector(Writer, Vec&: Present)) |
188 | return EC; |
189 | |
190 | if (auto EC = writeSparseBitVector(Writer, Vec&: Deleted)) |
191 | return EC; |
192 | |
193 | for (const auto &Entry : *this) { |
194 | if (auto EC = Writer.writeInteger(Entry.first)) |
195 | return EC; |
196 | if (auto EC = Writer.writeObject(Entry.second)) |
197 | return EC; |
198 | } |
199 | return Error::success(); |
200 | } |
201 | |
202 | void clear() { |
203 | Buckets.resize(8); |
204 | Present.clear(); |
205 | Deleted.clear(); |
206 | } |
207 | |
208 | bool empty() const { return size() == 0; } |
209 | uint32_t capacity() const { return Buckets.size(); } |
210 | uint32_t size() const { return Present.count(); } |
211 | |
212 | const_iterator begin() const { return const_iterator(*this); } |
213 | const_iterator end() const { return const_iterator(*this, 0, true); } |
214 | |
215 | /// Find the entry whose key has the specified hash value, using the specified |
216 | /// traits defining hash function and equality. |
217 | template <typename Key, typename TraitsT> |
218 | const_iterator find_as(const Key &K, TraitsT &Traits) const { |
219 | uint32_t H = Traits.hashLookupKey(K) % capacity(); |
220 | uint32_t I = H; |
221 | std::optional<uint32_t> FirstUnused; |
222 | do { |
223 | if (isPresent(K: I)) { |
224 | if (Traits.storageKeyToLookupKey(Buckets[I].first) == K) |
225 | return const_iterator(*this, I, false); |
226 | } else { |
227 | if (!FirstUnused) |
228 | FirstUnused = I; |
229 | // Insertion occurs via linear probing from the slot hint, and will be |
230 | // inserted at the first empty / deleted location. Therefore, if we are |
231 | // probing and find a location that is neither present nor deleted, then |
232 | // nothing must have EVER been inserted at this location, and thus it is |
233 | // not possible for a matching value to occur later. |
234 | if (!isDeleted(K: I)) |
235 | break; |
236 | } |
237 | I = (I + 1) % capacity(); |
238 | } while (I != H); |
239 | |
240 | // The only way FirstUnused would not be set is if every single entry in the |
241 | // table were Present. But this would violate the load factor constraints |
242 | // that we impose, so it should never happen. |
243 | assert(FirstUnused); |
244 | return const_iterator(*this, *FirstUnused, true); |
245 | } |
246 | |
247 | /// Set the entry using a key type that the specified Traits can convert |
248 | /// from a real key to an internal key. |
249 | template <typename Key, typename TraitsT> |
250 | bool set_as(const Key &K, ValueT V, TraitsT &Traits) { |
251 | return set_as_internal(K, std::move(V), Traits, std::nullopt); |
252 | } |
253 | |
254 | template <typename Key, typename TraitsT> |
255 | ValueT get(const Key &K, TraitsT &Traits) const { |
256 | auto Iter = find_as(K, Traits); |
257 | assert(Iter != end()); |
258 | return (*Iter).second; |
259 | } |
260 | |
261 | protected: |
262 | bool isPresent(uint32_t K) const { return Present.test(Idx: K); } |
263 | bool isDeleted(uint32_t K) const { return Deleted.test(Idx: K); } |
264 | |
265 | BucketList Buckets; |
266 | mutable SparseBitVector<> Present; |
267 | mutable SparseBitVector<> Deleted; |
268 | |
269 | private: |
270 | /// Set the entry using a key type that the specified Traits can convert |
271 | /// from a real key to an internal key. |
272 | template <typename Key, typename TraitsT> |
273 | bool set_as_internal(const Key &K, ValueT V, TraitsT &Traits, |
274 | std::optional<uint32_t> InternalKey) { |
275 | auto Entry = find_as(K, Traits); |
276 | if (Entry != end()) { |
277 | assert(isPresent(Entry.index())); |
278 | assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K); |
279 | // We're updating, no need to do anything special. |
280 | Buckets[Entry.index()].second = V; |
281 | return false; |
282 | } |
283 | |
284 | auto &B = Buckets[Entry.index()]; |
285 | assert(!isPresent(Entry.index())); |
286 | assert(Entry.isEnd()); |
287 | B.first = InternalKey ? *InternalKey : Traits.lookupKeyToStorageKey(K); |
288 | B.second = V; |
289 | Present.set(Entry.index()); |
290 | Deleted.reset(Idx: Entry.index()); |
291 | |
292 | grow(Traits); |
293 | |
294 | assert((find_as(K, Traits)) != end()); |
295 | return true; |
296 | } |
297 | |
298 | static uint32_t maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; } |
299 | |
300 | template <typename TraitsT> |
301 | void grow(TraitsT &Traits) { |
302 | uint32_t S = size(); |
303 | uint32_t MaxLoad = maxLoad(capacity: capacity()); |
304 | if (S < maxLoad(capacity: capacity())) |
305 | return; |
306 | assert(capacity() != UINT32_MAX && "Can't grow Hash table!" ); |
307 | |
308 | uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * 2 : UINT32_MAX; |
309 | |
310 | // Growing requires rebuilding the table and re-hashing every item. Make a |
311 | // copy with a larger capacity, insert everything into the copy, then swap |
312 | // it in. |
313 | HashTable NewMap(NewCapacity); |
314 | for (auto I : Present) { |
315 | auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first); |
316 | NewMap.set_as_internal(LookupKey, Buckets[I].second, Traits, |
317 | Buckets[I].first); |
318 | } |
319 | |
320 | Buckets.swap(NewMap.Buckets); |
321 | std::swap(Present, NewMap.Present); |
322 | std::swap(Deleted, NewMap.Deleted); |
323 | assert(capacity() == NewCapacity); |
324 | assert(size() == S); |
325 | } |
326 | }; |
327 | |
328 | } // end namespace pdb |
329 | |
330 | } // end namespace llvm |
331 | |
332 | #endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H |
333 | |