HashTable.h source code [llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h]

1	//===- HashTable.h - PDB Hash Table ------------------------------ C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#ifndef LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
10	#define LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
11
12	#include "llvm/ADT/SparseBitVector.h"
13	#include "llvm/ADT/iterator.h"
14	#include "llvm/DebugInfo/PDB/Native/RawError.h"
15	#include "llvm/Support/BinaryStreamReader.h"
16	#include "llvm/Support/BinaryStreamWriter.h"
17	#include "llvm/Support/Endian.h"
18	#include "llvm/Support/Error.h"
19	#include <cstdint>
20	#include <iterator>
21	#include <utility>
22	#include <vector>
23
24	namespace llvm {
25
26	namespace pdb {
27
28	Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V);
29	Error writeSparseBitVector(BinaryStreamWriter &Writer, SparseBitVector<> &Vec);
30
31	template <typename ValueT> class HashTable;
32
33	template <typename ValueT>
34	class HashTableIterator
35	: public iterator_facade_base<HashTableIterator<ValueT>,
36	std::forward_iterator_tag,
37	const std::pair<uint32_t, ValueT>> {
38	using BaseT = typename HashTableIterator::iterator_facade_base;
39	friend HashTable<ValueT>;
40
41	HashTableIterator(const HashTable<ValueT> &Map, uint32_t Index,
42	bool IsEnd)
43	: Map(&Map), Index(Index), IsEnd(IsEnd) {}
44
45	public:
46	HashTableIterator(const HashTable<ValueT> &Map) : Map(&Map) {
47	int I = Map.Present.find_first();
48	if (I == -`1`) {
49	Index = `0`;
50	IsEnd = true;
51	} else {
52	Index = static_cast<uint32_t>(I);
53	IsEnd = false;
54	}
55	}
56
57	HashTableIterator(const HashTableIterator &R) = default;
58	HashTableIterator &operator=(const HashTableIterator &R) {
59	Map = R.Map;
60	return *this;
61	}
62	bool operator==(const HashTableIterator &R) const {
63	if (IsEnd && R.IsEnd)
64	return true;
65	if (IsEnd != R.IsEnd)
66	return false;
67
68	return (Map == R.Map) && (Index == R.Index);
69	}
70	const std::pair<uint32_t, ValueT> &operator() const* {
71	assert(Map->Present.test(Index));
72	return Map->Buckets[Index];
73	}
74
75	// Implement postfix op++ in terms of prefix op++ by using the superclass
76	// implementation.
77	using BaseT::operator++;
78	HashTableIterator &operator++() {
79	while (Index < Map->Buckets.size()) {
80	++Index;
81	if (Map->Present.test(Index))
82	return *this;
83	}
84
85	IsEnd = true;
86	return *this;
87	}
88
89	private:
90	bool isEnd() const { return IsEnd; }
91	uint32_t index() const { return Index; }
92
93	const HashTable<ValueT> *Map;
94	uint32_t Index;
95	bool IsEnd;
96	};
97
98	template <typename ValueT>
99	class HashTable {
100	struct Header {
101	support::ulittle32_t Size;
102	support::ulittle32_t Capacity;
103	};
104
105	using BucketList = std::vector<std::pair<uint32_t, ValueT>>;
106
107	public:
108	using const_iterator = HashTableIterator<ValueT>;
109	friend const_iterator;
110
111	HashTable() { Buckets.resize(`8`); }
112	explicit HashTable(uint32_t Capacity) {
113	Buckets.resize(Capacity);
114	}
115
116	Error load(BinaryStreamReader &Stream) {
117	const Header *H;
118	if (auto EC = Stream.readObject(H))
119	return EC;
120	if (H->Capacity == `0`)
121	return make_error<RawError>(Args: raw_error_code::corrupt_file,
122	Args: "Invalid Hash Table Capacity");
123	if (H->Size > maxLoad(capacity: H->Capacity))
124	return make_error<RawError>(Args: raw_error_code::corrupt_file,
125	Args: "Invalid Hash Table Size");
126
127	Buckets.resize(H->Capacity);
128
129	if (auto EC = readSparseBitVector(Stream, V&: Present))
130	return EC;
131	if (Present.count() != H->Size)
132	return make_error<RawError>(Args: raw_error_code::corrupt_file,
133	Args: "Present bit vector does not match size!");
134
135	if (auto EC = readSparseBitVector(Stream, V&: Deleted))
136	return EC;
137	if (Present.intersects(RHS: Deleted))
138	return make_error<RawError>(Args: raw_error_code::corrupt_file,
139	Args: "Present bit vector intersects deleted!");
140
141	for (uint32_t P : Present) {
142	if (auto EC = Stream.readInteger(Buckets[P].first))
143	return EC;
144	const ValueT *Value;
145	if (auto EC = Stream.readObject(Value))
146	return EC;
147	Buckets[P].second = *Value;
148	}
149
150	return Error::success();
151	}
152
153	uint32_t calculateSerializedLength() const {
154	uint32_t Size = sizeof(Header);
155
156	constexpr int BitsPerWord = `8` * sizeof(uint32_t);
157
158	int NumBitsP = Present.find_last() + `1`;
159	int NumBitsD = Deleted.find_last() + `1`;
160
161	uint32_t NumWordsP = alignTo(Value: NumBitsP, Align: BitsPerWord) / BitsPerWord;
162	uint32_t NumWordsD = alignTo(Value: NumBitsD, Align: BitsPerWord) / BitsPerWord;
163
164	// Present bit set number of words (4 bytes), followed by that many actual
165	// words (4 bytes each).
166	Size += sizeof(uint32_t);
167	Size += NumWordsP * sizeof(uint32_t);
168
169	// Deleted bit set number of words (4 bytes), followed by that many actual
170	// words (4 bytes each).
171	Size += sizeof(uint32_t);
172	Size += NumWordsD * sizeof(uint32_t);
173
174	// One (Key, ValueT) pair for each entry Present.
175	Size += (sizeof(uint32_t) + sizeof(ValueT)) * size();
176
177	return Size;
178	}
179
180	Error commit(BinaryStreamWriter &Writer) const {
181	Header H;
182	H.Size = size();
183	H.Capacity = capacity();
184	if (auto EC = Writer.writeObject(H))
185	return EC;
186
187	if (auto EC = writeSparseBitVector(Writer, Vec&: Present))
188	return EC;
189
190	if (auto EC = writeSparseBitVector(Writer, Vec&: Deleted))
191	return EC;
192
193	for (const auto &Entry : *this) {
194	if (auto EC = Writer.writeInteger(Entry.first))
195	return EC;
196	if (auto EC = Writer.writeObject(Entry.second))
197	return EC;
198	}
199	return Error::success();
200	}
201
202	void clear() {
203	Buckets.resize(`8`);
204	Present.clear();
205	Deleted.clear();
206	}
207
208	bool empty() const { return size() == `0`; }
209	uint32_t capacity() const { return Buckets.size(); }
210	uint32_t size() const { return Present.count(); }
211
212	const_iterator begin() const { return const_iterator(*this); }
213	const_iterator end() const { return const_iterator(*this, `0`, true); }
214
215	/// Find the entry whose key has the specified hash value, using the specified
216	/// traits defining hash function and equality.
217	template <typename Key, typename TraitsT>
218	const_iterator find_as(const Key &K, TraitsT &Traits) const {
219	uint32_t H = Traits.hashLookupKey(K) % capacity();
220	uint32_t I = H;
221	std::optional<uint32_t> FirstUnused;
222	do {
223	if (isPresent(K: I)) {
224	if (Traits.storageKeyToLookupKey(Buckets[I].first) == K)
225	return const_iterator(*this, I, false);
226	} else {
227	if (!FirstUnused)
228	FirstUnused = I;
229	// Insertion occurs via linear probing from the slot hint, and will be
230	// inserted at the first empty / deleted location. Therefore, if we are
231	// probing and find a location that is neither present nor deleted, then
232	// nothing must have EVER been inserted at this location, and thus it is
233	// not possible for a matching value to occur later.
234	if (!isDeleted(K: I))
235	break;
236	}
237	I = (I + `1`) % capacity();
238	} while (I != H);
239
240	// The only way FirstUnused would not be set is if every single entry in the
241	// table were Present. But this would violate the load factor constraints
242	// that we impose, so it should never happen.
243	assert(FirstUnused);
244	return const_iterator(*this, FirstUnused, true*);
245	}
246
247	/// Set the entry using a key type that the specified Traits can convert
248	/// from a real key to an internal key.
249	template <typename Key, typename TraitsT>
250	bool set_as(const Key &K, ValueT V, TraitsT &Traits) {
251	return set_as_internal(K, std::move(V), Traits, std::nullopt);
252	}
253
254	template <typename Key, typename TraitsT>
255	ValueT get(const Key &K, TraitsT &Traits) const {
256	auto Iter = find_as(K, Traits);
257	assert(Iter != end());
258	return (*Iter).second;
259	}
260
261	protected:
262	bool isPresent(uint32_t K) const { return Present.test(Idx: K); }
263	bool isDeleted(uint32_t K) const { return Deleted.test(Idx: K); }
264
265	BucketList Buckets;
266	mutable SparseBitVector<> Present;
267	mutable SparseBitVector<> Deleted;
268
269	private:
270	/// Set the entry using a key type that the specified Traits can convert
271	/// from a real key to an internal key.
272	template <typename Key, typename TraitsT>
273	bool set_as_internal(const Key &K, ValueT V, TraitsT &Traits,
274	std::optional<uint32_t> InternalKey) {
275	auto Entry = find_as(K, Traits);
276	if (Entry != end()) {
277	assert(isPresent(Entry.index()));
278	assert(Traits.storageKeyToLookupKey(Buckets[Entry.index()].first) == K);
279	// We're updating, no need to do anything special.
280	Buckets[Entry.index()].second = V;
281	return false;
282	}
283
284	auto &B = Buckets[Entry.index()];
285	assert(!isPresent(Entry.index()));
286	assert(Entry.isEnd());
287	B.first = InternalKey ? *InternalKey : Traits.lookupKeyToStorageKey(K);
288	B.second = V;
289	Present.set(Entry.index());
290	Deleted.reset(Idx: Entry.index());
291
292	grow(Traits);
293
294	assert((find_as(K, Traits)) != end());
295	return true;
296	}
297
298	static uint32_t maxLoad(uint32_t capacity) { return capacity * `2` / `3` + `1`; }
299
300	template <typename TraitsT>
301	void grow(TraitsT &Traits) {
302	uint32_t S = size();
303	uint32_t MaxLoad = maxLoad(capacity: capacity());
304	if (S < maxLoad(capacity: capacity()))
305	return;
306	assert(capacity() != UINT32_MAX && "Can't grow Hash table!");
307
308	uint32_t NewCapacity = (capacity() <= INT32_MAX) ? MaxLoad * `2` : UINT32_MAX;
309
310	// Growing requires rebuilding the table and re-hashing every item. Make a
311	// copy with a larger capacity, insert everything into the copy, then swap
312	// it in.
313	HashTable NewMap(NewCapacity);
314	for (auto I : Present) {
315	auto LookupKey = Traits.storageKeyToLookupKey(Buckets[I].first);
316	NewMap.set_as_internal(LookupKey, Buckets[I].second, Traits,
317	Buckets[I].first);
318	}
319
320	Buckets.swap(NewMap.Buckets);
321	std::swap(Present, NewMap.Present);
322	std::swap(Deleted, NewMap.Deleted);
323	assert(capacity() == NewCapacity);
324	assert(size() == S);
325	}
326	};
327
328	} // end namespace pdb
329
330	} // end namespace llvm
331
332	#endif // LLVM_DEBUGINFO_PDB_NATIVE_HASHTABLE_H
333

source code of llvm/include/llvm/DebugInfo/PDB/Native/HashTable.h