1 | //===- TypeHashing.h ---------------------------------------------*- C++-*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H |
10 | #define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H |
11 | |
12 | #include "llvm/ADT/ArrayRef.h" |
13 | #include "llvm/ADT/Hashing.h" |
14 | #include "llvm/ADT/StringRef.h" |
15 | |
16 | #include "llvm/DebugInfo/CodeView/CVRecord.h" |
17 | #include "llvm/DebugInfo/CodeView/TypeCollection.h" |
18 | #include "llvm/DebugInfo/CodeView/TypeIndex.h" |
19 | |
20 | #include "llvm/Support/FormatProviders.h" |
21 | |
22 | #include <type_traits> |
23 | |
24 | namespace llvm { |
25 | class raw_ostream; |
26 | namespace codeview { |
27 | |
28 | /// A locally hashed type represents a straightforward hash code of a serialized |
29 | /// record. The record is simply serialized, and then the bytes are hashed by |
30 | /// a standard algorithm. This is sufficient for the case of de-duplicating |
31 | /// records within a single sequence of types, because if two records both have |
32 | /// a back-reference to the same type in the same stream, they will both have |
33 | /// the same numeric value for the TypeIndex of the back reference. |
34 | struct LocallyHashedType { |
35 | hash_code Hash; |
36 | ArrayRef<uint8_t> RecordData; |
37 | |
38 | /// Given a type, compute its local hash. |
39 | static LocallyHashedType hashType(ArrayRef<uint8_t> RecordData); |
40 | |
41 | /// Given a sequence of types, compute all of the local hashes. |
42 | template <typename Range> |
43 | static std::vector<LocallyHashedType> hashTypes(Range &&Records) { |
44 | std::vector<LocallyHashedType> Hashes; |
45 | Hashes.reserve(n: std::distance(std::begin(Records), std::end(Records))); |
46 | for (const auto &R : Records) |
47 | Hashes.push_back(hashType(RecordData: R)); |
48 | |
49 | return Hashes; |
50 | } |
51 | |
52 | static std::vector<LocallyHashedType> |
53 | hashTypeCollection(TypeCollection &Types) { |
54 | std::vector<LocallyHashedType> Hashes; |
55 | Types.ForEachRecord(Func: [&Hashes](TypeIndex TI, const CVType &Type) { |
56 | Hashes.push_back(x: hashType(RecordData: Type.RecordData)); |
57 | }); |
58 | return Hashes; |
59 | } |
60 | }; |
61 | |
62 | enum class GlobalTypeHashAlg : uint16_t { |
63 | SHA1 = 0, // standard 20-byte SHA1 hash |
64 | SHA1_8, // last 8-bytes of standard SHA1 hash |
65 | BLAKE3, // truncated 8-bytes BLAKE3 |
66 | }; |
67 | |
68 | /// A globally hashed type represents a hash value that is sufficient to |
69 | /// uniquely identify a record across multiple type streams or type sequences. |
70 | /// This works by, for any given record A which references B, replacing the |
71 | /// TypeIndex that refers to B with a previously-computed global hash for B. As |
72 | /// this is a recursive algorithm (e.g. the global hash of B also depends on the |
73 | /// global hashes of the types that B refers to), a global hash can uniquely |
74 | /// identify that A occurs in another stream that has a completely |
75 | /// different graph structure. Although the hash itself is slower to compute, |
76 | /// probing is much faster with a globally hashed type, because the hash itself |
77 | /// is considered "as good as" the original type. Since type records can be |
78 | /// quite large, this makes the equality comparison of the hash much faster than |
79 | /// equality comparison of a full record. |
80 | struct GloballyHashedType { |
81 | GloballyHashedType() = default; |
82 | GloballyHashedType(StringRef H) |
83 | : GloballyHashedType(ArrayRef<uint8_t>(H.bytes_begin(), H.bytes_end())) {} |
84 | GloballyHashedType(ArrayRef<uint8_t> H) { |
85 | assert(H.size() == 8); |
86 | ::memcpy(dest: Hash.data(), src: H.data(), n: 8); |
87 | } |
88 | std::array<uint8_t, 8> Hash; |
89 | |
90 | bool empty() const { return *(const uint64_t*)Hash.data() == 0; } |
91 | |
92 | friend inline bool operator==(const GloballyHashedType &L, |
93 | const GloballyHashedType &R) { |
94 | return L.Hash == R.Hash; |
95 | } |
96 | |
97 | friend inline bool operator!=(const GloballyHashedType &L, |
98 | const GloballyHashedType &R) { |
99 | return !(L.Hash == R.Hash); |
100 | } |
101 | |
102 | /// Given a sequence of bytes representing a record, compute a global hash for |
103 | /// this record. Due to the nature of global hashes incorporating the hashes |
104 | /// of referenced records, this function requires a list of types and ids |
105 | /// that RecordData might reference, indexable by TypeIndex. |
106 | static GloballyHashedType hashType(ArrayRef<uint8_t> RecordData, |
107 | ArrayRef<GloballyHashedType> PreviousTypes, |
108 | ArrayRef<GloballyHashedType> PreviousIds); |
109 | |
110 | /// Given a sequence of bytes representing a record, compute a global hash for |
111 | /// this record. Due to the nature of global hashes incorporating the hashes |
112 | /// of referenced records, this function requires a list of types and ids |
113 | /// that RecordData might reference, indexable by TypeIndex. |
114 | static GloballyHashedType hashType(CVType Type, |
115 | ArrayRef<GloballyHashedType> PreviousTypes, |
116 | ArrayRef<GloballyHashedType> PreviousIds) { |
117 | return hashType(RecordData: Type.RecordData, PreviousTypes, PreviousIds); |
118 | } |
119 | |
120 | /// Given a sequence of combined type and ID records, compute global hashes |
121 | /// for each of them, returning the results in a vector of hashed types. |
122 | template <typename Range> |
123 | static std::vector<GloballyHashedType> hashTypes(Range &&Records) { |
124 | std::vector<GloballyHashedType> Hashes; |
125 | bool UnresolvedRecords = false; |
126 | for (const auto &R : Records) { |
127 | GloballyHashedType H = hashType(R, Hashes, Hashes); |
128 | if (H.empty()) |
129 | UnresolvedRecords = true; |
130 | Hashes.push_back(x: H); |
131 | } |
132 | |
133 | // In some rare cases, there might be records with forward references in the |
134 | // stream. Several passes might be needed to fully hash each record in the |
135 | // Type stream. However this occurs on very small OBJs generated by MASM, |
136 | // with a dozen records at most. Therefore this codepath isn't |
137 | // time-critical, as it isn't taken in 99% of cases. |
138 | while (UnresolvedRecords) { |
139 | UnresolvedRecords = false; |
140 | auto HashIt = Hashes.begin(); |
141 | for (const auto &R : Records) { |
142 | if (HashIt->empty()) { |
143 | GloballyHashedType H = hashType(R, Hashes, Hashes); |
144 | if (H.empty()) |
145 | UnresolvedRecords = true; |
146 | else |
147 | *HashIt = H; |
148 | } |
149 | ++HashIt; |
150 | } |
151 | } |
152 | |
153 | return Hashes; |
154 | } |
155 | |
156 | /// Given a sequence of combined type and ID records, compute global hashes |
157 | /// for each of them, returning the results in a vector of hashed types. |
158 | template <typename Range> |
159 | static std::vector<GloballyHashedType> |
160 | hashIds(Range &&Records, ArrayRef<GloballyHashedType> TypeHashes) { |
161 | std::vector<GloballyHashedType> IdHashes; |
162 | for (const auto &R : Records) |
163 | IdHashes.push_back(hashType(R, TypeHashes, IdHashes)); |
164 | |
165 | return IdHashes; |
166 | } |
167 | |
168 | static std::vector<GloballyHashedType> |
169 | hashTypeCollection(TypeCollection &Types) { |
170 | std::vector<GloballyHashedType> Hashes; |
171 | Types.ForEachRecord(Func: [&Hashes](TypeIndex TI, const CVType &Type) { |
172 | Hashes.push_back(x: hashType(RecordData: Type.RecordData, PreviousTypes: Hashes, PreviousIds: Hashes)); |
173 | }); |
174 | return Hashes; |
175 | } |
176 | }; |
177 | static_assert(std::is_trivially_copyable<GloballyHashedType>::value, |
178 | "GloballyHashedType must be trivially copyable so that we can " |
179 | "reinterpret_cast arrays of hash data to arrays of " |
180 | "GloballyHashedType" ); |
181 | } // namespace codeview |
182 | |
183 | template <> struct DenseMapInfo<codeview::LocallyHashedType> { |
184 | static codeview::LocallyHashedType Empty; |
185 | static codeview::LocallyHashedType Tombstone; |
186 | |
187 | static codeview::LocallyHashedType getEmptyKey() { return Empty; } |
188 | |
189 | static codeview::LocallyHashedType getTombstoneKey() { return Tombstone; } |
190 | |
191 | static unsigned getHashValue(codeview::LocallyHashedType Val) { |
192 | return Val.Hash; |
193 | } |
194 | |
195 | static bool isEqual(codeview::LocallyHashedType LHS, |
196 | codeview::LocallyHashedType RHS) { |
197 | if (LHS.Hash != RHS.Hash) |
198 | return false; |
199 | return LHS.RecordData == RHS.RecordData; |
200 | } |
201 | }; |
202 | |
203 | template <> struct DenseMapInfo<codeview::GloballyHashedType> { |
204 | static codeview::GloballyHashedType Empty; |
205 | static codeview::GloballyHashedType Tombstone; |
206 | |
207 | static codeview::GloballyHashedType getEmptyKey() { return Empty; } |
208 | |
209 | static codeview::GloballyHashedType getTombstoneKey() { return Tombstone; } |
210 | |
211 | static unsigned getHashValue(codeview::GloballyHashedType Val) { |
212 | return *reinterpret_cast<const unsigned *>(Val.Hash.data()); |
213 | } |
214 | |
215 | static bool isEqual(codeview::GloballyHashedType LHS, |
216 | codeview::GloballyHashedType RHS) { |
217 | return LHS == RHS; |
218 | } |
219 | }; |
220 | |
221 | template <> struct format_provider<codeview::LocallyHashedType> { |
222 | public: |
223 | static void format(const codeview::LocallyHashedType &V, |
224 | llvm::raw_ostream &Stream, StringRef Style) { |
225 | write_hex(S&: Stream, N: V.Hash, Style: HexPrintStyle::Upper, Width: 8); |
226 | } |
227 | }; |
228 | |
229 | template <> struct format_provider<codeview::GloballyHashedType> { |
230 | public: |
231 | static void format(const codeview::GloballyHashedType &V, |
232 | llvm::raw_ostream &Stream, StringRef Style) { |
233 | for (uint8_t B : V.Hash) { |
234 | write_hex(S&: Stream, N: B, Style: HexPrintStyle::Upper, Width: 2); |
235 | } |
236 | } |
237 | }; |
238 | |
239 | } // namespace llvm |
240 | |
241 | #endif |
242 | |