1//===- TypeHashing.h ---------------------------------------------*- C++-*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H
10#define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H
11
12#include "llvm/ADT/ArrayRef.h"
13#include "llvm/ADT/Hashing.h"
14#include "llvm/ADT/StringRef.h"
15
16#include "llvm/DebugInfo/CodeView/CVRecord.h"
17#include "llvm/DebugInfo/CodeView/TypeCollection.h"
18#include "llvm/DebugInfo/CodeView/TypeIndex.h"
19
20#include "llvm/Support/FormatProviders.h"
21
22#include <type_traits>
23
24namespace llvm {
25class raw_ostream;
26namespace codeview {
27
28/// A locally hashed type represents a straightforward hash code of a serialized
29/// record. The record is simply serialized, and then the bytes are hashed by
30/// a standard algorithm. This is sufficient for the case of de-duplicating
31/// records within a single sequence of types, because if two records both have
32/// a back-reference to the same type in the same stream, they will both have
33/// the same numeric value for the TypeIndex of the back reference.
34struct LocallyHashedType {
35 hash_code Hash;
36 ArrayRef<uint8_t> RecordData;
37
38 /// Given a type, compute its local hash.
39 static LocallyHashedType hashType(ArrayRef<uint8_t> RecordData);
40
41 /// Given a sequence of types, compute all of the local hashes.
42 template <typename Range>
43 static std::vector<LocallyHashedType> hashTypes(Range &&Records) {
44 std::vector<LocallyHashedType> Hashes;
45 Hashes.reserve(n: std::distance(std::begin(Records), std::end(Records)));
46 for (const auto &R : Records)
47 Hashes.push_back(hashType(RecordData: R));
48
49 return Hashes;
50 }
51
52 static std::vector<LocallyHashedType>
53 hashTypeCollection(TypeCollection &Types) {
54 std::vector<LocallyHashedType> Hashes;
55 Types.ForEachRecord(Func: [&Hashes](TypeIndex TI, const CVType &Type) {
56 Hashes.push_back(x: hashType(RecordData: Type.RecordData));
57 });
58 return Hashes;
59 }
60};
61
62enum class GlobalTypeHashAlg : uint16_t {
63 SHA1 = 0, // standard 20-byte SHA1 hash
64 SHA1_8, // last 8-bytes of standard SHA1 hash
65 BLAKE3, // truncated 8-bytes BLAKE3
66};
67
68/// A globally hashed type represents a hash value that is sufficient to
69/// uniquely identify a record across multiple type streams or type sequences.
70/// This works by, for any given record A which references B, replacing the
71/// TypeIndex that refers to B with a previously-computed global hash for B. As
72/// this is a recursive algorithm (e.g. the global hash of B also depends on the
73/// global hashes of the types that B refers to), a global hash can uniquely
74/// identify that A occurs in another stream that has a completely
75/// different graph structure. Although the hash itself is slower to compute,
76/// probing is much faster with a globally hashed type, because the hash itself
77/// is considered "as good as" the original type. Since type records can be
78/// quite large, this makes the equality comparison of the hash much faster than
79/// equality comparison of a full record.
80struct GloballyHashedType {
81 GloballyHashedType() = default;
82 GloballyHashedType(StringRef H)
83 : GloballyHashedType(ArrayRef<uint8_t>(H.bytes_begin(), H.bytes_end())) {}
84 GloballyHashedType(ArrayRef<uint8_t> H) {
85 assert(H.size() == 8);
86 ::memcpy(dest: Hash.data(), src: H.data(), n: 8);
87 }
88 std::array<uint8_t, 8> Hash;
89
90 bool empty() const { return *(const uint64_t*)Hash.data() == 0; }
91
92 friend inline bool operator==(const GloballyHashedType &L,
93 const GloballyHashedType &R) {
94 return L.Hash == R.Hash;
95 }
96
97 friend inline bool operator!=(const GloballyHashedType &L,
98 const GloballyHashedType &R) {
99 return !(L.Hash == R.Hash);
100 }
101
102 /// Given a sequence of bytes representing a record, compute a global hash for
103 /// this record. Due to the nature of global hashes incorporating the hashes
104 /// of referenced records, this function requires a list of types and ids
105 /// that RecordData might reference, indexable by TypeIndex.
106 static GloballyHashedType hashType(ArrayRef<uint8_t> RecordData,
107 ArrayRef<GloballyHashedType> PreviousTypes,
108 ArrayRef<GloballyHashedType> PreviousIds);
109
110 /// Given a sequence of bytes representing a record, compute a global hash for
111 /// this record. Due to the nature of global hashes incorporating the hashes
112 /// of referenced records, this function requires a list of types and ids
113 /// that RecordData might reference, indexable by TypeIndex.
114 static GloballyHashedType hashType(CVType Type,
115 ArrayRef<GloballyHashedType> PreviousTypes,
116 ArrayRef<GloballyHashedType> PreviousIds) {
117 return hashType(RecordData: Type.RecordData, PreviousTypes, PreviousIds);
118 }
119
120 /// Given a sequence of combined type and ID records, compute global hashes
121 /// for each of them, returning the results in a vector of hashed types.
122 template <typename Range>
123 static std::vector<GloballyHashedType> hashTypes(Range &&Records) {
124 std::vector<GloballyHashedType> Hashes;
125 bool UnresolvedRecords = false;
126 for (const auto &R : Records) {
127 GloballyHashedType H = hashType(R, Hashes, Hashes);
128 if (H.empty())
129 UnresolvedRecords = true;
130 Hashes.push_back(x: H);
131 }
132
133 // In some rare cases, there might be records with forward references in the
134 // stream. Several passes might be needed to fully hash each record in the
135 // Type stream. However this occurs on very small OBJs generated by MASM,
136 // with a dozen records at most. Therefore this codepath isn't
137 // time-critical, as it isn't taken in 99% of cases.
138 while (UnresolvedRecords) {
139 UnresolvedRecords = false;
140 auto HashIt = Hashes.begin();
141 for (const auto &R : Records) {
142 if (HashIt->empty()) {
143 GloballyHashedType H = hashType(R, Hashes, Hashes);
144 if (H.empty())
145 UnresolvedRecords = true;
146 else
147 *HashIt = H;
148 }
149 ++HashIt;
150 }
151 }
152
153 return Hashes;
154 }
155
156 /// Given a sequence of combined type and ID records, compute global hashes
157 /// for each of them, returning the results in a vector of hashed types.
158 template <typename Range>
159 static std::vector<GloballyHashedType>
160 hashIds(Range &&Records, ArrayRef<GloballyHashedType> TypeHashes) {
161 std::vector<GloballyHashedType> IdHashes;
162 for (const auto &R : Records)
163 IdHashes.push_back(hashType(R, TypeHashes, IdHashes));
164
165 return IdHashes;
166 }
167
168 static std::vector<GloballyHashedType>
169 hashTypeCollection(TypeCollection &Types) {
170 std::vector<GloballyHashedType> Hashes;
171 Types.ForEachRecord(Func: [&Hashes](TypeIndex TI, const CVType &Type) {
172 Hashes.push_back(x: hashType(RecordData: Type.RecordData, PreviousTypes: Hashes, PreviousIds: Hashes));
173 });
174 return Hashes;
175 }
176};
177static_assert(std::is_trivially_copyable<GloballyHashedType>::value,
178 "GloballyHashedType must be trivially copyable so that we can "
179 "reinterpret_cast arrays of hash data to arrays of "
180 "GloballyHashedType");
181} // namespace codeview
182
183template <> struct DenseMapInfo<codeview::LocallyHashedType> {
184 static codeview::LocallyHashedType Empty;
185 static codeview::LocallyHashedType Tombstone;
186
187 static codeview::LocallyHashedType getEmptyKey() { return Empty; }
188
189 static codeview::LocallyHashedType getTombstoneKey() { return Tombstone; }
190
191 static unsigned getHashValue(codeview::LocallyHashedType Val) {
192 return Val.Hash;
193 }
194
195 static bool isEqual(codeview::LocallyHashedType LHS,
196 codeview::LocallyHashedType RHS) {
197 if (LHS.Hash != RHS.Hash)
198 return false;
199 return LHS.RecordData == RHS.RecordData;
200 }
201};
202
203template <> struct DenseMapInfo<codeview::GloballyHashedType> {
204 static codeview::GloballyHashedType Empty;
205 static codeview::GloballyHashedType Tombstone;
206
207 static codeview::GloballyHashedType getEmptyKey() { return Empty; }
208
209 static codeview::GloballyHashedType getTombstoneKey() { return Tombstone; }
210
211 static unsigned getHashValue(codeview::GloballyHashedType Val) {
212 return *reinterpret_cast<const unsigned *>(Val.Hash.data());
213 }
214
215 static bool isEqual(codeview::GloballyHashedType LHS,
216 codeview::GloballyHashedType RHS) {
217 return LHS == RHS;
218 }
219};
220
221template <> struct format_provider<codeview::LocallyHashedType> {
222public:
223 static void format(const codeview::LocallyHashedType &V,
224 llvm::raw_ostream &Stream, StringRef Style) {
225 write_hex(S&: Stream, N: V.Hash, Style: HexPrintStyle::Upper, Width: 8);
226 }
227};
228
229template <> struct format_provider<codeview::GloballyHashedType> {
230public:
231 static void format(const codeview::GloballyHashedType &V,
232 llvm::raw_ostream &Stream, StringRef Style) {
233 for (uint8_t B : V.Hash) {
234 write_hex(S&: Stream, N: B, Style: HexPrintStyle::Upper, Width: 2);
235 }
236 }
237};
238
239} // namespace llvm
240
241#endif
242

source code of llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h