1 | //===- InstrProfWriter.cpp - Instrumented profiling writer ----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains support for writing profiling data for clang's |
10 | // instrumentation based PGO and coverage. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #include "llvm/ProfileData/InstrProfWriter.h" |
15 | #include "llvm/ADT/STLExtras.h" |
16 | #include "llvm/ADT/SetVector.h" |
17 | #include "llvm/ADT/StringRef.h" |
18 | #include "llvm/IR/ProfileSummary.h" |
19 | #include "llvm/ProfileData/InstrProf.h" |
20 | #include "llvm/ProfileData/MemProf.h" |
21 | #include "llvm/ProfileData/ProfileCommon.h" |
22 | #include "llvm/Support/Endian.h" |
23 | #include "llvm/Support/EndianStream.h" |
24 | #include "llvm/Support/Error.h" |
25 | #include "llvm/Support/MemoryBuffer.h" |
26 | #include "llvm/Support/OnDiskHashTable.h" |
27 | #include "llvm/Support/raw_ostream.h" |
28 | #include <cstdint> |
29 | #include <memory> |
30 | #include <string> |
31 | #include <tuple> |
32 | #include <utility> |
33 | #include <vector> |
34 | |
35 | using namespace llvm; |
36 | |
37 | // A struct to define how the data stream should be patched. For Indexed |
38 | // profiling, only uint64_t data type is needed. |
39 | struct PatchItem { |
40 | uint64_t Pos; // Where to patch. |
41 | uint64_t *D; // Pointer to an array of source data. |
42 | int N; // Number of elements in \c D array. |
43 | }; |
44 | |
45 | namespace llvm { |
46 | |
47 | // A wrapper class to abstract writer stream with support of bytes |
48 | // back patching. |
49 | class ProfOStream { |
50 | public: |
51 | ProfOStream(raw_fd_ostream &FD) |
52 | : IsFDOStream(true), OS(FD), LE(FD, llvm::endianness::little) {} |
53 | ProfOStream(raw_string_ostream &STR) |
54 | : IsFDOStream(false), OS(STR), LE(STR, llvm::endianness::little) {} |
55 | |
56 | uint64_t tell() { return OS.tell(); } |
57 | void write(uint64_t V) { LE.write<uint64_t>(Val: V); } |
58 | void writeByte(uint8_t V) { LE.write<uint8_t>(Val: V); } |
59 | |
60 | // \c patch can only be called when all data is written and flushed. |
61 | // For raw_string_ostream, the patch is done on the target string |
62 | // directly and it won't be reflected in the stream's internal buffer. |
63 | void patch(PatchItem *P, int NItems) { |
64 | using namespace support; |
65 | |
66 | if (IsFDOStream) { |
67 | raw_fd_ostream &FDOStream = static_cast<raw_fd_ostream &>(OS); |
68 | const uint64_t LastPos = FDOStream.tell(); |
69 | for (int K = 0; K < NItems; K++) { |
70 | FDOStream.seek(off: P[K].Pos); |
71 | for (int I = 0; I < P[K].N; I++) |
72 | write(V: P[K].D[I]); |
73 | } |
74 | // Reset the stream to the last position after patching so that users |
75 | // don't accidentally overwrite data. This makes it consistent with |
76 | // the string stream below which replaces the data directly. |
77 | FDOStream.seek(off: LastPos); |
78 | } else { |
79 | raw_string_ostream &SOStream = static_cast<raw_string_ostream &>(OS); |
80 | std::string &Data = SOStream.str(); // with flush |
81 | for (int K = 0; K < NItems; K++) { |
82 | for (int I = 0; I < P[K].N; I++) { |
83 | uint64_t Bytes = |
84 | endian::byte_swap<uint64_t, llvm::endianness::little>(value: P[K].D[I]); |
85 | Data.replace(pos: P[K].Pos + I * sizeof(uint64_t), n1: sizeof(uint64_t), |
86 | s: (const char *)&Bytes, n2: sizeof(uint64_t)); |
87 | } |
88 | } |
89 | } |
90 | } |
91 | |
92 | // If \c OS is an instance of \c raw_fd_ostream, this field will be |
93 | // true. Otherwise, \c OS will be an raw_string_ostream. |
94 | bool IsFDOStream; |
95 | raw_ostream &OS; |
96 | support::endian::Writer LE; |
97 | }; |
98 | |
99 | class InstrProfRecordWriterTrait { |
100 | public: |
101 | using key_type = StringRef; |
102 | using key_type_ref = StringRef; |
103 | |
104 | using data_type = const InstrProfWriter::ProfilingData *const; |
105 | using data_type_ref = const InstrProfWriter::ProfilingData *const; |
106 | |
107 | using hash_value_type = uint64_t; |
108 | using offset_type = uint64_t; |
109 | |
110 | llvm::endianness ValueProfDataEndianness = llvm::endianness::little; |
111 | InstrProfSummaryBuilder *SummaryBuilder; |
112 | InstrProfSummaryBuilder *CSSummaryBuilder; |
113 | |
114 | InstrProfRecordWriterTrait() = default; |
115 | |
116 | static hash_value_type ComputeHash(key_type_ref K) { |
117 | return IndexedInstrProf::ComputeHash(K); |
118 | } |
119 | |
120 | static std::pair<offset_type, offset_type> |
121 | EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { |
122 | using namespace support; |
123 | |
124 | endian::Writer LE(Out, llvm::endianness::little); |
125 | |
126 | offset_type N = K.size(); |
127 | LE.write<offset_type>(Val: N); |
128 | |
129 | offset_type M = 0; |
130 | for (const auto &ProfileData : *V) { |
131 | const InstrProfRecord &ProfRecord = ProfileData.second; |
132 | M += sizeof(uint64_t); // The function hash |
133 | M += sizeof(uint64_t); // The size of the Counts vector |
134 | M += ProfRecord.Counts.size() * sizeof(uint64_t); |
135 | M += sizeof(uint64_t); // The size of the Bitmap vector |
136 | M += ProfRecord.BitmapBytes.size() * sizeof(uint64_t); |
137 | |
138 | // Value data |
139 | M += ValueProfData::getSize(Record: ProfileData.second); |
140 | } |
141 | LE.write<offset_type>(Val: M); |
142 | |
143 | return std::make_pair(x&: N, y&: M); |
144 | } |
145 | |
146 | void EmitKey(raw_ostream &Out, key_type_ref K, offset_type N) { |
147 | Out.write(Ptr: K.data(), Size: N); |
148 | } |
149 | |
150 | void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, offset_type) { |
151 | using namespace support; |
152 | |
153 | endian::Writer LE(Out, llvm::endianness::little); |
154 | for (const auto &ProfileData : *V) { |
155 | const InstrProfRecord &ProfRecord = ProfileData.second; |
156 | if (NamedInstrProfRecord::hasCSFlagInHash(FuncHash: ProfileData.first)) |
157 | CSSummaryBuilder->addRecord(ProfRecord); |
158 | else |
159 | SummaryBuilder->addRecord(ProfRecord); |
160 | |
161 | LE.write<uint64_t>(Val: ProfileData.first); // Function hash |
162 | LE.write<uint64_t>(Val: ProfRecord.Counts.size()); |
163 | for (uint64_t I : ProfRecord.Counts) |
164 | LE.write<uint64_t>(Val: I); |
165 | |
166 | LE.write<uint64_t>(Val: ProfRecord.BitmapBytes.size()); |
167 | for (uint64_t I : ProfRecord.BitmapBytes) |
168 | LE.write<uint64_t>(Val: I); |
169 | |
170 | // Write value data |
171 | std::unique_ptr<ValueProfData> VDataPtr = |
172 | ValueProfData::serializeFrom(Record: ProfileData.second); |
173 | uint32_t S = VDataPtr->getSize(); |
174 | VDataPtr->swapBytesFromHost(Endianness: ValueProfDataEndianness); |
175 | Out.write(Ptr: (const char *)VDataPtr.get(), Size: S); |
176 | } |
177 | } |
178 | }; |
179 | |
180 | } // end namespace llvm |
181 | |
182 | InstrProfWriter::InstrProfWriter(bool Sparse, |
183 | uint64_t TemporalProfTraceReservoirSize, |
184 | uint64_t MaxTemporalProfTraceLength) |
185 | : Sparse(Sparse), MaxTemporalProfTraceLength(MaxTemporalProfTraceLength), |
186 | TemporalProfTraceReservoirSize(TemporalProfTraceReservoirSize), |
187 | InfoObj(new InstrProfRecordWriterTrait()) {} |
188 | |
189 | InstrProfWriter::~InstrProfWriter() { delete InfoObj; } |
190 | |
191 | // Internal interface for testing purpose only. |
192 | void InstrProfWriter::setValueProfDataEndianness(llvm::endianness Endianness) { |
193 | InfoObj->ValueProfDataEndianness = Endianness; |
194 | } |
195 | |
196 | void InstrProfWriter::setOutputSparse(bool Sparse) { |
197 | this->Sparse = Sparse; |
198 | } |
199 | |
200 | void InstrProfWriter::addRecord(NamedInstrProfRecord &&I, uint64_t Weight, |
201 | function_ref<void(Error)> Warn) { |
202 | auto Name = I.Name; |
203 | auto Hash = I.Hash; |
204 | addRecord(Name, Hash, I: std::move(I), Weight, Warn); |
205 | } |
206 | |
207 | void InstrProfWriter::overlapRecord(NamedInstrProfRecord &&Other, |
208 | OverlapStats &Overlap, |
209 | OverlapStats &FuncLevelOverlap, |
210 | const OverlapFuncFilters &FuncFilter) { |
211 | auto Name = Other.Name; |
212 | auto Hash = Other.Hash; |
213 | Other.accumulateCounts(Sum&: FuncLevelOverlap.Test); |
214 | if (!FunctionData.contains(Key: Name)) { |
215 | Overlap.addOneUnique(UniqueFunc: FuncLevelOverlap.Test); |
216 | return; |
217 | } |
218 | if (FuncLevelOverlap.Test.CountSum < 1.0f) { |
219 | Overlap.Overlap.NumEntries += 1; |
220 | return; |
221 | } |
222 | auto &ProfileDataMap = FunctionData[Name]; |
223 | bool NewFunc; |
224 | ProfilingData::iterator Where; |
225 | std::tie(args&: Where, args&: NewFunc) = |
226 | ProfileDataMap.insert(KV: std::make_pair(x&: Hash, y: InstrProfRecord())); |
227 | if (NewFunc) { |
228 | Overlap.addOneMismatch(MismatchFunc: FuncLevelOverlap.Test); |
229 | return; |
230 | } |
231 | InstrProfRecord &Dest = Where->second; |
232 | |
233 | uint64_t ValueCutoff = FuncFilter.ValueCutoff; |
234 | if (!FuncFilter.NameFilter.empty() && Name.contains(Other: FuncFilter.NameFilter)) |
235 | ValueCutoff = 0; |
236 | |
237 | Dest.overlap(Other, Overlap, FuncLevelOverlap, ValueCutoff); |
238 | } |
239 | |
240 | void InstrProfWriter::addRecord(StringRef Name, uint64_t Hash, |
241 | InstrProfRecord &&I, uint64_t Weight, |
242 | function_ref<void(Error)> Warn) { |
243 | auto &ProfileDataMap = FunctionData[Name]; |
244 | |
245 | bool NewFunc; |
246 | ProfilingData::iterator Where; |
247 | std::tie(args&: Where, args&: NewFunc) = |
248 | ProfileDataMap.insert(KV: std::make_pair(x&: Hash, y: InstrProfRecord())); |
249 | InstrProfRecord &Dest = Where->second; |
250 | |
251 | auto MapWarn = [&](instrprof_error E) { |
252 | Warn(make_error<InstrProfError>(Args&: E)); |
253 | }; |
254 | |
255 | if (NewFunc) { |
256 | // We've never seen a function with this name and hash, add it. |
257 | Dest = std::move(I); |
258 | if (Weight > 1) |
259 | Dest.scale(N: Weight, D: 1, Warn: MapWarn); |
260 | } else { |
261 | // We're updating a function we've seen before. |
262 | Dest.merge(Other&: I, Weight, Warn: MapWarn); |
263 | } |
264 | |
265 | Dest.sortValueData(); |
266 | } |
267 | |
268 | void InstrProfWriter::addMemProfRecord( |
269 | const Function::GUID Id, const memprof::IndexedMemProfRecord &Record) { |
270 | auto Result = MemProfRecordData.insert(KV: {Id, Record}); |
271 | // If we inserted a new record then we are done. |
272 | if (Result.second) { |
273 | return; |
274 | } |
275 | memprof::IndexedMemProfRecord &Existing = Result.first->second; |
276 | Existing.merge(Other: Record); |
277 | } |
278 | |
279 | bool InstrProfWriter::addMemProfFrame(const memprof::FrameId Id, |
280 | const memprof::Frame &Frame, |
281 | function_ref<void(Error)> Warn) { |
282 | auto Result = MemProfFrameData.insert(KV: {Id, Frame}); |
283 | // If a mapping already exists for the current frame id and it does not |
284 | // match the new mapping provided then reset the existing contents and bail |
285 | // out. We don't support the merging of memprof data whose Frame -> Id |
286 | // mapping across profiles is inconsistent. |
287 | if (!Result.second && Result.first->second != Frame) { |
288 | Warn(make_error<InstrProfError>(Args: instrprof_error::malformed, |
289 | Args: "frame to id mapping mismatch" )); |
290 | return false; |
291 | } |
292 | return true; |
293 | } |
294 | |
295 | void InstrProfWriter::addBinaryIds(ArrayRef<llvm::object::BuildID> BIs) { |
296 | llvm::append_range(C&: BinaryIds, R&: BIs); |
297 | } |
298 | |
299 | void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) { |
300 | if (Trace.FunctionNameRefs.size() > MaxTemporalProfTraceLength) |
301 | Trace.FunctionNameRefs.resize(new_size: MaxTemporalProfTraceLength); |
302 | if (Trace.FunctionNameRefs.empty()) |
303 | return; |
304 | |
305 | if (TemporalProfTraceStreamSize < TemporalProfTraceReservoirSize) { |
306 | // Simply append the trace if we have not yet hit our reservoir size limit. |
307 | TemporalProfTraces.push_back(Elt: std::move(Trace)); |
308 | } else { |
309 | // Otherwise, replace a random trace in the stream. |
310 | std::uniform_int_distribution<uint64_t> Distribution( |
311 | 0, TemporalProfTraceStreamSize); |
312 | uint64_t RandomIndex = Distribution(RNG); |
313 | if (RandomIndex < TemporalProfTraces.size()) |
314 | TemporalProfTraces[RandomIndex] = std::move(Trace); |
315 | } |
316 | ++TemporalProfTraceStreamSize; |
317 | } |
318 | |
319 | void InstrProfWriter::addTemporalProfileTraces( |
320 | SmallVectorImpl<TemporalProfTraceTy> &SrcTraces, uint64_t SrcStreamSize) { |
321 | // Assume that the source has the same reservoir size as the destination to |
322 | // avoid needing to record it in the indexed profile format. |
323 | bool IsDestSampled = |
324 | (TemporalProfTraceStreamSize > TemporalProfTraceReservoirSize); |
325 | bool IsSrcSampled = (SrcStreamSize > TemporalProfTraceReservoirSize); |
326 | if (!IsDestSampled && IsSrcSampled) { |
327 | // If one of the traces are sampled, ensure that it belongs to Dest. |
328 | std::swap(LHS&: TemporalProfTraces, RHS&: SrcTraces); |
329 | std::swap(a&: TemporalProfTraceStreamSize, b&: SrcStreamSize); |
330 | std::swap(a&: IsDestSampled, b&: IsSrcSampled); |
331 | } |
332 | if (!IsSrcSampled) { |
333 | // If the source stream is not sampled, we add each source trace normally. |
334 | for (auto &Trace : SrcTraces) |
335 | addTemporalProfileTrace(Trace: std::move(Trace)); |
336 | return; |
337 | } |
338 | // Otherwise, we find the traces that would have been removed if we added |
339 | // the whole source stream. |
340 | SmallSetVector<uint64_t, 8> IndicesToReplace; |
341 | for (uint64_t I = 0; I < SrcStreamSize; I++) { |
342 | std::uniform_int_distribution<uint64_t> Distribution( |
343 | 0, TemporalProfTraceStreamSize); |
344 | uint64_t RandomIndex = Distribution(RNG); |
345 | if (RandomIndex < TemporalProfTraces.size()) |
346 | IndicesToReplace.insert(X: RandomIndex); |
347 | ++TemporalProfTraceStreamSize; |
348 | } |
349 | // Then we insert a random sample of the source traces. |
350 | llvm::shuffle(first: SrcTraces.begin(), last: SrcTraces.end(), g&: RNG); |
351 | for (const auto &[Index, Trace] : llvm::zip(t&: IndicesToReplace, u&: SrcTraces)) |
352 | TemporalProfTraces[Index] = std::move(Trace); |
353 | } |
354 | |
355 | void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW, |
356 | function_ref<void(Error)> Warn) { |
357 | for (auto &I : IPW.FunctionData) |
358 | for (auto &Func : I.getValue()) |
359 | addRecord(Name: I.getKey(), Hash: Func.first, I: std::move(Func.second), Weight: 1, Warn); |
360 | |
361 | BinaryIds.reserve(n: BinaryIds.size() + IPW.BinaryIds.size()); |
362 | for (auto &I : IPW.BinaryIds) |
363 | addBinaryIds(BIs: I); |
364 | |
365 | addTemporalProfileTraces(SrcTraces&: IPW.TemporalProfTraces, |
366 | SrcStreamSize: IPW.TemporalProfTraceStreamSize); |
367 | |
368 | MemProfFrameData.reserve(NumEntries: IPW.MemProfFrameData.size()); |
369 | for (auto &I : IPW.MemProfFrameData) { |
370 | // If we weren't able to add the frame mappings then it doesn't make sense |
371 | // to try to merge the records from this profile. |
372 | if (!addMemProfFrame(Id: I.first, Frame: I.second, Warn)) |
373 | return; |
374 | } |
375 | |
376 | MemProfRecordData.reserve(NumEntries: IPW.MemProfRecordData.size()); |
377 | for (auto &I : IPW.MemProfRecordData) { |
378 | addMemProfRecord(Id: I.first, Record: I.second); |
379 | } |
380 | } |
381 | |
382 | bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) { |
383 | if (!Sparse) |
384 | return true; |
385 | for (const auto &Func : PD) { |
386 | const InstrProfRecord &IPR = Func.second; |
387 | if (llvm::any_of(Range: IPR.Counts, P: [](uint64_t Count) { return Count > 0; })) |
388 | return true; |
389 | if (llvm::any_of(Range: IPR.BitmapBytes, P: [](uint8_t Byte) { return Byte > 0; })) |
390 | return true; |
391 | } |
392 | return false; |
393 | } |
394 | |
395 | static void setSummary(IndexedInstrProf::Summary *TheSummary, |
396 | ProfileSummary &PS) { |
397 | using namespace IndexedInstrProf; |
398 | |
399 | const std::vector<ProfileSummaryEntry> &Res = PS.getDetailedSummary(); |
400 | TheSummary->NumSummaryFields = Summary::NumKinds; |
401 | TheSummary->NumCutoffEntries = Res.size(); |
402 | TheSummary->set(K: Summary::MaxFunctionCount, V: PS.getMaxFunctionCount()); |
403 | TheSummary->set(K: Summary::MaxBlockCount, V: PS.getMaxCount()); |
404 | TheSummary->set(K: Summary::MaxInternalBlockCount, V: PS.getMaxInternalCount()); |
405 | TheSummary->set(K: Summary::TotalBlockCount, V: PS.getTotalCount()); |
406 | TheSummary->set(K: Summary::TotalNumBlocks, V: PS.getNumCounts()); |
407 | TheSummary->set(K: Summary::TotalNumFunctions, V: PS.getNumFunctions()); |
408 | for (unsigned I = 0; I < Res.size(); I++) |
409 | TheSummary->setEntry(I, E: Res[I]); |
410 | } |
411 | |
412 | Error InstrProfWriter::writeImpl(ProfOStream &OS) { |
413 | using namespace IndexedInstrProf; |
414 | using namespace support; |
415 | |
416 | OnDiskChainedHashTableGenerator<InstrProfRecordWriterTrait> Generator; |
417 | |
418 | InstrProfSummaryBuilder ISB(ProfileSummaryBuilder::DefaultCutoffs); |
419 | InfoObj->SummaryBuilder = &ISB; |
420 | InstrProfSummaryBuilder CSISB(ProfileSummaryBuilder::DefaultCutoffs); |
421 | InfoObj->CSSummaryBuilder = &CSISB; |
422 | |
423 | // Populate the hash table generator. |
424 | SmallVector<std::pair<StringRef, const ProfilingData *>, 0> OrderedData; |
425 | for (const auto &I : FunctionData) |
426 | if (shouldEncodeData(PD: I.getValue())) |
427 | OrderedData.emplace_back(Args: (I.getKey()), Args: &I.getValue()); |
428 | llvm::sort(C&: OrderedData, Comp: less_first()); |
429 | for (const auto &I : OrderedData) |
430 | Generator.insert(Key: I.first, Data: I.second); |
431 | |
432 | // Write the header. |
433 | IndexedInstrProf::Header ; |
434 | Header.Magic = IndexedInstrProf::Magic; |
435 | Header.Version = IndexedInstrProf::ProfVersion::CurrentVersion; |
436 | if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation)) |
437 | Header.Version |= VARIANT_MASK_IR_PROF; |
438 | if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) |
439 | Header.Version |= VARIANT_MASK_CSIR_PROF; |
440 | if (static_cast<bool>(ProfileKind & |
441 | InstrProfKind::FunctionEntryInstrumentation)) |
442 | Header.Version |= VARIANT_MASK_INSTR_ENTRY; |
443 | if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage)) |
444 | Header.Version |= VARIANT_MASK_BYTE_COVERAGE; |
445 | if (static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly)) |
446 | Header.Version |= VARIANT_MASK_FUNCTION_ENTRY_ONLY; |
447 | if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) |
448 | Header.Version |= VARIANT_MASK_MEMPROF; |
449 | if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) |
450 | Header.Version |= VARIANT_MASK_TEMPORAL_PROF; |
451 | |
452 | Header.Unused = 0; |
453 | Header.HashType = static_cast<uint64_t>(IndexedInstrProf::HashType); |
454 | Header.HashOffset = 0; |
455 | Header.MemProfOffset = 0; |
456 | Header.BinaryIdOffset = 0; |
457 | Header.TemporalProfTracesOffset = 0; |
458 | int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t); |
459 | |
460 | // Only write out all the fields except 'HashOffset', 'MemProfOffset', |
461 | // 'BinaryIdOffset' and `TemporalProfTracesOffset`. We need to remember the |
462 | // offset of these fields to allow back patching later. |
463 | for (int I = 0; I < N - 4; I++) |
464 | OS.write(V: reinterpret_cast<uint64_t *>(&Header)[I]); |
465 | |
466 | // Save the location of Header.HashOffset field in \c OS. |
467 | uint64_t HashTableStartFieldOffset = OS.tell(); |
468 | // Reserve the space for HashOffset field. |
469 | OS.write(V: 0); |
470 | |
471 | // Save the location of MemProf profile data. This is stored in two parts as |
472 | // the schema and as a separate on-disk chained hashtable. |
473 | uint64_t MemProfSectionOffset = OS.tell(); |
474 | // Reserve space for the MemProf table field to be patched later if this |
475 | // profile contains memory profile information. |
476 | OS.write(V: 0); |
477 | |
478 | // Save the location of binary ids section. |
479 | uint64_t BinaryIdSectionOffset = OS.tell(); |
480 | // Reserve space for the BinaryIdOffset field to be patched later if this |
481 | // profile contains binary ids. |
482 | OS.write(V: 0); |
483 | |
484 | uint64_t TemporalProfTracesOffset = OS.tell(); |
485 | OS.write(V: 0); |
486 | |
487 | // Reserve space to write profile summary data. |
488 | uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size(); |
489 | uint32_t SummarySize = Summary::getSize(NumSumFields: Summary::NumKinds, NumCutoffEntries: NumEntries); |
490 | // Remember the summary offset. |
491 | uint64_t SummaryOffset = OS.tell(); |
492 | for (unsigned I = 0; I < SummarySize / sizeof(uint64_t); I++) |
493 | OS.write(V: 0); |
494 | uint64_t CSSummaryOffset = 0; |
495 | uint64_t CSSummarySize = 0; |
496 | if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) { |
497 | CSSummaryOffset = OS.tell(); |
498 | CSSummarySize = SummarySize / sizeof(uint64_t); |
499 | for (unsigned I = 0; I < CSSummarySize; I++) |
500 | OS.write(V: 0); |
501 | } |
502 | |
503 | // Write the hash table. |
504 | uint64_t HashTableStart = Generator.Emit(Out&: OS.OS, InfoObj&: *InfoObj); |
505 | |
506 | // Write the MemProf profile data if we have it. This includes a simple schema |
507 | // with the format described below followed by the hashtable: |
508 | // uint64_t RecordTableOffset = RecordTableGenerator.Emit |
509 | // uint64_t FramePayloadOffset = Stream offset before emitting the frame table |
510 | // uint64_t FrameTableOffset = FrameTableGenerator.Emit |
511 | // uint64_t Num schema entries |
512 | // uint64_t Schema entry 0 |
513 | // uint64_t Schema entry 1 |
514 | // .... |
515 | // uint64_t Schema entry N - 1 |
516 | // OnDiskChainedHashTable MemProfRecordData |
517 | // OnDiskChainedHashTable MemProfFrameData |
518 | uint64_t MemProfSectionStart = 0; |
519 | if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf)) { |
520 | MemProfSectionStart = OS.tell(); |
521 | OS.write(V: 0ULL); // Reserve space for the memprof record table offset. |
522 | OS.write(V: 0ULL); // Reserve space for the memprof frame payload offset. |
523 | OS.write(V: 0ULL); // Reserve space for the memprof frame table offset. |
524 | |
525 | auto Schema = memprof::PortableMemInfoBlock::getSchema(); |
526 | OS.write(V: static_cast<uint64_t>(Schema.size())); |
527 | for (const auto Id : Schema) { |
528 | OS.write(V: static_cast<uint64_t>(Id)); |
529 | } |
530 | |
531 | auto RecordWriter = std::make_unique<memprof::RecordWriterTrait>(); |
532 | RecordWriter->Schema = &Schema; |
533 | OnDiskChainedHashTableGenerator<memprof::RecordWriterTrait> |
534 | RecordTableGenerator; |
535 | for (auto &I : MemProfRecordData) { |
536 | // Insert the key (func hash) and value (memprof record). |
537 | RecordTableGenerator.insert(Key: I.first, Data&: I.second); |
538 | } |
539 | // Release the memory of this MapVector as it is no longer needed. |
540 | MemProfRecordData.clear(); |
541 | |
542 | // The call to Emit invokes RecordWriterTrait::EmitData which destructs |
543 | // the memprof record copies owned by the RecordTableGenerator. This works |
544 | // because the RecordTableGenerator is not used after this point. |
545 | uint64_t RecordTableOffset = |
546 | RecordTableGenerator.Emit(Out&: OS.OS, InfoObj&: *RecordWriter); |
547 | |
548 | uint64_t FramePayloadOffset = OS.tell(); |
549 | |
550 | auto FrameWriter = std::make_unique<memprof::FrameWriterTrait>(); |
551 | OnDiskChainedHashTableGenerator<memprof::FrameWriterTrait> |
552 | FrameTableGenerator; |
553 | for (auto &I : MemProfFrameData) { |
554 | // Insert the key (frame id) and value (frame contents). |
555 | FrameTableGenerator.insert(Key: I.first, Data&: I.second); |
556 | } |
557 | // Release the memory of this MapVector as it is no longer needed. |
558 | MemProfFrameData.clear(); |
559 | |
560 | uint64_t FrameTableOffset = FrameTableGenerator.Emit(Out&: OS.OS, InfoObj&: *FrameWriter); |
561 | |
562 | PatchItem PatchItems[] = { |
563 | {.Pos: MemProfSectionStart, .D: &RecordTableOffset, .N: 1}, |
564 | {.Pos: MemProfSectionStart + sizeof(uint64_t), .D: &FramePayloadOffset, .N: 1}, |
565 | {.Pos: MemProfSectionStart + 2 * sizeof(uint64_t), .D: &FrameTableOffset, .N: 1}, |
566 | }; |
567 | OS.patch(P: PatchItems, NItems: 3); |
568 | } |
569 | |
570 | // BinaryIdSection has two parts: |
571 | // 1. uint64_t BinaryIdsSectionSize |
572 | // 2. list of binary ids that consist of: |
573 | // a. uint64_t BinaryIdLength |
574 | // b. uint8_t BinaryIdData |
575 | // c. uint8_t Padding (if necessary) |
576 | uint64_t BinaryIdSectionStart = OS.tell(); |
577 | // Calculate size of binary section. |
578 | uint64_t BinaryIdsSectionSize = 0; |
579 | |
580 | // Remove duplicate binary ids. |
581 | llvm::sort(C&: BinaryIds); |
582 | BinaryIds.erase(first: std::unique(first: BinaryIds.begin(), last: BinaryIds.end()), |
583 | last: BinaryIds.end()); |
584 | |
585 | for (auto BI : BinaryIds) { |
586 | // Increment by binary id length data type size. |
587 | BinaryIdsSectionSize += sizeof(uint64_t); |
588 | // Increment by binary id data length, aligned to 8 bytes. |
589 | BinaryIdsSectionSize += alignToPowerOf2(Value: BI.size(), Align: sizeof(uint64_t)); |
590 | } |
591 | // Write binary ids section size. |
592 | OS.write(V: BinaryIdsSectionSize); |
593 | |
594 | for (auto BI : BinaryIds) { |
595 | uint64_t BILen = BI.size(); |
596 | // Write binary id length. |
597 | OS.write(V: BILen); |
598 | // Write binary id data. |
599 | for (unsigned K = 0; K < BILen; K++) |
600 | OS.writeByte(V: BI[K]); |
601 | // Write padding if necessary. |
602 | uint64_t PaddingSize = alignToPowerOf2(Value: BILen, Align: sizeof(uint64_t)) - BILen; |
603 | for (unsigned K = 0; K < PaddingSize; K++) |
604 | OS.writeByte(V: 0); |
605 | } |
606 | |
607 | uint64_t TemporalProfTracesSectionStart = 0; |
608 | if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) { |
609 | TemporalProfTracesSectionStart = OS.tell(); |
610 | OS.write(V: TemporalProfTraces.size()); |
611 | OS.write(V: TemporalProfTraceStreamSize); |
612 | for (auto &Trace : TemporalProfTraces) { |
613 | OS.write(V: Trace.Weight); |
614 | OS.write(V: Trace.FunctionNameRefs.size()); |
615 | for (auto &NameRef : Trace.FunctionNameRefs) |
616 | OS.write(V: NameRef); |
617 | } |
618 | } |
619 | |
620 | // Allocate space for data to be serialized out. |
621 | std::unique_ptr<IndexedInstrProf::Summary> TheSummary = |
622 | IndexedInstrProf::allocSummary(TotalSize: SummarySize); |
623 | // Compute the Summary and copy the data to the data |
624 | // structure to be serialized out (to disk or buffer). |
625 | std::unique_ptr<ProfileSummary> PS = ISB.getSummary(); |
626 | setSummary(TheSummary: TheSummary.get(), PS&: *PS); |
627 | InfoObj->SummaryBuilder = nullptr; |
628 | |
629 | // For Context Sensitive summary. |
630 | std::unique_ptr<IndexedInstrProf::Summary> TheCSSummary = nullptr; |
631 | if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) { |
632 | TheCSSummary = IndexedInstrProf::allocSummary(TotalSize: SummarySize); |
633 | std::unique_ptr<ProfileSummary> CSPS = CSISB.getSummary(); |
634 | setSummary(TheSummary: TheCSSummary.get(), PS&: *CSPS); |
635 | } |
636 | InfoObj->CSSummaryBuilder = nullptr; |
637 | |
638 | // Now do the final patch: |
639 | PatchItem PatchItems[] = { |
640 | // Patch the Header.HashOffset field. |
641 | {.Pos: HashTableStartFieldOffset, .D: &HashTableStart, .N: 1}, |
642 | // Patch the Header.MemProfOffset (=0 for profiles without MemProf |
643 | // data). |
644 | {.Pos: MemProfSectionOffset, .D: &MemProfSectionStart, .N: 1}, |
645 | // Patch the Header.BinaryIdSectionOffset. |
646 | {.Pos: BinaryIdSectionOffset, .D: &BinaryIdSectionStart, .N: 1}, |
647 | // Patch the Header.TemporalProfTracesOffset (=0 for profiles without |
648 | // traces). |
649 | {.Pos: TemporalProfTracesOffset, .D: &TemporalProfTracesSectionStart, .N: 1}, |
650 | // Patch the summary data. |
651 | {.Pos: SummaryOffset, .D: reinterpret_cast<uint64_t *>(TheSummary.get()), |
652 | .N: (int)(SummarySize / sizeof(uint64_t))}, |
653 | {.Pos: CSSummaryOffset, .D: reinterpret_cast<uint64_t *>(TheCSSummary.get()), |
654 | .N: (int)CSSummarySize}}; |
655 | |
656 | OS.patch(P: PatchItems, NItems: std::size(PatchItems)); |
657 | |
658 | for (const auto &I : FunctionData) |
659 | for (const auto &F : I.getValue()) |
660 | if (Error E = validateRecord(Func: F.second)) |
661 | return E; |
662 | |
663 | return Error::success(); |
664 | } |
665 | |
666 | Error InstrProfWriter::write(raw_fd_ostream &OS) { |
667 | // Write the hash table. |
668 | ProfOStream POS(OS); |
669 | return writeImpl(OS&: POS); |
670 | } |
671 | |
672 | Error InstrProfWriter::write(raw_string_ostream &OS) { |
673 | ProfOStream POS(OS); |
674 | return writeImpl(OS&: POS); |
675 | } |
676 | |
677 | std::unique_ptr<MemoryBuffer> InstrProfWriter::writeBuffer() { |
678 | std::string Data; |
679 | raw_string_ostream OS(Data); |
680 | // Write the hash table. |
681 | if (Error E = write(OS)) |
682 | return nullptr; |
683 | // Return this in an aligned memory buffer. |
684 | return MemoryBuffer::getMemBufferCopy(InputData: Data); |
685 | } |
686 | |
687 | static const char *ValueProfKindStr[] = { |
688 | #define VALUE_PROF_KIND(Enumerator, Value, Descr) #Enumerator, |
689 | #include "llvm/ProfileData/InstrProfData.inc" |
690 | }; |
691 | |
692 | Error InstrProfWriter::validateRecord(const InstrProfRecord &Func) { |
693 | for (uint32_t VK = 0; VK <= IPVK_Last; VK++) { |
694 | uint32_t NS = Func.getNumValueSites(ValueKind: VK); |
695 | if (!NS) |
696 | continue; |
697 | for (uint32_t S = 0; S < NS; S++) { |
698 | uint32_t ND = Func.getNumValueDataForSite(ValueKind: VK, Site: S); |
699 | std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(ValueKind: VK, Site: S); |
700 | DenseSet<uint64_t> SeenValues; |
701 | for (uint32_t I = 0; I < ND; I++) |
702 | if ((VK != IPVK_IndirectCallTarget) && !SeenValues.insert(V: VD[I].Value).second) |
703 | return make_error<InstrProfError>(Args: instrprof_error::invalid_prof); |
704 | } |
705 | } |
706 | |
707 | return Error::success(); |
708 | } |
709 | |
710 | void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash, |
711 | const InstrProfRecord &Func, |
712 | InstrProfSymtab &Symtab, |
713 | raw_fd_ostream &OS) { |
714 | OS << Name << "\n" ; |
715 | OS << "# Func Hash:\n" << Hash << "\n" ; |
716 | OS << "# Num Counters:\n" << Func.Counts.size() << "\n" ; |
717 | OS << "# Counter Values:\n" ; |
718 | for (uint64_t Count : Func.Counts) |
719 | OS << Count << "\n" ; |
720 | |
721 | if (Func.BitmapBytes.size() > 0) { |
722 | OS << "# Num Bitmap Bytes:\n$" << Func.BitmapBytes.size() << "\n" ; |
723 | OS << "# Bitmap Byte Values:\n" ; |
724 | for (uint8_t Byte : Func.BitmapBytes) { |
725 | OS << "0x" ; |
726 | OS.write_hex(N: Byte); |
727 | OS << "\n" ; |
728 | } |
729 | OS << "\n" ; |
730 | } |
731 | |
732 | uint32_t NumValueKinds = Func.getNumValueKinds(); |
733 | if (!NumValueKinds) { |
734 | OS << "\n" ; |
735 | return; |
736 | } |
737 | |
738 | OS << "# Num Value Kinds:\n" << Func.getNumValueKinds() << "\n" ; |
739 | for (uint32_t VK = 0; VK < IPVK_Last + 1; VK++) { |
740 | uint32_t NS = Func.getNumValueSites(ValueKind: VK); |
741 | if (!NS) |
742 | continue; |
743 | OS << "# ValueKind = " << ValueProfKindStr[VK] << ":\n" << VK << "\n" ; |
744 | OS << "# NumValueSites:\n" << NS << "\n" ; |
745 | for (uint32_t S = 0; S < NS; S++) { |
746 | uint32_t ND = Func.getNumValueDataForSite(ValueKind: VK, Site: S); |
747 | OS << ND << "\n" ; |
748 | std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(ValueKind: VK, Site: S); |
749 | for (uint32_t I = 0; I < ND; I++) { |
750 | if (VK == IPVK_IndirectCallTarget) |
751 | OS << Symtab.getFuncOrVarNameIfDefined(MD5Hash: VD[I].Value) << ":" |
752 | << VD[I].Count << "\n" ; |
753 | else |
754 | OS << VD[I].Value << ":" << VD[I].Count << "\n" ; |
755 | } |
756 | } |
757 | } |
758 | |
759 | OS << "\n" ; |
760 | } |
761 | |
762 | Error InstrProfWriter::writeText(raw_fd_ostream &OS) { |
763 | // Check CS first since it implies an IR level profile. |
764 | if (static_cast<bool>(ProfileKind & InstrProfKind::ContextSensitive)) |
765 | OS << "# CSIR level Instrumentation Flag\n:csir\n" ; |
766 | else if (static_cast<bool>(ProfileKind & InstrProfKind::IRInstrumentation)) |
767 | OS << "# IR level Instrumentation Flag\n:ir\n" ; |
768 | |
769 | if (static_cast<bool>(ProfileKind & |
770 | InstrProfKind::FunctionEntryInstrumentation)) |
771 | OS << "# Always instrument the function entry block\n:entry_first\n" ; |
772 | if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage)) |
773 | OS << "# Instrument block coverage\n:single_byte_coverage\n" ; |
774 | InstrProfSymtab Symtab; |
775 | |
776 | using FuncPair = detail::DenseMapPair<uint64_t, InstrProfRecord>; |
777 | using RecordType = std::pair<StringRef, FuncPair>; |
778 | SmallVector<RecordType, 4> OrderedFuncData; |
779 | |
780 | for (const auto &I : FunctionData) { |
781 | if (shouldEncodeData(PD: I.getValue())) { |
782 | if (Error E = Symtab.addFuncName(FuncName: I.getKey())) |
783 | return E; |
784 | for (const auto &Func : I.getValue()) |
785 | OrderedFuncData.push_back(Elt: std::make_pair(x: I.getKey(), y: Func)); |
786 | } |
787 | } |
788 | |
789 | if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) |
790 | writeTextTemporalProfTraceData(OS, Symtab); |
791 | |
792 | llvm::sort(C&: OrderedFuncData, Comp: [](const RecordType &A, const RecordType &B) { |
793 | return std::tie(args: A.first, args: A.second.first) < |
794 | std::tie(args: B.first, args: B.second.first); |
795 | }); |
796 | |
797 | for (const auto &record : OrderedFuncData) { |
798 | const StringRef &Name = record.first; |
799 | const FuncPair &Func = record.second; |
800 | writeRecordInText(Name, Hash: Func.first, Func: Func.second, Symtab, OS); |
801 | } |
802 | |
803 | for (const auto &record : OrderedFuncData) { |
804 | const FuncPair &Func = record.second; |
805 | if (Error E = validateRecord(Func: Func.second)) |
806 | return E; |
807 | } |
808 | |
809 | return Error::success(); |
810 | } |
811 | |
812 | void InstrProfWriter::writeTextTemporalProfTraceData(raw_fd_ostream &OS, |
813 | InstrProfSymtab &Symtab) { |
814 | OS << ":temporal_prof_traces\n" ; |
815 | OS << "# Num Temporal Profile Traces:\n" << TemporalProfTraces.size() << "\n" ; |
816 | OS << "# Temporal Profile Trace Stream Size:\n" |
817 | << TemporalProfTraceStreamSize << "\n" ; |
818 | for (auto &Trace : TemporalProfTraces) { |
819 | OS << "# Weight:\n" << Trace.Weight << "\n" ; |
820 | for (auto &NameRef : Trace.FunctionNameRefs) |
821 | OS << Symtab.getFuncOrVarName(MD5Hash: NameRef) << "," ; |
822 | OS << "\n" ; |
823 | } |
824 | OS << "\n" ; |
825 | } |
826 | |