1//===-- DecodedThread.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "DecodedThread.h"
10#include "TraceCursorIntelPT.h"
11#include <intel-pt.h>
12#include <memory>
13#include <optional>
14
15using namespace lldb;
16using namespace lldb_private;
17using namespace lldb_private::trace_intel_pt;
18using namespace llvm;
19
20char IntelPTError::ID;
21
22IntelPTError::IntelPTError(int libipt_error_code, lldb::addr_t address)
23 : m_libipt_error_code(libipt_error_code), m_address(address) {
24 assert(libipt_error_code < 0);
25}
26
27void IntelPTError::log(llvm::raw_ostream &OS) const {
28 OS << pt_errstr(pt_errcode(m_libipt_error_code));
29 if (m_address != LLDB_INVALID_ADDRESS && m_address > 0)
30 OS << formatv(Fmt: ": {0:x+16}", Vals: m_address);
31}
32
33bool DecodedThread::TSCRange::InRange(uint64_t item_index) const {
34 return item_index >= first_item_index &&
35 item_index < first_item_index + items_count;
36}
37
38bool DecodedThread::NanosecondsRange::InRange(uint64_t item_index) const {
39 return item_index >= first_item_index &&
40 item_index < first_item_index + items_count;
41}
42
43double DecodedThread::NanosecondsRange::GetInterpolatedTime(
44 uint64_t item_index, uint64_t begin_of_time_nanos,
45 const LinuxPerfZeroTscConversion &tsc_conversion) const {
46 uint64_t items_since_last_tsc = item_index - first_item_index;
47
48 auto interpolate = [&](uint64_t next_range_start_ns) {
49 if (next_range_start_ns == nanos) {
50 // If the resolution of the conversion formula is bad enough to consider
51 // these two timestamps as equal, then we just increase the next one by 1
52 // for correction
53 next_range_start_ns++;
54 }
55 long double item_duration =
56 static_cast<long double>(items_count) / (next_range_start_ns - nanos);
57 return (nanos - begin_of_time_nanos) + items_since_last_tsc * item_duration;
58 };
59
60 if (!next_range) {
61 // If this is the last TSC range, so we have to extrapolate. In this case,
62 // we assume that each instruction took one TSC, which is what an
63 // instruction would take if no parallelism is achieved and the frequency
64 // multiplier is 1.
65 return interpolate(tsc_conversion.ToNanos(tsc: tsc + items_count));
66 }
67 if (items_count < (next_range->tsc - tsc)) {
68 // If the numbers of items in this range is less than the total TSC duration
69 // of this range, i.e. each instruction taking longer than 1 TSC, then we
70 // can assume that something else happened between these TSCs (e.g. a
71 // context switch, change to kernel, decoding errors, etc). In this case, we
72 // also assume that each instruction took 1 TSC. A proper way to improve
73 // this would be to analize the next events in the trace looking for context
74 // switches or trace disablement events, but for now, as we only want an
75 // approximation, we keep it simple. We are also guaranteed that the time in
76 // nanos of the next range is different to the current one, just because of
77 // the definition of a NanosecondsRange.
78 return interpolate(
79 std::min(a: tsc_conversion.ToNanos(tsc: tsc + items_count), b: next_range->nanos));
80 }
81
82 // In this case, each item took less than 1 TSC, so some parallelism was
83 // achieved, which is an indication that we didn't suffered of any kind of
84 // interruption.
85 return interpolate(next_range->nanos);
86}
87
88uint64_t DecodedThread::GetItemsCount() const { return m_item_data.size(); }
89
90lldb::addr_t
91DecodedThread::GetInstructionLoadAddress(uint64_t item_index) const {
92 return std::get<lldb::addr_t>(v: m_item_data[item_index]);
93}
94
95lldb::addr_t
96DecodedThread::GetSyncPointOffsetByIndex(uint64_t item_index) const {
97 return m_psb_offsets.find(Val: item_index)->second;
98}
99
100ThreadSP DecodedThread::GetThread() { return m_thread_sp; }
101
102template <typename Data>
103DecodedThread::TraceItemStorage &
104DecodedThread::CreateNewTraceItem(lldb::TraceItemKind kind, Data &&data) {
105 m_item_data.emplace_back(data);
106
107 if (m_last_tsc)
108 (*m_last_tsc)->second.items_count++;
109 if (m_last_nanoseconds)
110 (*m_last_nanoseconds)->second.items_count++;
111
112 return m_item_data.back();
113}
114
115void DecodedThread::NotifySyncPoint(lldb::addr_t psb_offset) {
116 m_psb_offsets.try_emplace(Key: GetItemsCount(), Args&: psb_offset);
117 AppendEvent(lldb::eTraceEventSyncPoint);
118}
119
120void DecodedThread::NotifyTsc(TSC tsc) {
121 if (m_last_tsc && (*m_last_tsc)->second.tsc == tsc)
122 return;
123 if (m_last_tsc)
124 assert(tsc >= (*m_last_tsc)->second.tsc &&
125 "We can't have decreasing times");
126
127 m_last_tsc =
128 m_tscs.emplace(args: GetItemsCount(), args: TSCRange{.tsc: tsc, .items_count: 0, .first_item_index: GetItemsCount()}).first;
129
130 if (m_tsc_conversion) {
131 uint64_t nanos = m_tsc_conversion->ToNanos(tsc);
132 if (!m_last_nanoseconds || (*m_last_nanoseconds)->second.nanos != nanos) {
133 m_last_nanoseconds =
134 m_nanoseconds
135 .emplace(args: GetItemsCount(), args: NanosecondsRange{.nanos: nanos, .tsc: tsc, .next_range: nullptr, .items_count: 0,
136 .first_item_index: GetItemsCount()})
137 .first;
138 if (*m_last_nanoseconds != m_nanoseconds.begin()) {
139 auto prev_range = prev(x: *m_last_nanoseconds);
140 prev_range->second.next_range = &(*m_last_nanoseconds)->second;
141 }
142 }
143 }
144 AppendEvent(lldb::eTraceEventHWClockTick);
145}
146
147void DecodedThread::NotifyCPU(lldb::cpu_id_t cpu_id) {
148 if (!m_last_cpu || *m_last_cpu != cpu_id) {
149 m_cpus.emplace(args: GetItemsCount(), args&: cpu_id);
150 m_last_cpu = cpu_id;
151 AppendEvent(lldb::eTraceEventCPUChanged);
152 }
153}
154
155lldb::cpu_id_t DecodedThread::GetCPUByIndex(uint64_t item_index) const {
156 auto it = m_cpus.upper_bound(x: item_index);
157 return it == m_cpus.begin() ? LLDB_INVALID_CPU_ID : prev(x: it)->second;
158}
159
160std::optional<DecodedThread::TSCRange>
161DecodedThread::GetTSCRangeByIndex(uint64_t item_index) const {
162 auto next_it = m_tscs.upper_bound(x: item_index);
163 if (next_it == m_tscs.begin())
164 return std::nullopt;
165 return prev(x: next_it)->second;
166}
167
168std::optional<DecodedThread::NanosecondsRange>
169DecodedThread::GetNanosecondsRangeByIndex(uint64_t item_index) {
170 auto next_it = m_nanoseconds.upper_bound(x: item_index);
171 if (next_it == m_nanoseconds.begin())
172 return std::nullopt;
173 return prev(x: next_it)->second;
174}
175
176uint64_t DecodedThread::GetTotalInstructionCount() const {
177 return m_insn_count;
178}
179
180void DecodedThread::AppendEvent(lldb::TraceEvent event) {
181 CreateNewTraceItem(kind: lldb::eTraceItemKindEvent, data&: event);
182 m_events_stats.RecordEvent(event);
183}
184
185void DecodedThread::AppendInstruction(const pt_insn &insn) {
186 CreateNewTraceItem(lldb::eTraceItemKindInstruction, insn.ip);
187 m_insn_count++;
188}
189
190void DecodedThread::AppendError(const IntelPTError &error) {
191 CreateNewTraceItem(kind: lldb::eTraceItemKindError, data: error.message());
192 m_error_stats.RecordError(/*fatal=*/false);
193}
194
195void DecodedThread::AppendCustomError(StringRef err, bool fatal) {
196 CreateNewTraceItem(kind: lldb::eTraceItemKindError, data: err.str());
197 m_error_stats.RecordError(fatal);
198}
199
200lldb::TraceEvent DecodedThread::GetEventByIndex(int item_index) const {
201 return std::get<lldb::TraceEvent>(v: m_item_data[item_index]);
202}
203
204const DecodedThread::EventsStats &DecodedThread::GetEventsStats() const {
205 return m_events_stats;
206}
207
208void DecodedThread::EventsStats::RecordEvent(lldb::TraceEvent event) {
209 events_counts[event]++;
210 total_count++;
211}
212
213uint64_t DecodedThread::ErrorStats::GetTotalCount() const {
214 uint64_t total = 0;
215 for (const auto &[kind, count] : libipt_errors)
216 total += count;
217
218 return total + other_errors + fatal_errors;
219}
220
221void DecodedThread::ErrorStats::RecordError(bool fatal) {
222 if (fatal)
223 fatal_errors++;
224 else
225 other_errors++;
226}
227
228void DecodedThread::ErrorStats::RecordError(int libipt_error_code) {
229 libipt_errors[pt_errstr(pt_errcode(libipt_error_code))]++;
230}
231
232const DecodedThread::ErrorStats &DecodedThread::GetErrorStats() const {
233 return m_error_stats;
234}
235
236lldb::TraceItemKind
237DecodedThread::GetItemKindByIndex(uint64_t item_index) const {
238 return std::visit(
239 visitor: llvm::makeVisitor(
240 Callables: [](const std::string &) { return lldb::eTraceItemKindError; },
241 Callables: [](lldb::TraceEvent) { return lldb::eTraceItemKindEvent; },
242 Callables: [](lldb::addr_t) { return lldb::eTraceItemKindInstruction; }),
243 variants: m_item_data[item_index]);
244}
245
246llvm::StringRef DecodedThread::GetErrorByIndex(uint64_t item_index) const {
247 if (item_index >= m_item_data.size())
248 return llvm::StringRef();
249 return std::get<std::string>(v: m_item_data[item_index]);
250}
251
252DecodedThread::DecodedThread(
253 ThreadSP thread_sp,
254 const std::optional<LinuxPerfZeroTscConversion> &tsc_conversion)
255 : m_thread_sp(thread_sp), m_tsc_conversion(tsc_conversion) {}
256
257size_t DecodedThread::CalculateApproximateMemoryUsage() const {
258 return sizeof(TraceItemStorage) * m_item_data.size() +
259 (sizeof(uint64_t) + sizeof(TSC)) * m_tscs.size() +
260 (sizeof(uint64_t) + sizeof(uint64_t)) * m_nanoseconds.size() +
261 (sizeof(uint64_t) + sizeof(lldb::cpu_id_t)) * m_cpus.size();
262}
263

source code of lldb/source/Plugins/Trace/intel-pt/DecodedThread.cpp