1 | //===-- TraceIntelPTMultiCpuDecoder.cpp -----------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "TraceIntelPTMultiCpuDecoder.h" |
10 | #include "TraceIntelPT.h" |
11 | #include "llvm/Support/Error.h" |
12 | #include <optional> |
13 | |
14 | using namespace lldb; |
15 | using namespace lldb_private; |
16 | using namespace lldb_private::trace_intel_pt; |
17 | using namespace llvm; |
18 | |
19 | TraceIntelPTMultiCpuDecoder::TraceIntelPTMultiCpuDecoder( |
20 | TraceIntelPTSP trace_sp) |
21 | : m_trace_wp(trace_sp) { |
22 | for (Process *proc : trace_sp->GetAllProcesses()) { |
23 | for (ThreadSP thread_sp : proc->GetThreadList().Threads()) { |
24 | m_tids.insert(thread_sp->GetID()); |
25 | } |
26 | } |
27 | } |
28 | |
29 | TraceIntelPTSP TraceIntelPTMultiCpuDecoder::GetTrace() { |
30 | return m_trace_wp.lock(); |
31 | } |
32 | |
33 | bool TraceIntelPTMultiCpuDecoder::TracesThread(lldb::tid_t tid) const { |
34 | return m_tids.count(x: tid); |
35 | } |
36 | |
37 | Expected<std::optional<uint64_t>> TraceIntelPTMultiCpuDecoder::FindLowestTSC() { |
38 | std::optional<uint64_t> lowest_tsc; |
39 | TraceIntelPTSP trace_sp = GetTrace(); |
40 | |
41 | Error err = GetTrace()->OnAllCpusBinaryDataRead( |
42 | IntelPTDataKinds::kIptTrace, |
43 | [&](const DenseMap<cpu_id_t, ArrayRef<uint8_t>> &buffers) -> Error { |
44 | for (auto &cpu_id_to_buffer : buffers) { |
45 | Expected<std::optional<uint64_t>> tsc = |
46 | FindLowestTSCInTrace(trace_intel_pt&: *trace_sp, buffer: cpu_id_to_buffer.second); |
47 | if (!tsc) |
48 | return tsc.takeError(); |
49 | if (*tsc && (!lowest_tsc || *lowest_tsc > **tsc)) |
50 | lowest_tsc = **tsc; |
51 | } |
52 | return Error::success(); |
53 | }); |
54 | if (err) |
55 | return std::move(err); |
56 | return lowest_tsc; |
57 | } |
58 | |
59 | Expected<DecodedThreadSP> TraceIntelPTMultiCpuDecoder::Decode(Thread &thread) { |
60 | if (Error err = CorrelateContextSwitchesAndIntelPtTraces()) |
61 | return std::move(err); |
62 | |
63 | TraceIntelPTSP trace_sp = GetTrace(); |
64 | |
65 | return trace_sp->GetThreadTimer(tid: thread.GetID()) |
66 | .TimeTask(name: "Decoding instructions" , task: [&]() -> Expected<DecodedThreadSP> { |
67 | auto it = m_decoded_threads.find(Val: thread.GetID()); |
68 | if (it != m_decoded_threads.end()) |
69 | return it->second; |
70 | |
71 | DecodedThreadSP decoded_thread_sp = std::make_shared<DecodedThread>( |
72 | args: thread.shared_from_this(), args: trace_sp->GetPerfZeroTscConversion()); |
73 | |
74 | Error err = trace_sp->OnAllCpusBinaryDataRead( |
75 | IntelPTDataKinds::kIptTrace, |
76 | [&](const DenseMap<cpu_id_t, ArrayRef<uint8_t>> &buffers) -> Error { |
77 | auto it = |
78 | m_continuous_executions_per_thread->find(Val: thread.GetID()); |
79 | if (it != m_continuous_executions_per_thread->end()) |
80 | return DecodeSystemWideTraceForThread( |
81 | decoded_thread&: *decoded_thread_sp, trace_intel_pt&: *trace_sp, buffers, executions: it->second); |
82 | |
83 | return Error::success(); |
84 | }); |
85 | if (err) |
86 | return std::move(err); |
87 | |
88 | m_decoded_threads.try_emplace(Key: thread.GetID(), Args&: decoded_thread_sp); |
89 | return decoded_thread_sp; |
90 | }); |
91 | } |
92 | |
93 | static Expected<std::vector<PSBBlock>> GetPSBBlocksForCPU(TraceIntelPT &trace, |
94 | cpu_id_t cpu_id) { |
95 | std::vector<PSBBlock> psb_blocks; |
96 | Error err = trace.OnCpuBinaryDataRead( |
97 | cpu_id, IntelPTDataKinds::kIptTrace, |
98 | [&](ArrayRef<uint8_t> data) -> Error { |
99 | Expected<std::vector<PSBBlock>> split_trace = |
100 | SplitTraceIntoPSBBlock(trace_intel_pt&: trace, buffer: data, /*expect_tscs=*/true); |
101 | if (!split_trace) |
102 | return split_trace.takeError(); |
103 | |
104 | psb_blocks = std::move(*split_trace); |
105 | return Error::success(); |
106 | }); |
107 | if (err) |
108 | return std::move(err); |
109 | return psb_blocks; |
110 | } |
111 | |
112 | Expected<DenseMap<lldb::tid_t, std::vector<IntelPTThreadContinousExecution>>> |
113 | TraceIntelPTMultiCpuDecoder::DoCorrelateContextSwitchesAndIntelPtTraces() { |
114 | DenseMap<lldb::tid_t, std::vector<IntelPTThreadContinousExecution>> |
115 | continuous_executions_per_thread; |
116 | TraceIntelPTSP trace_sp = GetTrace(); |
117 | |
118 | std::optional<LinuxPerfZeroTscConversion> conv_opt = |
119 | trace_sp->GetPerfZeroTscConversion(); |
120 | if (!conv_opt) |
121 | return createStringError( |
122 | EC: inconvertibleErrorCode(), |
123 | Msg: "TSC to nanoseconds conversion values were not found" ); |
124 | |
125 | LinuxPerfZeroTscConversion tsc_conversion = *conv_opt; |
126 | |
127 | for (cpu_id_t cpu_id : trace_sp->GetTracedCpus()) { |
128 | Expected<std::vector<PSBBlock>> psb_blocks = |
129 | GetPSBBlocksForCPU(*trace_sp, cpu_id); |
130 | if (!psb_blocks) |
131 | return psb_blocks.takeError(); |
132 | |
133 | m_total_psb_blocks += psb_blocks->size(); |
134 | // We'll be iterating through the thread continuous executions and the intel |
135 | // pt subtraces sorted by time. |
136 | auto it = psb_blocks->begin(); |
137 | auto on_new_thread_execution = |
138 | [&](const ThreadContinuousExecution &thread_execution) { |
139 | IntelPTThreadContinousExecution execution(thread_execution); |
140 | |
141 | for (; it != psb_blocks->end() && |
142 | *it->tsc < thread_execution.GetEndTSC(); |
143 | it++) { |
144 | if (*it->tsc > thread_execution.GetStartTSC()) { |
145 | execution.psb_blocks.push_back(*it); |
146 | } else { |
147 | m_unattributed_psb_blocks++; |
148 | } |
149 | } |
150 | continuous_executions_per_thread[thread_execution.tid].push_back( |
151 | execution); |
152 | }; |
153 | Error err = trace_sp->OnCpuBinaryDataRead( |
154 | cpu_id, IntelPTDataKinds::kPerfContextSwitchTrace, |
155 | [&](ArrayRef<uint8_t> data) -> Error { |
156 | Expected<std::vector<ThreadContinuousExecution>> executions = |
157 | DecodePerfContextSwitchTrace(data, cpu_id, tsc_conversion); |
158 | if (!executions) |
159 | return executions.takeError(); |
160 | for (const ThreadContinuousExecution &exec : *executions) |
161 | on_new_thread_execution(exec); |
162 | return Error::success(); |
163 | }); |
164 | if (err) |
165 | return std::move(err); |
166 | |
167 | m_unattributed_psb_blocks += psb_blocks->end() - it; |
168 | } |
169 | // We now sort the executions of each thread to have them ready for |
170 | // instruction decoding |
171 | for (auto &tid_executions : continuous_executions_per_thread) |
172 | std::sort(first: tid_executions.second.begin(), last: tid_executions.second.end()); |
173 | |
174 | return continuous_executions_per_thread; |
175 | } |
176 | |
177 | Error TraceIntelPTMultiCpuDecoder::CorrelateContextSwitchesAndIntelPtTraces() { |
178 | if (m_setup_error) |
179 | return createStringError(EC: inconvertibleErrorCode(), Msg: m_setup_error->c_str()); |
180 | |
181 | if (m_continuous_executions_per_thread) |
182 | return Error::success(); |
183 | |
184 | Error err = GetTrace()->GetGlobalTimer().TimeTask( |
185 | name: "Context switch and Intel PT traces correlation" , task: [&]() -> Error { |
186 | if (auto correlation = DoCorrelateContextSwitchesAndIntelPtTraces()) { |
187 | m_continuous_executions_per_thread.emplace(args: std::move(*correlation)); |
188 | return Error::success(); |
189 | } else { |
190 | return correlation.takeError(); |
191 | } |
192 | }); |
193 | if (err) { |
194 | m_setup_error = toString(E: std::move(err)); |
195 | return createStringError(EC: inconvertibleErrorCode(), Msg: m_setup_error->c_str()); |
196 | } |
197 | return Error::success(); |
198 | } |
199 | |
200 | size_t TraceIntelPTMultiCpuDecoder::GetNumContinuousExecutionsForThread( |
201 | lldb::tid_t tid) const { |
202 | if (!m_continuous_executions_per_thread) |
203 | return 0; |
204 | auto it = m_continuous_executions_per_thread->find(Val: tid); |
205 | if (it == m_continuous_executions_per_thread->end()) |
206 | return 0; |
207 | return it->second.size(); |
208 | } |
209 | |
210 | size_t TraceIntelPTMultiCpuDecoder::GetTotalContinuousExecutionsCount() const { |
211 | if (!m_continuous_executions_per_thread) |
212 | return 0; |
213 | size_t count = 0; |
214 | for (const auto &kv : *m_continuous_executions_per_thread) |
215 | count += kv.second.size(); |
216 | return count; |
217 | } |
218 | |
219 | size_t |
220 | TraceIntelPTMultiCpuDecoder::GePSBBlocksCountForThread(lldb::tid_t tid) const { |
221 | if (!m_continuous_executions_per_thread) |
222 | return 0; |
223 | size_t count = 0; |
224 | auto it = m_continuous_executions_per_thread->find(Val: tid); |
225 | if (it == m_continuous_executions_per_thread->end()) |
226 | return 0; |
227 | for (const IntelPTThreadContinousExecution &execution : it->second) |
228 | count += execution.psb_blocks.size(); |
229 | return count; |
230 | } |
231 | |
232 | size_t TraceIntelPTMultiCpuDecoder::GetUnattributedPSBBlocksCount() const { |
233 | return m_unattributed_psb_blocks; |
234 | } |
235 | |
236 | size_t TraceIntelPTMultiCpuDecoder::GetTotalPSBBlocksCount() const { |
237 | return m_total_psb_blocks; |
238 | } |
239 | |