1 | //===-- TraceDumper.h -------------------------------------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "lldb/Symbol/SymbolContext.h" |
10 | #include "lldb/Target/TraceCursor.h" |
11 | #include <optional> |
12 | |
13 | #ifndef LLDB_TARGET_TRACE_INSTRUCTION_DUMPER_H |
14 | #define LLDB_TARGET_TRACE_INSTRUCTION_DUMPER_H |
15 | |
16 | namespace lldb_private { |
17 | |
18 | /// Class that holds the configuration used by \a TraceDumper for |
19 | /// traversing and dumping instructions. |
20 | struct TraceDumperOptions { |
21 | /// If \b true, the cursor will be iterated forwards starting from the |
22 | /// oldest instruction. Otherwise, the iteration starts from the most |
23 | /// recent instruction. |
24 | bool forwards = false; |
25 | /// Dump only instruction addresses without disassembly nor symbol |
26 | /// information. |
27 | bool raw = false; |
28 | /// Dump in json format. |
29 | bool json = false; |
30 | /// When dumping in JSON format, pretty print the output. |
31 | bool pretty_print_json = false; |
32 | /// For each trace item, print the corresponding timestamp in nanoseconds if |
33 | /// available. |
34 | bool show_timestamps = false; |
35 | /// Dump the events that happened between instructions. |
36 | bool show_events = false; |
37 | /// Dump events and none of the instructions. |
38 | bool only_events = false; |
39 | /// For each instruction, print the instruction kind. |
40 | bool show_control_flow_kind = false; |
41 | /// Optional custom id to start traversing from. |
42 | std::optional<uint64_t> id; |
43 | /// Optional number of instructions to skip from the starting position |
44 | /// of the cursor. |
45 | std::optional<size_t> skip; |
46 | }; |
47 | |
48 | /// Class used to dump the instructions of a \a TraceCursor using its current |
49 | /// state and granularity. |
50 | class TraceDumper { |
51 | public: |
52 | /// Helper struct that holds symbol, disassembly and address information of an |
53 | /// instruction. |
54 | struct SymbolInfo { |
55 | SymbolContext sc; |
56 | Address address; |
57 | lldb::DisassemblerSP disassembler; |
58 | lldb::InstructionSP instruction; |
59 | lldb_private::ExecutionContext exe_ctx; |
60 | }; |
61 | |
62 | /// Helper struct that holds all the information we know about a trace item |
63 | struct TraceItem { |
64 | lldb::user_id_t id; |
65 | lldb::addr_t load_address; |
66 | std::optional<double> timestamp; |
67 | std::optional<uint64_t> hw_clock; |
68 | std::optional<std::string> sync_point_metadata; |
69 | std::optional<llvm::StringRef> error; |
70 | std::optional<lldb::TraceEvent> event; |
71 | std::optional<SymbolInfo> symbol_info; |
72 | std::optional<SymbolInfo> prev_symbol_info; |
73 | std::optional<lldb::cpu_id_t> cpu_id; |
74 | }; |
75 | |
76 | /// An object representing a traced function call. |
77 | /// |
78 | /// A function call is represented using segments and subcalls. |
79 | /// |
80 | /// TracedSegment: |
81 | /// A traced segment is a maximal list of consecutive traced instructions |
82 | /// that belong to the same function call. A traced segment will end in |
83 | /// three possible ways: |
84 | /// - With a call to a function deeper in the callstack. In this case, |
85 | /// most of the times this nested call will return |
86 | /// and resume with the next segment of this segment's owning function |
87 | /// call. More on this later. |
88 | /// - Abruptly due to end of trace. In this case, we weren't able to trace |
89 | /// the end of this function call. |
90 | /// - Simply a return higher in the callstack. |
91 | /// |
92 | /// In terms of implementation details, as segment can be represented with |
93 | /// the beginning and ending instruction IDs from the instruction trace. |
94 | /// |
95 | /// UntracedPrefixSegment: |
96 | /// It might happen that we didn't trace the beginning of a function and we |
97 | /// saw it for the first time as part of a return. As a way to signal these |
98 | /// cases, we have a placeholder UntracedPrefixSegment class that completes the |
99 | /// callgraph. |
100 | /// |
101 | /// Example: |
102 | /// We might have this piece of execution: |
103 | /// |
104 | /// main() [offset 0x00 to 0x20] [traced instruction ids 1 to 4] |
105 | /// foo() [offset 0x00 to 0x80] [traced instruction ids 5 to 20] # main |
106 | /// invoked foo |
107 | /// main() [offset 0x24 to 0x40] [traced instruction ids 21 to 30] |
108 | /// |
109 | /// In this case, our function main invokes foo. We have 3 segments: main |
110 | /// [offset 0x00 to 0x20], foo() [offset 0x00 to 0x80], and main() [offset |
111 | /// 0x24 to 0x40]. We also have the instruction ids from the corresponding |
112 | /// linear instruction trace for each segment. |
113 | /// |
114 | /// But what if we started tracing since the middle of foo? Then we'd have |
115 | /// an incomplete trace |
116 | /// |
117 | /// foo() [offset 0x30 to 0x80] [traced instruction ids 1 to 10] |
118 | /// main() [offset 0x24 to 0x40] [traced instruction ids 11 to 20] |
119 | /// |
120 | /// Notice that we changed the instruction ids because this is a new trace. |
121 | /// Here, in order to have a somewhat complete tree with good traversal |
122 | /// capabilities, we can create an UntracedPrefixSegment to signal the portion of |
123 | /// main() that we didn't trace. We don't know if this segment was in fact |
124 | /// multiple segments with many function calls. We'll never know. The |
125 | /// resulting tree looks like the following: |
126 | /// |
127 | /// main() [untraced] |
128 | /// foo() [offset 0x30 to 0x80] [traced instruction ids 1 to 10] |
129 | /// main() [offset 0x24 to 0x40] [traced instruction ids 11 to 20] |
130 | /// |
131 | /// And in pseudo-code: |
132 | /// |
133 | /// FunctionCall [ |
134 | /// UntracedPrefixSegment { |
135 | /// symbol: main() |
136 | /// nestedCall: FunctionCall [ # this untraced segment has a nested |
137 | /// call |
138 | /// TracedSegment { |
139 | /// symbol: foo() |
140 | /// fromInstructionId: 1 |
141 | /// toInstructionId: 10 |
142 | /// nestedCall: none # this doesn't have a nested call |
143 | /// } |
144 | /// } |
145 | /// ], |
146 | /// TracedSegment { |
147 | /// symbol: main() |
148 | /// fromInstructionId: 11 |
149 | /// toInstructionId: 20 |
150 | /// nestedCall: none # this also doesn't have a nested call |
151 | /// } |
152 | /// ] |
153 | /// |
154 | /// We can see the nested structure and how instructions are represented as |
155 | /// segments. |
156 | /// |
157 | /// |
158 | /// Returns: |
159 | /// Code doesn't always behave intuitively. Some interesting functions |
160 | /// might modify the stack and thus change the behavior of common |
161 | /// instructions like CALL and RET. We try to identify these cases, and |
162 | /// the result is that the return edge from a segment might connect with a |
163 | /// function call very high the stack. For example, you might have |
164 | /// |
165 | /// main() |
166 | /// foo() |
167 | /// bar() |
168 | /// # here bar modifies the stack and pops foo() from it. Then it |
169 | /// finished the a RET (return) |
170 | /// main() # we came back directly to main() |
171 | /// |
172 | /// I have observed some trampolines doing this, as well as some std |
173 | /// functions (like ostream functions). So consumers should be aware of |
174 | /// this. |
175 | /// |
176 | /// There are all sorts of "abnormal" behaviors you can see in code, and |
177 | /// whenever we fail at identifying what's going on, we prefer to create a |
178 | /// new tree. |
179 | /// |
180 | /// Function call forest: |
181 | /// A single tree would suffice if a trace didn't contain errors nor |
182 | /// abnormal behaviors that made our algorithms fail. Sadly these |
183 | /// anomalies exist and we prefer not to use too many heuristics and |
184 | /// probably end up lying to the user. So we create a new tree from the |
185 | /// point we can't continue using the previous tree. This results in |
186 | /// having a forest instead of a single tree. This is probably the best we |
187 | /// can do if we consumers want to use this data to perform performance |
188 | /// analysis or reverse debugging. |
189 | /// |
190 | /// Non-functions: |
191 | /// Not everything in a program is a function. There are blocks of |
192 | /// instructions that are simply labeled or even regions without symbol |
193 | /// information that we don't what they are. We treat all of them as |
194 | /// functions for simplicity. |
195 | /// |
196 | /// Errors: |
197 | /// Whenever an error is found, a new tree with a single segment is |
198 | /// created. All consecutive errors after the original one are then |
199 | /// appended to this segment. As a note, something that GDB does is to use |
200 | /// some heuristics to merge trees that were interrupted by errors. We are |
201 | /// leaving that out of scope until a feature like that one is really |
202 | /// needed. |
203 | |
204 | /// Forward declaration |
205 | class FunctionCall; |
206 | using FunctionCallUP = std::unique_ptr<FunctionCall>; |
207 | |
208 | class FunctionCall { |
209 | public: |
210 | class TracedSegment { |
211 | public: |
212 | /// \param[in] cursor_sp |
213 | /// A cursor pointing to the beginning of the segment. |
214 | /// |
215 | /// \param[in] symbol_info |
216 | /// The symbol information of the first instruction of the segment. |
217 | /// |
218 | /// \param[in] call |
219 | /// The FunctionCall object that owns this segment. |
220 | TracedSegment(const lldb::TraceCursorSP &cursor_sp, |
221 | const SymbolInfo &symbol_info, FunctionCall &owning_call) |
222 | : m_first_insn_id(cursor_sp->GetId()), |
223 | m_last_insn_id(cursor_sp->GetId()), |
224 | m_first_symbol_info(symbol_info), m_last_symbol_info(symbol_info), |
225 | m_owning_call(owning_call) {} |
226 | |
227 | /// \return |
228 | /// The chronologically first instruction ID in this segment. |
229 | lldb::user_id_t GetFirstInstructionID() const; |
230 | /// \return |
231 | /// The chronologically last instruction ID in this segment. |
232 | lldb::user_id_t GetLastInstructionID() const; |
233 | |
234 | /// \return |
235 | /// The symbol information of the chronologically first instruction ID |
236 | /// in this segment. |
237 | const SymbolInfo &GetFirstInstructionSymbolInfo() const; |
238 | |
239 | /// \return |
240 | /// The symbol information of the chronologically last instruction ID in |
241 | /// this segment. |
242 | const SymbolInfo &GetLastInstructionSymbolInfo() const; |
243 | |
244 | /// \return |
245 | /// Get the call that owns this segment. |
246 | const FunctionCall &GetOwningCall() const; |
247 | |
248 | /// Append a new instruction to this segment. |
249 | /// |
250 | /// \param[in] cursor_sp |
251 | /// A cursor pointing to the new instruction. |
252 | /// |
253 | /// \param[in] symbol_info |
254 | /// The symbol information of the new instruction. |
255 | void AppendInsn(const lldb::TraceCursorSP &cursor_sp, |
256 | const SymbolInfo &symbol_info); |
257 | |
258 | /// Create a nested call at the end of this segment. |
259 | /// |
260 | /// \param[in] cursor_sp |
261 | /// A cursor pointing to the first instruction of the nested call. |
262 | /// |
263 | /// \param[in] symbol_info |
264 | /// The symbol information of the first instruction of the nested call. |
265 | FunctionCall &CreateNestedCall(const lldb::TraceCursorSP &cursor_sp, |
266 | const SymbolInfo &symbol_info); |
267 | |
268 | /// Executed the given callback if there's a nested call at the end of |
269 | /// this segment. |
270 | void IfNestedCall(std::function<void(const FunctionCall &function_call)> |
271 | callback) const; |
272 | |
273 | private: |
274 | TracedSegment(const TracedSegment &) = delete; |
275 | TracedSegment &operator=(TracedSegment const &); |
276 | |
277 | /// Delimiting instruction IDs taken chronologically. |
278 | /// \{ |
279 | lldb::user_id_t m_first_insn_id; |
280 | lldb::user_id_t m_last_insn_id; |
281 | /// \} |
282 | /// An optional nested call starting at the end of this segment. |
283 | FunctionCallUP m_nested_call; |
284 | /// The symbol information of the delimiting instructions |
285 | /// \{ |
286 | SymbolInfo m_first_symbol_info; |
287 | SymbolInfo m_last_symbol_info; |
288 | /// \} |
289 | FunctionCall &m_owning_call; |
290 | }; |
291 | |
292 | class UntracedPrefixSegment { |
293 | public: |
294 | /// Note: Untraced segments can only exist if have also seen a traced |
295 | /// segment of the same function call. Thus, we can use those traced |
296 | /// segments if we want symbol information and such. |
297 | |
298 | UntracedPrefixSegment(FunctionCallUP &&nested_call) |
299 | : m_nested_call(std::move(nested_call)) {} |
300 | |
301 | const FunctionCall &GetNestedCall() const; |
302 | |
303 | private: |
304 | UntracedPrefixSegment(const UntracedPrefixSegment &) = delete; |
305 | UntracedPrefixSegment &operator=(UntracedPrefixSegment const &); |
306 | FunctionCallUP m_nested_call; |
307 | }; |
308 | |
309 | /// Create a new function call given an instruction. This will also create a |
310 | /// segment for that instruction. |
311 | /// |
312 | /// \param[in] cursor_sp |
313 | /// A cursor pointing to the first instruction of that function call. |
314 | /// |
315 | /// \param[in] symbol_info |
316 | /// The symbol information of that first instruction. |
317 | FunctionCall(const lldb::TraceCursorSP &cursor_sp, |
318 | const SymbolInfo &symbol_info); |
319 | |
320 | /// Append a new traced segment to this function call. |
321 | /// |
322 | /// \param[in] cursor_sp |
323 | /// A cursor pointing to the first instruction of the new segment. |
324 | /// |
325 | /// \param[in] symbol_info |
326 | /// The symbol information of that first instruction. |
327 | void AppendSegment(const lldb::TraceCursorSP &cursor_sp, |
328 | const SymbolInfo &symbol_info); |
329 | |
330 | /// \return |
331 | /// The symbol info of some traced instruction of this call. |
332 | const SymbolInfo &GetSymbolInfo() const; |
333 | |
334 | /// \return |
335 | /// \b true if and only if the instructions in this function call are |
336 | /// trace errors, in which case this function call is a fake one. |
337 | bool IsError() const; |
338 | |
339 | /// \return |
340 | /// The list of traced segments of this call. |
341 | const std::deque<TracedSegment> &GetTracedSegments() const; |
342 | |
343 | /// \return |
344 | /// A non-const reference to the most-recent traced segment. |
345 | TracedSegment &GetLastTracedSegment(); |
346 | |
347 | /// Create an untraced segment for this call that jumps to the provided |
348 | /// nested call. |
349 | void SetUntracedPrefixSegment(FunctionCallUP &&nested_call); |
350 | |
351 | /// \return |
352 | /// A optional to the untraced prefix segment of this call. |
353 | const std::optional<UntracedPrefixSegment> & |
354 | GetUntracedPrefixSegment() const; |
355 | |
356 | /// \return |
357 | /// A pointer to the parent call. It may be \b nullptr. |
358 | FunctionCall *GetParentCall() const; |
359 | |
360 | void SetParentCall(FunctionCall &parent_call); |
361 | |
362 | private: |
363 | /// An optional untraced segment that precedes all the traced segments. |
364 | std::optional<UntracedPrefixSegment> m_untraced_prefix_segment; |
365 | /// The traced segments in order. We used a deque to prevent moving these |
366 | /// objects when appending to the list, which would happen with vector. |
367 | std::deque<TracedSegment> m_traced_segments; |
368 | /// The parent call, which might be null. Useful for reconstructing |
369 | /// callstacks. |
370 | FunctionCall *m_parent_call = nullptr; |
371 | /// Whether this call represents a list of consecutive errors. |
372 | bool m_is_error; |
373 | }; |
374 | |
375 | /// Interface used to abstract away the format in which the instruction |
376 | /// information will be dumped. |
377 | class OutputWriter { |
378 | public: |
379 | virtual ~OutputWriter() = default; |
380 | |
381 | /// Notify this writer that the cursor ran out of data. |
382 | virtual void NoMoreData() {} |
383 | |
384 | /// Dump a trace item (instruction, error or event). |
385 | virtual void TraceItem(const TraceItem &item) = 0; |
386 | |
387 | /// Dump a function call forest. |
388 | virtual void |
389 | FunctionCallForest(const std::vector<FunctionCallUP> &forest) = 0; |
390 | }; |
391 | |
392 | /// Create a instruction dumper for the cursor. |
393 | /// |
394 | /// \param[in] cursor |
395 | /// The cursor whose instructions will be dumped. |
396 | /// |
397 | /// \param[in] s |
398 | /// The stream where to dump the instructions to. |
399 | /// |
400 | /// \param[in] options |
401 | /// Additional options for configuring the dumping. |
402 | TraceDumper(lldb::TraceCursorSP cursor_sp, Stream &s, |
403 | const TraceDumperOptions &options); |
404 | |
405 | /// Dump \a count instructions of the thread trace starting at the current |
406 | /// cursor position. |
407 | /// |
408 | /// This effectively moves the cursor to the next unvisited position, so that |
409 | /// a subsequent call to this method continues where it left off. |
410 | /// |
411 | /// \param[in] count |
412 | /// The number of instructions to print. |
413 | /// |
414 | /// \return |
415 | /// The instruction id of the last traversed instruction, or \b |
416 | /// std::nullopt if no instructions were visited. |
417 | std::optional<lldb::user_id_t> DumpInstructions(size_t count); |
418 | |
419 | /// Dump all function calls forwards chronologically and hierarchically |
420 | void DumpFunctionCalls(); |
421 | |
422 | private: |
423 | /// Create a trace item for the current position without symbol information. |
424 | TraceItem CreatRawTraceItem(); |
425 | |
426 | lldb::TraceCursorSP m_cursor_sp; |
427 | TraceDumperOptions m_options; |
428 | std::unique_ptr<OutputWriter> m_writer_up; |
429 | }; |
430 | |
431 | } // namespace lldb_private |
432 | |
433 | #endif // LLDB_TARGET_TRACE_INSTRUCTION_DUMPER_H |
434 | |