TraceDumper.h source code [lldb/include/lldb/Target/TraceDumper.h]

1	//===-- TraceDumper.h -------------------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "lldb/Symbol/SymbolContext.h"
10	#include "lldb/Target/TraceCursor.h"
11	#include <optional>
12
13	#ifndef LLDB_TARGET_TRACE_INSTRUCTION_DUMPER_H
14	#define LLDB_TARGET_TRACE_INSTRUCTION_DUMPER_H
15
16	namespace lldb_private {
17
18	/// Class that holds the configuration used by \a TraceDumper for
19	/// traversing and dumping instructions.
20	struct TraceDumperOptions {
21	/// If \b true, the cursor will be iterated forwards starting from the
22	/// oldest instruction. Otherwise, the iteration starts from the most
23	/// recent instruction.
24	bool forwards = false;
25	/// Dump only instruction addresses without disassembly nor symbol
26	/// information.
27	bool raw = false;
28	/// Dump in json format.
29	bool json = false;
30	/// When dumping in JSON format, pretty print the output.
31	bool pretty_print_json = false;
32	/// For each trace item, print the corresponding timestamp in nanoseconds if
33	/// available.
34	bool show_timestamps = false;
35	/// Dump the events that happened between instructions.
36	bool show_events = false;
37	/// Dump events and none of the instructions.
38	bool only_events = false;
39	/// For each instruction, print the instruction kind.
40	bool show_control_flow_kind = false;
41	/// Optional custom id to start traversing from.
42	std::optional<uint64_t> id;
43	/// Optional number of instructions to skip from the starting position
44	/// of the cursor.
45	std::optional<size_t> skip;
46	};
47
48	/// Class used to dump the instructions of a \a TraceCursor using its current
49	/// state and granularity.
50	class TraceDumper {
51	public:
52	/// Helper struct that holds symbol, disassembly and address information of an
53	/// instruction.
54	struct SymbolInfo {
55	SymbolContext sc;
56	Address address;
57	lldb::DisassemblerSP disassembler;
58	lldb::InstructionSP instruction;
59	lldb_private::ExecutionContext exe_ctx;
60	};
61
62	/// Helper struct that holds all the information we know about a trace item
63	struct TraceItem {
64	lldb::user_id_t id;
65	lldb::addr_t load_address;
66	std::optional<double> timestamp;
67	std::optional<uint64_t> hw_clock;
68	std::optional<std::string> sync_point_metadata;
69	std::optional<llvm::StringRef> error;
70	std::optional<lldb::TraceEvent> event;
71	std::optional<SymbolInfo> symbol_info;
72	std::optional<SymbolInfo> prev_symbol_info;
73	std::optional<lldb::cpu_id_t> cpu_id;
74	};
75
76	/// An object representing a traced function call.
77	///
78	/// A function call is represented using segments and subcalls.
79	///
80	/// TracedSegment:
81	/// A traced segment is a maximal list of consecutive traced instructions
82	/// that belong to the same function call. A traced segment will end in
83	/// three possible ways:
84	/// - With a call to a function deeper in the callstack. In this case,
85	/// most of the times this nested call will return
86	/// and resume with the next segment of this segment's owning function
87	/// call. More on this later.
88	/// - Abruptly due to end of trace. In this case, we weren't able to trace
89	/// the end of this function call.
90	/// - Simply a return higher in the callstack.
91	///
92	/// In terms of implementation details, as segment can be represented with
93	/// the beginning and ending instruction IDs from the instruction trace.
94	///
95	/// UntracedPrefixSegment:
96	/// It might happen that we didn't trace the beginning of a function and we
97	/// saw it for the first time as part of a return. As a way to signal these
98	/// cases, we have a placeholder UntracedPrefixSegment class that completes the
99	/// callgraph.
100	///
101	/// Example:
102	/// We might have this piece of execution:
103	///
104	/// main() [offset 0x00 to 0x20] [traced instruction ids 1 to 4]
105	/// foo() [offset 0x00 to 0x80] [traced instruction ids 5 to 20] # main
106	/// invoked foo
107	/// main() [offset 0x24 to 0x40] [traced instruction ids 21 to 30]
108	///
109	/// In this case, our function main invokes foo. We have 3 segments: main
110	/// [offset 0x00 to 0x20], foo() [offset 0x00 to 0x80], and main() [offset
111	/// 0x24 to 0x40]. We also have the instruction ids from the corresponding
112	/// linear instruction trace for each segment.
113	///
114	/// But what if we started tracing since the middle of foo? Then we'd have
115	/// an incomplete trace
116	///
117	/// foo() [offset 0x30 to 0x80] [traced instruction ids 1 to 10]
118	/// main() [offset 0x24 to 0x40] [traced instruction ids 11 to 20]
119	///
120	/// Notice that we changed the instruction ids because this is a new trace.
121	/// Here, in order to have a somewhat complete tree with good traversal
122	/// capabilities, we can create an UntracedPrefixSegment to signal the portion of
123	/// main() that we didn't trace. We don't know if this segment was in fact
124	/// multiple segments with many function calls. We'll never know. The
125	/// resulting tree looks like the following:
126	///
127	/// main() [untraced]
128	/// foo() [offset 0x30 to 0x80] [traced instruction ids 1 to 10]
129	/// main() [offset 0x24 to 0x40] [traced instruction ids 11 to 20]
130	///
131	/// And in pseudo-code:
132	///
133	/// FunctionCall [
134	/// UntracedPrefixSegment {
135	/// symbol: main()
136	/// nestedCall: FunctionCall [ # this untraced segment has a nested
137	/// call
138	/// TracedSegment {
139	/// symbol: foo()
140	/// fromInstructionId: 1
141	/// toInstructionId: 10
142	/// nestedCall: none # this doesn't have a nested call
143	/// }
144	/// }
145	/// ],
146	/// TracedSegment {
147	/// symbol: main()
148	/// fromInstructionId: 11
149	/// toInstructionId: 20
150	/// nestedCall: none # this also doesn't have a nested call
151	/// }
152	/// ]
153	///
154	/// We can see the nested structure and how instructions are represented as
155	/// segments.
156	///
157	///
158	/// Returns:
159	/// Code doesn't always behave intuitively. Some interesting functions
160	/// might modify the stack and thus change the behavior of common
161	/// instructions like CALL and RET. We try to identify these cases, and
162	/// the result is that the return edge from a segment might connect with a
163	/// function call very high the stack. For example, you might have
164	///
165	/// main()
166	/// foo()
167	/// bar()
168	/// # here bar modifies the stack and pops foo() from it. Then it
169	/// finished the a RET (return)
170	/// main() # we came back directly to main()
171	///
172	/// I have observed some trampolines doing this, as well as some std
173	/// functions (like ostream functions). So consumers should be aware of
174	/// this.
175	///
176	/// There are all sorts of "abnormal" behaviors you can see in code, and
177	/// whenever we fail at identifying what's going on, we prefer to create a
178	/// new tree.
179	///
180	/// Function call forest:
181	/// A single tree would suffice if a trace didn't contain errors nor
182	/// abnormal behaviors that made our algorithms fail. Sadly these
183	/// anomalies exist and we prefer not to use too many heuristics and
184	/// probably end up lying to the user. So we create a new tree from the
185	/// point we can't continue using the previous tree. This results in
186	/// having a forest instead of a single tree. This is probably the best we
187	/// can do if we consumers want to use this data to perform performance
188	/// analysis or reverse debugging.
189	///
190	/// Non-functions:
191	/// Not everything in a program is a function. There are blocks of
192	/// instructions that are simply labeled or even regions without symbol
193	/// information that we don't what they are. We treat all of them as
194	/// functions for simplicity.
195	///
196	/// Errors:
197	/// Whenever an error is found, a new tree with a single segment is
198	/// created. All consecutive errors after the original one are then
199	/// appended to this segment. As a note, something that GDB does is to use
200	/// some heuristics to merge trees that were interrupted by errors. We are
201	/// leaving that out of scope until a feature like that one is really
202	/// needed.
203
204	/// Forward declaration
205	class FunctionCall;
206	using FunctionCallUP = std::unique_ptr<FunctionCall>;
207
208	class FunctionCall {
209	public:
210	class TracedSegment {
211	public:
212	/// \param[in] cursor_sp
213	/// A cursor pointing to the beginning of the segment.
214	///
215	/// \param[in] symbol_info
216	/// The symbol information of the first instruction of the segment.
217	///
218	/// \param[in] call
219	/// The FunctionCall object that owns this segment.
220	TracedSegment(const lldb::TraceCursorSP &cursor_sp,
221	const SymbolInfo &symbol_info, FunctionCall &owning_call)
222	: m_first_insn_id(cursor_sp ->GetId()),
223	m_last_insn_id(cursor_sp ->GetId()),
224	m_first_symbol_info (symbol_info), m_last_symbol_info (symbol_info),
225	m_owning_call(owning_call) {}
226
227	/// \return
228	/// The chronologically first instruction ID in this segment.
229	lldb::user_id_t GetFirstInstructionID() const;
230	/// \return
231	/// The chronologically last instruction ID in this segment.
232	lldb::user_id_t GetLastInstructionID() const;
233
234	/// \return
235	/// The symbol information of the chronologically first instruction ID
236	/// in this segment.
237	const SymbolInfo &GetFirstInstructionSymbolInfo() const;
238
239	/// \return
240	/// The symbol information of the chronologically last instruction ID in
241	/// this segment.
242	const SymbolInfo &GetLastInstructionSymbolInfo() const;
243
244	/// \return
245	/// Get the call that owns this segment.
246	const FunctionCall &GetOwningCall() const;
247
248	/// Append a new instruction to this segment.
249	///
250	/// \param[in] cursor_sp
251	/// A cursor pointing to the new instruction.
252	///
253	/// \param[in] symbol_info
254	/// The symbol information of the new instruction.
255	void AppendInsn(const lldb::TraceCursorSP &cursor_sp,
256	const SymbolInfo &symbol_info);
257
258	/// Create a nested call at the end of this segment.
259	///
260	/// \param[in] cursor_sp
261	/// A cursor pointing to the first instruction of the nested call.
262	///
263	/// \param[in] symbol_info
264	/// The symbol information of the first instruction of the nested call.
265	FunctionCall &CreateNestedCall(const lldb::TraceCursorSP &cursor_sp,
266	const SymbolInfo &symbol_info);
267
268	/// Executed the given callback if there's a nested call at the end of
269	/// this segment.
270	void IfNestedCall(std::function<void(const FunctionCall &function_call)>
271	callback) const;
272
273	private:
274	TracedSegment(const TracedSegment &) = delete;
275	TracedSegment &operator=(TracedSegment const &);
276
277	/// Delimiting instruction IDs taken chronologically.
278	/// \{
279	lldb::user_id_t m_first_insn_id;
280	lldb::user_id_t m_last_insn_id;
281	/// \}
282	/// An optional nested call starting at the end of this segment.
283	FunctionCallUP m_nested_call;
284	/// The symbol information of the delimiting instructions
285	/// \{
286	SymbolInfo m_first_symbol_info;
287	SymbolInfo m_last_symbol_info;
288	/// \}
289	FunctionCall &m_owning_call;
290	};
291
292	class UntracedPrefixSegment {
293	public:
294	/// Note: Untraced segments can only exist if have also seen a traced
295	/// segment of the same function call. Thus, we can use those traced
296	/// segments if we want symbol information and such.
297
298	UntracedPrefixSegment(FunctionCallUP &&nested_call)
299	: m_nested_call (std::move(nested_call)) {}
300
301	const FunctionCall &GetNestedCall() const;
302
303	private:
304	UntracedPrefixSegment(const UntracedPrefixSegment &) = delete;
305	UntracedPrefixSegment &operator=(UntracedPrefixSegment const &);
306	FunctionCallUP m_nested_call;
307	};
308
309	/// Create a new function call given an instruction. This will also create a
310	/// segment for that instruction.
311	///
312	/// \param[in] cursor_sp
313	/// A cursor pointing to the first instruction of that function call.
314	///
315	/// \param[in] symbol_info
316	/// The symbol information of that first instruction.
317	FunctionCall(const lldb::TraceCursorSP &cursor_sp,
318	const SymbolInfo &symbol_info);
319
320	/// Append a new traced segment to this function call.
321	///
322	/// \param[in] cursor_sp
323	/// A cursor pointing to the first instruction of the new segment.
324	///
325	/// \param[in] symbol_info
326	/// The symbol information of that first instruction.
327	void AppendSegment(const lldb::TraceCursorSP &cursor_sp,
328	const SymbolInfo &symbol_info);
329
330	/// \return
331	/// The symbol info of some traced instruction of this call.
332	const SymbolInfo &GetSymbolInfo() const;
333
334	/// \return
335	/// \b true if and only if the instructions in this function call are
336	/// trace errors, in which case this function call is a fake one.
337	bool IsError() const;
338
339	/// \return
340	/// The list of traced segments of this call.
341	const std::deque<TracedSegment> &GetTracedSegments() const;
342
343	/// \return
344	/// A non-const reference to the most-recent traced segment.
345	TracedSegment &GetLastTracedSegment();
346
347	/// Create an untraced segment for this call that jumps to the provided
348	/// nested call.
349	void SetUntracedPrefixSegment(FunctionCallUP &&nested_call);
350
351	/// \return
352	/// A optional to the untraced prefix segment of this call.
353	const std::optional<UntracedPrefixSegment> &
354	GetUntracedPrefixSegment() const;
355
356	/// \return
357	/// A pointer to the parent call. It may be \b nullptr.
358	FunctionCall GetParentCall() const*;
359
360	void SetParentCall(FunctionCall &parent_call);
361
362	private:
363	/// An optional untraced segment that precedes all the traced segments.
364	std::optional<UntracedPrefixSegment> m_untraced_prefix_segment;
365	/// The traced segments in order. We used a deque to prevent moving these
366	/// objects when appending to the list, which would happen with vector.
367	std::deque<TracedSegment> m_traced_segments;
368	/// The parent call, which might be null. Useful for reconstructing
369	/// callstacks.
370	FunctionCall m_parent_call = nullptr*;
371	/// Whether this call represents a list of consecutive errors.
372	bool m_is_error;
373	};
374
375	/// Interface used to abstract away the format in which the instruction
376	/// information will be dumped.
377	class OutputWriter {
378	public:
379	virtual ~OutputWriter() = default;
380
381	/// Notify this writer that the cursor ran out of data.
382	virtual void NoMoreData() {}
383
384	/// Dump a trace item (instruction, error or event).
385	virtual void TraceItem(const TraceItem &item) = `0`;
386
387	/// Dump a function call forest.
388	virtual void
389	FunctionCallForest(const std::vector<FunctionCallUP> &forest) = `0`;
390	};
391
392	/// Create a instruction dumper for the cursor.
393	///
394	/// \param[in] cursor
395	/// The cursor whose instructions will be dumped.
396	///
397	/// \param[in] s
398	/// The stream where to dump the instructions to.
399	///
400	/// \param[in] options
401	/// Additional options for configuring the dumping.
402	TraceDumper(lldb::TraceCursorSP cursor_sp, Stream &s,
403	const TraceDumperOptions &options);
404
405	/// Dump \a count instructions of the thread trace starting at the current
406	/// cursor position.
407	///
408	/// This effectively moves the cursor to the next unvisited position, so that
409	/// a subsequent call to this method continues where it left off.
410	///
411	/// \param[in] count
412	/// The number of instructions to print.
413	///
414	/// \return
415	/// The instruction id of the last traversed instruction, or \b
416	/// std::nullopt if no instructions were visited.
417	std::optional<lldb::user_id_t> DumpInstructions(size_t count);
418
419	/// Dump all function calls forwards chronologically and hierarchically
420	void DumpFunctionCalls();
421
422	private:
423	/// Create a trace item for the current position without symbol information.
424	TraceItem CreatRawTraceItem();
425
426	lldb::TraceCursorSP m_cursor_sp;
427	TraceDumperOptions m_options;
428	std::unique_ptr<OutputWriter> m_writer_up;
429	};
430
431	} // namespace lldb_private
432
433	#endif // LLDB_TARGET_TRACE_INSTRUCTION_DUMPER_H
434

source code of lldb/include/lldb/Target/TraceDumper.h