1 | //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements hierarchical time profiler. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/Support/TimeProfiler.h" |
14 | #include "llvm/ADT/STLFunctionalExtras.h" |
15 | #include "llvm/ADT/StringMap.h" |
16 | #include "llvm/Support/JSON.h" |
17 | #include "llvm/Support/Path.h" |
18 | #include "llvm/Support/Process.h" |
19 | #include "llvm/Support/Threading.h" |
20 | #include <algorithm> |
21 | #include <cassert> |
22 | #include <chrono> |
23 | #include <mutex> |
24 | #include <string> |
25 | #include <vector> |
26 | |
27 | using namespace llvm; |
28 | |
29 | namespace { |
30 | |
31 | using std::chrono::duration; |
32 | using std::chrono::duration_cast; |
33 | using std::chrono::microseconds; |
34 | using std::chrono::steady_clock; |
35 | using std::chrono::system_clock; |
36 | using std::chrono::time_point; |
37 | using std::chrono::time_point_cast; |
38 | |
39 | struct TimeTraceProfilerInstances { |
40 | std::mutex Lock; |
41 | std::vector<TimeTraceProfiler *> List; |
42 | }; |
43 | |
44 | TimeTraceProfilerInstances &getTimeTraceProfilerInstances() { |
45 | static TimeTraceProfilerInstances Instances; |
46 | return Instances; |
47 | } |
48 | |
49 | } // anonymous namespace |
50 | |
51 | // Per Thread instance |
52 | static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr; |
53 | |
54 | TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() { |
55 | return TimeTraceProfilerInstance; |
56 | } |
57 | |
58 | namespace { |
59 | |
60 | using ClockType = steady_clock; |
61 | using TimePointType = time_point<ClockType>; |
62 | using DurationType = duration<ClockType::rep, ClockType::period>; |
63 | using CountAndDurationType = std::pair<size_t, DurationType>; |
64 | using NameAndCountAndDurationType = |
65 | std::pair<std::string, CountAndDurationType>; |
66 | |
67 | /// Represents an open or completed time section entry to be captured. |
68 | struct TimeTraceProfilerEntry { |
69 | const TimePointType Start; |
70 | TimePointType End; |
71 | const std::string Name; |
72 | const std::string Detail; |
73 | |
74 | TimeTraceProfilerEntry(TimePointType &&S, TimePointType &&E, std::string &&N, |
75 | std::string &&Dt) |
76 | : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), |
77 | Detail(std::move(Dt)) {} |
78 | |
79 | // Calculate timings for FlameGraph. Cast time points to microsecond precision |
80 | // rather than casting duration. This avoids truncation issues causing inner |
81 | // scopes overruning outer scopes. |
82 | ClockType::rep getFlameGraphStartUs(TimePointType StartTime) const { |
83 | return (time_point_cast<microseconds>(t: Start) - |
84 | time_point_cast<microseconds>(t: StartTime)) |
85 | .count(); |
86 | } |
87 | |
88 | ClockType::rep getFlameGraphDurUs() const { |
89 | return (time_point_cast<microseconds>(t: End) - |
90 | time_point_cast<microseconds>(t: Start)) |
91 | .count(); |
92 | } |
93 | }; |
94 | |
95 | } // anonymous namespace |
96 | |
97 | struct llvm::TimeTraceProfiler { |
98 | TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "" ) |
99 | : BeginningOfTime(system_clock::now()), StartTime(ClockType::now()), |
100 | ProcName(ProcName), Pid(sys::Process::getProcessId()), |
101 | Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) { |
102 | llvm::get_thread_name(Name&: ThreadName); |
103 | } |
104 | |
105 | void begin(std::string Name, llvm::function_ref<std::string()> Detail) { |
106 | Stack.emplace_back(Args: ClockType::now(), Args: TimePointType(), Args: std::move(Name), |
107 | Args: Detail()); |
108 | } |
109 | |
110 | void end() { |
111 | assert(!Stack.empty() && "Must call begin() first" ); |
112 | TimeTraceProfilerEntry &E = Stack.back(); |
113 | E.End = ClockType::now(); |
114 | |
115 | // Check that end times monotonically increase. |
116 | assert((Entries.empty() || |
117 | (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >= |
118 | Entries.back().getFlameGraphStartUs(StartTime) + |
119 | Entries.back().getFlameGraphDurUs())) && |
120 | "TimeProfiler scope ended earlier than previous scope" ); |
121 | |
122 | // Calculate duration at full precision for overall counts. |
123 | DurationType Duration = E.End - E.Start; |
124 | |
125 | // Only include sections longer or equal to TimeTraceGranularity msec. |
126 | if (duration_cast<microseconds>(d: Duration).count() >= TimeTraceGranularity) |
127 | Entries.emplace_back(Args&: E); |
128 | |
129 | // Track total time taken by each "name", but only the topmost levels of |
130 | // them; e.g. if there's a template instantiation that instantiates other |
131 | // templates from within, we only want to add the topmost one. "topmost" |
132 | // happens to be the ones that don't have any currently open entries above |
133 | // itself. |
134 | if (llvm::none_of(Range: llvm::drop_begin(RangeOrContainer: llvm::reverse(C&: Stack)), |
135 | P: [&](const TimeTraceProfilerEntry &Val) { |
136 | return Val.Name == E.Name; |
137 | })) { |
138 | auto &CountAndTotal = CountAndTotalPerName[E.Name]; |
139 | CountAndTotal.first++; |
140 | CountAndTotal.second += Duration; |
141 | } |
142 | |
143 | Stack.pop_back(); |
144 | } |
145 | |
146 | // Write events from this TimeTraceProfilerInstance and |
147 | // ThreadTimeTraceProfilerInstances. |
148 | void write(raw_pwrite_stream &OS) { |
149 | // Acquire Mutex as reading ThreadTimeTraceProfilerInstances. |
150 | auto &Instances = getTimeTraceProfilerInstances(); |
151 | std::lock_guard<std::mutex> Lock(Instances.Lock); |
152 | assert(Stack.empty() && |
153 | "All profiler sections should be ended when calling write" ); |
154 | assert(llvm::all_of(Instances.List, |
155 | [](const auto &TTP) { return TTP->Stack.empty(); }) && |
156 | "All profiler sections should be ended when calling write" ); |
157 | |
158 | json::OStream J(OS); |
159 | J.objectBegin(); |
160 | J.attributeBegin(Key: "traceEvents" ); |
161 | J.arrayBegin(); |
162 | |
163 | // Emit all events for the main flame graph. |
164 | auto writeEvent = [&](const auto &E, uint64_t Tid) { |
165 | auto StartUs = E.getFlameGraphStartUs(StartTime); |
166 | auto DurUs = E.getFlameGraphDurUs(); |
167 | |
168 | J.object(Contents: [&] { |
169 | J.attribute(Key: "pid" , Contents: Pid); |
170 | J.attribute(Key: "tid" , Contents: int64_t(Tid)); |
171 | J.attribute(Key: "ph" , Contents: "X" ); |
172 | J.attribute(Key: "ts" , Contents: StartUs); |
173 | J.attribute(Key: "dur" , Contents: DurUs); |
174 | J.attribute(Key: "name" , Contents: E.Name); |
175 | if (!E.Detail.empty()) { |
176 | J.attributeObject(Key: "args" , Contents: [&] { J.attribute(Key: "detail" , Contents: E.Detail); }); |
177 | } |
178 | }); |
179 | }; |
180 | for (const TimeTraceProfilerEntry &E : Entries) |
181 | writeEvent(E, this->Tid); |
182 | for (const TimeTraceProfiler *TTP : Instances.List) |
183 | for (const TimeTraceProfilerEntry &E : TTP->Entries) |
184 | writeEvent(E, TTP->Tid); |
185 | |
186 | // Emit totals by section name as additional "thread" events, sorted from |
187 | // longest one. |
188 | // Find highest used thread id. |
189 | uint64_t MaxTid = this->Tid; |
190 | for (const TimeTraceProfiler *TTP : Instances.List) |
191 | MaxTid = std::max(a: MaxTid, b: TTP->Tid); |
192 | |
193 | // Combine all CountAndTotalPerName from threads into one. |
194 | StringMap<CountAndDurationType> AllCountAndTotalPerName; |
195 | auto combineStat = [&](const auto &Stat) { |
196 | StringRef Key = Stat.getKey(); |
197 | auto Value = Stat.getValue(); |
198 | auto &CountAndTotal = AllCountAndTotalPerName[Key]; |
199 | CountAndTotal.first += Value.first; |
200 | CountAndTotal.second += Value.second; |
201 | }; |
202 | for (const auto &Stat : CountAndTotalPerName) |
203 | combineStat(Stat); |
204 | for (const TimeTraceProfiler *TTP : Instances.List) |
205 | for (const auto &Stat : TTP->CountAndTotalPerName) |
206 | combineStat(Stat); |
207 | |
208 | std::vector<NameAndCountAndDurationType> SortedTotals; |
209 | SortedTotals.reserve(n: AllCountAndTotalPerName.size()); |
210 | for (const auto &Total : AllCountAndTotalPerName) |
211 | SortedTotals.emplace_back(args: std::string(Total.getKey()), args: Total.getValue()); |
212 | |
213 | llvm::sort(C&: SortedTotals, Comp: [](const NameAndCountAndDurationType &A, |
214 | const NameAndCountAndDurationType &B) { |
215 | return A.second.second > B.second.second; |
216 | }); |
217 | |
218 | // Report totals on separate threads of tracing file. |
219 | uint64_t TotalTid = MaxTid + 1; |
220 | for (const NameAndCountAndDurationType &Total : SortedTotals) { |
221 | auto DurUs = duration_cast<microseconds>(d: Total.second.second).count(); |
222 | auto Count = AllCountAndTotalPerName[Total.first].first; |
223 | |
224 | J.object(Contents: [&] { |
225 | J.attribute(Key: "pid" , Contents: Pid); |
226 | J.attribute(Key: "tid" , Contents: int64_t(TotalTid)); |
227 | J.attribute(Key: "ph" , Contents: "X" ); |
228 | J.attribute(Key: "ts" , Contents: 0); |
229 | J.attribute(Key: "dur" , Contents: DurUs); |
230 | J.attribute(Key: "name" , Contents: "Total " + Total.first); |
231 | J.attributeObject(Key: "args" , Contents: [&] { |
232 | J.attribute(Key: "count" , Contents: int64_t(Count)); |
233 | J.attribute(Key: "avg ms" , Contents: int64_t(DurUs / Count / 1000)); |
234 | }); |
235 | }); |
236 | |
237 | ++TotalTid; |
238 | } |
239 | |
240 | auto writeMetadataEvent = [&](const char *Name, uint64_t Tid, |
241 | StringRef arg) { |
242 | J.object(Contents: [&] { |
243 | J.attribute(Key: "cat" , Contents: "" ); |
244 | J.attribute(Key: "pid" , Contents: Pid); |
245 | J.attribute(Key: "tid" , Contents: int64_t(Tid)); |
246 | J.attribute(Key: "ts" , Contents: 0); |
247 | J.attribute(Key: "ph" , Contents: "M" ); |
248 | J.attribute(Key: "name" , Contents: Name); |
249 | J.attributeObject(Key: "args" , Contents: [&] { J.attribute(Key: "name" , Contents: arg); }); |
250 | }); |
251 | }; |
252 | |
253 | writeMetadataEvent("process_name" , Tid, ProcName); |
254 | writeMetadataEvent("thread_name" , Tid, ThreadName); |
255 | for (const TimeTraceProfiler *TTP : Instances.List) |
256 | writeMetadataEvent("thread_name" , TTP->Tid, TTP->ThreadName); |
257 | |
258 | J.arrayEnd(); |
259 | J.attributeEnd(); |
260 | |
261 | // Emit the absolute time when this TimeProfiler started. |
262 | // This can be used to combine the profiling data from |
263 | // multiple processes and preserve actual time intervals. |
264 | J.attribute(Key: "beginningOfTime" , |
265 | Contents: time_point_cast<microseconds>(t: BeginningOfTime) |
266 | .time_since_epoch() |
267 | .count()); |
268 | |
269 | J.objectEnd(); |
270 | } |
271 | |
272 | SmallVector<TimeTraceProfilerEntry, 16> Stack; |
273 | SmallVector<TimeTraceProfilerEntry, 128> Entries; |
274 | StringMap<CountAndDurationType> CountAndTotalPerName; |
275 | // System clock time when the session was begun. |
276 | const time_point<system_clock> BeginningOfTime; |
277 | // Profiling clock time when the session was begun. |
278 | const TimePointType StartTime; |
279 | const std::string ProcName; |
280 | const sys::Process::Pid Pid; |
281 | SmallString<0> ThreadName; |
282 | const uint64_t Tid; |
283 | |
284 | // Minimum time granularity (in microseconds) |
285 | const unsigned TimeTraceGranularity; |
286 | }; |
287 | |
288 | void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity, |
289 | StringRef ProcName) { |
290 | assert(TimeTraceProfilerInstance == nullptr && |
291 | "Profiler should not be initialized" ); |
292 | TimeTraceProfilerInstance = new TimeTraceProfiler( |
293 | TimeTraceGranularity, llvm::sys::path::filename(path: ProcName)); |
294 | } |
295 | |
296 | // Removes all TimeTraceProfilerInstances. |
297 | // Called from main thread. |
298 | void llvm::timeTraceProfilerCleanup() { |
299 | delete TimeTraceProfilerInstance; |
300 | TimeTraceProfilerInstance = nullptr; |
301 | |
302 | auto &Instances = getTimeTraceProfilerInstances(); |
303 | std::lock_guard<std::mutex> Lock(Instances.Lock); |
304 | for (auto *TTP : Instances.List) |
305 | delete TTP; |
306 | Instances.List.clear(); |
307 | } |
308 | |
309 | // Finish TimeTraceProfilerInstance on a worker thread. |
310 | // This doesn't remove the instance, just moves the pointer to global vector. |
311 | void llvm::timeTraceProfilerFinishThread() { |
312 | auto &Instances = getTimeTraceProfilerInstances(); |
313 | std::lock_guard<std::mutex> Lock(Instances.Lock); |
314 | Instances.List.push_back(x: TimeTraceProfilerInstance); |
315 | TimeTraceProfilerInstance = nullptr; |
316 | } |
317 | |
318 | void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) { |
319 | assert(TimeTraceProfilerInstance != nullptr && |
320 | "Profiler object can't be null" ); |
321 | TimeTraceProfilerInstance->write(OS); |
322 | } |
323 | |
324 | Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName, |
325 | StringRef FallbackFileName) { |
326 | assert(TimeTraceProfilerInstance != nullptr && |
327 | "Profiler object can't be null" ); |
328 | |
329 | std::string Path = PreferredFileName.str(); |
330 | if (Path.empty()) { |
331 | Path = FallbackFileName == "-" ? "out" : FallbackFileName.str(); |
332 | Path += ".time-trace" ; |
333 | } |
334 | |
335 | std::error_code EC; |
336 | raw_fd_ostream OS(Path, EC, sys::fs::OF_TextWithCRLF); |
337 | if (EC) |
338 | return createStringError(EC, S: "Could not open " + Path); |
339 | |
340 | timeTraceProfilerWrite(OS); |
341 | return Error::success(); |
342 | } |
343 | |
344 | void llvm::timeTraceProfilerBegin(StringRef Name, StringRef Detail) { |
345 | if (TimeTraceProfilerInstance != nullptr) |
346 | TimeTraceProfilerInstance->begin(Name: std::string(Name), |
347 | Detail: [&]() { return std::string(Detail); }); |
348 | } |
349 | |
350 | void llvm::timeTraceProfilerBegin(StringRef Name, |
351 | llvm::function_ref<std::string()> Detail) { |
352 | if (TimeTraceProfilerInstance != nullptr) |
353 | TimeTraceProfilerInstance->begin(Name: std::string(Name), Detail); |
354 | } |
355 | |
356 | void llvm::timeTraceProfilerEnd() { |
357 | if (TimeTraceProfilerInstance != nullptr) |
358 | TimeTraceProfilerInstance->end(); |
359 | } |
360 | |