1//===- Timing.cpp - Execution time measurement facilities -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Facilities to measure and provide statistics on execution time.
10//
11//===----------------------------------------------------------------------===//
12
13#include "mlir/Support/Timing.h"
14#include "mlir/Support/ThreadLocalCache.h"
15#include "llvm/ADT/MapVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/StringMap.h"
18#include "llvm/ADT/StringSet.h"
19#include "llvm/Support/Allocator.h"
20#include "llvm/Support/CommandLine.h"
21#include "llvm/Support/Format.h"
22#include "llvm/Support/FormatVariadic.h"
23#include "llvm/Support/ManagedStatic.h"
24#include "llvm/Support/RWMutex.h"
25#include "llvm/Support/Threading.h"
26#include "llvm/Support/raw_ostream.h"
27
28#include <atomic>
29#include <chrono>
30#include <optional>
31
32using namespace mlir;
33using namespace detail;
34using DisplayMode = DefaultTimingManager::DisplayMode;
35using OutputFormat = DefaultTimingManager::OutputFormat;
36
37constexpr llvm::StringLiteral kTimingDescription =
38 "... Execution time report ...";
39
40//===----------------------------------------------------------------------===//
41// TimingManager
42//===----------------------------------------------------------------------===//
43
44namespace mlir {
45namespace detail {
46/// Private implementation details of the `TimingManager`.
47class TimingManagerImpl {
48public:
49 // Identifier allocator, map, and mutex for thread safety.
50 llvm::BumpPtrAllocator identifierAllocator;
51 llvm::StringSet<llvm::BumpPtrAllocator &> identifiers;
52 llvm::sys::SmartRWMutex<true> identifierMutex;
53
54 /// A thread local cache of identifiers to reduce lock contention.
55 ThreadLocalCache<llvm::StringMap<llvm::StringMapEntry<std::nullopt_t> *>>
56 localIdentifierCache;
57
58 TimingManagerImpl() : identifiers(identifierAllocator) {}
59};
60} // namespace detail
61} // namespace mlir
62
63TimingManager::TimingManager() : impl(std::make_unique<TimingManagerImpl>()) {}
64
65TimingManager::~TimingManager() = default;
66
67/// Get the root timer of this timing manager.
68Timer TimingManager::getRootTimer() {
69 auto rt = rootTimer();
70 return rt ? Timer(*this, *rt) : Timer();
71}
72
73/// Get the root timer of this timing manager wrapped in a `TimingScope`.
74TimingScope TimingManager::getRootScope() {
75 return TimingScope(getRootTimer());
76}
77
78//===----------------------------------------------------------------------===//
79// Identifier uniquing
80//===----------------------------------------------------------------------===//
81
82/// Return an identifier for the specified string.
83TimingIdentifier TimingIdentifier::get(StringRef str, TimingManager &tm) {
84 // Check for an existing instance in the local cache.
85 auto &impl = *tm.impl;
86 auto *&localEntry = (*impl.localIdentifierCache)[str];
87 if (localEntry)
88 return TimingIdentifier(localEntry);
89
90 // Check for an existing identifier in read-only mode.
91 {
92 llvm::sys::SmartScopedReader<true> contextLock(impl.identifierMutex);
93 auto it = impl.identifiers.find(Key: str);
94 if (it != impl.identifiers.end()) {
95 localEntry = &*it;
96 return TimingIdentifier(localEntry);
97 }
98 }
99
100 // Acquire a writer-lock so that we can safely create the new instance.
101 llvm::sys::SmartScopedWriter<true> contextLock(impl.identifierMutex);
102 auto it = impl.identifiers.insert(key: str).first;
103 localEntry = &*it;
104 return TimingIdentifier(localEntry);
105}
106
107//===----------------------------------------------------------------------===//
108// Helpers for time record printing
109//===----------------------------------------------------------------------===//
110
111namespace {
112
113class OutputTextStrategy : public OutputStrategy {
114public:
115 OutputTextStrategy(raw_ostream &os) : OutputStrategy(os) {}
116
117 void printHeader(const TimeRecord &total) override {
118 // Figure out how many spaces to description name.
119 unsigned padding = (80 - kTimingDescription.size()) / 2;
120 os << "===" << std::string(73, '-') << "===\n";
121 os.indent(NumSpaces: padding) << kTimingDescription << '\n';
122 os << "===" << std::string(73, '-') << "===\n";
123
124 // Print the total time followed by the section headers.
125 os << llvm::format(Fmt: " Total Execution Time: %.4f seconds\n\n", Vals: total.wall);
126 if (total.user != total.wall)
127 os << " ----User Time----";
128 os << " ----Wall Time---- ----Name----\n";
129 }
130
131 void printFooter() override { os.flush(); }
132
133 void printTime(const TimeRecord &time, const TimeRecord &total) override {
134 if (total.user != total.wall) {
135 os << llvm::format(Fmt: " %8.4f (%5.1f%%)", Vals: time.user,
136 Vals: 100.0 * time.user / total.user);
137 }
138 os << llvm::format(Fmt: " %8.4f (%5.1f%%) ", Vals: time.wall,
139 Vals: 100.0 * time.wall / total.wall);
140 }
141
142 void printListEntry(StringRef name, const TimeRecord &time,
143 const TimeRecord &total, bool lastEntry) override {
144 printTime(time, total);
145 os << name << "\n";
146 }
147
148 void printTreeEntry(unsigned indent, StringRef name, const TimeRecord &time,
149 const TimeRecord &total) override {
150 printTime(time, total);
151 os.indent(NumSpaces: indent) << name << "\n";
152 }
153
154 void printTreeEntryEnd(unsigned indent, bool lastEntry) override {}
155};
156
157class OutputJsonStrategy : public OutputStrategy {
158public:
159 OutputJsonStrategy(raw_ostream &os) : OutputStrategy(os) {}
160
161 void printHeader(const TimeRecord &total) override { os << "[" << "\n"; }
162
163 void printFooter() override {
164 os << "]" << "\n";
165 os.flush();
166 }
167
168 void printTime(const TimeRecord &time, const TimeRecord &total) override {
169 if (total.user != total.wall) {
170 os << "\"user\": {";
171 os << "\"duration\": " << llvm::format(Fmt: "%8.4f", Vals: time.user) << ", ";
172 os << "\"percentage\": "
173 << llvm::format(Fmt: "%5.1f", Vals: 100.0 * time.user / total.user);
174 os << "}, ";
175 }
176 os << "\"wall\": {";
177 os << "\"duration\": " << llvm::format(Fmt: "%8.4f", Vals: time.wall) << ", ";
178 os << "\"percentage\": "
179 << llvm::format(Fmt: "%5.1f", Vals: 100.0 * time.wall / total.wall);
180 os << "}";
181 }
182
183 void printListEntry(StringRef name, const TimeRecord &time,
184 const TimeRecord &total, bool lastEntry) override {
185 os << "{";
186 printTime(time, total);
187 os << ", \"name\": " << "\"" << name << "\"";
188 os << "}";
189 if (!lastEntry)
190 os << ",";
191 os << "\n";
192 }
193
194 void printTreeEntry(unsigned indent, StringRef name, const TimeRecord &time,
195 const TimeRecord &total) override {
196 os.indent(NumSpaces: indent) << "{";
197 printTime(time, total);
198 os << ", \"name\": " << "\"" << name << "\"";
199 os << ", \"passes\": [" << "\n";
200 }
201
202 void printTreeEntryEnd(unsigned indent, bool lastEntry) override {
203 os.indent(NumSpaces: indent) << "{}]";
204 os << "}";
205 if (!lastEntry)
206 os << ",";
207 os << "\n";
208 }
209};
210
211} // namespace
212
213//===----------------------------------------------------------------------===//
214// Timer Implementation for DefaultTimingManager
215//===----------------------------------------------------------------------===//
216
217namespace {
218
219/// A timer used to sample execution time.
220///
221/// Separately tracks wall time and user time to account for parallel threads of
222/// execution. Timers are intended to be started and stopped multiple times.
223/// Each start and stop will add to the timer's wall and user time.
224class TimerImpl {
225public:
226 using ChildrenMap = llvm::MapVector<const void *, std::unique_ptr<TimerImpl>>;
227 using AsyncChildrenMap = llvm::DenseMap<uint64_t, ChildrenMap>;
228
229 TimerImpl(std::string &&name, std::unique_ptr<OutputStrategy> &output)
230 : threadId(llvm::get_threadid()), name(name), output(output) {}
231
232 /// Start the timer.
233 void start() { startTime = std::chrono::steady_clock::now(); }
234
235 /// Stop the timer.
236 void stop() {
237 auto newTime = std::chrono::steady_clock::now() - startTime;
238 wallTime += newTime;
239 userTime += newTime;
240 }
241
242 /// Create a child timer nested within this one. Multiple calls to this
243 /// function with the same unique identifier `id` will return the same child
244 /// timer.
245 ///
246 /// This function can be called from other threads, as long as this timer
247 /// outlives any uses of the child timer on the other thread.
248 TimerImpl *nest(const void *id, function_ref<std::string()> nameBuilder) {
249 auto tid = llvm::get_threadid();
250 if (tid == threadId)
251 return nestTail(child&: children[id], nameBuilder);
252 std::unique_lock<std::mutex> lock(asyncMutex);
253 return nestTail(child&: asyncChildren[tid][id], nameBuilder);
254 }
255
256 /// Tail-called from `nest()`.
257 TimerImpl *nestTail(std::unique_ptr<TimerImpl> &child,
258 function_ref<std::string()> nameBuilder) {
259 if (!child)
260 child = std::make_unique<TimerImpl>(args: nameBuilder(), args&: output);
261 return child.get();
262 }
263
264 /// Finalize this timer and all its children.
265 ///
266 /// If this timer has async children, which happens if `nest()` was called
267 /// from another thread, this function merges the async childr timers into the
268 /// main list of child timers.
269 ///
270 /// Caution: Call this function only after all nested timers running on other
271 /// threads no longer need their timers!
272 void finalize() {
273 addAsyncUserTime();
274 mergeAsyncChildren();
275 }
276
277 /// Add the user time of all async children to this timer's user time. This is
278 /// necessary since the user time already contains all regular child timers,
279 /// but not the asynchronous ones (by the nesting nature of the timers).
280 std::chrono::nanoseconds addAsyncUserTime() {
281 auto added = std::chrono::nanoseconds(0);
282 for (auto &child : children)
283 added += child.second->addAsyncUserTime();
284 for (auto &thread : asyncChildren) {
285 for (auto &child : thread.second) {
286 child.second->addAsyncUserTime();
287 added += child.second->userTime;
288 }
289 }
290 userTime += added;
291 return added;
292 }
293
294 /// Ensure that this timer and recursively all its children have their async
295 /// children folded into the main map of children.
296 void mergeAsyncChildren() {
297 for (auto &child : children)
298 child.second->mergeAsyncChildren();
299 mergeChildren(other: std::move(asyncChildren));
300 assert(asyncChildren.empty());
301 }
302
303 /// Merge multiple child timers into this timer.
304 ///
305 /// Children in `other` are added as children to this timer, or, if this timer
306 /// already contains a child with the corresponding unique identifier, are
307 /// merged into the existing child.
308 void mergeChildren(ChildrenMap &&other) {
309 if (children.empty()) {
310 children = std::move(other);
311 for (auto &child : children)
312 child.second->mergeAsyncChildren();
313 } else {
314 for (auto &child : other)
315 mergeChild(id: child.first, other: std::move(child.second));
316 other.clear();
317 }
318 }
319
320 /// See above.
321 void mergeChildren(AsyncChildrenMap &&other) {
322 for (auto &thread : other) {
323 mergeChildren(other: std::move(thread.second));
324 assert(thread.second.empty());
325 }
326 other.clear();
327 }
328
329 /// Merge a child timer into this timer for a given unique identifier.
330 ///
331 /// Moves all child and async child timers of `other` into this timer's child
332 /// for the given unique identifier.
333 void mergeChild(const void *id, std::unique_ptr<TimerImpl> &&other) {
334 auto &into = children[id];
335 if (!into) {
336 into = std::move(other);
337 into->mergeAsyncChildren();
338 } else {
339 into->wallTime = std::max(a: into->wallTime, b: other->wallTime);
340 into->userTime += other->userTime;
341 into->mergeChildren(other: std::move(other->children));
342 into->mergeChildren(other: std::move(other->asyncChildren));
343 other.reset();
344 }
345 }
346
347 /// Dump a human-readable tree representation of the timer and its children.
348 /// This is useful for debugging the timing mechanisms and structure of the
349 /// timers.
350 void dump(raw_ostream &os, unsigned indent = 0, unsigned markThreadId = 0) {
351 auto time = getTimeRecord();
352 os << std::string(indent * 2, ' ') << name << " [" << threadId << "]"
353 << llvm::format(Fmt: " %7.4f / %7.4f", Vals: time.user, Vals: time.wall);
354 if (threadId != markThreadId && markThreadId != 0)
355 os << " (*)";
356 os << "\n";
357 for (auto &child : children)
358 child.second->dump(os, indent: indent + 1, markThreadId: threadId);
359 for (auto &thread : asyncChildren)
360 for (auto &child : thread.second)
361 child.second->dump(os, indent: indent + 1, markThreadId: threadId);
362 }
363
364 /// Returns the time for this timer in seconds.
365 TimeRecord getTimeRecord() {
366 return TimeRecord(
367 std::chrono::duration_cast<std::chrono::duration<double>>(d: wallTime)
368 .count(),
369 std::chrono::duration_cast<std::chrono::duration<double>>(d: userTime)
370 .count());
371 }
372
373 /// Print the timing result in list mode.
374 void printAsList(TimeRecord total) {
375 // Flatten the leaf timers in the tree and merge them by name.
376 llvm::StringMap<TimeRecord> mergedTimers;
377 std::function<void(TimerImpl *)> addTimer = [&](TimerImpl *timer) {
378 mergedTimers[timer->name] += timer->getTimeRecord();
379 for (auto &children : timer->children)
380 addTimer(children.second.get());
381 };
382 addTimer(this);
383
384 // Sort the timing information by wall time.
385 std::vector<std::pair<StringRef, TimeRecord>> timerNameAndTime;
386 for (auto &it : mergedTimers)
387 timerNameAndTime.emplace_back(args: it.first(), args&: it.second);
388 llvm::array_pod_sort(Start: timerNameAndTime.begin(), End: timerNameAndTime.end(),
389 Compare: [](const std::pair<StringRef, TimeRecord> *lhs,
390 const std::pair<StringRef, TimeRecord> *rhs) {
391 return llvm::array_pod_sort_comparator<double>(
392 P1: &rhs->second.wall, P2: &lhs->second.wall);
393 });
394
395 // Print the timing information sequentially.
396 for (auto &timeData : timerNameAndTime)
397 output->printListEntry(name: timeData.first, time: timeData.second, total);
398 }
399
400 /// Print the timing result in tree mode.
401 void printAsTree(TimeRecord total, unsigned indent = 0) {
402 unsigned childIndent = indent;
403 if (!hidden) {
404 output->printTreeEntry(indent, name, time: getTimeRecord(), total);
405 childIndent += 2;
406 }
407 for (auto &child : children) {
408 child.second->printAsTree(total, indent: childIndent);
409 }
410 if (!hidden) {
411 output->printTreeEntryEnd(indent);
412 }
413 }
414
415 /// Print the current timing information.
416 void print(DisplayMode displayMode) {
417 // Print the banner.
418 auto total = getTimeRecord();
419 output->printHeader(total);
420
421 // Defer to a specialized printer for each display mode.
422 switch (displayMode) {
423 case DisplayMode::List:
424 printAsList(total);
425 break;
426 case DisplayMode::Tree:
427 printAsTree(total);
428 break;
429 }
430
431 // Print the top-level time not accounted for by child timers, and the
432 // total.
433 auto rest = total;
434 for (auto &child : children)
435 rest -= child.second->getTimeRecord();
436 output->printListEntry(name: "Rest", time: rest, total);
437 output->printListEntry(name: "Total", time: total, total, /*lastEntry=*/true);
438 output->printFooter();
439 }
440
441 /// The last time instant at which the timer was started.
442 std::chrono::time_point<std::chrono::steady_clock> startTime;
443
444 /// Accumulated wall time. If multiple threads of execution are merged into
445 /// this timer, the wall time will hold the maximum wall time of each thread
446 /// of execution.
447 std::chrono::nanoseconds wallTime = std::chrono::nanoseconds(0);
448
449 /// Accumulated user time. If multiple threads of execution are merged into
450 /// this timer, each thread's user time is added here.
451 std::chrono::nanoseconds userTime = std::chrono::nanoseconds(0);
452
453 /// The thread on which this timer is running.
454 uint64_t threadId;
455
456 /// A descriptive name for this timer.
457 std::string name;
458
459 /// Whether to omit this timer from reports and directly show its children.
460 bool hidden = false;
461
462 /// Child timers on the same thread the timer itself. We keep at most one
463 /// timer per unique identifier.
464 ChildrenMap children;
465
466 /// Child timers on other threads. We keep at most one timer per unique
467 /// identifier.
468 AsyncChildrenMap asyncChildren;
469
470 /// Mutex for the async children.
471 std::mutex asyncMutex;
472
473 std::unique_ptr<OutputStrategy> &output;
474};
475
476} // namespace
477
478//===----------------------------------------------------------------------===//
479// DefaultTimingManager
480//===----------------------------------------------------------------------===//
481
482namespace mlir {
483namespace detail {
484
485/// Implementation details of the `DefaultTimingManager`.
486class DefaultTimingManagerImpl {
487public:
488 /// Whether we should do our work or not.
489 bool enabled = false;
490
491 /// The configured display mode.
492 DisplayMode displayMode = DisplayMode::Tree;
493
494 /// The root timer.
495 std::unique_ptr<TimerImpl> rootTimer;
496};
497
498} // namespace detail
499} // namespace mlir
500
501DefaultTimingManager::DefaultTimingManager()
502 : impl(std::make_unique<DefaultTimingManagerImpl>()),
503 out(std::make_unique<OutputTextStrategy>(args&: llvm::errs())) {
504 clear(); // initializes the root timer
505}
506
507DefaultTimingManager::~DefaultTimingManager() { print(); }
508
509/// Enable or disable execution time sampling.
510void DefaultTimingManager::setEnabled(bool enabled) { impl->enabled = enabled; }
511
512/// Return whether execution time sampling is enabled.
513bool DefaultTimingManager::isEnabled() const { return impl->enabled; }
514
515/// Change the display mode.
516void DefaultTimingManager::setDisplayMode(DisplayMode displayMode) {
517 impl->displayMode = displayMode;
518}
519
520/// Return the current display mode;
521DefaultTimingManager::DisplayMode DefaultTimingManager::getDisplayMode() const {
522 return impl->displayMode;
523}
524
525/// Change the stream where the output will be printed to.
526void DefaultTimingManager::setOutput(std::unique_ptr<OutputStrategy> output) {
527 out = std::move(output);
528}
529
530/// Print and clear the timing results.
531void DefaultTimingManager::print() {
532 if (impl->enabled) {
533 impl->rootTimer->finalize();
534 impl->rootTimer->print(displayMode: impl->displayMode);
535 }
536 clear();
537}
538
539/// Clear the timing results.
540void DefaultTimingManager::clear() {
541 impl->rootTimer = std::make_unique<TimerImpl>(args: "root", args&: out);
542 impl->rootTimer->hidden = true;
543}
544
545/// Debug print the timer data structures to an output stream.
546void DefaultTimingManager::dumpTimers(raw_ostream &os) {
547 impl->rootTimer->dump(os);
548}
549
550/// Debug print the timers as a list.
551void DefaultTimingManager::dumpAsList(raw_ostream &os) {
552 impl->rootTimer->finalize();
553 impl->rootTimer->print(displayMode: DisplayMode::List);
554}
555
556/// Debug print the timers as a tree.
557void DefaultTimingManager::dumpAsTree(raw_ostream &os) {
558 impl->rootTimer->finalize();
559 impl->rootTimer->print(displayMode: DisplayMode::Tree);
560}
561
562std::optional<void *> DefaultTimingManager::rootTimer() {
563 if (impl->enabled)
564 return impl->rootTimer.get();
565 return std::nullopt;
566}
567
568void DefaultTimingManager::startTimer(void *handle) {
569 static_cast<TimerImpl *>(handle)->start();
570}
571
572void DefaultTimingManager::stopTimer(void *handle) {
573 static_cast<TimerImpl *>(handle)->stop();
574}
575
576void *DefaultTimingManager::nestTimer(void *handle, const void *id,
577 function_ref<std::string()> nameBuilder) {
578 return static_cast<TimerImpl *>(handle)->nest(id, nameBuilder);
579}
580
581void DefaultTimingManager::hideTimer(void *handle) {
582 static_cast<TimerImpl *>(handle)->hidden = true;
583}
584
585//===----------------------------------------------------------------------===//
586// DefaultTimingManager Command Line Options
587//===----------------------------------------------------------------------===//
588
589namespace {
590struct DefaultTimingManagerOptions {
591 llvm::cl::opt<bool> timing{"mlir-timing",
592 llvm::cl::desc("Display execution times"),
593 llvm::cl::init(Val: false)};
594 llvm::cl::opt<DisplayMode> displayMode{
595 "mlir-timing-display", llvm::cl::desc("Display method for timing data"),
596 llvm::cl::init(Val: DisplayMode::Tree),
597 llvm::cl::values(
598 clEnumValN(DisplayMode::List, "list",
599 "display the results in a list sorted by total time"),
600 clEnumValN(DisplayMode::Tree, "tree",
601 "display the results ina with a nested tree view"))};
602 llvm::cl::opt<OutputFormat> outputFormat{
603 "mlir-output-format", llvm::cl::desc("Output format for timing data"),
604 llvm::cl::init(Val: OutputFormat::Text),
605 llvm::cl::values(clEnumValN(OutputFormat::Text, "text",
606 "display the results in text format"),
607 clEnumValN(OutputFormat::Json, "json",
608 "display the results in JSON format"))};
609};
610} // namespace
611
612static llvm::ManagedStatic<DefaultTimingManagerOptions> options;
613
614void mlir::registerDefaultTimingManagerCLOptions() {
615 // Make sure that the options struct has been constructed.
616 *options;
617}
618
619void mlir::applyDefaultTimingManagerCLOptions(DefaultTimingManager &tm) {
620 if (!options.isConstructed())
621 return;
622 tm.setEnabled(options->timing);
623 tm.setDisplayMode(options->displayMode);
624
625 std::unique_ptr<OutputStrategy> printer;
626 if (options->outputFormat == OutputFormat::Text)
627 printer = std::make_unique<OutputTextStrategy>(args&: llvm::errs());
628 else if (options->outputFormat == OutputFormat::Json)
629 printer = std::make_unique<OutputJsonStrategy>(args&: llvm::errs());
630 tm.setOutput(std::move(printer));
631}
632

source code of mlir/lib/Support/Timing.cpp