1 | //===-- PerfJITEventListener.cpp - Tell Linux's perf about JITted code ----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines a JITEventListener object that tells perf about JITted |
10 | // functions, including source line information. |
11 | // |
12 | // Documentation for perf jit integration is available at: |
13 | // https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jitdump-specification.txt |
14 | // https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/tools/perf/Documentation/jit-interface.txt |
15 | // |
16 | //===----------------------------------------------------------------------===// |
17 | |
18 | #include "llvm/ADT/Twine.h" |
19 | #include "llvm/Config/config.h" |
20 | #include "llvm/DebugInfo/DWARF/DWARFContext.h" |
21 | #include "llvm/ExecutionEngine/JITEventListener.h" |
22 | #include "llvm/Object/ObjectFile.h" |
23 | #include "llvm/Object/SymbolSize.h" |
24 | #include "llvm/Support/Debug.h" |
25 | #include "llvm/Support/Errno.h" |
26 | #include "llvm/Support/FileSystem.h" |
27 | #include "llvm/Support/MemoryBuffer.h" |
28 | #include "llvm/Support/Mutex.h" |
29 | #include "llvm/Support/Path.h" |
30 | #include "llvm/Support/Process.h" |
31 | #include "llvm/Support/Threading.h" |
32 | #include "llvm/Support/raw_ostream.h" |
33 | #include <mutex> |
34 | |
35 | #include <sys/mman.h> // mmap() |
36 | #include <time.h> // clock_gettime(), time(), localtime_r() */ |
37 | #include <unistd.h> // for read(), close() |
38 | |
39 | using namespace llvm; |
40 | using namespace llvm::object; |
41 | typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind; |
42 | |
43 | namespace { |
44 | |
45 | // language identifier (XXX: should we generate something better from debug |
46 | // info?) |
47 | #define JIT_LANG "llvm-IR" |
48 | #define LLVM_PERF_JIT_MAGIC \ |
49 | ((uint32_t)'J' << 24 | (uint32_t)'i' << 16 | (uint32_t)'T' << 8 | \ |
50 | (uint32_t)'D') |
51 | #define LLVM_PERF_JIT_VERSION 1 |
52 | |
53 | // bit 0: set if the jitdump file is using an architecture-specific timestamp |
54 | // clock source |
55 | #define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << 0) |
56 | |
57 | struct LLVMPerfJitHeader; |
58 | |
59 | class PerfJITEventListener : public JITEventListener { |
60 | public: |
61 | PerfJITEventListener(); |
62 | ~PerfJITEventListener() { |
63 | if (MarkerAddr) |
64 | CloseMarker(); |
65 | } |
66 | |
67 | void notifyObjectLoaded(ObjectKey K, const ObjectFile &Obj, |
68 | const RuntimeDyld::LoadedObjectInfo &L) override; |
69 | void notifyFreeingObject(ObjectKey K) override; |
70 | |
71 | private: |
72 | bool InitDebuggingDir(); |
73 | bool OpenMarker(); |
74 | void CloseMarker(); |
75 | static bool FillMachine(LLVMPerfJitHeader &hdr); |
76 | |
77 | void NotifyCode(Expected<llvm::StringRef> &Symbol, uint64_t CodeAddr, |
78 | uint64_t CodeSize); |
79 | void NotifyDebug(uint64_t CodeAddr, DILineInfoTable Lines); |
80 | |
81 | // cache lookups |
82 | sys::Process::Pid Pid; |
83 | |
84 | // base directory for output data |
85 | std::string JitPath; |
86 | |
87 | // output data stream, closed via Dumpstream |
88 | int DumpFd = -1; |
89 | |
90 | // output data stream |
91 | std::unique_ptr<raw_fd_ostream> Dumpstream; |
92 | |
93 | // prevent concurrent dumps from messing up the output file |
94 | sys::Mutex Mutex; |
95 | |
96 | // perf mmap marker |
97 | void *MarkerAddr = NULL; |
98 | |
99 | // perf support ready |
100 | bool SuccessfullyInitialized = false; |
101 | |
102 | // identifier for functions, primarily to identify when moving them around |
103 | uint64_t CodeGeneration = 1; |
104 | }; |
105 | |
106 | // The following are POD struct definitions from the perf jit specification |
107 | |
108 | enum LLVMPerfJitRecordType { |
109 | JIT_CODE_LOAD = 0, |
110 | JIT_CODE_MOVE = 1, // not emitted, code isn't moved |
111 | JIT_CODE_DEBUG_INFO = 2, |
112 | JIT_CODE_CLOSE = 3, // not emitted, unnecessary |
113 | JIT_CODE_UNWINDING_INFO = 4, // not emitted |
114 | |
115 | JIT_CODE_MAX |
116 | }; |
117 | |
118 | struct { |
119 | uint32_t ; // characters "JiTD" |
120 | uint32_t ; // header version |
121 | uint32_t ; // total size of header |
122 | uint32_t ; // elf mach target |
123 | uint32_t ; // reserved |
124 | uint32_t ; |
125 | uint64_t ; // timestamp |
126 | uint64_t ; // flags |
127 | }; |
128 | |
129 | // record prefix (mandatory in each record) |
130 | struct LLVMPerfJitRecordPrefix { |
131 | uint32_t Id; // record type identifier |
132 | uint32_t TotalSize; |
133 | uint64_t Timestamp; |
134 | }; |
135 | |
136 | struct LLVMPerfJitRecordCodeLoad { |
137 | LLVMPerfJitRecordPrefix Prefix; |
138 | |
139 | uint32_t Pid; |
140 | uint32_t Tid; |
141 | uint64_t Vma; |
142 | uint64_t CodeAddr; |
143 | uint64_t CodeSize; |
144 | uint64_t CodeIndex; |
145 | }; |
146 | |
147 | struct LLVMPerfJitDebugEntry { |
148 | uint64_t Addr; |
149 | int Lineno; // source line number starting at 1 |
150 | int Discrim; // column discriminator, 0 is default |
151 | // followed by null terminated filename, \xff\0 if same as previous entry |
152 | }; |
153 | |
154 | struct LLVMPerfJitRecordDebugInfo { |
155 | LLVMPerfJitRecordPrefix Prefix; |
156 | |
157 | uint64_t CodeAddr; |
158 | uint64_t NrEntry; |
159 | // followed by NrEntry LLVMPerfJitDebugEntry records |
160 | }; |
161 | |
162 | static inline uint64_t timespec_to_ns(const struct timespec *ts) { |
163 | const uint64_t NanoSecPerSec = 1000000000; |
164 | return ((uint64_t)ts->tv_sec * NanoSecPerSec) + ts->tv_nsec; |
165 | } |
166 | |
167 | static inline uint64_t perf_get_timestamp(void) { |
168 | struct timespec ts; |
169 | int ret; |
170 | |
171 | ret = clock_gettime(CLOCK_MONOTONIC, tp: &ts); |
172 | if (ret) |
173 | return 0; |
174 | |
175 | return timespec_to_ns(ts: &ts); |
176 | } |
177 | |
178 | PerfJITEventListener::PerfJITEventListener() |
179 | : Pid(sys::Process::getProcessId()) { |
180 | // check if clock-source is supported |
181 | if (!perf_get_timestamp()) { |
182 | errs() << "kernel does not support CLOCK_MONOTONIC\n" ; |
183 | return; |
184 | } |
185 | |
186 | if (!InitDebuggingDir()) { |
187 | errs() << "could not initialize debugging directory\n" ; |
188 | return; |
189 | } |
190 | |
191 | std::string Filename; |
192 | raw_string_ostream FilenameBuf(Filename); |
193 | FilenameBuf << JitPath << "/jit-" << Pid << ".dump" ; |
194 | |
195 | // Need to open ourselves, because we need to hand the FD to OpenMarker() and |
196 | // raw_fd_ostream doesn't expose the FD. |
197 | using sys::fs::openFileForWrite; |
198 | if (auto EC = |
199 | openFileForReadWrite(Name: FilenameBuf.str(), ResultFD&: DumpFd, |
200 | Disp: sys::fs::CD_CreateNew, Flags: sys::fs::OF_None)) { |
201 | errs() << "could not open JIT dump file " << FilenameBuf.str() << ": " |
202 | << EC.message() << "\n" ; |
203 | return; |
204 | } |
205 | |
206 | Dumpstream = std::make_unique<raw_fd_ostream>(args&: DumpFd, args: true); |
207 | |
208 | LLVMPerfJitHeader = {.Magic: 0, .Version: 0, .TotalSize: 0, .ElfMach: 0, .Pad1: 0, .Pid: 0, .Timestamp: 0, .Flags: 0}; |
209 | if (!FillMachine(hdr&: Header)) |
210 | return; |
211 | |
212 | // signal this process emits JIT information |
213 | if (!OpenMarker()) |
214 | return; |
215 | |
216 | // emit dumpstream header |
217 | Header.Magic = LLVM_PERF_JIT_MAGIC; |
218 | Header.Version = LLVM_PERF_JIT_VERSION; |
219 | Header.TotalSize = sizeof(Header); |
220 | Header.Pid = Pid; |
221 | Header.Timestamp = perf_get_timestamp(); |
222 | Dumpstream->write(Ptr: reinterpret_cast<const char *>(&Header), Size: sizeof(Header)); |
223 | |
224 | // Everything initialized, can do profiling now. |
225 | if (!Dumpstream->has_error()) |
226 | SuccessfullyInitialized = true; |
227 | } |
228 | |
229 | void PerfJITEventListener::notifyObjectLoaded( |
230 | ObjectKey K, const ObjectFile &Obj, |
231 | const RuntimeDyld::LoadedObjectInfo &L) { |
232 | |
233 | if (!SuccessfullyInitialized) |
234 | return; |
235 | |
236 | OwningBinary<ObjectFile> DebugObjOwner = L.getObjectForDebug(Obj); |
237 | const ObjectFile &DebugObj = *DebugObjOwner.getBinary(); |
238 | |
239 | // Get the address of the object image for use as a unique identifier |
240 | std::unique_ptr<DIContext> Context = DWARFContext::create(Obj: DebugObj); |
241 | |
242 | // Use symbol info to iterate over functions in the object. |
243 | for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(O: DebugObj)) { |
244 | SymbolRef Sym = P.first; |
245 | std::string SourceFileName; |
246 | |
247 | Expected<SymbolRef::Type> SymTypeOrErr = Sym.getType(); |
248 | if (!SymTypeOrErr) { |
249 | // There's not much we can with errors here |
250 | consumeError(Err: SymTypeOrErr.takeError()); |
251 | continue; |
252 | } |
253 | SymbolRef::Type SymType = *SymTypeOrErr; |
254 | if (SymType != SymbolRef::ST_Function) |
255 | continue; |
256 | |
257 | Expected<StringRef> Name = Sym.getName(); |
258 | if (!Name) { |
259 | consumeError(Err: Name.takeError()); |
260 | continue; |
261 | } |
262 | |
263 | Expected<uint64_t> AddrOrErr = Sym.getAddress(); |
264 | if (!AddrOrErr) { |
265 | consumeError(Err: AddrOrErr.takeError()); |
266 | continue; |
267 | } |
268 | uint64_t Size = P.second; |
269 | object::SectionedAddress Address; |
270 | Address.Address = *AddrOrErr; |
271 | |
272 | uint64_t SectionIndex = object::SectionedAddress::UndefSection; |
273 | if (auto SectOrErr = Sym.getSection()) |
274 | if (*SectOrErr != Obj.section_end()) |
275 | SectionIndex = SectOrErr.get()->getIndex(); |
276 | |
277 | // According to spec debugging info has to come before loading the |
278 | // corresponding code load. |
279 | DILineInfoTable Lines = Context->getLineInfoForAddressRange( |
280 | Address: {.Address: *AddrOrErr, .SectionIndex: SectionIndex}, Size, Specifier: FileLineInfoKind::AbsoluteFilePath); |
281 | |
282 | NotifyDebug(CodeAddr: *AddrOrErr, Lines); |
283 | NotifyCode(Symbol&: Name, CodeAddr: *AddrOrErr, CodeSize: Size); |
284 | } |
285 | |
286 | // avoid races with writes |
287 | std::lock_guard<sys::Mutex> Guard(Mutex); |
288 | |
289 | Dumpstream->flush(); |
290 | } |
291 | |
292 | void PerfJITEventListener::notifyFreeingObject(ObjectKey K) { |
293 | // perf currently doesn't have an interface for unloading. But munmap()ing the |
294 | // code section does, so that's ok. |
295 | } |
296 | |
297 | bool PerfJITEventListener::InitDebuggingDir() { |
298 | time_t Time; |
299 | struct tm LocalTime; |
300 | char TimeBuffer[sizeof("YYYYMMDD" )]; |
301 | SmallString<64> Path; |
302 | |
303 | // search for location to dump data to |
304 | if (const char *BaseDir = getenv(name: "JITDUMPDIR" )) |
305 | Path.append(RHS: BaseDir); |
306 | else if (!sys::path::home_directory(result&: Path)) |
307 | Path = "." ; |
308 | |
309 | // create debug directory |
310 | Path += "/.debug/jit/" ; |
311 | if (auto EC = sys::fs::create_directories(path: Path)) { |
312 | errs() << "could not create jit cache directory " << Path << ": " |
313 | << EC.message() << "\n" ; |
314 | return false; |
315 | } |
316 | |
317 | // create unique directory for dump data related to this process |
318 | time(timer: &Time); |
319 | localtime_r(timer: &Time, tp: &LocalTime); |
320 | strftime(s: TimeBuffer, maxsize: sizeof(TimeBuffer), format: "%Y%m%d" , tp: &LocalTime); |
321 | Path += JIT_LANG "-jit-" ; |
322 | Path += TimeBuffer; |
323 | |
324 | SmallString<128> UniqueDebugDir; |
325 | |
326 | using sys::fs::createUniqueDirectory; |
327 | if (auto EC = createUniqueDirectory(Prefix: Path, ResultPath&: UniqueDebugDir)) { |
328 | errs() << "could not create unique jit cache directory " << UniqueDebugDir |
329 | << ": " << EC.message() << "\n" ; |
330 | return false; |
331 | } |
332 | |
333 | JitPath = std::string(UniqueDebugDir.str()); |
334 | |
335 | return true; |
336 | } |
337 | |
338 | bool PerfJITEventListener::OpenMarker() { |
339 | // We mmap the jitdump to create an MMAP RECORD in perf.data file. The mmap |
340 | // is captured either live (perf record running when we mmap) or in deferred |
341 | // mode, via /proc/PID/maps. The MMAP record is used as a marker of a jitdump |
342 | // file for more meta data info about the jitted code. Perf report/annotate |
343 | // detect this special filename and process the jitdump file. |
344 | // |
345 | // Mapping must be PROT_EXEC to ensure it is captured by perf record |
346 | // even when not using -d option. |
347 | MarkerAddr = ::mmap(NULL, len: sys::Process::getPageSizeEstimate(), |
348 | PROT_READ | PROT_EXEC, MAP_PRIVATE, fd: DumpFd, offset: 0); |
349 | |
350 | if (MarkerAddr == MAP_FAILED) { |
351 | errs() << "could not mmap JIT marker\n" ; |
352 | return false; |
353 | } |
354 | return true; |
355 | } |
356 | |
357 | void PerfJITEventListener::CloseMarker() { |
358 | if (!MarkerAddr) |
359 | return; |
360 | |
361 | munmap(addr: MarkerAddr, len: sys::Process::getPageSizeEstimate()); |
362 | MarkerAddr = nullptr; |
363 | } |
364 | |
365 | bool PerfJITEventListener::(LLVMPerfJitHeader &hdr) { |
366 | char id[16]; |
367 | struct { |
368 | uint16_t e_type; |
369 | uint16_t e_machine; |
370 | } info; |
371 | |
372 | size_t RequiredMemory = sizeof(id) + sizeof(info); |
373 | |
374 | ErrorOr<std::unique_ptr<MemoryBuffer>> MB = |
375 | MemoryBuffer::getFileSlice(Filename: "/proc/self/exe" , |
376 | MapSize: RequiredMemory, |
377 | Offset: 0); |
378 | |
379 | // This'll not guarantee that enough data was actually read from the |
380 | // underlying file. Instead the trailing part of the buffer would be |
381 | // zeroed. Given the ELF signature check below that seems ok though, |
382 | // it's unlikely that the file ends just after that, and the |
383 | // consequence would just be that perf wouldn't recognize the |
384 | // signature. |
385 | if (auto EC = MB.getError()) { |
386 | errs() << "could not open /proc/self/exe: " << EC.message() << "\n" ; |
387 | return false; |
388 | } |
389 | |
390 | memcpy(dest: &id, src: (*MB)->getBufferStart(), n: sizeof(id)); |
391 | memcpy(dest: &info, src: (*MB)->getBufferStart() + sizeof(id), n: sizeof(info)); |
392 | |
393 | // check ELF signature |
394 | if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F') { |
395 | errs() << "invalid elf signature\n" ; |
396 | return false; |
397 | } |
398 | |
399 | hdr.ElfMach = info.e_machine; |
400 | |
401 | return true; |
402 | } |
403 | |
404 | void PerfJITEventListener::NotifyCode(Expected<llvm::StringRef> &Symbol, |
405 | uint64_t CodeAddr, uint64_t CodeSize) { |
406 | assert(SuccessfullyInitialized); |
407 | |
408 | // 0 length functions can't have samples. |
409 | if (CodeSize == 0) |
410 | return; |
411 | |
412 | LLVMPerfJitRecordCodeLoad rec; |
413 | rec.Prefix.Id = JIT_CODE_LOAD; |
414 | rec.Prefix.TotalSize = sizeof(rec) + // debug record itself |
415 | Symbol->size() + 1 + // symbol name |
416 | CodeSize; // and code |
417 | rec.Prefix.Timestamp = perf_get_timestamp(); |
418 | |
419 | rec.CodeSize = CodeSize; |
420 | rec.Vma = CodeAddr; |
421 | rec.CodeAddr = CodeAddr; |
422 | rec.Pid = Pid; |
423 | rec.Tid = get_threadid(); |
424 | |
425 | // avoid interspersing output |
426 | std::lock_guard<sys::Mutex> Guard(Mutex); |
427 | |
428 | rec.CodeIndex = CodeGeneration++; // under lock! |
429 | |
430 | Dumpstream->write(Ptr: reinterpret_cast<const char *>(&rec), Size: sizeof(rec)); |
431 | Dumpstream->write(Ptr: Symbol->data(), Size: Symbol->size() + 1); |
432 | Dumpstream->write(Ptr: reinterpret_cast<const char *>(CodeAddr), Size: CodeSize); |
433 | } |
434 | |
435 | void PerfJITEventListener::NotifyDebug(uint64_t CodeAddr, |
436 | DILineInfoTable Lines) { |
437 | assert(SuccessfullyInitialized); |
438 | |
439 | // Didn't get useful debug info. |
440 | if (Lines.empty()) |
441 | return; |
442 | |
443 | LLVMPerfJitRecordDebugInfo rec; |
444 | rec.Prefix.Id = JIT_CODE_DEBUG_INFO; |
445 | rec.Prefix.TotalSize = sizeof(rec); // will be increased further |
446 | rec.Prefix.Timestamp = perf_get_timestamp(); |
447 | rec.CodeAddr = CodeAddr; |
448 | rec.NrEntry = Lines.size(); |
449 | |
450 | // compute total size of record (variable due to filenames) |
451 | DILineInfoTable::iterator Begin = Lines.begin(); |
452 | DILineInfoTable::iterator End = Lines.end(); |
453 | for (DILineInfoTable::iterator It = Begin; It != End; ++It) { |
454 | DILineInfo &line = It->second; |
455 | rec.Prefix.TotalSize += sizeof(LLVMPerfJitDebugEntry); |
456 | rec.Prefix.TotalSize += line.FileName.size() + 1; |
457 | } |
458 | |
459 | // The debug_entry describes the source line information. It is defined as |
460 | // follows in order: |
461 | // * uint64_t code_addr: address of function for which the debug information |
462 | // is generated |
463 | // * uint32_t line : source file line number (starting at 1) |
464 | // * uint32_t discrim : column discriminator, 0 is default |
465 | // * char name[n] : source file name in ASCII, including null termination |
466 | |
467 | // avoid interspersing output |
468 | std::lock_guard<sys::Mutex> Guard(Mutex); |
469 | |
470 | Dumpstream->write(Ptr: reinterpret_cast<const char *>(&rec), Size: sizeof(rec)); |
471 | |
472 | for (DILineInfoTable::iterator It = Begin; It != End; ++It) { |
473 | LLVMPerfJitDebugEntry LineInfo; |
474 | DILineInfo &Line = It->second; |
475 | |
476 | LineInfo.Addr = It->first; |
477 | // The function re-created by perf is preceded by a elf |
478 | // header. Need to adjust for that, otherwise the results are |
479 | // wrong. |
480 | LineInfo.Addr += 0x40; |
481 | LineInfo.Lineno = Line.Line; |
482 | LineInfo.Discrim = Line.Discriminator; |
483 | |
484 | Dumpstream->write(Ptr: reinterpret_cast<const char *>(&LineInfo), |
485 | Size: sizeof(LineInfo)); |
486 | Dumpstream->write(Ptr: Line.FileName.c_str(), Size: Line.FileName.size() + 1); |
487 | } |
488 | } |
489 | |
490 | } // end anonymous namespace |
491 | |
492 | namespace llvm { |
493 | JITEventListener *JITEventListener::createPerfJITEventListener() { |
494 | // There should be only a single event listener per process, otherwise perf |
495 | // gets confused. |
496 | static PerfJITEventListener PerfListener; |
497 | return &PerfListener; |
498 | } |
499 | |
500 | } // namespace llvm |
501 | |
502 | LLVMJITEventListenerRef LLVMCreatePerfJITEventListener(void) |
503 | { |
504 | return wrap(JITEventListener::createPerfJITEventListener()); |
505 | } |
506 | |