1//===-- Perf.cpp ----------------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "Perf.h"
10
11#include "Plugins/Process/POSIX/ProcessPOSIXLog.h"
12#include "lldb/Host/linux/Support.h"
13#include "llvm/Support/FormatVariadic.h"
14#include "llvm/Support/MathExtras.h"
15#include "llvm/Support/MemoryBuffer.h"
16#include <linux/version.h>
17#include <sys/ioctl.h>
18#include <sys/mman.h>
19#include <sys/syscall.h>
20#include <unistd.h>
21
22using namespace lldb_private;
23using namespace process_linux;
24using namespace llvm;
25
26Expected<LinuxPerfZeroTscConversion>
27lldb_private::process_linux::LoadPerfTscConversionParameters() {
28#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 12, 0)
29 lldb::pid_t pid = getpid();
30 perf_event_attr attr;
31 memset(s: &attr, c: 0, n: sizeof(attr));
32 attr.size = sizeof(attr);
33 attr.type = PERF_TYPE_SOFTWARE;
34 attr.config = PERF_COUNT_SW_DUMMY;
35
36 Expected<PerfEvent> perf_event = PerfEvent::Init(attr, pid);
37 if (!perf_event)
38 return perf_event.takeError();
39 if (Error mmap_err =
40 perf_event->MmapMetadataAndBuffers(/*num_data_pages=*/0,
41 /*num_aux_pages=*/0,
42 /*data_buffer_write=*/false))
43 return std::move(mmap_err);
44
45 perf_event_mmap_page &mmap_metada = perf_event->GetMetadataPage();
46 if (mmap_metada.cap_user_time && mmap_metada.cap_user_time_zero) {
47 return LinuxPerfZeroTscConversion{
48 .time_mult: mmap_metada.time_mult, .time_shift: mmap_metada.time_shift, .time_zero: {.value: mmap_metada.time_zero}};
49 } else {
50 auto err_cap =
51 !mmap_metada.cap_user_time ? "cap_user_time" : "cap_user_time_zero";
52 std::string err_msg =
53 llvm::formatv(Fmt: "Can't get TSC to real time conversion values. "
54 "perf_event capability '{0}' not supported.",
55 Vals&: err_cap);
56 return llvm::createStringError(EC: llvm::inconvertibleErrorCode(), S: err_msg);
57 }
58#else
59 std::string err_msg = "PERF_COUNT_SW_DUMMY requires Linux 3.12";
60 return llvm::createStringError(llvm::inconvertibleErrorCode(), err_msg);
61#endif
62}
63
64void resource_handle::MmapDeleter::operator()(void *ptr) {
65 if (m_bytes && ptr != nullptr)
66 munmap(addr: ptr, len: m_bytes);
67}
68
69void resource_handle::FileDescriptorDeleter::operator()(long *ptr) {
70 if (ptr == nullptr)
71 return;
72 if (*ptr == -1)
73 return;
74 close(fd: *ptr);
75 std::default_delete<long>()(ptr);
76}
77
78llvm::Expected<PerfEvent> PerfEvent::Init(perf_event_attr &attr,
79 std::optional<lldb::pid_t> pid,
80 std::optional<lldb::cpu_id_t> cpu,
81 std::optional<long> group_fd,
82 unsigned long flags) {
83 errno = 0;
84 long fd = syscall(SYS_perf_event_open, &attr, pid.value_or(u: -1),
85 cpu.value_or(u: -1), group_fd.value_or(u: -1), flags);
86 if (fd == -1) {
87 std::string err_msg =
88 llvm::formatv(Fmt: "perf event syscall failed: {0}", Vals: std::strerror(errno));
89 return llvm::createStringError(EC: llvm::inconvertibleErrorCode(), S: err_msg);
90 }
91 return PerfEvent(fd, !attr.disabled);
92}
93
94llvm::Expected<PerfEvent> PerfEvent::Init(perf_event_attr &attr,
95 std::optional<lldb::pid_t> pid,
96 std::optional<lldb::cpu_id_t> cpu) {
97 return Init(attr, pid, cpu, group_fd: -1, flags: 0);
98}
99
100llvm::Expected<resource_handle::MmapUP>
101PerfEvent::DoMmap(void *addr, size_t length, int prot, int flags,
102 long int offset, llvm::StringRef buffer_name) {
103 errno = 0;
104 auto mmap_result = ::mmap(addr: addr, len: length, prot: prot, flags: flags, fd: GetFd(), offset: offset);
105
106 if (mmap_result == MAP_FAILED) {
107 std::string err_msg =
108 llvm::formatv(Fmt: "perf event mmap allocation failed for {0}: {1}",
109 Vals&: buffer_name, Vals: std::strerror(errno));
110 return createStringError(EC: inconvertibleErrorCode(), S: err_msg);
111 }
112 return resource_handle::MmapUP(mmap_result, length);
113}
114
115llvm::Error PerfEvent::MmapMetadataAndDataBuffer(size_t num_data_pages,
116 bool data_buffer_write) {
117 size_t mmap_size = (num_data_pages + 1) * getpagesize();
118 if (Expected<resource_handle::MmapUP> mmap_metadata_data = DoMmap(
119 addr: nullptr, length: mmap_size, PROT_READ | (data_buffer_write ? PROT_WRITE : 0),
120 MAP_SHARED, offset: 0, buffer_name: "metadata and data buffer")) {
121 m_metadata_data_base = std::move(mmap_metadata_data.get());
122 return Error::success();
123 } else
124 return mmap_metadata_data.takeError();
125}
126
127llvm::Error PerfEvent::MmapAuxBuffer(size_t num_aux_pages) {
128#ifndef PERF_ATTR_SIZE_VER5
129 return createStringError(inconvertibleErrorCode(),
130 "Intel PT Linux perf event not supported");
131#else
132 if (num_aux_pages == 0)
133 return Error::success();
134
135 perf_event_mmap_page &metadata_page = GetMetadataPage();
136
137 metadata_page.aux_offset =
138 metadata_page.data_offset + metadata_page.data_size;
139 metadata_page.aux_size = num_aux_pages * getpagesize();
140
141 if (Expected<resource_handle::MmapUP> mmap_aux =
142 DoMmap(addr: nullptr, length: metadata_page.aux_size, PROT_READ, MAP_SHARED,
143 offset: metadata_page.aux_offset, buffer_name: "aux buffer")) {
144 m_aux_base = std::move(mmap_aux.get());
145 return Error::success();
146 } else
147 return mmap_aux.takeError();
148#endif
149}
150
151llvm::Error PerfEvent::MmapMetadataAndBuffers(size_t num_data_pages,
152 size_t num_aux_pages,
153 bool data_buffer_write) {
154 if (num_data_pages != 0 && !isPowerOf2_64(Value: num_data_pages))
155 return llvm::createStringError(
156 EC: llvm::inconvertibleErrorCode(),
157 S: llvm::formatv(Fmt: "Number of data pages must be a power of 2, got: {0}",
158 Vals&: num_data_pages));
159 if (num_aux_pages != 0 && !isPowerOf2_64(Value: num_aux_pages))
160 return llvm::createStringError(
161 EC: llvm::inconvertibleErrorCode(),
162 S: llvm::formatv(Fmt: "Number of aux pages must be a power of 2, got: {0}",
163 Vals&: num_aux_pages));
164 if (Error err = MmapMetadataAndDataBuffer(num_data_pages, data_buffer_write))
165 return err;
166 if (Error err = MmapAuxBuffer(num_aux_pages))
167 return err;
168 return Error::success();
169}
170
171long PerfEvent::GetFd() const { return *(m_fd.get()); }
172
173perf_event_mmap_page &PerfEvent::GetMetadataPage() const {
174 return *reinterpret_cast<perf_event_mmap_page *>(m_metadata_data_base.get());
175}
176
177ArrayRef<uint8_t> PerfEvent::GetDataBuffer() const {
178#ifndef PERF_ATTR_SIZE_VER5
179 llvm_unreachable("Intel PT Linux perf event not supported");
180#else
181 perf_event_mmap_page &mmap_metadata = GetMetadataPage();
182 return {reinterpret_cast<uint8_t *>(m_metadata_data_base.get()) +
183 mmap_metadata.data_offset,
184 static_cast<size_t>(mmap_metadata.data_size)};
185#endif
186}
187
188ArrayRef<uint8_t> PerfEvent::GetAuxBuffer() const {
189#ifndef PERF_ATTR_SIZE_VER5
190 llvm_unreachable("Intel PT Linux perf event not supported");
191#else
192 perf_event_mmap_page &mmap_metadata = GetMetadataPage();
193 return {reinterpret_cast<uint8_t *>(m_aux_base.get()),
194 static_cast<size_t>(mmap_metadata.aux_size)};
195#endif
196}
197
198Expected<std::vector<uint8_t>> PerfEvent::GetReadOnlyDataBuffer() {
199 // The following code assumes that the protection level of the DATA page
200 // is PROT_READ. If PROT_WRITE is used, then reading would require that
201 // this piece of code updates some pointers. See more about data_tail
202 // in https://man7.org/linux/man-pages/man2/perf_event_open.2.html.
203
204#ifndef PERF_ATTR_SIZE_VER5
205 return createStringError(inconvertibleErrorCode(),
206 "Intel PT Linux perf event not supported");
207#else
208 bool was_enabled = m_enabled;
209 if (Error err = DisableWithIoctl())
210 return std::move(err);
211
212 /**
213 * The data buffer and aux buffer have different implementations
214 * with respect to their definition of head pointer when using PROD_READ only.
215 * In the case of Aux data buffer the head always wraps around the aux buffer
216 * and we don't need to care about it, whereas the data_head keeps
217 * increasing and needs to be wrapped by modulus operator
218 */
219 perf_event_mmap_page &mmap_metadata = GetMetadataPage();
220
221 ArrayRef<uint8_t> data = GetDataBuffer();
222 uint64_t data_head = mmap_metadata.data_head;
223 uint64_t data_size = mmap_metadata.data_size;
224 std::vector<uint8_t> output;
225 output.reserve(n: data.size());
226
227 if (data_head > data_size) {
228 uint64_t actual_data_head = data_head % data_size;
229 // The buffer has wrapped, so we first the oldest chunk of data
230 output.insert(position: output.end(), first: data.begin() + actual_data_head, last: data.end());
231 // And we read the most recent chunk of data
232 output.insert(position: output.end(), first: data.begin(), last: data.begin() + actual_data_head);
233 } else {
234 // There's been no wrapping, so we just read linearly
235 output.insert(position: output.end(), first: data.begin(), last: data.begin() + data_head);
236 }
237
238 if (was_enabled) {
239 if (Error err = EnableWithIoctl())
240 return std::move(err);
241 }
242
243 return output;
244#endif
245}
246
247Expected<std::vector<uint8_t>> PerfEvent::GetReadOnlyAuxBuffer() {
248 // The following code assumes that the protection level of the AUX page
249 // is PROT_READ. If PROT_WRITE is used, then reading would require that
250 // this piece of code updates some pointers. See more about aux_tail
251 // in https://man7.org/linux/man-pages/man2/perf_event_open.2.html.
252
253#ifndef PERF_ATTR_SIZE_VER5
254 return createStringError(inconvertibleErrorCode(),
255 "Intel PT Linux perf event not supported");
256#else
257 bool was_enabled = m_enabled;
258 if (Error err = DisableWithIoctl())
259 return std::move(err);
260
261 perf_event_mmap_page &mmap_metadata = GetMetadataPage();
262
263 ArrayRef<uint8_t> data = GetAuxBuffer();
264 uint64_t aux_head = mmap_metadata.aux_head;
265 std::vector<uint8_t> output;
266 output.reserve(n: data.size());
267
268 /**
269 * When configured as ring buffer, the aux buffer keeps wrapping around
270 * the buffer and its not possible to detect how many times the buffer
271 * wrapped. Initially the buffer is filled with zeros,as shown below
272 * so in order to get complete buffer we first copy firstpartsize, followed
273 * by any left over part from beginning to aux_head
274 *
275 * aux_offset [d,d,d,d,d,d,d,d,0,0,0,0,0,0,0,0,0,0,0] aux_size
276 * aux_head->||<- firstpartsize ->|
277 *
278 * */
279
280 output.insert(position: output.end(), first: data.begin() + aux_head, last: data.end());
281 output.insert(position: output.end(), first: data.begin(), last: data.begin() + aux_head);
282
283 if (was_enabled) {
284 if (Error err = EnableWithIoctl())
285 return std::move(err);
286 }
287
288 return output;
289#endif
290}
291
292Error PerfEvent::DisableWithIoctl() {
293 if (!m_enabled)
294 return Error::success();
295
296 if (ioctl(fd: *m_fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) < 0)
297 return createStringError(EC: inconvertibleErrorCode(),
298 Fmt: "Can't disable perf event. %s",
299 Vals: std::strerror(errno));
300
301 m_enabled = false;
302 return Error::success();
303}
304
305bool PerfEvent::IsEnabled() const { return m_enabled; }
306
307Error PerfEvent::EnableWithIoctl() {
308 if (m_enabled)
309 return Error::success();
310
311 if (ioctl(fd: *m_fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) < 0)
312 return createStringError(EC: inconvertibleErrorCode(),
313 Fmt: "Can't enable perf event. %s",
314 Vals: std::strerror(errno));
315
316 m_enabled = true;
317 return Error::success();
318}
319
320size_t PerfEvent::GetEffectiveDataBufferSize() const {
321#ifndef PERF_ATTR_SIZE_VER5
322 llvm_unreachable("Intel PT Linux perf event not supported");
323#else
324 perf_event_mmap_page &mmap_metadata = GetMetadataPage();
325 if (mmap_metadata.data_head < mmap_metadata.data_size)
326 return mmap_metadata.data_head;
327 else
328 return mmap_metadata.data_size; // The buffer has wrapped.
329#endif
330}
331
332Expected<PerfEvent>
333lldb_private::process_linux::CreateContextSwitchTracePerfEvent(
334 lldb::cpu_id_t cpu_id, const PerfEvent *parent_perf_event) {
335 Log *log = GetLog(mask: POSIXLog::Trace);
336#ifndef PERF_ATTR_SIZE_VER5
337 return createStringError(inconvertibleErrorCode(),
338 "Intel PT Linux perf event not supported");
339#else
340 perf_event_attr attr;
341 memset(s: &attr, c: 0, n: sizeof(attr));
342 attr.size = sizeof(attr);
343 attr.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_TIME;
344 attr.type = PERF_TYPE_SOFTWARE;
345 attr.context_switch = 1;
346 attr.exclude_kernel = 1;
347 attr.sample_id_all = 1;
348 attr.exclude_hv = 1;
349 attr.disabled = parent_perf_event ? !parent_perf_event->IsEnabled() : false;
350
351 // The given perf configuration will produce context switch records of 32
352 // bytes each. Assuming that every context switch will be emitted twice (one
353 // for context switch ins and another one for context switch outs), and that a
354 // context switch will happen at least every half a millisecond per core, we
355 // need 500 * 32 bytes (~16 KB) for a trace of one second, which is much more
356 // than what a regular intel pt trace can get. Pessimistically we pick as
357 // 32KiB for the size of our context switch trace.
358
359 uint64_t data_buffer_size = 32768;
360 uint64_t data_buffer_numpages = data_buffer_size / getpagesize();
361
362 LLDB_LOG(log, "Will create context switch trace buffer of size {0}",
363 data_buffer_size);
364
365 std::optional<long> group_fd;
366 if (parent_perf_event)
367 group_fd = parent_perf_event->GetFd();
368
369 if (Expected<PerfEvent> perf_event = PerfEvent::Init(
370 attr, /*pid=*/std::nullopt, cpu: cpu_id, group_fd, /*flags=*/0)) {
371 if (Error mmap_err = perf_event->MmapMetadataAndBuffers(
372 num_data_pages: data_buffer_numpages, num_aux_pages: 0, /*data_buffer_write=*/false)) {
373 return std::move(mmap_err);
374 }
375 return perf_event;
376 } else {
377 return perf_event.takeError();
378 }
379#endif
380}
381

source code of lldb/source/Plugins/Process/Linux/Perf.cpp