1// SPDX-License-Identifier: GPL-2.0
2/* Copyright (c) 2020 Facebook */
3#include <vmlinux.h>
4#include <bpf/bpf_core_read.h>
5#include <bpf/bpf_helpers.h>
6#include <bpf/bpf_tracing.h>
7
8#include "profiler.h"
9#include "err.h"
10#include "bpf_experimental.h"
11#include "bpf_compiler.h"
12
13#ifndef NULL
14#define NULL 0
15#endif
16
17#define O_WRONLY 00000001
18#define O_RDWR 00000002
19#define O_DIRECTORY 00200000
20#define __O_TMPFILE 020000000
21#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
22#define S_IFMT 00170000
23#define S_IFSOCK 0140000
24#define S_IFLNK 0120000
25#define S_IFREG 0100000
26#define S_IFBLK 0060000
27#define S_IFDIR 0040000
28#define S_IFCHR 0020000
29#define S_IFIFO 0010000
30#define S_ISUID 0004000
31#define S_ISGID 0002000
32#define S_ISVTX 0001000
33#define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK)
34#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
35#define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR)
36#define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK)
37#define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO)
38#define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK)
39
40#define KILL_DATA_ARRAY_SIZE 8
41
42struct var_kill_data_arr_t {
43 struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE];
44};
45
46union any_profiler_data_t {
47 struct var_exec_data_t var_exec;
48 struct var_kill_data_t var_kill;
49 struct var_sysctl_data_t var_sysctl;
50 struct var_filemod_data_t var_filemod;
51 struct var_fork_data_t var_fork;
52 struct var_kill_data_arr_t var_kill_data_arr;
53};
54
55volatile struct profiler_config_struct bpf_config = {};
56
57#define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf)
58#define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode)
59#define CGROUP_LOGIN_SESSION_INODE \
60 (bpf_config.cgroup_login_session_inode)
61#define KILL_SIGNALS (bpf_config.kill_signals_mask)
62#define STALE_INFO (bpf_config.stale_info_secs)
63#define INODE_FILTER (bpf_config.inode_filter)
64#define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec)
65#define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver)
66
67struct kernfs_iattrs___52 {
68 struct iattr ia_iattr;
69};
70
71struct kernfs_node___52 {
72 union /* kernfs_node_id */ {
73 struct {
74 u32 ino;
75 u32 generation;
76 };
77 u64 id;
78 } id;
79};
80
81struct {
82 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
83 __uint(max_entries, 1);
84 __type(key, u32);
85 __type(value, union any_profiler_data_t);
86} data_heap SEC(".maps");
87
88struct {
89 __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
90 __uint(key_size, sizeof(int));
91 __uint(value_size, sizeof(int));
92} events SEC(".maps");
93
94struct {
95 __uint(type, BPF_MAP_TYPE_HASH);
96 __uint(max_entries, KILL_DATA_ARRAY_SIZE);
97 __type(key, u32);
98 __type(value, struct var_kill_data_arr_t);
99} var_tpid_to_data SEC(".maps");
100
101struct {
102 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
103 __uint(max_entries, profiler_bpf_max_function_id);
104 __type(key, u32);
105 __type(value, struct bpf_func_stats_data);
106} bpf_func_stats SEC(".maps");
107
108struct {
109 __uint(type, BPF_MAP_TYPE_HASH);
110 __type(key, u32);
111 __type(value, bool);
112 __uint(max_entries, 16);
113} allowed_devices SEC(".maps");
114
115struct {
116 __uint(type, BPF_MAP_TYPE_HASH);
117 __type(key, u64);
118 __type(value, bool);
119 __uint(max_entries, 1024);
120} allowed_file_inodes SEC(".maps");
121
122struct {
123 __uint(type, BPF_MAP_TYPE_HASH);
124 __type(key, u64);
125 __type(value, bool);
126 __uint(max_entries, 1024);
127} allowed_directory_inodes SEC(".maps");
128
129struct {
130 __uint(type, BPF_MAP_TYPE_HASH);
131 __type(key, u32);
132 __type(value, bool);
133 __uint(max_entries, 16);
134} disallowed_exec_inodes SEC(".maps");
135
136#ifndef ARRAY_SIZE
137#define ARRAY_SIZE(arr) (int)(sizeof(arr) / sizeof(arr[0]))
138#endif
139
140static INLINE bool IS_ERR(const void* ptr)
141{
142 return IS_ERR_VALUE((unsigned long)ptr);
143}
144
145static INLINE u32 get_userspace_pid()
146{
147 return bpf_get_current_pid_tgid() >> 32;
148}
149
150static INLINE bool is_init_process(u32 tgid)
151{
152 return tgid == 1 || tgid == 0;
153}
154
155static INLINE unsigned long
156probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max)
157{
158 len = len < max ? len : max;
159 if (len > 1) {
160 if (bpf_probe_read_kernel(dst, len, src))
161 return 0;
162 } else if (len == 1) {
163 if (bpf_probe_read_kernel(dst, 1, src))
164 return 0;
165 }
166 return len;
167}
168
169static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct,
170 int spid)
171{
172#ifdef UNROLL
173 __pragma_loop_unroll
174#endif
175 for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
176 if (arr_struct->array[i].meta.pid == spid)
177 return i;
178 return -1;
179}
180
181static INLINE void populate_ancestors(struct task_struct* task,
182 struct ancestors_data_t* ancestors_data)
183{
184 struct task_struct* parent = task;
185 u32 num_ancestors, ppid;
186
187 ancestors_data->num_ancestors = 0;
188#ifdef UNROLL
189 __pragma_loop_unroll
190#endif
191 for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) {
192 parent = BPF_CORE_READ(parent, real_parent);
193 if (parent == NULL)
194 break;
195 ppid = BPF_CORE_READ(parent, tgid);
196 if (is_init_process(ppid))
197 break;
198 ancestors_data->ancestor_pids[num_ancestors] = ppid;
199 ancestors_data->ancestor_exec_ids[num_ancestors] =
200 BPF_CORE_READ(parent, self_exec_id);
201 ancestors_data->ancestor_start_times[num_ancestors] =
202 BPF_CORE_READ(parent, start_time);
203 ancestors_data->num_ancestors = num_ancestors;
204 }
205}
206
207static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
208 struct kernfs_node* cgroup_root_node,
209 void* payload,
210 int* root_pos)
211{
212 void* payload_start = payload;
213 size_t filepart_length;
214
215#ifdef UNROLL
216 __pragma_loop_unroll
217#endif
218 for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
219 filepart_length =
220 bpf_probe_read_kernel_str(payload, MAX_PATH,
221 BPF_CORE_READ(cgroup_node, name));
222 if (!cgroup_node)
223 return payload;
224 if (cgroup_node == cgroup_root_node)
225 *root_pos = payload - payload_start;
226 if (bpf_cmp_likely(filepart_length, <=, MAX_PATH)) {
227 payload += filepart_length;
228 }
229 cgroup_node = BPF_CORE_READ(cgroup_node, parent);
230 }
231 return payload;
232}
233
234static ino_t get_inode_from_kernfs(struct kernfs_node* node)
235{
236 struct kernfs_node___52* node52 = (void*)node;
237
238 if (bpf_core_field_exists(node52->id.ino)) {
239 barrier_var(node52);
240 return BPF_CORE_READ(node52, id.ino);
241 } else {
242 barrier_var(node);
243 return (u64)BPF_CORE_READ(node, id);
244 }
245}
246
247extern bool CONFIG_CGROUP_PIDS __kconfig __weak;
248enum cgroup_subsys_id___local {
249 pids_cgrp_id___local = 123, /* value doesn't matter */
250};
251
252static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
253 struct task_struct* task,
254 void* payload)
255{
256 struct kernfs_node* root_kernfs =
257 BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
258 struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
259
260#if __has_builtin(__builtin_preserve_enum_value)
261 if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) {
262 int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
263 pids_cgrp_id___local);
264#ifdef UNROLL
265 __pragma_loop_unroll
266#endif
267 for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
268 struct cgroup_subsys_state* subsys =
269 BPF_CORE_READ(task, cgroups, subsys[i]);
270 if (subsys != NULL) {
271 int subsys_id = BPF_CORE_READ(subsys, ss, id);
272 if (subsys_id == cgrp_id) {
273 proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn);
274 root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn);
275 break;
276 }
277 }
278 }
279 }
280#endif
281
282 cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs);
283 cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs);
284
285 if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) {
286 cgroup_data->cgroup_root_mtime =
287 BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec);
288 cgroup_data->cgroup_proc_mtime =
289 BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec);
290 } else {
291 struct kernfs_iattrs___52* root_iattr =
292 (struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr);
293 cgroup_data->cgroup_root_mtime =
294 BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec);
295
296 struct kernfs_iattrs___52* proc_iattr =
297 (struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr);
298 cgroup_data->cgroup_proc_mtime =
299 BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec);
300 }
301
302 cgroup_data->cgroup_root_length = 0;
303 cgroup_data->cgroup_proc_length = 0;
304 cgroup_data->cgroup_full_length = 0;
305
306 size_t cgroup_root_length =
307 bpf_probe_read_kernel_str(payload, MAX_PATH,
308 BPF_CORE_READ(root_kernfs, name));
309 if (bpf_cmp_likely(cgroup_root_length, <=, MAX_PATH)) {
310 cgroup_data->cgroup_root_length = cgroup_root_length;
311 payload += cgroup_root_length;
312 }
313
314 size_t cgroup_proc_length =
315 bpf_probe_read_kernel_str(payload, MAX_PATH,
316 BPF_CORE_READ(proc_kernfs, name));
317 if (bpf_cmp_likely(cgroup_proc_length, <=, MAX_PATH)) {
318 cgroup_data->cgroup_proc_length = cgroup_proc_length;
319 payload += cgroup_proc_length;
320 }
321
322 if (FETCH_CGROUPS_FROM_BPF) {
323 cgroup_data->cgroup_full_path_root_pos = -1;
324 void* payload_end_pos = read_full_cgroup_path(cgroup_node: proc_kernfs, cgroup_root_node: root_kernfs, payload,
325 root_pos: &cgroup_data->cgroup_full_path_root_pos);
326 cgroup_data->cgroup_full_length = payload_end_pos - payload;
327 payload = payload_end_pos;
328 }
329
330 return (void*)payload;
331}
332
333static INLINE void* populate_var_metadata(struct var_metadata_t* metadata,
334 struct task_struct* task,
335 u32 pid, void* payload)
336{
337 u64 uid_gid = bpf_get_current_uid_gid();
338
339 metadata->uid = (u32)uid_gid;
340 metadata->gid = uid_gid >> 32;
341 metadata->pid = pid;
342 metadata->exec_id = BPF_CORE_READ(task, self_exec_id);
343 metadata->start_time = BPF_CORE_READ(task, start_time);
344 metadata->comm_length = 0;
345
346 size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
347 if (bpf_cmp_likely(comm_length, <=, TASK_COMM_LEN)) {
348 metadata->comm_length = comm_length;
349 payload += comm_length;
350 }
351
352 return (void*)payload;
353}
354
355static INLINE struct var_kill_data_t*
356get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig)
357{
358 int zero = 0;
359 struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
360
361 if (kill_data == NULL)
362 return NULL;
363 struct task_struct* task = (struct task_struct*)bpf_get_current_task();
364
365 void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload);
366 payload = populate_cgroup_info(cgroup_data: &kill_data->cgroup_data, task, payload);
367 size_t payload_length = payload - (void*)kill_data->payload;
368 kill_data->payload_length = payload_length;
369 populate_ancestors(task, ancestors_data: &kill_data->ancestors_info);
370 kill_data->meta.type = KILL_EVENT;
371 kill_data->kill_target_pid = tpid;
372 kill_data->kill_sig = sig;
373 kill_data->kill_count = 1;
374 kill_data->last_kill_time = bpf_ktime_get_ns();
375 return kill_data;
376}
377
378static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
379{
380 if ((KILL_SIGNALS & (1ULL << sig)) == 0)
381 return 0;
382
383 u32 spid = get_userspace_pid();
384 struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
385
386 if (arr_struct == NULL) {
387 struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig);
388 int zero = 0;
389
390 if (kill_data == NULL)
391 return 0;
392 arr_struct = bpf_map_lookup_elem(&data_heap, &zero);
393 if (arr_struct == NULL)
394 return 0;
395 bpf_probe_read_kernel(&arr_struct->array[0],
396 sizeof(arr_struct->array[0]), kill_data);
397 } else {
398 int index = get_var_spid_index(arr_struct, spid);
399
400 if (index == -1) {
401 struct var_kill_data_t* kill_data =
402 get_var_kill_data(ctx, spid, tpid, sig);
403 if (kill_data == NULL)
404 return 0;
405#ifdef UNROLL
406 __pragma_loop_unroll
407#endif
408 for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
409 if (arr_struct->array[i].meta.pid == 0) {
410 bpf_probe_read_kernel(&arr_struct->array[i],
411 sizeof(arr_struct->array[i]),
412 kill_data);
413 bpf_map_update_elem(&var_tpid_to_data, &tpid,
414 arr_struct, 0);
415
416 return 0;
417 }
418 return 0;
419 }
420
421 struct var_kill_data_t* kill_data = &arr_struct->array[index];
422
423 u64 delta_sec =
424 (bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000;
425
426 if (delta_sec < STALE_INFO) {
427 kill_data->kill_count++;
428 kill_data->last_kill_time = bpf_ktime_get_ns();
429 bpf_probe_read_kernel(&arr_struct->array[index],
430 sizeof(arr_struct->array[index]),
431 kill_data);
432 } else {
433 struct var_kill_data_t* kill_data =
434 get_var_kill_data(ctx, spid, tpid, sig);
435 if (kill_data == NULL)
436 return 0;
437 bpf_probe_read_kernel(&arr_struct->array[index],
438 sizeof(arr_struct->array[index]),
439 kill_data);
440 }
441 }
442 bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0);
443 return 0;
444}
445
446static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx,
447 enum bpf_function_id func_id)
448{
449 int func_id_key = func_id;
450
451 bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns();
452 bpf_stat_ctx->bpf_func_stats_data_val =
453 bpf_map_lookup_elem(&bpf_func_stats, &func_id_key);
454 if (bpf_stat_ctx->bpf_func_stats_data_val)
455 bpf_stat_ctx->bpf_func_stats_data_val->num_executions++;
456}
457
458static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx)
459{
460 if (bpf_stat_ctx->bpf_func_stats_data_val)
461 bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns +=
462 bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns;
463}
464
465static INLINE void
466bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx,
467 struct var_metadata_t* meta)
468{
469 if (bpf_stat_ctx->bpf_func_stats_data_val) {
470 bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++;
471 meta->bpf_stats_num_perf_events =
472 bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events;
473 }
474 meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns;
475 meta->cpu_id = bpf_get_smp_processor_id();
476}
477
478static INLINE size_t
479read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
480{
481 size_t length = 0;
482 size_t filepart_length;
483 struct dentry* parent_dentry;
484
485#ifdef UNROLL
486 __pragma_loop_unroll
487#endif
488 for (int i = 0; i < MAX_PATH_DEPTH; i++) {
489 filepart_length =
490 bpf_probe_read_kernel_str(payload, MAX_PATH,
491 BPF_CORE_READ(filp_dentry, d_name.name));
492 bpf_nop_mov(filepart_length);
493 if (bpf_cmp_unlikely(filepart_length, >, MAX_PATH))
494 break;
495 payload += filepart_length;
496 length += filepart_length;
497
498 parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
499 if (filp_dentry == parent_dentry)
500 break;
501 filp_dentry = parent_dentry;
502 }
503
504 return length;
505}
506
507static INLINE bool
508is_ancestor_in_allowed_inodes(struct dentry* filp_dentry)
509{
510 struct dentry* parent_dentry;
511#ifdef UNROLL
512 __pragma_loop_unroll
513#endif
514 for (int i = 0; i < MAX_PATH_DEPTH; i++) {
515 u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino);
516 bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino);
517
518 if (allowed_dir != NULL)
519 return true;
520 parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
521 if (filp_dentry == parent_dentry)
522 break;
523 filp_dentry = parent_dentry;
524 }
525 return false;
526}
527
528static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry,
529 u32* device_id,
530 u64* file_ino)
531{
532 u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev);
533 *device_id = dev_id;
534 bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id);
535
536 if (allowed_device == NULL)
537 return false;
538
539 u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino);
540 *file_ino = ino;
541 bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino);
542
543 if (allowed_file == NULL)
544 if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent)))
545 return false;
546 return true;
547}
548
549SEC("kprobe/proc_sys_write")
550ssize_t BPF_KPROBE(kprobe__proc_sys_write,
551 struct file* filp, const char* buf,
552 size_t count, loff_t* ppos)
553{
554 struct bpf_func_stats_ctx stats_ctx;
555 bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write);
556
557 u32 pid = get_userspace_pid();
558 int zero = 0;
559 struct var_sysctl_data_t* sysctl_data =
560 bpf_map_lookup_elem(&data_heap, &zero);
561 if (!sysctl_data)
562 goto out;
563
564 struct task_struct* task = (struct task_struct*)bpf_get_current_task();
565 sysctl_data->meta.type = SYSCTL_EVENT;
566 void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload);
567 payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload);
568
569 populate_ancestors(task, &sysctl_data->ancestors_info);
570
571 sysctl_data->sysctl_val_length = 0;
572 sysctl_data->sysctl_path_length = 0;
573
574 size_t sysctl_val_length = bpf_probe_read_kernel_str(payload,
575 CTL_MAXNAME, buf);
576 if (bpf_cmp_likely(sysctl_val_length, <=, CTL_MAXNAME)) {
577 sysctl_data->sysctl_val_length = sysctl_val_length;
578 payload += sysctl_val_length;
579 }
580
581 size_t sysctl_path_length =
582 bpf_probe_read_kernel_str(payload, MAX_PATH,
583 BPF_CORE_READ(filp, f_path.dentry,
584 d_name.name));
585 if (bpf_cmp_likely(sysctl_path_length, <=, MAX_PATH)) {
586 sysctl_data->sysctl_path_length = sysctl_path_length;
587 payload += sysctl_path_length;
588 }
589
590 bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta);
591 unsigned long data_len = payload - (void*)sysctl_data;
592 data_len = data_len > sizeof(struct var_sysctl_data_t)
593 ? sizeof(struct var_sysctl_data_t)
594 : data_len;
595 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len);
596out:
597 bpf_stats_exit(&stats_ctx);
598 return 0;
599}
600
601SEC("tracepoint/syscalls/sys_enter_kill")
602int tracepoint__syscalls__sys_enter_kill(struct syscall_trace_enter* ctx)
603{
604 struct bpf_func_stats_ctx stats_ctx;
605
606 bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill);
607 int pid = ctx->args[0];
608 int sig = ctx->args[1];
609 int ret = trace_var_sys_kill(ctx, pid, sig);
610 bpf_stats_exit(&stats_ctx);
611 return ret;
612};
613
614SEC("raw_tracepoint/sched_process_exit")
615int raw_tracepoint__sched_process_exit(void* ctx)
616{
617 int zero = 0;
618 struct bpf_func_stats_ctx stats_ctx;
619 bpf_stats_enter(bpf_stat_ctx: &stats_ctx, func_id: profiler_bpf_sched_process_exit);
620
621 u32 tpid = get_userspace_pid();
622
623 struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
624 struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
625
626 if (arr_struct == NULL || kill_data == NULL)
627 goto out;
628
629 struct task_struct* task = (struct task_struct*)bpf_get_current_task();
630 struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
631
632#ifdef UNROLL
633 __pragma_loop_unroll
634#endif
635 for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
636 struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
637
638 if (past_kill_data != NULL && past_kill_data->kill_target_pid == (pid_t)tpid) {
639 bpf_probe_read_kernel(kill_data, sizeof(*past_kill_data),
640 past_kill_data);
641 void* payload = kill_data->payload;
642 size_t offset = kill_data->payload_length;
643 if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
644 return 0;
645 payload += offset;
646
647 kill_data->kill_target_name_length = 0;
648 kill_data->kill_target_cgroup_proc_length = 0;
649
650 size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
651 if (bpf_cmp_likely(comm_length, <=, TASK_COMM_LEN)) {
652 kill_data->kill_target_name_length = comm_length;
653 payload += comm_length;
654 }
655
656 size_t cgroup_proc_length =
657 bpf_probe_read_kernel_str(payload,
658 KILL_TARGET_LEN,
659 BPF_CORE_READ(proc_kernfs, name));
660 if (bpf_cmp_likely(cgroup_proc_length, <=, KILL_TARGET_LEN)) {
661 kill_data->kill_target_cgroup_proc_length = cgroup_proc_length;
662 payload += cgroup_proc_length;
663 }
664
665 bpf_stats_pre_submit_var_perf_event(bpf_stat_ctx: &stats_ctx, meta: &kill_data->meta);
666 unsigned long data_len = (void*)payload - (void*)kill_data;
667 data_len = data_len > sizeof(struct var_kill_data_t)
668 ? sizeof(struct var_kill_data_t)
669 : data_len;
670 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len);
671 }
672 }
673 bpf_map_delete_elem(&var_tpid_to_data, &tpid);
674out:
675 bpf_stats_exit(bpf_stat_ctx: &stats_ctx);
676 return 0;
677}
678
679SEC("raw_tracepoint/sched_process_exec")
680int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
681{
682 struct bpf_func_stats_ctx stats_ctx;
683 bpf_stats_enter(bpf_stat_ctx: &stats_ctx, func_id: profiler_bpf_sched_process_exec);
684
685 struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2];
686 u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino);
687
688 bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode);
689 if (should_filter_binprm != NULL)
690 goto out;
691
692 int zero = 0;
693 struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero);
694 if (!proc_exec_data)
695 goto out;
696
697 if (INODE_FILTER && inode != INODE_FILTER)
698 return 0;
699
700 u32 pid = get_userspace_pid();
701 struct task_struct* task = (struct task_struct*)bpf_get_current_task();
702
703 proc_exec_data->meta.type = EXEC_EVENT;
704 proc_exec_data->bin_path_length = 0;
705 proc_exec_data->cmdline_length = 0;
706 proc_exec_data->environment_length = 0;
707 void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid,
708 proc_exec_data->payload);
709 payload = populate_cgroup_info(cgroup_data: &proc_exec_data->cgroup_data, task, payload);
710
711 struct task_struct* parent_task = BPF_CORE_READ(task, real_parent);
712 proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid);
713 proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val);
714 proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id);
715 proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time);
716
717 const char* filename = BPF_CORE_READ(bprm, filename);
718 size_t bin_path_length =
719 bpf_probe_read_kernel_str(payload, MAX_FILENAME_LEN, filename);
720 if (bpf_cmp_likely(bin_path_length, <=, MAX_FILENAME_LEN)) {
721 proc_exec_data->bin_path_length = bin_path_length;
722 payload += bin_path_length;
723 }
724
725 void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start);
726 void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end);
727 unsigned int cmdline_length = probe_read_lim(dst: payload, src: arg_start,
728 len: arg_end - arg_start, MAX_ARGS_LEN);
729
730 if (bpf_cmp_likely(cmdline_length, <=, MAX_ARGS_LEN)) {
731 proc_exec_data->cmdline_length = cmdline_length;
732 payload += cmdline_length;
733 }
734
735 if (READ_ENVIRON_FROM_EXEC) {
736 void* env_start = (void*)BPF_CORE_READ(task, mm, env_start);
737 void* env_end = (void*)BPF_CORE_READ(task, mm, env_end);
738 unsigned long env_len = probe_read_lim(dst: payload, src: env_start,
739 len: env_end - env_start, MAX_ENVIRON_LEN);
740 if (cmdline_length <= MAX_ENVIRON_LEN) {
741 proc_exec_data->environment_length = env_len;
742 payload += env_len;
743 }
744 }
745
746 bpf_stats_pre_submit_var_perf_event(bpf_stat_ctx: &stats_ctx, meta: &proc_exec_data->meta);
747 unsigned long data_len = payload - (void*)proc_exec_data;
748 data_len = data_len > sizeof(struct var_exec_data_t)
749 ? sizeof(struct var_exec_data_t)
750 : data_len;
751 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len);
752out:
753 bpf_stats_exit(bpf_stat_ctx: &stats_ctx);
754 return 0;
755}
756
757SEC("kretprobe/do_filp_open")
758int kprobe_ret__do_filp_open(struct pt_regs* ctx)
759{
760 struct bpf_func_stats_ctx stats_ctx;
761 bpf_stats_enter(bpf_stat_ctx: &stats_ctx, func_id: profiler_bpf_do_filp_open_ret);
762
763 struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx);
764
765 if (filp == NULL || IS_ERR(filp))
766 goto out;
767 unsigned int flags = BPF_CORE_READ(filp, f_flags);
768 if ((flags & (O_RDWR | O_WRONLY)) == 0)
769 goto out;
770 if ((flags & O_TMPFILE) > 0)
771 goto out;
772 struct inode* file_inode = BPF_CORE_READ(filp, f_inode);
773 umode_t mode = BPF_CORE_READ(file_inode, i_mode);
774 if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) ||
775 S_ISSOCK(mode))
776 goto out;
777
778 struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry);
779 u32 device_id = 0;
780 u64 file_ino = 0;
781 if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino))
782 goto out;
783
784 int zero = 0;
785 struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
786 if (!filemod_data)
787 goto out;
788
789 u32 pid = get_userspace_pid();
790 struct task_struct* task = (struct task_struct*)bpf_get_current_task();
791
792 filemod_data->meta.type = FILEMOD_EVENT;
793 filemod_data->fmod_type = FMOD_OPEN;
794 filemod_data->dst_flags = flags;
795 filemod_data->src_inode = 0;
796 filemod_data->dst_inode = file_ino;
797 filemod_data->src_device_id = 0;
798 filemod_data->dst_device_id = device_id;
799 filemod_data->src_filepath_length = 0;
800 filemod_data->dst_filepath_length = 0;
801
802 void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
803 filemod_data->payload);
804 payload = populate_cgroup_info(cgroup_data: &filemod_data->cgroup_data, task, payload);
805
806 size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload);
807 if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
808 payload += len;
809 filemod_data->dst_filepath_length = len;
810 }
811 bpf_stats_pre_submit_var_perf_event(bpf_stat_ctx: &stats_ctx, meta: &filemod_data->meta);
812 unsigned long data_len = payload - (void*)filemod_data;
813 data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
814 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
815out:
816 bpf_stats_exit(bpf_stat_ctx: &stats_ctx);
817 return 0;
818}
819
820SEC("kprobe/vfs_link")
821int BPF_KPROBE(kprobe__vfs_link,
822 struct dentry* old_dentry, struct mnt_idmap *idmap,
823 struct inode* dir, struct dentry* new_dentry,
824 struct inode** delegated_inode)
825{
826 struct bpf_func_stats_ctx stats_ctx;
827 bpf_stats_enter(bpf_stat_ctx: &stats_ctx, func_id: profiler_bpf_vfs_link);
828
829 u32 src_device_id = 0;
830 u64 src_file_ino = 0;
831 u32 dst_device_id = 0;
832 u64 dst_file_ino = 0;
833 if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) &&
834 !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino))
835 goto out;
836
837 int zero = 0;
838 struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
839 if (!filemod_data)
840 goto out;
841
842 u32 pid = get_userspace_pid();
843 struct task_struct* task = (struct task_struct*)bpf_get_current_task();
844
845 filemod_data->meta.type = FILEMOD_EVENT;
846 filemod_data->fmod_type = FMOD_LINK;
847 filemod_data->dst_flags = 0;
848 filemod_data->src_inode = src_file_ino;
849 filemod_data->dst_inode = dst_file_ino;
850 filemod_data->src_device_id = src_device_id;
851 filemod_data->dst_device_id = dst_device_id;
852 filemod_data->src_filepath_length = 0;
853 filemod_data->dst_filepath_length = 0;
854
855 void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
856 filemod_data->payload);
857 payload = populate_cgroup_info(cgroup_data: &filemod_data->cgroup_data, task, payload);
858
859 size_t len = read_absolute_file_path_from_dentry(old_dentry, payload);
860 if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
861 payload += len;
862 filemod_data->src_filepath_length = len;
863 }
864
865 len = read_absolute_file_path_from_dentry(new_dentry, payload);
866 if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
867 payload += len;
868 filemod_data->dst_filepath_length = len;
869 }
870
871 bpf_stats_pre_submit_var_perf_event(bpf_stat_ctx: &stats_ctx, meta: &filemod_data->meta);
872 unsigned long data_len = payload - (void*)filemod_data;
873 data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
874 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
875out:
876 bpf_stats_exit(bpf_stat_ctx: &stats_ctx);
877 return 0;
878}
879
880SEC("kprobe/vfs_symlink")
881int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry,
882 const char* oldname)
883{
884 struct bpf_func_stats_ctx stats_ctx;
885 bpf_stats_enter(bpf_stat_ctx: &stats_ctx, func_id: profiler_bpf_vfs_symlink);
886
887 u32 dst_device_id = 0;
888 u64 dst_file_ino = 0;
889 if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino))
890 goto out;
891
892 int zero = 0;
893 struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
894 if (!filemod_data)
895 goto out;
896
897 u32 pid = get_userspace_pid();
898 struct task_struct* task = (struct task_struct*)bpf_get_current_task();
899
900 filemod_data->meta.type = FILEMOD_EVENT;
901 filemod_data->fmod_type = FMOD_SYMLINK;
902 filemod_data->dst_flags = 0;
903 filemod_data->src_inode = 0;
904 filemod_data->dst_inode = dst_file_ino;
905 filemod_data->src_device_id = 0;
906 filemod_data->dst_device_id = dst_device_id;
907 filemod_data->src_filepath_length = 0;
908 filemod_data->dst_filepath_length = 0;
909
910 void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
911 filemod_data->payload);
912 payload = populate_cgroup_info(cgroup_data: &filemod_data->cgroup_data, task, payload);
913
914 size_t len = bpf_probe_read_kernel_str(payload, MAX_FILEPATH_LENGTH,
915 oldname);
916 if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
917 payload += len;
918 filemod_data->src_filepath_length = len;
919 }
920 len = read_absolute_file_path_from_dentry(dentry, payload);
921 if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
922 payload += len;
923 filemod_data->dst_filepath_length = len;
924 }
925 bpf_stats_pre_submit_var_perf_event(bpf_stat_ctx: &stats_ctx, meta: &filemod_data->meta);
926 unsigned long data_len = payload - (void*)filemod_data;
927 data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
928 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
929out:
930 bpf_stats_exit(bpf_stat_ctx: &stats_ctx);
931 return 0;
932}
933
934SEC("raw_tracepoint/sched_process_fork")
935int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx)
936{
937 struct bpf_func_stats_ctx stats_ctx;
938 bpf_stats_enter(bpf_stat_ctx: &stats_ctx, func_id: profiler_bpf_sched_process_fork);
939
940 int zero = 0;
941 struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero);
942 if (!fork_data)
943 goto out;
944
945 struct task_struct* parent = (struct task_struct*)ctx->args[0];
946 struct task_struct* child = (struct task_struct*)ctx->args[1];
947 fork_data->meta.type = FORK_EVENT;
948
949 void* payload = populate_var_metadata(&fork_data->meta, child,
950 BPF_CORE_READ(child, pid), fork_data->payload);
951 fork_data->parent_pid = BPF_CORE_READ(parent, pid);
952 fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id);
953 fork_data->parent_start_time = BPF_CORE_READ(parent, start_time);
954 bpf_stats_pre_submit_var_perf_event(bpf_stat_ctx: &stats_ctx, meta: &fork_data->meta);
955
956 unsigned long data_len = payload - (void*)fork_data;
957 data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len;
958 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len);
959out:
960 bpf_stats_exit(bpf_stat_ctx: &stats_ctx);
961 return 0;
962}
963char _license[] SEC("license") = "GPL";
964

source code of linux/tools/testing/selftests/bpf/progs/profiler.inc.h