1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright (c) 2020 Facebook */ |
3 | #include <vmlinux.h> |
4 | #include <bpf/bpf_core_read.h> |
5 | #include <bpf/bpf_helpers.h> |
6 | #include <bpf/bpf_tracing.h> |
7 | |
8 | #include "profiler.h" |
9 | #include "err.h" |
10 | #include "bpf_experimental.h" |
11 | #include "bpf_compiler.h" |
12 | |
13 | #ifndef NULL |
14 | #define NULL 0 |
15 | #endif |
16 | |
17 | #define O_WRONLY 00000001 |
18 | #define O_RDWR 00000002 |
19 | #define O_DIRECTORY 00200000 |
20 | #define __O_TMPFILE 020000000 |
21 | #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) |
22 | #define S_IFMT 00170000 |
23 | #define S_IFSOCK 0140000 |
24 | #define S_IFLNK 0120000 |
25 | #define S_IFREG 0100000 |
26 | #define S_IFBLK 0060000 |
27 | #define S_IFDIR 0040000 |
28 | #define S_IFCHR 0020000 |
29 | #define S_IFIFO 0010000 |
30 | #define S_ISUID 0004000 |
31 | #define S_ISGID 0002000 |
32 | #define S_ISVTX 0001000 |
33 | #define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK) |
34 | #define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR) |
35 | #define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR) |
36 | #define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK) |
37 | #define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO) |
38 | #define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK) |
39 | |
40 | #define KILL_DATA_ARRAY_SIZE 8 |
41 | |
42 | struct var_kill_data_arr_t { |
43 | struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE]; |
44 | }; |
45 | |
46 | union any_profiler_data_t { |
47 | struct var_exec_data_t var_exec; |
48 | struct var_kill_data_t var_kill; |
49 | struct var_sysctl_data_t var_sysctl; |
50 | struct var_filemod_data_t var_filemod; |
51 | struct var_fork_data_t var_fork; |
52 | struct var_kill_data_arr_t var_kill_data_arr; |
53 | }; |
54 | |
55 | volatile struct profiler_config_struct bpf_config = {}; |
56 | |
57 | #define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf) |
58 | #define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode) |
59 | #define CGROUP_LOGIN_SESSION_INODE \ |
60 | (bpf_config.cgroup_login_session_inode) |
61 | #define KILL_SIGNALS (bpf_config.kill_signals_mask) |
62 | #define STALE_INFO (bpf_config.stale_info_secs) |
63 | #define INODE_FILTER (bpf_config.inode_filter) |
64 | #define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec) |
65 | #define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver) |
66 | |
67 | struct kernfs_iattrs___52 { |
68 | struct iattr ia_iattr; |
69 | }; |
70 | |
71 | struct kernfs_node___52 { |
72 | union /* kernfs_node_id */ { |
73 | struct { |
74 | u32 ino; |
75 | u32 generation; |
76 | }; |
77 | u64 id; |
78 | } id; |
79 | }; |
80 | |
81 | struct { |
82 | __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); |
83 | __uint(max_entries, 1); |
84 | __type(key, u32); |
85 | __type(value, union any_profiler_data_t); |
86 | } data_heap SEC(".maps" ); |
87 | |
88 | struct { |
89 | __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); |
90 | __uint(key_size, sizeof(int)); |
91 | __uint(value_size, sizeof(int)); |
92 | } events SEC(".maps" ); |
93 | |
94 | struct { |
95 | __uint(type, BPF_MAP_TYPE_HASH); |
96 | __uint(max_entries, KILL_DATA_ARRAY_SIZE); |
97 | __type(key, u32); |
98 | __type(value, struct var_kill_data_arr_t); |
99 | } var_tpid_to_data SEC(".maps" ); |
100 | |
101 | struct { |
102 | __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); |
103 | __uint(max_entries, profiler_bpf_max_function_id); |
104 | __type(key, u32); |
105 | __type(value, struct bpf_func_stats_data); |
106 | } bpf_func_stats SEC(".maps" ); |
107 | |
108 | struct { |
109 | __uint(type, BPF_MAP_TYPE_HASH); |
110 | __type(key, u32); |
111 | __type(value, bool); |
112 | __uint(max_entries, 16); |
113 | } allowed_devices SEC(".maps" ); |
114 | |
115 | struct { |
116 | __uint(type, BPF_MAP_TYPE_HASH); |
117 | __type(key, u64); |
118 | __type(value, bool); |
119 | __uint(max_entries, 1024); |
120 | } allowed_file_inodes SEC(".maps" ); |
121 | |
122 | struct { |
123 | __uint(type, BPF_MAP_TYPE_HASH); |
124 | __type(key, u64); |
125 | __type(value, bool); |
126 | __uint(max_entries, 1024); |
127 | } allowed_directory_inodes SEC(".maps" ); |
128 | |
129 | struct { |
130 | __uint(type, BPF_MAP_TYPE_HASH); |
131 | __type(key, u32); |
132 | __type(value, bool); |
133 | __uint(max_entries, 16); |
134 | } disallowed_exec_inodes SEC(".maps" ); |
135 | |
136 | #ifndef ARRAY_SIZE |
137 | #define ARRAY_SIZE(arr) (int)(sizeof(arr) / sizeof(arr[0])) |
138 | #endif |
139 | |
140 | static INLINE bool IS_ERR(const void* ptr) |
141 | { |
142 | return IS_ERR_VALUE((unsigned long)ptr); |
143 | } |
144 | |
145 | static INLINE u32 get_userspace_pid() |
146 | { |
147 | return bpf_get_current_pid_tgid() >> 32; |
148 | } |
149 | |
150 | static INLINE bool is_init_process(u32 tgid) |
151 | { |
152 | return tgid == 1 || tgid == 0; |
153 | } |
154 | |
155 | static INLINE unsigned long |
156 | probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max) |
157 | { |
158 | len = len < max ? len : max; |
159 | if (len > 1) { |
160 | if (bpf_probe_read_kernel(dst, len, src)) |
161 | return 0; |
162 | } else if (len == 1) { |
163 | if (bpf_probe_read_kernel(dst, 1, src)) |
164 | return 0; |
165 | } |
166 | return len; |
167 | } |
168 | |
169 | static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct, |
170 | int spid) |
171 | { |
172 | #ifdef UNROLL |
173 | __pragma_loop_unroll |
174 | #endif |
175 | for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) |
176 | if (arr_struct->array[i].meta.pid == spid) |
177 | return i; |
178 | return -1; |
179 | } |
180 | |
181 | static INLINE void populate_ancestors(struct task_struct* task, |
182 | struct ancestors_data_t* ancestors_data) |
183 | { |
184 | struct task_struct* parent = task; |
185 | u32 num_ancestors, ppid; |
186 | |
187 | ancestors_data->num_ancestors = 0; |
188 | #ifdef UNROLL |
189 | __pragma_loop_unroll |
190 | #endif |
191 | for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) { |
192 | parent = BPF_CORE_READ(parent, real_parent); |
193 | if (parent == NULL) |
194 | break; |
195 | ppid = BPF_CORE_READ(parent, tgid); |
196 | if (is_init_process(ppid)) |
197 | break; |
198 | ancestors_data->ancestor_pids[num_ancestors] = ppid; |
199 | ancestors_data->ancestor_exec_ids[num_ancestors] = |
200 | BPF_CORE_READ(parent, self_exec_id); |
201 | ancestors_data->ancestor_start_times[num_ancestors] = |
202 | BPF_CORE_READ(parent, start_time); |
203 | ancestors_data->num_ancestors = num_ancestors; |
204 | } |
205 | } |
206 | |
207 | static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node, |
208 | struct kernfs_node* cgroup_root_node, |
209 | void* payload, |
210 | int* root_pos) |
211 | { |
212 | void* payload_start = payload; |
213 | size_t filepart_length; |
214 | |
215 | #ifdef UNROLL |
216 | __pragma_loop_unroll |
217 | #endif |
218 | for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) { |
219 | filepart_length = |
220 | bpf_probe_read_kernel_str(payload, MAX_PATH, |
221 | BPF_CORE_READ(cgroup_node, name)); |
222 | if (!cgroup_node) |
223 | return payload; |
224 | if (cgroup_node == cgroup_root_node) |
225 | *root_pos = payload - payload_start; |
226 | if (bpf_cmp_likely(filepart_length, <=, MAX_PATH)) { |
227 | payload += filepart_length; |
228 | } |
229 | cgroup_node = BPF_CORE_READ(cgroup_node, parent); |
230 | } |
231 | return payload; |
232 | } |
233 | |
234 | static ino_t get_inode_from_kernfs(struct kernfs_node* node) |
235 | { |
236 | struct kernfs_node___52* node52 = (void*)node; |
237 | |
238 | if (bpf_core_field_exists(node52->id.ino)) { |
239 | barrier_var(node52); |
240 | return BPF_CORE_READ(node52, id.ino); |
241 | } else { |
242 | barrier_var(node); |
243 | return (u64)BPF_CORE_READ(node, id); |
244 | } |
245 | } |
246 | |
247 | extern bool CONFIG_CGROUP_PIDS __kconfig __weak; |
248 | enum cgroup_subsys_id___local { |
249 | pids_cgrp_id___local = 123, /* value doesn't matter */ |
250 | }; |
251 | |
252 | static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, |
253 | struct task_struct* task, |
254 | void* payload) |
255 | { |
256 | struct kernfs_node* root_kernfs = |
257 | BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn); |
258 | struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); |
259 | |
260 | #if __has_builtin(__builtin_preserve_enum_value) |
261 | if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) { |
262 | int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local, |
263 | pids_cgrp_id___local); |
264 | #ifdef UNROLL |
265 | __pragma_loop_unroll |
266 | #endif |
267 | for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
268 | struct cgroup_subsys_state* subsys = |
269 | BPF_CORE_READ(task, cgroups, subsys[i]); |
270 | if (subsys != NULL) { |
271 | int subsys_id = BPF_CORE_READ(subsys, ss, id); |
272 | if (subsys_id == cgrp_id) { |
273 | proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn); |
274 | root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn); |
275 | break; |
276 | } |
277 | } |
278 | } |
279 | } |
280 | #endif |
281 | |
282 | cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs); |
283 | cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs); |
284 | |
285 | if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) { |
286 | cgroup_data->cgroup_root_mtime = |
287 | BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec); |
288 | cgroup_data->cgroup_proc_mtime = |
289 | BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec); |
290 | } else { |
291 | struct kernfs_iattrs___52* root_iattr = |
292 | (struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr); |
293 | cgroup_data->cgroup_root_mtime = |
294 | BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec); |
295 | |
296 | struct kernfs_iattrs___52* proc_iattr = |
297 | (struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr); |
298 | cgroup_data->cgroup_proc_mtime = |
299 | BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec); |
300 | } |
301 | |
302 | cgroup_data->cgroup_root_length = 0; |
303 | cgroup_data->cgroup_proc_length = 0; |
304 | cgroup_data->cgroup_full_length = 0; |
305 | |
306 | size_t cgroup_root_length = |
307 | bpf_probe_read_kernel_str(payload, MAX_PATH, |
308 | BPF_CORE_READ(root_kernfs, name)); |
309 | if (bpf_cmp_likely(cgroup_root_length, <=, MAX_PATH)) { |
310 | cgroup_data->cgroup_root_length = cgroup_root_length; |
311 | payload += cgroup_root_length; |
312 | } |
313 | |
314 | size_t cgroup_proc_length = |
315 | bpf_probe_read_kernel_str(payload, MAX_PATH, |
316 | BPF_CORE_READ(proc_kernfs, name)); |
317 | if (bpf_cmp_likely(cgroup_proc_length, <=, MAX_PATH)) { |
318 | cgroup_data->cgroup_proc_length = cgroup_proc_length; |
319 | payload += cgroup_proc_length; |
320 | } |
321 | |
322 | if (FETCH_CGROUPS_FROM_BPF) { |
323 | cgroup_data->cgroup_full_path_root_pos = -1; |
324 | void* payload_end_pos = read_full_cgroup_path(cgroup_node: proc_kernfs, cgroup_root_node: root_kernfs, payload, |
325 | root_pos: &cgroup_data->cgroup_full_path_root_pos); |
326 | cgroup_data->cgroup_full_length = payload_end_pos - payload; |
327 | payload = payload_end_pos; |
328 | } |
329 | |
330 | return (void*)payload; |
331 | } |
332 | |
333 | static INLINE void* populate_var_metadata(struct var_metadata_t* metadata, |
334 | struct task_struct* task, |
335 | u32 pid, void* payload) |
336 | { |
337 | u64 uid_gid = bpf_get_current_uid_gid(); |
338 | |
339 | metadata->uid = (u32)uid_gid; |
340 | metadata->gid = uid_gid >> 32; |
341 | metadata->pid = pid; |
342 | metadata->exec_id = BPF_CORE_READ(task, self_exec_id); |
343 | metadata->start_time = BPF_CORE_READ(task, start_time); |
344 | metadata->comm_length = 0; |
345 | |
346 | size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm); |
347 | if (bpf_cmp_likely(comm_length, <=, TASK_COMM_LEN)) { |
348 | metadata->comm_length = comm_length; |
349 | payload += comm_length; |
350 | } |
351 | |
352 | return (void*)payload; |
353 | } |
354 | |
355 | static INLINE struct var_kill_data_t* |
356 | get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig) |
357 | { |
358 | int zero = 0; |
359 | struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero); |
360 | |
361 | if (kill_data == NULL) |
362 | return NULL; |
363 | struct task_struct* task = (struct task_struct*)bpf_get_current_task(); |
364 | |
365 | void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload); |
366 | payload = populate_cgroup_info(cgroup_data: &kill_data->cgroup_data, task, payload); |
367 | size_t payload_length = payload - (void*)kill_data->payload; |
368 | kill_data->payload_length = payload_length; |
369 | populate_ancestors(task, ancestors_data: &kill_data->ancestors_info); |
370 | kill_data->meta.type = KILL_EVENT; |
371 | kill_data->kill_target_pid = tpid; |
372 | kill_data->kill_sig = sig; |
373 | kill_data->kill_count = 1; |
374 | kill_data->last_kill_time = bpf_ktime_get_ns(); |
375 | return kill_data; |
376 | } |
377 | |
378 | static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig) |
379 | { |
380 | if ((KILL_SIGNALS & (1ULL << sig)) == 0) |
381 | return 0; |
382 | |
383 | u32 spid = get_userspace_pid(); |
384 | struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid); |
385 | |
386 | if (arr_struct == NULL) { |
387 | struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig); |
388 | int zero = 0; |
389 | |
390 | if (kill_data == NULL) |
391 | return 0; |
392 | arr_struct = bpf_map_lookup_elem(&data_heap, &zero); |
393 | if (arr_struct == NULL) |
394 | return 0; |
395 | bpf_probe_read_kernel(&arr_struct->array[0], |
396 | sizeof(arr_struct->array[0]), kill_data); |
397 | } else { |
398 | int index = get_var_spid_index(arr_struct, spid); |
399 | |
400 | if (index == -1) { |
401 | struct var_kill_data_t* kill_data = |
402 | get_var_kill_data(ctx, spid, tpid, sig); |
403 | if (kill_data == NULL) |
404 | return 0; |
405 | #ifdef UNROLL |
406 | __pragma_loop_unroll |
407 | #endif |
408 | for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) |
409 | if (arr_struct->array[i].meta.pid == 0) { |
410 | bpf_probe_read_kernel(&arr_struct->array[i], |
411 | sizeof(arr_struct->array[i]), |
412 | kill_data); |
413 | bpf_map_update_elem(&var_tpid_to_data, &tpid, |
414 | arr_struct, 0); |
415 | |
416 | return 0; |
417 | } |
418 | return 0; |
419 | } |
420 | |
421 | struct var_kill_data_t* kill_data = &arr_struct->array[index]; |
422 | |
423 | u64 delta_sec = |
424 | (bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000; |
425 | |
426 | if (delta_sec < STALE_INFO) { |
427 | kill_data->kill_count++; |
428 | kill_data->last_kill_time = bpf_ktime_get_ns(); |
429 | bpf_probe_read_kernel(&arr_struct->array[index], |
430 | sizeof(arr_struct->array[index]), |
431 | kill_data); |
432 | } else { |
433 | struct var_kill_data_t* kill_data = |
434 | get_var_kill_data(ctx, spid, tpid, sig); |
435 | if (kill_data == NULL) |
436 | return 0; |
437 | bpf_probe_read_kernel(&arr_struct->array[index], |
438 | sizeof(arr_struct->array[index]), |
439 | kill_data); |
440 | } |
441 | } |
442 | bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0); |
443 | return 0; |
444 | } |
445 | |
446 | static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx, |
447 | enum bpf_function_id func_id) |
448 | { |
449 | int func_id_key = func_id; |
450 | |
451 | bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns(); |
452 | bpf_stat_ctx->bpf_func_stats_data_val = |
453 | bpf_map_lookup_elem(&bpf_func_stats, &func_id_key); |
454 | if (bpf_stat_ctx->bpf_func_stats_data_val) |
455 | bpf_stat_ctx->bpf_func_stats_data_val->num_executions++; |
456 | } |
457 | |
458 | static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx) |
459 | { |
460 | if (bpf_stat_ctx->bpf_func_stats_data_val) |
461 | bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns += |
462 | bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns; |
463 | } |
464 | |
465 | static INLINE void |
466 | bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx, |
467 | struct var_metadata_t* meta) |
468 | { |
469 | if (bpf_stat_ctx->bpf_func_stats_data_val) { |
470 | bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++; |
471 | meta->bpf_stats_num_perf_events = |
472 | bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events; |
473 | } |
474 | meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns; |
475 | meta->cpu_id = bpf_get_smp_processor_id(); |
476 | } |
477 | |
478 | static INLINE size_t |
479 | read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload) |
480 | { |
481 | size_t length = 0; |
482 | size_t filepart_length; |
483 | struct dentry* parent_dentry; |
484 | |
485 | #ifdef UNROLL |
486 | __pragma_loop_unroll |
487 | #endif |
488 | for (int i = 0; i < MAX_PATH_DEPTH; i++) { |
489 | filepart_length = |
490 | bpf_probe_read_kernel_str(payload, MAX_PATH, |
491 | BPF_CORE_READ(filp_dentry, d_name.name)); |
492 | bpf_nop_mov(filepart_length); |
493 | if (bpf_cmp_unlikely(filepart_length, >, MAX_PATH)) |
494 | break; |
495 | payload += filepart_length; |
496 | length += filepart_length; |
497 | |
498 | parent_dentry = BPF_CORE_READ(filp_dentry, d_parent); |
499 | if (filp_dentry == parent_dentry) |
500 | break; |
501 | filp_dentry = parent_dentry; |
502 | } |
503 | |
504 | return length; |
505 | } |
506 | |
507 | static INLINE bool |
508 | is_ancestor_in_allowed_inodes(struct dentry* filp_dentry) |
509 | { |
510 | struct dentry* parent_dentry; |
511 | #ifdef UNROLL |
512 | __pragma_loop_unroll |
513 | #endif |
514 | for (int i = 0; i < MAX_PATH_DEPTH; i++) { |
515 | u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino); |
516 | bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino); |
517 | |
518 | if (allowed_dir != NULL) |
519 | return true; |
520 | parent_dentry = BPF_CORE_READ(filp_dentry, d_parent); |
521 | if (filp_dentry == parent_dentry) |
522 | break; |
523 | filp_dentry = parent_dentry; |
524 | } |
525 | return false; |
526 | } |
527 | |
528 | static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry, |
529 | u32* device_id, |
530 | u64* file_ino) |
531 | { |
532 | u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev); |
533 | *device_id = dev_id; |
534 | bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id); |
535 | |
536 | if (allowed_device == NULL) |
537 | return false; |
538 | |
539 | u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino); |
540 | *file_ino = ino; |
541 | bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino); |
542 | |
543 | if (allowed_file == NULL) |
544 | if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent))) |
545 | return false; |
546 | return true; |
547 | } |
548 | |
549 | SEC("kprobe/proc_sys_write" ) |
550 | ssize_t BPF_KPROBE(kprobe__proc_sys_write, |
551 | struct file* filp, const char* buf, |
552 | size_t count, loff_t* ppos) |
553 | { |
554 | struct bpf_func_stats_ctx stats_ctx; |
555 | bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write); |
556 | |
557 | u32 pid = get_userspace_pid(); |
558 | int zero = 0; |
559 | struct var_sysctl_data_t* sysctl_data = |
560 | bpf_map_lookup_elem(&data_heap, &zero); |
561 | if (!sysctl_data) |
562 | goto out; |
563 | |
564 | struct task_struct* task = (struct task_struct*)bpf_get_current_task(); |
565 | sysctl_data->meta.type = SYSCTL_EVENT; |
566 | void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload); |
567 | payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload); |
568 | |
569 | populate_ancestors(task, &sysctl_data->ancestors_info); |
570 | |
571 | sysctl_data->sysctl_val_length = 0; |
572 | sysctl_data->sysctl_path_length = 0; |
573 | |
574 | size_t sysctl_val_length = bpf_probe_read_kernel_str(payload, |
575 | CTL_MAXNAME, buf); |
576 | if (bpf_cmp_likely(sysctl_val_length, <=, CTL_MAXNAME)) { |
577 | sysctl_data->sysctl_val_length = sysctl_val_length; |
578 | payload += sysctl_val_length; |
579 | } |
580 | |
581 | size_t sysctl_path_length = |
582 | bpf_probe_read_kernel_str(payload, MAX_PATH, |
583 | BPF_CORE_READ(filp, f_path.dentry, |
584 | d_name.name)); |
585 | if (bpf_cmp_likely(sysctl_path_length, <=, MAX_PATH)) { |
586 | sysctl_data->sysctl_path_length = sysctl_path_length; |
587 | payload += sysctl_path_length; |
588 | } |
589 | |
590 | bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta); |
591 | unsigned long data_len = payload - (void*)sysctl_data; |
592 | data_len = data_len > sizeof(struct var_sysctl_data_t) |
593 | ? sizeof(struct var_sysctl_data_t) |
594 | : data_len; |
595 | bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len); |
596 | out: |
597 | bpf_stats_exit(&stats_ctx); |
598 | return 0; |
599 | } |
600 | |
601 | SEC("tracepoint/syscalls/sys_enter_kill" ) |
602 | int tracepoint__syscalls__sys_enter_kill(struct syscall_trace_enter* ctx) |
603 | { |
604 | struct bpf_func_stats_ctx stats_ctx; |
605 | |
606 | bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill); |
607 | int pid = ctx->args[0]; |
608 | int sig = ctx->args[1]; |
609 | int ret = trace_var_sys_kill(ctx, pid, sig); |
610 | bpf_stats_exit(&stats_ctx); |
611 | return ret; |
612 | }; |
613 | |
614 | SEC("raw_tracepoint/sched_process_exit" ) |
615 | int raw_tracepoint__sched_process_exit(void* ctx) |
616 | { |
617 | int zero = 0; |
618 | struct bpf_func_stats_ctx stats_ctx; |
619 | bpf_stats_enter(bpf_stat_ctx: &stats_ctx, func_id: profiler_bpf_sched_process_exit); |
620 | |
621 | u32 tpid = get_userspace_pid(); |
622 | |
623 | struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid); |
624 | struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero); |
625 | |
626 | if (arr_struct == NULL || kill_data == NULL) |
627 | goto out; |
628 | |
629 | struct task_struct* task = (struct task_struct*)bpf_get_current_task(); |
630 | struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); |
631 | |
632 | #ifdef UNROLL |
633 | __pragma_loop_unroll |
634 | #endif |
635 | for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) { |
636 | struct var_kill_data_t* past_kill_data = &arr_struct->array[i]; |
637 | |
638 | if (past_kill_data != NULL && past_kill_data->kill_target_pid == (pid_t)tpid) { |
639 | bpf_probe_read_kernel(kill_data, sizeof(*past_kill_data), |
640 | past_kill_data); |
641 | void* payload = kill_data->payload; |
642 | size_t offset = kill_data->payload_length; |
643 | if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN) |
644 | return 0; |
645 | payload += offset; |
646 | |
647 | kill_data->kill_target_name_length = 0; |
648 | kill_data->kill_target_cgroup_proc_length = 0; |
649 | |
650 | size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm); |
651 | if (bpf_cmp_likely(comm_length, <=, TASK_COMM_LEN)) { |
652 | kill_data->kill_target_name_length = comm_length; |
653 | payload += comm_length; |
654 | } |
655 | |
656 | size_t cgroup_proc_length = |
657 | bpf_probe_read_kernel_str(payload, |
658 | KILL_TARGET_LEN, |
659 | BPF_CORE_READ(proc_kernfs, name)); |
660 | if (bpf_cmp_likely(cgroup_proc_length, <=, KILL_TARGET_LEN)) { |
661 | kill_data->kill_target_cgroup_proc_length = cgroup_proc_length; |
662 | payload += cgroup_proc_length; |
663 | } |
664 | |
665 | bpf_stats_pre_submit_var_perf_event(bpf_stat_ctx: &stats_ctx, meta: &kill_data->meta); |
666 | unsigned long data_len = (void*)payload - (void*)kill_data; |
667 | data_len = data_len > sizeof(struct var_kill_data_t) |
668 | ? sizeof(struct var_kill_data_t) |
669 | : data_len; |
670 | bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len); |
671 | } |
672 | } |
673 | bpf_map_delete_elem(&var_tpid_to_data, &tpid); |
674 | out: |
675 | bpf_stats_exit(bpf_stat_ctx: &stats_ctx); |
676 | return 0; |
677 | } |
678 | |
679 | SEC("raw_tracepoint/sched_process_exec" ) |
680 | int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx) |
681 | { |
682 | struct bpf_func_stats_ctx stats_ctx; |
683 | bpf_stats_enter(bpf_stat_ctx: &stats_ctx, func_id: profiler_bpf_sched_process_exec); |
684 | |
685 | struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2]; |
686 | u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino); |
687 | |
688 | bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode); |
689 | if (should_filter_binprm != NULL) |
690 | goto out; |
691 | |
692 | int zero = 0; |
693 | struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero); |
694 | if (!proc_exec_data) |
695 | goto out; |
696 | |
697 | if (INODE_FILTER && inode != INODE_FILTER) |
698 | return 0; |
699 | |
700 | u32 pid = get_userspace_pid(); |
701 | struct task_struct* task = (struct task_struct*)bpf_get_current_task(); |
702 | |
703 | proc_exec_data->meta.type = EXEC_EVENT; |
704 | proc_exec_data->bin_path_length = 0; |
705 | proc_exec_data->cmdline_length = 0; |
706 | proc_exec_data->environment_length = 0; |
707 | void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid, |
708 | proc_exec_data->payload); |
709 | payload = populate_cgroup_info(cgroup_data: &proc_exec_data->cgroup_data, task, payload); |
710 | |
711 | struct task_struct* parent_task = BPF_CORE_READ(task, real_parent); |
712 | proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid); |
713 | proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val); |
714 | proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id); |
715 | proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time); |
716 | |
717 | const char* filename = BPF_CORE_READ(bprm, filename); |
718 | size_t bin_path_length = |
719 | bpf_probe_read_kernel_str(payload, MAX_FILENAME_LEN, filename); |
720 | if (bpf_cmp_likely(bin_path_length, <=, MAX_FILENAME_LEN)) { |
721 | proc_exec_data->bin_path_length = bin_path_length; |
722 | payload += bin_path_length; |
723 | } |
724 | |
725 | void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start); |
726 | void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end); |
727 | unsigned int cmdline_length = probe_read_lim(dst: payload, src: arg_start, |
728 | len: arg_end - arg_start, MAX_ARGS_LEN); |
729 | |
730 | if (bpf_cmp_likely(cmdline_length, <=, MAX_ARGS_LEN)) { |
731 | proc_exec_data->cmdline_length = cmdline_length; |
732 | payload += cmdline_length; |
733 | } |
734 | |
735 | if (READ_ENVIRON_FROM_EXEC) { |
736 | void* env_start = (void*)BPF_CORE_READ(task, mm, env_start); |
737 | void* env_end = (void*)BPF_CORE_READ(task, mm, env_end); |
738 | unsigned long env_len = probe_read_lim(dst: payload, src: env_start, |
739 | len: env_end - env_start, MAX_ENVIRON_LEN); |
740 | if (cmdline_length <= MAX_ENVIRON_LEN) { |
741 | proc_exec_data->environment_length = env_len; |
742 | payload += env_len; |
743 | } |
744 | } |
745 | |
746 | bpf_stats_pre_submit_var_perf_event(bpf_stat_ctx: &stats_ctx, meta: &proc_exec_data->meta); |
747 | unsigned long data_len = payload - (void*)proc_exec_data; |
748 | data_len = data_len > sizeof(struct var_exec_data_t) |
749 | ? sizeof(struct var_exec_data_t) |
750 | : data_len; |
751 | bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len); |
752 | out: |
753 | bpf_stats_exit(bpf_stat_ctx: &stats_ctx); |
754 | return 0; |
755 | } |
756 | |
757 | SEC("kretprobe/do_filp_open" ) |
758 | int kprobe_ret__do_filp_open(struct pt_regs* ctx) |
759 | { |
760 | struct bpf_func_stats_ctx stats_ctx; |
761 | bpf_stats_enter(bpf_stat_ctx: &stats_ctx, func_id: profiler_bpf_do_filp_open_ret); |
762 | |
763 | struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx); |
764 | |
765 | if (filp == NULL || IS_ERR(filp)) |
766 | goto out; |
767 | unsigned int flags = BPF_CORE_READ(filp, f_flags); |
768 | if ((flags & (O_RDWR | O_WRONLY)) == 0) |
769 | goto out; |
770 | if ((flags & O_TMPFILE) > 0) |
771 | goto out; |
772 | struct inode* file_inode = BPF_CORE_READ(filp, f_inode); |
773 | umode_t mode = BPF_CORE_READ(file_inode, i_mode); |
774 | if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) || |
775 | S_ISSOCK(mode)) |
776 | goto out; |
777 | |
778 | struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry); |
779 | u32 device_id = 0; |
780 | u64 file_ino = 0; |
781 | if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino)) |
782 | goto out; |
783 | |
784 | int zero = 0; |
785 | struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); |
786 | if (!filemod_data) |
787 | goto out; |
788 | |
789 | u32 pid = get_userspace_pid(); |
790 | struct task_struct* task = (struct task_struct*)bpf_get_current_task(); |
791 | |
792 | filemod_data->meta.type = FILEMOD_EVENT; |
793 | filemod_data->fmod_type = FMOD_OPEN; |
794 | filemod_data->dst_flags = flags; |
795 | filemod_data->src_inode = 0; |
796 | filemod_data->dst_inode = file_ino; |
797 | filemod_data->src_device_id = 0; |
798 | filemod_data->dst_device_id = device_id; |
799 | filemod_data->src_filepath_length = 0; |
800 | filemod_data->dst_filepath_length = 0; |
801 | |
802 | void* payload = populate_var_metadata(&filemod_data->meta, task, pid, |
803 | filemod_data->payload); |
804 | payload = populate_cgroup_info(cgroup_data: &filemod_data->cgroup_data, task, payload); |
805 | |
806 | size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload); |
807 | if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { |
808 | payload += len; |
809 | filemod_data->dst_filepath_length = len; |
810 | } |
811 | bpf_stats_pre_submit_var_perf_event(bpf_stat_ctx: &stats_ctx, meta: &filemod_data->meta); |
812 | unsigned long data_len = payload - (void*)filemod_data; |
813 | data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; |
814 | bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); |
815 | out: |
816 | bpf_stats_exit(bpf_stat_ctx: &stats_ctx); |
817 | return 0; |
818 | } |
819 | |
820 | SEC("kprobe/vfs_link" ) |
821 | int BPF_KPROBE(kprobe__vfs_link, |
822 | struct dentry* old_dentry, struct mnt_idmap *idmap, |
823 | struct inode* dir, struct dentry* new_dentry, |
824 | struct inode** delegated_inode) |
825 | { |
826 | struct bpf_func_stats_ctx stats_ctx; |
827 | bpf_stats_enter(bpf_stat_ctx: &stats_ctx, func_id: profiler_bpf_vfs_link); |
828 | |
829 | u32 src_device_id = 0; |
830 | u64 src_file_ino = 0; |
831 | u32 dst_device_id = 0; |
832 | u64 dst_file_ino = 0; |
833 | if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) && |
834 | !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino)) |
835 | goto out; |
836 | |
837 | int zero = 0; |
838 | struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); |
839 | if (!filemod_data) |
840 | goto out; |
841 | |
842 | u32 pid = get_userspace_pid(); |
843 | struct task_struct* task = (struct task_struct*)bpf_get_current_task(); |
844 | |
845 | filemod_data->meta.type = FILEMOD_EVENT; |
846 | filemod_data->fmod_type = FMOD_LINK; |
847 | filemod_data->dst_flags = 0; |
848 | filemod_data->src_inode = src_file_ino; |
849 | filemod_data->dst_inode = dst_file_ino; |
850 | filemod_data->src_device_id = src_device_id; |
851 | filemod_data->dst_device_id = dst_device_id; |
852 | filemod_data->src_filepath_length = 0; |
853 | filemod_data->dst_filepath_length = 0; |
854 | |
855 | void* payload = populate_var_metadata(&filemod_data->meta, task, pid, |
856 | filemod_data->payload); |
857 | payload = populate_cgroup_info(cgroup_data: &filemod_data->cgroup_data, task, payload); |
858 | |
859 | size_t len = read_absolute_file_path_from_dentry(old_dentry, payload); |
860 | if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { |
861 | payload += len; |
862 | filemod_data->src_filepath_length = len; |
863 | } |
864 | |
865 | len = read_absolute_file_path_from_dentry(new_dentry, payload); |
866 | if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { |
867 | payload += len; |
868 | filemod_data->dst_filepath_length = len; |
869 | } |
870 | |
871 | bpf_stats_pre_submit_var_perf_event(bpf_stat_ctx: &stats_ctx, meta: &filemod_data->meta); |
872 | unsigned long data_len = payload - (void*)filemod_data; |
873 | data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; |
874 | bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); |
875 | out: |
876 | bpf_stats_exit(bpf_stat_ctx: &stats_ctx); |
877 | return 0; |
878 | } |
879 | |
880 | SEC("kprobe/vfs_symlink" ) |
881 | int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry, |
882 | const char* oldname) |
883 | { |
884 | struct bpf_func_stats_ctx stats_ctx; |
885 | bpf_stats_enter(bpf_stat_ctx: &stats_ctx, func_id: profiler_bpf_vfs_symlink); |
886 | |
887 | u32 dst_device_id = 0; |
888 | u64 dst_file_ino = 0; |
889 | if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino)) |
890 | goto out; |
891 | |
892 | int zero = 0; |
893 | struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); |
894 | if (!filemod_data) |
895 | goto out; |
896 | |
897 | u32 pid = get_userspace_pid(); |
898 | struct task_struct* task = (struct task_struct*)bpf_get_current_task(); |
899 | |
900 | filemod_data->meta.type = FILEMOD_EVENT; |
901 | filemod_data->fmod_type = FMOD_SYMLINK; |
902 | filemod_data->dst_flags = 0; |
903 | filemod_data->src_inode = 0; |
904 | filemod_data->dst_inode = dst_file_ino; |
905 | filemod_data->src_device_id = 0; |
906 | filemod_data->dst_device_id = dst_device_id; |
907 | filemod_data->src_filepath_length = 0; |
908 | filemod_data->dst_filepath_length = 0; |
909 | |
910 | void* payload = populate_var_metadata(&filemod_data->meta, task, pid, |
911 | filemod_data->payload); |
912 | payload = populate_cgroup_info(cgroup_data: &filemod_data->cgroup_data, task, payload); |
913 | |
914 | size_t len = bpf_probe_read_kernel_str(payload, MAX_FILEPATH_LENGTH, |
915 | oldname); |
916 | if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { |
917 | payload += len; |
918 | filemod_data->src_filepath_length = len; |
919 | } |
920 | len = read_absolute_file_path_from_dentry(dentry, payload); |
921 | if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) { |
922 | payload += len; |
923 | filemod_data->dst_filepath_length = len; |
924 | } |
925 | bpf_stats_pre_submit_var_perf_event(bpf_stat_ctx: &stats_ctx, meta: &filemod_data->meta); |
926 | unsigned long data_len = payload - (void*)filemod_data; |
927 | data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; |
928 | bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); |
929 | out: |
930 | bpf_stats_exit(bpf_stat_ctx: &stats_ctx); |
931 | return 0; |
932 | } |
933 | |
934 | SEC("raw_tracepoint/sched_process_fork" ) |
935 | int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx) |
936 | { |
937 | struct bpf_func_stats_ctx stats_ctx; |
938 | bpf_stats_enter(bpf_stat_ctx: &stats_ctx, func_id: profiler_bpf_sched_process_fork); |
939 | |
940 | int zero = 0; |
941 | struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero); |
942 | if (!fork_data) |
943 | goto out; |
944 | |
945 | struct task_struct* parent = (struct task_struct*)ctx->args[0]; |
946 | struct task_struct* child = (struct task_struct*)ctx->args[1]; |
947 | fork_data->meta.type = FORK_EVENT; |
948 | |
949 | void* payload = populate_var_metadata(&fork_data->meta, child, |
950 | BPF_CORE_READ(child, pid), fork_data->payload); |
951 | fork_data->parent_pid = BPF_CORE_READ(parent, pid); |
952 | fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id); |
953 | fork_data->parent_start_time = BPF_CORE_READ(parent, start_time); |
954 | bpf_stats_pre_submit_var_perf_event(bpf_stat_ctx: &stats_ctx, meta: &fork_data->meta); |
955 | |
956 | unsigned long data_len = payload - (void*)fork_data; |
957 | data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len; |
958 | bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len); |
959 | out: |
960 | bpf_stats_exit(bpf_stat_ctx: &stats_ctx); |
961 | return 0; |
962 | } |
963 | char _license[] SEC("license" ) = "GPL" ; |
964 | |