builtin-lock.c source code [linux/tools/perf/builtin-lock.c]

1	// SPDX-License-Identifier: GPL-2.0
2	#include <errno.h>
3	#include <inttypes.h>
4	#include "builtin.h"
5	#include "perf.h"
6
7	#include "util/evlist.h" // for struct evsel_str_handler
8	#include "util/evsel.h"
9	#include "util/symbol.h"
10	#include "util/thread.h"
11	#include "util/header.h"
12	#include "util/target.h"
13	#include "util/cgroup.h"
14	#include "util/callchain.h"
15	#include "util/lock-contention.h"
16	#include "util/bpf_skel/lock_data.h"
17
18	#include <subcmd/pager.h>
19	#include <subcmd/parse-options.h>
20	#include "util/trace-event.h"
21	#include "util/tracepoint.h"
22
23	#include "util/debug.h"
24	#include "util/session.h"
25	#include "util/tool.h"
26	#include "util/data.h"
27	#include "util/string2.h"
28	#include "util/map.h"
29	#include "util/util.h"
30
31	#include <stdio.h>
32	#include <sys/types.h>
33	#include <sys/prctl.h>
34	#include <semaphore.h>
35	#include <math.h>
36	#include <limits.h>
37	#include <ctype.h>
38
39	#include <linux/list.h>
40	#include <linux/hash.h>
41	#include <linux/kernel.h>
42	#include <linux/zalloc.h>
43	#include <linux/err.h>
44	#include <linux/stringify.h>
45
46	static struct perf_session *session;
47	static struct target target;
48
49	/ based on kernel/lockdep.c /
50	#define LOCKHASH_BITS 12
51	#define LOCKHASH_SIZE (1UL << LOCKHASH_BITS)
52
53	static struct hlist_head *lockhash_table;
54
55	#define __lockhashfn(key) hash_long((unsigned long)key, LOCKHASH_BITS)
56	#define lockhashentry(key) (lockhash_table + __lockhashfn((key)))
57
58	static struct rb_root thread_stats;
59
60	static bool combine_locks;
61	static bool show_thread_stats;
62	static bool show_lock_addrs;
63	static bool show_lock_owner;
64	static bool show_lock_cgroups;
65	static bool use_bpf;
66	static unsigned long bpf_map_entries = MAX_ENTRIES;
67	static int max_stack_depth = CONTENTION_STACK_DEPTH;
68	static int stack_skip = CONTENTION_STACK_SKIP;
69	static int print_nr_entries = INT_MAX / `2`;
70	static LIST_HEAD(callstack_filters);
71	static const char *output_name = NULL;
72	static FILE *lock_output;
73
74	struct callstack_filter {
75	struct list_head list;
76	char name[];
77	};
78
79	static struct lock_filter filters;
80
81	static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
82
83	static bool needs_callstack(void)
84	{
85	return !list_empty(head: &callstack_filters);
86	}
87
88	static struct thread_stat *thread_stat_find(u32 tid)
89	{
90	struct rb_node *node;
91	struct thread_stat *st;
92
93	node = thread_stats.rb_node;
94	while (node) {
95	st = container_of(node, struct thread_stat, rb);
96	if (st->tid == tid)
97	return st;
98	else if (tid < st->tid)
99	node = node->rb_left;
100	else
101	node = node->rb_right;
102	}
103
104	return NULL;
105	}
106
107	static void thread_stat_insert(struct thread_stat *new)
108	{
109	struct rb_node **rb = &thread_stats.rb_node;
110	struct rb_node *parent = NULL;
111	struct thread_stat *p;
112
113	while (*rb) {
114	p = container_of(rb, struct* thread_stat, rb);
115	parent = *rb;
116
117	if (new->tid < p->tid)
118	rb = &(*rb)->rb_left;
119	else if (new->tid > p->tid)
120	rb = &(*rb)->rb_right;
121	else
122	BUG_ON("inserting invalid thread_stat\n");
123	}
124
125	rb_link_node(node: &new->rb, parent, rb_link: rb);
126	rb_insert_color(&new->rb, &thread_stats);
127	}
128
129	static struct thread_stat *thread_stat_findnew_after_first(u32 tid)
130	{
131	struct thread_stat *st;
132
133	st = thread_stat_find(tid);
134	if (st)
135	return st;
136
137	st = zalloc(sizeof(struct thread_stat));
138	if (!st) {
139	pr_err("memory allocation failed\n");
140	return NULL;
141	}
142
143	st->tid = tid;
144	INIT_LIST_HEAD(list: &st->seq_list);
145
146	thread_stat_insert(new: st);
147
148	return st;
149	}
150
151	static struct thread_stat *thread_stat_findnew_first(u32 tid);
152	static struct thread_stat (thread_stat_findnew)(u32 tid) =
153	thread_stat_findnew_first;
154
155	static struct thread_stat *thread_stat_findnew_first(u32 tid)
156	{
157	struct thread_stat *st;
158
159	st = zalloc(sizeof(struct thread_stat));
160	if (!st) {
161	pr_err("memory allocation failed\n");
162	return NULL;
163	}
164	st->tid = tid;
165	INIT_LIST_HEAD(list: &st->seq_list);
166
167	rb_link_node(node: &st->rb, NULL, rb_link: &thread_stats.rb_node);
168	rb_insert_color(&st->rb, &thread_stats);
169
170	thread_stat_findnew = thread_stat_findnew_after_first;
171	return st;
172	}
173
174	/ build simple key function one is bigger than two /
175	#define SINGLE_KEY(member) \
176	static int lock_stat_key_ ## member(struct lock_stat *one, \
177	struct lock_stat *two) \
178	{ \
179	return one->member > two->member; \
180	}
181
182	SINGLE_KEY(nr_acquired)
183	SINGLE_KEY(nr_contended)
184	SINGLE_KEY(avg_wait_time)
185	SINGLE_KEY(wait_time_total)
186	SINGLE_KEY(wait_time_max)
187
188	static int lock_stat_key_wait_time_min(struct lock_stat *one,
189	struct lock_stat *two)
190	{
191	u64 s1 = one->wait_time_min;
192	u64 s2 = two->wait_time_min;
193	if (s1 == ULLONG_MAX)
194	s1 = `0`;
195	if (s2 == ULLONG_MAX)
196	s2 = `0`;
197	return s1 > s2;
198	}
199
200	struct lock_key {
201	/*
202	* name: the value for specify by user
203	* this should be simpler than raw name of member
204	* e.g. nr_acquired -> acquired, wait_time_total -> wait_total
205	*/
206	const char *name;
207	/ header: the string printed on the header line /
208	const char *header;
209	/ len: the printing width of the field /
210	int len;
211	/ key: a pointer to function to compare two lock stats for sorting /
212	int (key)(struct* lock_stat, struct* lock_stat*);
213	/ print: a pointer to function to print a given lock stats /
214	void (print)(struct* lock_key, struct* lock_stat*);
215	/ list: list entry to link this /
216	struct list_head list;
217	};
218
219	static void lock_stat_key_print_time(unsigned long long nsec, int len)
220	{
221	static const struct {
222	float base;
223	const char *unit;
224	} table[] = {
225	{ `1e9` * `3600`, "h " },
226	{ `1e9` * `60`, "m " },
227	{ `1e9`, "s " },
228	{ `1e6`, "ms" },
229	{ `1e3`, "us" },
230	{ `0`, NULL },
231	};
232
233	/ for CSV output /
234	if (len == `0`) {
235	fprintf(lock_output, "%llu", nsec);
236	return;
237	}
238
239	for (int i = `0`; table[i].unit; i++) {
240	if (nsec < table[i].base)
241	continue;
242
243	fprintf(lock_output, "%*.2f %s", len - `3`, nsec / table[i].base, table[i].unit);
244	return;
245	}
246
247	fprintf(lock_output, "%*llu %s", len - `3`, nsec, "ns");
248	}
249
250	#define PRINT_KEY(member) \
251	static void lock_stat_key_print_ ## member(struct lock_key *key, \
252	struct lock_stat *ls) \
253	{ \
254	fprintf(lock_output, "%*llu", key->len, (unsigned long long)ls->member);\
255	}
256
257	#define PRINT_TIME(member) \
258	static void lock_stat_key_print_ ## member(struct lock_key *key, \
259	struct lock_stat *ls) \
260	{ \
261	lock_stat_key_print_time((unsigned long long)ls->member, key->len); \
262	}
263
264	PRINT_KEY(nr_acquired)
265	PRINT_KEY(nr_contended)
266	PRINT_TIME(avg_wait_time)
267	PRINT_TIME(wait_time_total)
268	PRINT_TIME(wait_time_max)
269
270	static void lock_stat_key_print_wait_time_min(struct lock_key *key,
271	struct lock_stat *ls)
272	{
273	u64 wait_time = ls->wait_time_min;
274
275	if (wait_time == ULLONG_MAX)
276	wait_time = `0`;
277
278	lock_stat_key_print_time(nsec: wait_time, len: key->len);
279	}
280
281
282	static const char *sort_key = "acquired";
283
284	static int (compare)(struct* lock_stat , struct* lock_stat *);
285
286	static struct rb_root sorted; / place to store intermediate data /
287	static struct rb_root result; / place to store sorted data /
288
289	static LIST_HEAD(lock_keys);
290	static const char *output_fields;
291
292	#define DEF_KEY_LOCK(name, header, fn_suffix, len) \
293	{ #name, header, len, lock_stat_key_ ## fn_suffix, lock_stat_key_print_ ## fn_suffix, {} }
294	static struct lock_key report_keys[] = {
295	DEF_KEY_LOCK(acquired, "acquired", nr_acquired, `10`),
296	DEF_KEY_LOCK(contended, "contended", nr_contended, `10`),
297	DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, `12`),
298	DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, `12`),
299	DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, `12`),
300	DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, `12`),
301
302	/ extra comparisons much complicated should be here /
303	{ }
304	};
305
306	static struct lock_key contention_keys[] = {
307	DEF_KEY_LOCK(contended, "contended", nr_contended, `10`),
308	DEF_KEY_LOCK(wait_total, "total wait", wait_time_total, `12`),
309	DEF_KEY_LOCK(wait_max, "max wait", wait_time_max, `12`),
310	DEF_KEY_LOCK(wait_min, "min wait", wait_time_min, `12`),
311	DEF_KEY_LOCK(avg_wait, "avg wait", avg_wait_time, `12`),
312
313	/ extra comparisons much complicated should be here /
314	{ }
315	};
316
317	static int select_key(bool contention)
318	{
319	int i;
320	struct lock_key *keys = report_keys;
321
322	if (contention)
323	keys = contention_keys;
324
325	for (i = `0`; keys[i].name; i++) {
326	if (!strcmp(keys[i].name, sort_key)) {
327	compare = keys[i].key;
328
329	/ selected key should be in the output fields /
330	if (list_empty(head: &keys[i].list))
331	list_add_tail(new: &keys[i].list, head: &lock_keys);
332
333	return `0`;
334	}
335	}
336
337	pr_err("Unknown compare key: %s\n", sort_key);
338	return -`1`;
339	}
340
341	static int add_output_field(bool contention, char *name)
342	{
343	int i;
344	struct lock_key *keys = report_keys;
345
346	if (contention)
347	keys = contention_keys;
348
349	for (i = `0`; keys[i].name; i++) {
350	if (strcmp(keys[i].name, name))
351	continue;
352
353	/ prevent double link /
354	if (list_empty(head: &keys[i].list))
355	list_add_tail(new: &keys[i].list, head: &lock_keys);
356
357	return `0`;
358	}
359
360	pr_err("Unknown output field: %s\n", name);
361	return -`1`;
362	}
363
364	static int setup_output_field(bool contention, const char *str)
365	{
366	char tok, tmp, *orig;
367	int i, ret = `0`;
368	struct lock_key *keys = report_keys;
369
370	if (contention)
371	keys = contention_keys;
372
373	/ no output field given: use all of them /
374	if (str == NULL) {
375	for (i = `0`; keys[i].name; i++)
376	list_add_tail(new: &keys[i].list, head: &lock_keys);
377	return `0`;
378	}
379
380	for (i = `0`; keys[i].name; i++)
381	INIT_LIST_HEAD(list: &keys[i].list);
382
383	orig = tmp = strdup(str);
384	if (orig == NULL)
385	return -ENOMEM;
386
387	while ((tok = strsep(&tmp, ",")) != NULL){
388	ret = add_output_field(contention, name: tok);
389	if (ret < `0`)
390	break;
391	}
392	free(orig);
393
394	return ret;
395	}
396
397	static void combine_lock_stats(struct lock_stat *st)
398	{
399	struct rb_node **rb = &sorted.rb_node;
400	struct rb_node *parent = NULL;
401	struct lock_stat *p;
402	int ret;
403
404	while (*rb) {
405	p = container_of(rb, struct* lock_stat, rb);
406	parent = *rb;
407
408	if (st->name && p->name)
409	ret = strcmp(st->name, p->name);
410	else
411	ret = !!st->name - !!p->name;
412
413	if (ret == `0`) {
414	p->nr_acquired += st->nr_acquired;
415	p->nr_contended += st->nr_contended;
416	p->wait_time_total += st->wait_time_total;
417
418	if (p->nr_contended)
419	p->avg_wait_time = p->wait_time_total / p->nr_contended;
420
421	if (p->wait_time_min > st->wait_time_min)
422	p->wait_time_min = st->wait_time_min;
423	if (p->wait_time_max < st->wait_time_max)
424	p->wait_time_max = st->wait_time_max;
425
426	p->broken \|= st->broken;
427	st->combined = `1`;
428	return;
429	}
430
431	if (ret < `0`)
432	rb = &(*rb)->rb_left;
433	else
434	rb = &(*rb)->rb_right;
435	}
436
437	rb_link_node(node: &st->rb, parent, rb_link: rb);
438	rb_insert_color(&st->rb, &sorted);
439	}
440
441	static void insert_to_result(struct lock_stat *st,
442	int (bigger)(struct* lock_stat , struct* lock_stat *))
443	{
444	struct rb_node **rb = &result.rb_node;
445	struct rb_node *parent = NULL;
446	struct lock_stat *p;
447
448	if (combine_locks && st->combined)
449	return;
450
451	while (*rb) {
452	p = container_of(rb, struct* lock_stat, rb);
453	parent = *rb;
454
455	if (bigger(st, p))
456	rb = &(*rb)->rb_left;
457	else
458	rb = &(*rb)->rb_right;
459	}
460
461	rb_link_node(node: &st->rb, parent, rb_link: rb);
462	rb_insert_color(&st->rb, &result);
463	}
464
465	/ returns left most element of result, and erase it /
466	static struct lock_stat pop_from_result(void*)
467	{
468	struct rb_node *node = result.rb_node;
469
470	if (!node)
471	return NULL;
472
473	while (node->rb_left)
474	node = node->rb_left;
475
476	rb_erase(node, &result);
477	return container_of(node, struct lock_stat, rb);
478	}
479
480	struct lock_stat *lock_stat_find(u64 addr)
481	{
482	struct hlist_head *entry = lockhashentry(addr);
483	struct lock_stat *ret;
484
485	hlist_for_each_entry(ret, entry, hash_entry) {
486	if (ret->addr == addr)
487	return ret;
488	}
489	return NULL;
490	}
491
492	struct lock_stat lock_stat_findnew(u64 addr, const* char name, int* flags)
493	{
494	struct hlist_head *entry = lockhashentry(addr);
495	struct lock_stat ret, new;
496
497	hlist_for_each_entry(ret, entry, hash_entry) {
498	if (ret->addr == addr)
499	return ret;
500	}
501
502	new = zalloc(sizeof(struct lock_stat));
503	if (!new)
504	goto alloc_failed;
505
506	new->addr = addr;
507	new->name = strdup(name);
508	if (!new->name) {
509	free(new);
510	goto alloc_failed;
511	}
512
513	new->flags = flags;
514	new->wait_time_min = ULLONG_MAX;
515
516	hlist_add_head(n: &new->hash_entry, h: entry);
517	return new;
518
519	alloc_failed:
520	pr_err("memory allocation failed\n");
521	return NULL;
522	}
523
524	bool match_callstack_filter(struct machine machine, u64 callstack)
525	{
526	struct map *kmap;
527	struct symbol *sym;
528	u64 ip;
529	const char *arch = perf_env__arch(env: machine->env);
530
531	if (list_empty(head: &callstack_filters))
532	return true;
533
534	for (int i = `0`; i < max_stack_depth; i++) {
535	struct callstack_filter *filter;
536
537	/*
538	* In powerpc, the callchain saved by kernel always includes
539	* first three entries as the NIP (next instruction pointer),
540	* LR (link register), and the contents of LR save area in the
541	* second stack frame. In certain scenarios its possible to have
542	* invalid kernel instruction addresses in either LR or the second
543	* stack frame's LR. In that case, kernel will store that address as
544	* zero.
545	*
546	* The below check will continue to look into callstack,
547	* incase first or second callstack index entry has 0
548	* address for powerpc.
549	*/
550	if (!callstack \|\| (!callstack[i] && (strcmp(arch, "powerpc") \|\|
551	(i != `1` && i != `2`))))
552	break;
553
554	ip = callstack[i];
555	sym = machine__find_kernel_symbol(machine, addr: ip, mapp: &kmap);
556	if (sym == NULL)
557	continue;
558
559	list_for_each_entry(filter, &callstack_filters, list) {
560	if (strstr(sym->name, filter->name))
561	return true;
562	}
563	}
564	return false;
565	}
566
567	struct trace_lock_handler {
568	/ it's used on CONFIG_LOCKDEP /
569	int (acquire_event)(struct* evsel *evsel,
570	struct perf_sample *sample);
571
572	/ it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT /
573	int (acquired_event)(struct* evsel *evsel,
574	struct perf_sample *sample);
575
576	/ it's used on CONFIG_LOCKDEP && CONFIG_LOCK_STAT /
577	int (contended_event)(struct* evsel *evsel,
578	struct perf_sample *sample);
579
580	/ it's used on CONFIG_LOCKDEP /
581	int (release_event)(struct* evsel *evsel,
582	struct perf_sample *sample);
583
584	/ it's used when CONFIG_LOCKDEP is off /
585	int (contention_begin_event)(struct* evsel *evsel,
586	struct perf_sample *sample);
587
588	/ it's used when CONFIG_LOCKDEP is off /
589	int (contention_end_event)(struct* evsel *evsel,
590	struct perf_sample *sample);
591	};
592
593	static struct lock_seq_stat get_seq(struct* thread_stat *ts, u64 addr)
594	{
595	struct lock_seq_stat *seq;
596
597	list_for_each_entry(seq, &ts->seq_list, list) {
598	if (seq->addr == addr)
599	return seq;
600	}
601
602	seq = zalloc(sizeof(struct lock_seq_stat));
603	if (!seq) {
604	pr_err("memory allocation failed\n");
605	return NULL;
606	}
607	seq->state = SEQ_STATE_UNINITIALIZED;
608	seq->addr = addr;
609
610	list_add(new: &seq->list, head: &ts->seq_list);
611	return seq;
612	}
613
614	enum broken_state {
615	BROKEN_ACQUIRE,
616	BROKEN_ACQUIRED,
617	BROKEN_CONTENDED,
618	BROKEN_RELEASE,
619	BROKEN_MAX,
620	};
621
622	static int bad_hist[BROKEN_MAX];
623
624	enum acquire_flags {
625	TRY_LOCK = `1`,
626	READ_LOCK = `2`,
627	};
628
629	static int get_key_by_aggr_mode_simple(u64 *key, u64 addr, u32 tid)
630	{
631	switch (aggr_mode) {
632	case LOCK_AGGR_ADDR:
633	*key = addr;
634	break;
635	case LOCK_AGGR_TASK:
636	*key = tid;
637	break;
638	case LOCK_AGGR_CALLER:
639	case LOCK_AGGR_CGROUP:
640	default:
641	pr_err("Invalid aggregation mode: %d\n", aggr_mode);
642	return -EINVAL;
643	}
644	return `0`;
645	}
646
647	static u64 callchain_id(struct evsel evsel, struct* perf_sample *sample);
648
649	static int get_key_by_aggr_mode(u64 key, u64 addr, struct* evsel *evsel,
650	struct perf_sample *sample)
651	{
652	if (aggr_mode == LOCK_AGGR_CALLER) {
653	*key = callchain_id(evsel, sample);
654	return `0`;
655	}
656	return get_key_by_aggr_mode_simple(key, addr, tid: sample->tid);
657	}
658
659	static int report_lock_acquire_event(struct evsel *evsel,
660	struct perf_sample *sample)
661	{
662	struct lock_stat *ls;
663	struct thread_stat *ts;
664	struct lock_seq_stat *seq;
665	const char *name = evsel__strval(evsel, sample, "name");
666	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
667	int flag = evsel__intval(evsel, sample, "flags");
668	u64 key;
669	int ret;
670
671	ret = get_key_by_aggr_mode_simple(key: &key, addr, tid: sample->tid);
672	if (ret < `0`)
673	return ret;
674
675	ls = lock_stat_findnew(addr: key, name, flags: `0`);
676	if (!ls)
677	return -ENOMEM;
678
679	ts = thread_stat_findnew(sample->tid);
680	if (!ts)
681	return -ENOMEM;
682
683	seq = get_seq(ts, addr);
684	if (!seq)
685	return -ENOMEM;
686
687	switch (seq->state) {
688	case SEQ_STATE_UNINITIALIZED:
689	case SEQ_STATE_RELEASED:
690	if (!flag) {
691	seq->state = SEQ_STATE_ACQUIRING;
692	} else {
693	if (flag & TRY_LOCK)
694	ls->nr_trylock++;
695	if (flag & READ_LOCK)
696	ls->nr_readlock++;
697	seq->state = SEQ_STATE_READ_ACQUIRED;
698	seq->read_count = `1`;
699	ls->nr_acquired++;
700	}
701	break;
702	case SEQ_STATE_READ_ACQUIRED:
703	if (flag & READ_LOCK) {
704	seq->read_count++;
705	ls->nr_acquired++;
706	goto end;
707	} else {
708	goto broken;
709	}
710	break;
711	case SEQ_STATE_ACQUIRED:
712	case SEQ_STATE_ACQUIRING:
713	case SEQ_STATE_CONTENDED:
714	broken:
715	/ broken lock sequence /
716	if (!ls->broken) {
717	ls->broken = `1`;
718	bad_hist[BROKEN_ACQUIRE]++;
719	}
720	list_del_init(entry: &seq->list);
721	free(seq);
722	goto end;
723	default:
724	BUG_ON("Unknown state of lock sequence found!\n");
725	break;
726	}
727
728	ls->nr_acquire++;
729	seq->prev_event_time = sample->time;
730	end:
731	return `0`;
732	}
733
734	static int report_lock_acquired_event(struct evsel *evsel,
735	struct perf_sample *sample)
736	{
737	struct lock_stat *ls;
738	struct thread_stat *ts;
739	struct lock_seq_stat *seq;
740	u64 contended_term;
741	const char *name = evsel__strval(evsel, sample, "name");
742	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
743	u64 key;
744	int ret;
745
746	ret = get_key_by_aggr_mode_simple(key: &key, addr, tid: sample->tid);
747	if (ret < `0`)
748	return ret;
749
750	ls = lock_stat_findnew(addr: key, name, flags: `0`);
751	if (!ls)
752	return -ENOMEM;
753
754	ts = thread_stat_findnew(sample->tid);
755	if (!ts)
756	return -ENOMEM;
757
758	seq = get_seq(ts, addr);
759	if (!seq)
760	return -ENOMEM;
761
762	switch (seq->state) {
763	case SEQ_STATE_UNINITIALIZED:
764	/ orphan event, do nothing /
765	return `0`;
766	case SEQ_STATE_ACQUIRING:
767	break;
768	case SEQ_STATE_CONTENDED:
769	contended_term = sample->time - seq->prev_event_time;
770	ls->wait_time_total += contended_term;
771	if (contended_term < ls->wait_time_min)
772	ls->wait_time_min = contended_term;
773	if (ls->wait_time_max < contended_term)
774	ls->wait_time_max = contended_term;
775	break;
776	case SEQ_STATE_RELEASED:
777	case SEQ_STATE_ACQUIRED:
778	case SEQ_STATE_READ_ACQUIRED:
779	/ broken lock sequence /
780	if (!ls->broken) {
781	ls->broken = `1`;
782	bad_hist[BROKEN_ACQUIRED]++;
783	}
784	list_del_init(entry: &seq->list);
785	free(seq);
786	goto end;
787	default:
788	BUG_ON("Unknown state of lock sequence found!\n");
789	break;
790	}
791
792	seq->state = SEQ_STATE_ACQUIRED;
793	ls->nr_acquired++;
794	ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : `0`;
795	seq->prev_event_time = sample->time;
796	end:
797	return `0`;
798	}
799
800	static int report_lock_contended_event(struct evsel *evsel,
801	struct perf_sample *sample)
802	{
803	struct lock_stat *ls;
804	struct thread_stat *ts;
805	struct lock_seq_stat *seq;
806	const char *name = evsel__strval(evsel, sample, "name");
807	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
808	u64 key;
809	int ret;
810
811	ret = get_key_by_aggr_mode_simple(key: &key, addr, tid: sample->tid);
812	if (ret < `0`)
813	return ret;
814
815	ls = lock_stat_findnew(addr: key, name, flags: `0`);
816	if (!ls)
817	return -ENOMEM;
818
819	ts = thread_stat_findnew(sample->tid);
820	if (!ts)
821	return -ENOMEM;
822
823	seq = get_seq(ts, addr);
824	if (!seq)
825	return -ENOMEM;
826
827	switch (seq->state) {
828	case SEQ_STATE_UNINITIALIZED:
829	/ orphan event, do nothing /
830	return `0`;
831	case SEQ_STATE_ACQUIRING:
832	break;
833	case SEQ_STATE_RELEASED:
834	case SEQ_STATE_ACQUIRED:
835	case SEQ_STATE_READ_ACQUIRED:
836	case SEQ_STATE_CONTENDED:
837	/ broken lock sequence /
838	if (!ls->broken) {
839	ls->broken = `1`;
840	bad_hist[BROKEN_CONTENDED]++;
841	}
842	list_del_init(entry: &seq->list);
843	free(seq);
844	goto end;
845	default:
846	BUG_ON("Unknown state of lock sequence found!\n");
847	break;
848	}
849
850	seq->state = SEQ_STATE_CONTENDED;
851	ls->nr_contended++;
852	ls->avg_wait_time = ls->wait_time_total/ls->nr_contended;
853	seq->prev_event_time = sample->time;
854	end:
855	return `0`;
856	}
857
858	static int report_lock_release_event(struct evsel *evsel,
859	struct perf_sample *sample)
860	{
861	struct lock_stat *ls;
862	struct thread_stat *ts;
863	struct lock_seq_stat *seq;
864	const char *name = evsel__strval(evsel, sample, "name");
865	u64 addr = evsel__intval(evsel, sample, "lockdep_addr");
866	u64 key;
867	int ret;
868
869	ret = get_key_by_aggr_mode_simple(key: &key, addr, tid: sample->tid);
870	if (ret < `0`)
871	return ret;
872
873	ls = lock_stat_findnew(addr: key, name, flags: `0`);
874	if (!ls)
875	return -ENOMEM;
876
877	ts = thread_stat_findnew(sample->tid);
878	if (!ts)
879	return -ENOMEM;
880
881	seq = get_seq(ts, addr);
882	if (!seq)
883	return -ENOMEM;
884
885	switch (seq->state) {
886	case SEQ_STATE_UNINITIALIZED:
887	goto end;
888	case SEQ_STATE_ACQUIRED:
889	break;
890	case SEQ_STATE_READ_ACQUIRED:
891	seq->read_count--;
892	BUG_ON(seq->read_count < `0`);
893	if (seq->read_count) {
894	ls->nr_release++;
895	goto end;
896	}
897	break;
898	case SEQ_STATE_ACQUIRING:
899	case SEQ_STATE_CONTENDED:
900	case SEQ_STATE_RELEASED:
901	/ broken lock sequence /
902	if (!ls->broken) {
903	ls->broken = `1`;
904	bad_hist[BROKEN_RELEASE]++;
905	}
906	goto free_seq;
907	default:
908	BUG_ON("Unknown state of lock sequence found!\n");
909	break;
910	}
911
912	ls->nr_release++;
913	free_seq:
914	list_del_init(entry: &seq->list);
915	free(seq);
916	end:
917	return `0`;
918	}
919
920	static int get_symbol_name_offset(struct map map, struct* symbol *sym, u64 ip,
921	char buf, int* size)
922	{
923	u64 offset;
924
925	if (map == NULL \|\| sym == NULL) {
926	buf[`0`] = `'\0'`;
927	return `0`;
928	}
929
930	offset = map__map_ip(map, ip) - sym->start;
931
932	if (offset)
933	return scnprintf(buf, size, fmt: "%s+%#lx", sym->name, offset);
934	else
935	return strlcpy(p: buf, q: sym->name, size);
936	}
937	static int lock_contention_caller(struct evsel evsel, struct* perf_sample *sample,
938	char buf, int* size)
939	{
940	struct thread *thread;
941	struct callchain_cursor *cursor;
942	struct machine *machine = &session->machines.host;
943	struct symbol *sym;
944	int skip = `0`;
945	int ret;
946
947	/ lock names will be replaced to task name later /
948	if (show_thread_stats)
949	return -`1`;
950
951	thread = machine__findnew_thread(machine, pid: -`1`, tid: sample->pid);
952	if (thread == NULL)
953	return -`1`;
954
955	cursor = get_tls_callchain_cursor();
956
957	/ use caller function name from the callchain /
958	ret = thread__resolve_callchain(thread, cursor, evsel, sample,
959	NULL, NULL, max_stack: max_stack_depth);
960	if (ret != `0`) {
961	thread__put(thread);
962	return -`1`;
963	}
964
965	callchain_cursor_commit(cursor);
966	thread__put(thread);
967
968	while (true) {
969	struct callchain_cursor_node *node;
970
971	node = callchain_cursor_current(cursor);
972	if (node == NULL)
973	break;
974
975	/ skip first few entries - for lock functions /
976	if (++skip <= stack_skip)
977	goto next;
978
979	sym = node->ms.sym;
980	if (sym && !machine__is_lock_function(machine, addr: node->ip)) {
981	get_symbol_name_offset(map: node->ms.map, sym, ip: node->ip,
982	buf, size);
983	return `0`;
984	}
985
986	next:
987	callchain_cursor_advance(cursor);
988	}
989	return -`1`;
990	}
991
992	static u64 callchain_id(struct evsel evsel, struct* perf_sample *sample)
993	{
994	struct callchain_cursor *cursor;
995	struct machine *machine = &session->machines.host;
996	struct thread *thread;
997	u64 hash = `0`;
998	int skip = `0`;
999	int ret;
1000
1001	thread = machine__findnew_thread(machine, pid: -`1`, tid: sample->pid);
1002	if (thread == NULL)
1003	return -`1`;
1004
1005	cursor = get_tls_callchain_cursor();
1006	/ use caller function name from the callchain /
1007	ret = thread__resolve_callchain(thread, cursor, evsel, sample,
1008	NULL, NULL, max_stack: max_stack_depth);
1009	thread__put(thread);
1010
1011	if (ret != `0`)
1012	return -`1`;
1013
1014	callchain_cursor_commit(cursor);
1015
1016	while (true) {
1017	struct callchain_cursor_node *node;
1018
1019	node = callchain_cursor_current(cursor);
1020	if (node == NULL)
1021	break;
1022
1023	/ skip first few entries - for lock functions /
1024	if (++skip <= stack_skip)
1025	goto next;
1026
1027	if (node->ms.sym && machine__is_lock_function(machine, addr: node->ip))
1028	goto next;
1029
1030	hash ^= hash_long((unsigned long)node->ip, `64`);
1031
1032	next:
1033	callchain_cursor_advance(cursor);
1034	}
1035	return hash;
1036	}
1037
1038	static u64 get_callstack(struct* perf_sample sample, int* max_stack)
1039	{
1040	u64 *callstack;
1041	u64 i;
1042	int c;
1043
1044	callstack = calloc(max_stack, sizeof(*callstack));
1045	if (callstack == NULL)
1046	return NULL;
1047
1048	for (i = `0`, c = `0`; i < sample->callchain->nr && c < max_stack; i++) {
1049	u64 ip = sample->callchain->ips[i];
1050
1051	if (ip >= PERF_CONTEXT_MAX)
1052	continue;
1053
1054	callstack[c++] = ip;
1055	}
1056	return callstack;
1057	}
1058
1059	static int report_lock_contention_begin_event(struct evsel *evsel,
1060	struct perf_sample *sample)
1061	{
1062	struct lock_stat *ls;
1063	struct thread_stat *ts;
1064	struct lock_seq_stat *seq;
1065	u64 addr = evsel__intval(evsel, sample, "lock_addr");
1066	unsigned int flags = evsel__intval(evsel, sample, "flags");
1067	u64 key;
1068	int i, ret;
1069	static bool kmap_loaded;
1070	struct machine *machine = &session->machines.host;
1071	struct map *kmap;
1072	struct symbol *sym;
1073
1074	ret = get_key_by_aggr_mode(key: &key, addr, evsel, sample);
1075	if (ret < `0`)
1076	return ret;
1077
1078	if (!kmap_loaded) {
1079	unsigned long *addrs;
1080
1081	/ make sure it loads the kernel map to find lock symbols /
1082	map__load(map: machine__kernel_map(machine));
1083	kmap_loaded = true;
1084
1085	/ convert (kernel) symbols to addresses /
1086	for (i = `0`; i < filters.nr_syms; i++) {
1087	sym = machine__find_kernel_symbol_by_name(machine,
1088	name: filters.syms[i],
1089	mapp: &kmap);
1090	if (sym == NULL) {
1091	pr_warning("ignore unknown symbol: %s\n",
1092	filters.syms[i]);
1093	continue;
1094	}
1095
1096	addrs = realloc(filters.addrs,
1097	(filters.nr_addrs + `1`) * sizeof(*addrs));
1098	if (addrs == NULL) {
1099	pr_warning("memory allocation failure\n");
1100	return -ENOMEM;
1101	}
1102
1103	addrs[filters.nr_addrs++] = map__unmap_ip(map: kmap, ip: sym->start);
1104	filters.addrs = addrs;
1105	}
1106	}
1107
1108	ls = lock_stat_find(addr: key);
1109	if (!ls) {
1110	char buf[`128`];
1111	const char *name = "";
1112
1113	switch (aggr_mode) {
1114	case LOCK_AGGR_ADDR:
1115	sym = machine__find_kernel_symbol(machine, addr: key, mapp: &kmap);
1116	if (sym)
1117	name = sym->name;
1118	break;
1119	case LOCK_AGGR_CALLER:
1120	name = buf;
1121	if (lock_contention_caller(evsel, sample, buf, size: sizeof(buf)) < `0`)
1122	name = "Unknown";
1123	break;
1124	case LOCK_AGGR_CGROUP:
1125	case LOCK_AGGR_TASK:
1126	default:
1127	break;
1128	}
1129
1130	ls = lock_stat_findnew(addr: key, name, flags);
1131	if (!ls)
1132	return -ENOMEM;
1133	}
1134
1135	if (filters.nr_types) {
1136	bool found = false;
1137
1138	for (i = `0`; i < filters.nr_types; i++) {
1139	if (flags == filters.types[i]) {
1140	found = true;
1141	break;
1142	}
1143	}
1144
1145	if (!found)
1146	return `0`;
1147	}
1148
1149	if (filters.nr_addrs) {
1150	bool found = false;
1151
1152	for (i = `0`; i < filters.nr_addrs; i++) {
1153	if (addr == filters.addrs[i]) {
1154	found = true;
1155	break;
1156	}
1157	}
1158
1159	if (!found)
1160	return `0`;
1161	}
1162
1163	if (needs_callstack()) {
1164	u64 *callstack = get_callstack(sample, max_stack: max_stack_depth);
1165	if (callstack == NULL)
1166	return -ENOMEM;
1167
1168	if (!match_callstack_filter(machine, callstack)) {
1169	free(callstack);
1170	return `0`;
1171	}
1172
1173	if (ls->callstack == NULL)
1174	ls->callstack = callstack;
1175	else
1176	free(callstack);
1177	}
1178
1179	ts = thread_stat_findnew(sample->tid);
1180	if (!ts)
1181	return -ENOMEM;
1182
1183	seq = get_seq(ts, addr);
1184	if (!seq)
1185	return -ENOMEM;
1186
1187	switch (seq->state) {
1188	case SEQ_STATE_UNINITIALIZED:
1189	case SEQ_STATE_ACQUIRED:
1190	break;
1191	case SEQ_STATE_CONTENDED:
1192	/*
1193	* It can have nested contention begin with mutex spinning,
1194	* then we would use the original contention begin event and
1195	* ignore the second one.
1196	*/
1197	goto end;
1198	case SEQ_STATE_ACQUIRING:
1199	case SEQ_STATE_READ_ACQUIRED:
1200	case SEQ_STATE_RELEASED:
1201	/ broken lock sequence /
1202	if (!ls->broken) {
1203	ls->broken = `1`;
1204	bad_hist[BROKEN_CONTENDED]++;
1205	}
1206	list_del_init(entry: &seq->list);
1207	free(seq);
1208	goto end;
1209	default:
1210	BUG_ON("Unknown state of lock sequence found!\n");
1211	break;
1212	}
1213
1214	if (seq->state != SEQ_STATE_CONTENDED) {
1215	seq->state = SEQ_STATE_CONTENDED;
1216	seq->prev_event_time = sample->time;
1217	ls->nr_contended++;
1218	}
1219	end:
1220	return `0`;
1221	}
1222
1223	static int report_lock_contention_end_event(struct evsel *evsel,
1224	struct perf_sample *sample)
1225	{
1226	struct lock_stat *ls;
1227	struct thread_stat *ts;
1228	struct lock_seq_stat *seq;
1229	u64 contended_term;
1230	u64 addr = evsel__intval(evsel, sample, "lock_addr");
1231	u64 key;
1232	int ret;
1233
1234	ret = get_key_by_aggr_mode(key: &key, addr, evsel, sample);
1235	if (ret < `0`)
1236	return ret;
1237
1238	ls = lock_stat_find(addr: key);
1239	if (!ls)
1240	return `0`;
1241
1242	ts = thread_stat_find(tid: sample->tid);
1243	if (!ts)
1244	return `0`;
1245
1246	seq = get_seq(ts, addr);
1247	if (!seq)
1248	return -ENOMEM;
1249
1250	switch (seq->state) {
1251	case SEQ_STATE_UNINITIALIZED:
1252	goto end;
1253	case SEQ_STATE_CONTENDED:
1254	contended_term = sample->time - seq->prev_event_time;
1255	ls->wait_time_total += contended_term;
1256	if (contended_term < ls->wait_time_min)
1257	ls->wait_time_min = contended_term;
1258	if (ls->wait_time_max < contended_term)
1259	ls->wait_time_max = contended_term;
1260	break;
1261	case SEQ_STATE_ACQUIRING:
1262	case SEQ_STATE_ACQUIRED:
1263	case SEQ_STATE_READ_ACQUIRED:
1264	case SEQ_STATE_RELEASED:
1265	/ broken lock sequence /
1266	if (!ls->broken) {
1267	ls->broken = `1`;
1268	bad_hist[BROKEN_ACQUIRED]++;
1269	}
1270	list_del_init(entry: &seq->list);
1271	free(seq);
1272	goto end;
1273	default:
1274	BUG_ON("Unknown state of lock sequence found!\n");
1275	break;
1276	}
1277
1278	seq->state = SEQ_STATE_ACQUIRED;
1279	ls->nr_acquired++;
1280	ls->avg_wait_time = ls->wait_time_total/ls->nr_acquired;
1281	end:
1282	return `0`;
1283	}
1284
1285	/ lock oriented handlers /
1286	/ TODO: handlers for CPU oriented, thread oriented /
1287	static struct trace_lock_handler report_lock_ops = {
1288	.acquire_event = report_lock_acquire_event,
1289	.acquired_event = report_lock_acquired_event,
1290	.contended_event = report_lock_contended_event,
1291	.release_event = report_lock_release_event,
1292	.contention_begin_event = report_lock_contention_begin_event,
1293	.contention_end_event = report_lock_contention_end_event,
1294	};
1295
1296	static struct trace_lock_handler contention_lock_ops = {
1297	.contention_begin_event = report_lock_contention_begin_event,
1298	.contention_end_event = report_lock_contention_end_event,
1299	};
1300
1301
1302	static struct trace_lock_handler *trace_handler;
1303
1304	static int evsel__process_lock_acquire(struct evsel evsel, struct* perf_sample *sample)
1305	{
1306	if (trace_handler->acquire_event)
1307	return trace_handler->acquire_event(evsel, sample);
1308	return `0`;
1309	}
1310
1311	static int evsel__process_lock_acquired(struct evsel evsel, struct* perf_sample *sample)
1312	{
1313	if (trace_handler->acquired_event)
1314	return trace_handler->acquired_event(evsel, sample);
1315	return `0`;
1316	}
1317
1318	static int evsel__process_lock_contended(struct evsel evsel, struct* perf_sample *sample)
1319	{
1320	if (trace_handler->contended_event)
1321	return trace_handler->contended_event(evsel, sample);
1322	return `0`;
1323	}
1324
1325	static int evsel__process_lock_release(struct evsel evsel, struct* perf_sample *sample)
1326	{
1327	if (trace_handler->release_event)
1328	return trace_handler->release_event(evsel, sample);
1329	return `0`;
1330	}
1331
1332	static int evsel__process_contention_begin(struct evsel evsel, struct* perf_sample *sample)
1333	{
1334	if (trace_handler->contention_begin_event)
1335	return trace_handler->contention_begin_event(evsel, sample);
1336	return `0`;
1337	}
1338
1339	static int evsel__process_contention_end(struct evsel evsel, struct* perf_sample *sample)
1340	{
1341	if (trace_handler->contention_end_event)
1342	return trace_handler->contention_end_event(evsel, sample);
1343	return `0`;
1344	}
1345
1346	static void print_bad_events(int bad, int total)
1347	{
1348	/ Output for debug, this have to be removed /
1349	int i;
1350	int broken = `0`;
1351	const char *name[`4`] =
1352	{ "acquire", "acquired", "contended", "release" };
1353
1354	for (i = `0`; i < BROKEN_MAX; i++)
1355	broken += bad_hist[i];
1356
1357	if (quiet \|\| total == `0` \|\| (broken == `0` && verbose <= `0`))
1358	return;
1359
1360	fprintf(lock_output, "\n=== output for debug ===\n\n");
1361	fprintf(lock_output, "bad: %d, total: %d\n", bad, total);
1362	fprintf(lock_output, "bad rate: %.2f %%\n", (double)bad / (double)total * `100`);
1363	fprintf(lock_output, "histogram of events caused bad sequence\n");
1364	for (i = `0`; i < BROKEN_MAX; i++)
1365	fprintf(lock_output, " %10s: %d\n", name[i], bad_hist[i]);
1366	}
1367
1368	/ TODO: various way to print, coloring, nano or milli sec /
1369	static void print_result(void)
1370	{
1371	struct lock_stat *st;
1372	struct lock_key *key;
1373	char cut_name[`20`];
1374	int bad, total, printed;
1375
1376	if (!quiet) {
1377	fprintf(lock_output, "%20s ", "Name");
1378	list_for_each_entry(key, &lock_keys, list)
1379	fprintf(lock_output, "%*s ", key->len, key->header);
1380	fprintf(lock_output, "\n\n");
1381	}
1382
1383	bad = total = printed = `0`;
1384	while ((st = pop_from_result())) {
1385	total++;
1386	if (st->broken)
1387	bad++;
1388	if (!st->nr_acquired)
1389	continue;
1390
1391	bzero(cut_name, `20`);
1392
1393	if (strlen(st->name) < `20`) {
1394	/ output raw name /
1395	const char *name = st->name;
1396
1397	if (show_thread_stats) {
1398	struct thread *t;
1399
1400	/ st->addr contains tid of thread /
1401	t = perf_session__findnew(session, pid: st->addr);
1402	name = thread__comm_str(thread: t);
1403	}
1404
1405	fprintf(lock_output, "%20s ", name);
1406	} else {
1407	strncpy(p: cut_name, q: st->name, size: `16`);
1408	cut_name[`16`] = `'.'`;
1409	cut_name[`17`] = `'.'`;
1410	cut_name[`18`] = `'.'`;
1411	cut_name[`19`] = `'\0'`;
1412	/ cut off name for saving output style /
1413	fprintf(lock_output, "%20s ", cut_name);
1414	}
1415
1416	list_for_each_entry(key, &lock_keys, list) {
1417	key->print(key, st);
1418	fprintf(lock_output, " ");
1419	}
1420	fprintf(lock_output, "\n");
1421
1422	if (++printed >= print_nr_entries)
1423	break;
1424	}
1425
1426	print_bad_events(bad, total);
1427	}
1428
1429	static bool info_threads, info_map;
1430
1431	static void dump_threads(void)
1432	{
1433	struct thread_stat *st;
1434	struct rb_node *node;
1435	struct thread *t;
1436
1437	fprintf(lock_output, "%10s: comm\n", "Thread ID");
1438
1439	node = rb_first(&thread_stats);
1440	while (node) {
1441	st = container_of(node, struct thread_stat, rb);
1442	t = perf_session__findnew(session, pid: st->tid);
1443	fprintf(lock_output, "%10d: %s\n", st->tid, thread__comm_str(thread: t));
1444	node = rb_next(node);
1445	thread__put(thread: t);
1446	}
1447	}
1448
1449	static int compare_maps(struct lock_stat a, struct* lock_stat *b)
1450	{
1451	int ret;
1452
1453	if (a->name && b->name)
1454	ret = strcmp(a->name, b->name);
1455	else
1456	ret = !!a->name - !!b->name;
1457
1458	if (!ret)
1459	return a->addr < b->addr;
1460	else
1461	return ret < `0`;
1462	}
1463
1464	static void dump_map(void)
1465	{
1466	unsigned int i;
1467	struct lock_stat *st;
1468
1469	fprintf(lock_output, "Address of instance: name of class\n");
1470	for (i = `0`; i < LOCKHASH_SIZE; i++) {
1471	hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1472	insert_to_result(st, bigger: compare_maps);
1473	}
1474	}
1475
1476	while ((st = pop_from_result()))
1477	fprintf(lock_output, " %#llx: %s\n", (unsigned long long)st->addr, st->name);
1478	}
1479
1480	static int dump_info(void)
1481	{
1482	int rc = `0`;
1483
1484	if (info_threads)
1485	dump_threads();
1486	else if (info_map)
1487	dump_map();
1488	else {
1489	rc = -`1`;
1490	pr_err("Unknown type of information\n");
1491	}
1492
1493	return rc;
1494	}
1495
1496	static const struct evsel_str_handler lock_tracepoints[] = {
1497	{ "lock:lock_acquire", evsel__process_lock_acquire, }, / CONFIG_LOCKDEP /
1498	{ "lock:lock_acquired", evsel__process_lock_acquired, }, / CONFIG_LOCKDEP, CONFIG_LOCK_STAT /
1499	{ "lock:lock_contended", evsel__process_lock_contended, }, / CONFIG_LOCKDEP, CONFIG_LOCK_STAT /
1500	{ "lock:lock_release", evsel__process_lock_release, }, / CONFIG_LOCKDEP /
1501	};
1502
1503	static const struct evsel_str_handler contention_tracepoints[] = {
1504	{ "lock:contention_begin", evsel__process_contention_begin, },
1505	{ "lock:contention_end", evsel__process_contention_end, },
1506	};
1507
1508	static int process_event_update(struct perf_tool *tool,
1509	union perf_event *event,
1510	struct evlist **pevlist)
1511	{
1512	int ret;
1513
1514	ret = perf_event__process_event_update(tool, event, pevlist);
1515	if (ret < `0`)
1516	return ret;
1517
1518	/ this can return -EEXIST since we call it for each evsel /
1519	perf_session__set_tracepoints_handlers(session, lock_tracepoints);
1520	perf_session__set_tracepoints_handlers(session, contention_tracepoints);
1521	return `0`;
1522	}
1523
1524	typedef int (tracepoint_handler)(struct* evsel *evsel,
1525	struct perf_sample *sample);
1526
1527	static int process_sample_event(struct perf_tool *tool __maybe_unused,
1528	union perf_event *event,
1529	struct perf_sample *sample,
1530	struct evsel *evsel,
1531	struct machine *machine)
1532	{
1533	int err = `0`;
1534	struct thread *thread = machine__findnew_thread(machine, pid: sample->pid,
1535	tid: sample->tid);
1536
1537	if (thread == NULL) {
1538	pr_debug("problem processing %d event, skipping it.\n",
1539	event->header.type);
1540	return -`1`;
1541	}
1542
1543	if (evsel->handler != NULL) {
1544	tracepoint_handler f = evsel->handler;
1545	err = f(evsel, sample);
1546	}
1547
1548	thread__put(thread);
1549
1550	return err;
1551	}
1552
1553	static void combine_result(void)
1554	{
1555	unsigned int i;
1556	struct lock_stat *st;
1557
1558	if (!combine_locks)
1559	return;
1560
1561	for (i = `0`; i < LOCKHASH_SIZE; i++) {
1562	hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1563	combine_lock_stats(st);
1564	}
1565	}
1566	}
1567
1568	static void sort_result(void)
1569	{
1570	unsigned int i;
1571	struct lock_stat *st;
1572
1573	for (i = `0`; i < LOCKHASH_SIZE; i++) {
1574	hlist_for_each_entry(st, &lockhash_table[i], hash_entry) {
1575	insert_to_result(st, bigger: compare);
1576	}
1577	}
1578	}
1579
1580	static const struct {
1581	unsigned int flags;
1582	const char *str;
1583	const char *name;
1584	} lock_type_table[] = {
1585	{ `0`, "semaphore", "semaphore" },
1586	{ LCB_F_SPIN, "spinlock", "spinlock" },
1587	{ LCB_F_SPIN \| LCB_F_READ, "rwlock:R", "rwlock" },
1588	{ LCB_F_SPIN \| LCB_F_WRITE, "rwlock:W", "rwlock" },
1589	{ LCB_F_READ, "rwsem:R", "rwsem" },
1590	{ LCB_F_WRITE, "rwsem:W", "rwsem" },
1591	{ LCB_F_RT, "rt-mutex", "rt-mutex" },
1592	{ LCB_F_RT \| LCB_F_READ, "rwlock-rt:R", "rwlock-rt" },
1593	{ LCB_F_RT \| LCB_F_WRITE, "rwlock-rt:W", "rwlock-rt" },
1594	{ LCB_F_PERCPU \| LCB_F_READ, "pcpu-sem:R", "percpu-rwsem" },
1595	{ LCB_F_PERCPU \| LCB_F_WRITE, "pcpu-sem:W", "percpu-rwsem" },
1596	{ LCB_F_MUTEX, "mutex", "mutex" },
1597	{ LCB_F_MUTEX \| LCB_F_SPIN, "mutex", "mutex" },
1598	/ alias for get_type_flag() /
1599	{ LCB_F_MUTEX \| LCB_F_SPIN, "mutex-spin", "mutex" },
1600	};
1601
1602	static const char get_type_str(unsigned* int flags)
1603	{
1604	flags &= LCB_F_MAX_FLAGS - `1`;
1605
1606	for (unsigned int i = `0`; i < ARRAY_SIZE(lock_type_table); i++) {
1607	if (lock_type_table[i].flags == flags)
1608	return lock_type_table[i].str;
1609	}
1610	return "unknown";
1611	}
1612
1613	static const char get_type_name(unsigned* int flags)
1614	{
1615	flags &= LCB_F_MAX_FLAGS - `1`;
1616
1617	for (unsigned int i = `0`; i < ARRAY_SIZE(lock_type_table); i++) {
1618	if (lock_type_table[i].flags == flags)
1619	return lock_type_table[i].name;
1620	}
1621	return "unknown";
1622	}
1623
1624	static unsigned int get_type_flag(const char *str)
1625	{
1626	for (unsigned int i = `0`; i < ARRAY_SIZE(lock_type_table); i++) {
1627	if (!strcmp(lock_type_table[i].name, str))
1628	return lock_type_table[i].flags;
1629	}
1630	for (unsigned int i = `0`; i < ARRAY_SIZE(lock_type_table); i++) {
1631	if (!strcmp(lock_type_table[i].str, str))
1632	return lock_type_table[i].flags;
1633	}
1634	return UINT_MAX;
1635	}
1636
1637	static void lock_filter_finish(void)
1638	{
1639	zfree(&filters.types);
1640	filters.nr_types = `0`;
1641
1642	zfree(&filters.addrs);
1643	filters.nr_addrs = `0`;
1644
1645	for (int i = `0`; i < filters.nr_syms; i++)
1646	free(filters.syms[i]);
1647
1648	zfree(&filters.syms);
1649	filters.nr_syms = `0`;
1650
1651	zfree(&filters.cgrps);
1652	filters.nr_cgrps = `0`;
1653	}
1654
1655	static void sort_contention_result(void)
1656	{
1657	sort_result();
1658	}
1659
1660	static void print_header_stdio(void)
1661	{
1662	struct lock_key *key;
1663
1664	list_for_each_entry(key, &lock_keys, list)
1665	fprintf(lock_output, "%*s ", key->len, key->header);
1666
1667	switch (aggr_mode) {
1668	case LOCK_AGGR_TASK:
1669	fprintf(lock_output, " %10s %s\n\n", "pid",
1670	show_lock_owner ? "owner" : "comm");
1671	break;
1672	case LOCK_AGGR_CALLER:
1673	fprintf(lock_output, " %10s %s\n\n", "type", "caller");
1674	break;
1675	case LOCK_AGGR_ADDR:
1676	fprintf(lock_output, " %16s %s\n\n", "address", "symbol");
1677	break;
1678	case LOCK_AGGR_CGROUP:
1679	fprintf(lock_output, " %s\n\n", "cgroup");
1680	break;
1681	default:
1682	break;
1683	}
1684	}
1685
1686	static void print_header_csv(const char *sep)
1687	{
1688	struct lock_key *key;
1689
1690	fprintf(lock_output, "# output: ");
1691	list_for_each_entry(key, &lock_keys, list)
1692	fprintf(lock_output, "%s%s ", key->header, sep);
1693
1694	switch (aggr_mode) {
1695	case LOCK_AGGR_TASK:
1696	fprintf(lock_output, "%s%s %s\n", "pid", sep,
1697	show_lock_owner ? "owner" : "comm");
1698	break;
1699	case LOCK_AGGR_CALLER:
1700	fprintf(lock_output, "%s%s %s", "type", sep, "caller");
1701	if (verbose > `0`)
1702	fprintf(lock_output, "%s %s", sep, "stacktrace");
1703	fprintf(lock_output, "\n");
1704	break;
1705	case LOCK_AGGR_ADDR:
1706	fprintf(lock_output, "%s%s %s%s %s\n", "address", sep, "symbol", sep, "type");
1707	break;
1708	case LOCK_AGGR_CGROUP:
1709	fprintf(lock_output, "%s\n", "cgroup");
1710	break;
1711	default:
1712	break;
1713	}
1714	}
1715
1716	static void print_header(void)
1717	{
1718	if (!quiet) {
1719	if (symbol_conf.field_sep)
1720	print_header_csv(sep: symbol_conf.field_sep);
1721	else
1722	print_header_stdio();
1723	}
1724	}
1725
1726	static void print_lock_stat_stdio(struct lock_contention con, struct* lock_stat *st)
1727	{
1728	struct lock_key *key;
1729	struct thread *t;
1730	int pid;
1731
1732	list_for_each_entry(key, &lock_keys, list) {
1733	key->print(key, st);
1734	fprintf(lock_output, " ");
1735	}
1736
1737	switch (aggr_mode) {
1738	case LOCK_AGGR_CALLER:
1739	fprintf(lock_output, " %10s %s\n", get_type_str(flags: st->flags), st->name);
1740	break;
1741	case LOCK_AGGR_TASK:
1742	pid = st->addr;
1743	t = perf_session__findnew(session, pid);
1744	fprintf(lock_output, " %10d %s\n",
1745	pid, pid == -`1` ? "Unknown" : thread__comm_str(thread: t));
1746	break;
1747	case LOCK_AGGR_ADDR:
1748	fprintf(lock_output, " %016llx %s (%s)\n", (unsigned long long)st->addr,
1749	st->name, get_type_name(flags: st->flags));
1750	break;
1751	case LOCK_AGGR_CGROUP:
1752	fprintf(lock_output, " %s\n", st->name);
1753	break;
1754	default:
1755	break;
1756	}
1757
1758	if (aggr_mode == LOCK_AGGR_CALLER && verbose > `0`) {
1759	struct map *kmap;
1760	struct symbol *sym;
1761	char buf[`128`];
1762	u64 ip;
1763
1764	for (int i = `0`; i < max_stack_depth; i++) {
1765	if (!st->callstack \|\| !st->callstack[i])
1766	break;
1767
1768	ip = st->callstack[i];
1769	sym = machine__find_kernel_symbol(machine: con->machine, addr: ip, mapp: &kmap);
1770	get_symbol_name_offset(map: kmap, sym, ip, buf, size: sizeof(buf));
1771	fprintf(lock_output, "\t\t\t%#lx %s\n", (unsigned long)ip, buf);
1772	}
1773	}
1774	}
1775
1776	static void print_lock_stat_csv(struct lock_contention con, struct* lock_stat *st,
1777	const char *sep)
1778	{
1779	struct lock_key *key;
1780	struct thread *t;
1781	int pid;
1782
1783	list_for_each_entry(key, &lock_keys, list) {
1784	key->print(key, st);
1785	fprintf(lock_output, "%s ", sep);
1786	}
1787
1788	switch (aggr_mode) {
1789	case LOCK_AGGR_CALLER:
1790	fprintf(lock_output, "%s%s %s", get_type_str(flags: st->flags), sep, st->name);
1791	if (verbose <= `0`)
1792	fprintf(lock_output, "\n");
1793	break;
1794	case LOCK_AGGR_TASK:
1795	pid = st->addr;
1796	t = perf_session__findnew(session, pid);
1797	fprintf(lock_output, "%d%s %s\n", pid, sep,
1798	pid == -`1` ? "Unknown" : thread__comm_str(thread: t));
1799	break;
1800	case LOCK_AGGR_ADDR:
1801	fprintf(lock_output, "%llx%s %s%s %s\n", (unsigned long long)st->addr, sep,
1802	st->name, sep, get_type_name(flags: st->flags));
1803	break;
1804	case LOCK_AGGR_CGROUP:
1805	fprintf(lock_output, "%s\n",st->name);
1806	break;
1807	default:
1808	break;
1809	}
1810
1811	if (aggr_mode == LOCK_AGGR_CALLER && verbose > `0`) {
1812	struct map *kmap;
1813	struct symbol *sym;
1814	char buf[`128`];
1815	u64 ip;
1816
1817	for (int i = `0`; i < max_stack_depth; i++) {
1818	if (!st->callstack \|\| !st->callstack[i])
1819	break;
1820
1821	ip = st->callstack[i];
1822	sym = machine__find_kernel_symbol(machine: con->machine, addr: ip, mapp: &kmap);
1823	get_symbol_name_offset(map: kmap, sym, ip, buf, size: sizeof(buf));
1824	fprintf(lock_output, "%s %#lx %s", i ? ":" : sep, (unsigned long) ip, buf);
1825	}
1826	fprintf(lock_output, "\n");
1827	}
1828	}
1829
1830	static void print_lock_stat(struct lock_contention con, struct* lock_stat *st)
1831	{
1832	if (symbol_conf.field_sep)
1833	print_lock_stat_csv(con, st, sep: symbol_conf.field_sep);
1834	else
1835	print_lock_stat_stdio(con, st);
1836	}
1837
1838	static void print_footer_stdio(int total, int bad, struct lock_contention_fails *fails)
1839	{
1840	/ Output for debug, this have to be removed /
1841	int broken = fails->task + fails->stack + fails->time + fails->data;
1842
1843	if (!use_bpf)
1844	print_bad_events(bad, total);
1845
1846	if (quiet \|\| total == `0` \|\| (broken == `0` && verbose <= `0`))
1847	return;
1848
1849	total += broken;
1850	fprintf(lock_output, "\n=== output for debug ===\n\n");
1851	fprintf(lock_output, "bad: %d, total: %d\n", broken, total);
1852	fprintf(lock_output, "bad rate: %.2f %%\n", `100.0` * broken / total);
1853
1854	fprintf(lock_output, "histogram of failure reasons\n");
1855	fprintf(lock_output, " %10s: %d\n", "task", fails->task);
1856	fprintf(lock_output, " %10s: %d\n", "stack", fails->stack);
1857	fprintf(lock_output, " %10s: %d\n", "time", fails->time);
1858	fprintf(lock_output, " %10s: %d\n", "data", fails->data);
1859	}
1860
1861	static void print_footer_csv(int total, int bad, struct lock_contention_fails *fails,
1862	const char *sep)
1863	{
1864	/ Output for debug, this have to be removed /
1865	if (use_bpf)
1866	bad = fails->task + fails->stack + fails->time + fails->data;
1867
1868	if (quiet \|\| total == `0` \|\| (bad == `0` && verbose <= `0`))
1869	return;
1870
1871	total += bad;
1872	fprintf(lock_output, "# debug: total=%d%s bad=%d", total, sep, bad);
1873
1874	if (use_bpf) {
1875	fprintf(lock_output, "%s bad_%s=%d", sep, "task", fails->task);
1876	fprintf(lock_output, "%s bad_%s=%d", sep, "stack", fails->stack);
1877	fprintf(lock_output, "%s bad_%s=%d", sep, "time", fails->time);
1878	fprintf(lock_output, "%s bad_%s=%d", sep, "data", fails->data);
1879	} else {
1880	int i;
1881	const char *name[`4`] = { "acquire", "acquired", "contended", "release" };
1882
1883	for (i = `0`; i < BROKEN_MAX; i++)
1884	fprintf(lock_output, "%s bad_%s=%d", sep, name[i], bad_hist[i]);
1885	}
1886	fprintf(lock_output, "\n");
1887	}
1888
1889	static void print_footer(int total, int bad, struct lock_contention_fails *fails)
1890	{
1891	if (symbol_conf.field_sep)
1892	print_footer_csv(total, bad, fails, sep: symbol_conf.field_sep);
1893	else
1894	print_footer_stdio(total, bad, fails);
1895	}
1896
1897	static void print_contention_result(struct lock_contention *con)
1898	{
1899	struct lock_stat *st;
1900	int bad, total, printed;
1901
1902	if (!quiet)
1903	print_header();
1904
1905	bad = total = printed = `0`;
1906
1907	while ((st = pop_from_result())) {
1908	total += use_bpf ? st->nr_contended : `1`;
1909	if (st->broken)
1910	bad++;
1911
1912	if (!st->wait_time_total)
1913	continue;
1914
1915	print_lock_stat(con, st);
1916
1917	if (++printed >= print_nr_entries)
1918	break;
1919	}
1920
1921	if (print_nr_entries) {
1922	/ update the total/bad stats /
1923	while ((st = pop_from_result())) {
1924	total += use_bpf ? st->nr_contended : `1`;
1925	if (st->broken)
1926	bad++;
1927	}
1928	}
1929	/ some entries are collected but hidden by the callstack filter /
1930	total += con->nr_filtered;
1931
1932	print_footer(total, bad, fails: &con->fails);
1933	}
1934
1935	static bool force;
1936
1937	static int __cmd_report(bool display_info)
1938	{
1939	int err = -EINVAL;
1940	struct perf_tool eops = {
1941	.attr = perf_event__process_attr,
1942	.event_update = process_event_update,
1943	.sample = process_sample_event,
1944	.comm = perf_event__process_comm,
1945	.mmap = perf_event__process_mmap,
1946	.namespaces = perf_event__process_namespaces,
1947	.tracing_data = perf_event__process_tracing_data,
1948	.ordered_events = true,
1949	};
1950	struct perf_data data = {
1951	.path = input_name,
1952	.mode = PERF_DATA_MODE_READ,
1953	.force = force,
1954	};
1955
1956	session = perf_session__new(data: &data, tool: &eops);
1957	if (IS_ERR(ptr: session)) {
1958	pr_err("Initializing perf session failed\n");
1959	return PTR_ERR(ptr: session);
1960	}
1961
1962	symbol_conf.allow_aliases = true;
1963	symbol__init(env: &session->header.env);
1964
1965	if (!data.is_pipe) {
1966	if (!perf_session__has_traces(session, msg: "lock record"))
1967	goto out_delete;
1968
1969	if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
1970	pr_err("Initializing perf session tracepoint handlers failed\n");
1971	goto out_delete;
1972	}
1973
1974	if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) {
1975	pr_err("Initializing perf session tracepoint handlers failed\n");
1976	goto out_delete;
1977	}
1978	}
1979
1980	if (setup_output_field(contention: false, str: output_fields))
1981	goto out_delete;
1982
1983	if (select_key(contention: false))
1984	goto out_delete;
1985
1986	if (show_thread_stats)
1987	aggr_mode = LOCK_AGGR_TASK;
1988
1989	err = perf_session__process_events(session);
1990	if (err)
1991	goto out_delete;
1992
1993	setup_pager();
1994	if (display_info) / used for info subcommand /
1995	err = dump_info();
1996	else {
1997	combine_result();
1998	sort_result();
1999	print_result();
2000	}
2001
2002	out_delete:
2003	perf_session__delete(session);
2004	return err;
2005	}
2006
2007	static void sighandler(int sig __maybe_unused)
2008	{
2009	}
2010
2011	static int check_lock_contention_options(const struct option *options,
2012	const char * const *usage)
2013
2014	{
2015	if (show_thread_stats && show_lock_addrs) {
2016	pr_err("Cannot use thread and addr mode together\n");
2017	parse_options_usage(usage, options, "threads", `0`);
2018	parse_options_usage(NULL, options, "lock-addr", `0`);
2019	return -`1`;
2020	}
2021
2022	if (show_lock_owner && !use_bpf) {
2023	pr_err("Lock owners are available only with BPF\n");
2024	parse_options_usage(usage, options, "lock-owner", `0`);
2025	parse_options_usage(NULL, options, "use-bpf", `0`);
2026	return -`1`;
2027	}
2028
2029	if (show_lock_owner && show_lock_addrs) {
2030	pr_err("Cannot use owner and addr mode together\n");
2031	parse_options_usage(usage, options, "lock-owner", `0`);
2032	parse_options_usage(NULL, options, "lock-addr", `0`);
2033	return -`1`;
2034	}
2035
2036	if (show_lock_cgroups && !use_bpf) {
2037	pr_err("Cgroups are available only with BPF\n");
2038	parse_options_usage(usage, options, "lock-cgroup", `0`);
2039	parse_options_usage(NULL, options, "use-bpf", `0`);
2040	return -`1`;
2041	}
2042
2043	if (show_lock_cgroups && show_lock_addrs) {
2044	pr_err("Cannot use cgroup and addr mode together\n");
2045	parse_options_usage(usage, options, "lock-cgroup", `0`);
2046	parse_options_usage(NULL, options, "lock-addr", `0`);
2047	return -`1`;
2048	}
2049
2050	if (show_lock_cgroups && show_thread_stats) {
2051	pr_err("Cannot use cgroup and thread mode together\n");
2052	parse_options_usage(usage, options, "lock-cgroup", `0`);
2053	parse_options_usage(NULL, options, "threads", `0`);
2054	return -`1`;
2055	}
2056
2057	if (symbol_conf.field_sep) {
2058	if (strstr(symbol_conf.field_sep, ":") \|\| / part of type flags /
2059	strstr(symbol_conf.field_sep, "+") \|\| / part of caller offset /
2060	strstr(symbol_conf.field_sep, ".")) { / can be in a symbol name /
2061	pr_err("Cannot use the separator that is already used\n");
2062	parse_options_usage(usage, options, "x", `1`);
2063	return -`1`;
2064	}
2065	}
2066
2067	if (show_lock_owner)
2068	show_thread_stats = true;
2069
2070	return `0`;
2071	}
2072
2073	static int __cmd_contention(int argc, const char **argv)
2074	{
2075	int err = -EINVAL;
2076	struct perf_tool eops = {
2077	.attr = perf_event__process_attr,
2078	.event_update = process_event_update,
2079	.sample = process_sample_event,
2080	.comm = perf_event__process_comm,
2081	.mmap = perf_event__process_mmap,
2082	.tracing_data = perf_event__process_tracing_data,
2083	.ordered_events = true,
2084	};
2085	struct perf_data data = {
2086	.path = input_name,
2087	.mode = PERF_DATA_MODE_READ,
2088	.force = force,
2089	};
2090	struct lock_contention con = {
2091	.target = &target,
2092	.map_nr_entries = bpf_map_entries,
2093	.max_stack = max_stack_depth,
2094	.stack_skip = stack_skip,
2095	.filters = &filters,
2096	.save_callstack = needs_callstack(),
2097	.owner = show_lock_owner,
2098	.cgroups = RB_ROOT,
2099	};
2100
2101	lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2102	if (!lockhash_table)
2103	return -ENOMEM;
2104
2105	con.result = &lockhash_table[`0`];
2106
2107	session = perf_session__new(data: use_bpf ? NULL : &data, tool: &eops);
2108	if (IS_ERR(ptr: session)) {
2109	pr_err("Initializing perf session failed\n");
2110	err = PTR_ERR(ptr: session);
2111	session = NULL;
2112	goto out_delete;
2113	}
2114
2115	con.machine = &session->machines.host;
2116
2117	con.aggr_mode = aggr_mode = show_thread_stats ? LOCK_AGGR_TASK :
2118	show_lock_addrs ? LOCK_AGGR_ADDR :
2119	show_lock_cgroups ? LOCK_AGGR_CGROUP : LOCK_AGGR_CALLER;
2120
2121	if (con.aggr_mode == LOCK_AGGR_CALLER)
2122	con.save_callstack = true;
2123
2124	symbol_conf.allow_aliases = true;
2125	symbol__init(env: &session->header.env);
2126
2127	if (use_bpf) {
2128	err = target__validate(target: &target);
2129	if (err) {
2130	char errbuf[`512`];
2131
2132	target__strerror(target: &target, errnum: err, buf: errbuf, buflen: `512`);
2133	pr_err("%s\n", errbuf);
2134	goto out_delete;
2135	}
2136
2137	signal(SIGINT, sighandler);
2138	signal(SIGCHLD, sighandler);
2139	signal(SIGTERM, sighandler);
2140
2141	con.evlist = evlist__new();
2142	if (con.evlist == NULL) {
2143	err = -ENOMEM;
2144	goto out_delete;
2145	}
2146
2147	err = evlist__create_maps(evlist: con.evlist, target: &target);
2148	if (err < `0`)
2149	goto out_delete;
2150
2151	if (argc) {
2152	err = evlist__prepare_workload(evlist: con.evlist, target: &target,
2153	argv, pipe_output: false, NULL);
2154	if (err < `0`)
2155	goto out_delete;
2156	}
2157
2158	if (lock_contention_prepare(con: &con) < `0`) {
2159	pr_err("lock contention BPF setup failed\n");
2160	goto out_delete;
2161	}
2162	} else if (!data.is_pipe) {
2163	if (!perf_session__has_traces(session, msg: "lock record"))
2164	goto out_delete;
2165
2166	if (!evlist__find_evsel_by_str(evlist: session->evlist,
2167	str: "lock:contention_begin")) {
2168	pr_err("lock contention evsel not found\n");
2169	goto out_delete;
2170	}
2171
2172	if (perf_session__set_tracepoints_handlers(session,
2173	contention_tracepoints)) {
2174	pr_err("Initializing perf session tracepoint handlers failed\n");
2175	goto out_delete;
2176	}
2177	}
2178
2179	if (setup_output_field(contention: true, str: output_fields))
2180	goto out_delete;
2181
2182	if (select_key(contention: true))
2183	goto out_delete;
2184
2185	if (symbol_conf.field_sep) {
2186	int i;
2187	struct lock_key *keys = contention_keys;
2188
2189	/ do not align output in CSV format /
2190	for (i = `0`; keys[i].name; i++)
2191	keys[i].len = `0`;
2192	}
2193
2194	if (use_bpf) {
2195	lock_contention_start();
2196	if (argc)
2197	evlist__start_workload(evlist: con.evlist);
2198
2199	/ wait for signal /
2200	pause();
2201
2202	lock_contention_stop();
2203	lock_contention_read(con: &con);
2204	} else {
2205	err = perf_session__process_events(session);
2206	if (err)
2207	goto out_delete;
2208	}
2209
2210	setup_pager();
2211
2212	sort_contention_result();
2213	print_contention_result(con: &con);
2214
2215	out_delete:
2216	lock_filter_finish();
2217	evlist__delete(evlist: con.evlist);
2218	lock_contention_finish(con: &con);
2219	perf_session__delete(session);
2220	zfree(&lockhash_table);
2221	return err;
2222	}
2223
2224
2225	static int __cmd_record(int argc, const char **argv)
2226	{
2227	const char *record_args[] = {
2228	"record", "-R", "-m", "1024", "-c", "1", "--synth", "task",
2229	};
2230	const char *callgraph_args[] = {
2231	"--call-graph", "fp," __stringify(CONTENTION_STACK_DEPTH),
2232	};
2233	unsigned int rec_argc, i, j, ret;
2234	unsigned int nr_tracepoints;
2235	unsigned int nr_callgraph_args = `0`;
2236	const char **rec_argv;
2237	bool has_lock_stat = true;
2238
2239	for (i = `0`; i < ARRAY_SIZE(lock_tracepoints); i++) {
2240	if (!is_valid_tracepoint(event_string: lock_tracepoints[i].name)) {
2241	pr_debug("tracepoint %s is not enabled. "
2242	"Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n",
2243	lock_tracepoints[i].name);
2244	has_lock_stat = false;
2245	break;
2246	}
2247	}
2248
2249	if (has_lock_stat)
2250	goto setup_args;
2251
2252	for (i = `0`; i < ARRAY_SIZE(contention_tracepoints); i++) {
2253	if (!is_valid_tracepoint(event_string: contention_tracepoints[i].name)) {
2254	pr_err("tracepoint %s is not enabled.\n",
2255	contention_tracepoints[i].name);
2256	return `1`;
2257	}
2258	}
2259
2260	nr_callgraph_args = ARRAY_SIZE(callgraph_args);
2261
2262	setup_args:
2263	rec_argc = ARRAY_SIZE(record_args) + nr_callgraph_args + argc - `1`;
2264
2265	if (has_lock_stat)
2266	nr_tracepoints = ARRAY_SIZE(lock_tracepoints);
2267	else
2268	nr_tracepoints = ARRAY_SIZE(contention_tracepoints);
2269
2270	/ factor of 2 is for -e in front of each tracepoint /
2271	rec_argc += `2` * nr_tracepoints;
2272
2273	rec_argv = calloc(rec_argc + `1`, sizeof(char *));
2274	if (!rec_argv)
2275	return -ENOMEM;
2276
2277	for (i = `0`; i < ARRAY_SIZE(record_args); i++)
2278	rec_argv[i] = strdup(record_args[i]);
2279
2280	for (j = `0`; j < nr_tracepoints; j++) {
2281	const char *ev_name;
2282
2283	if (has_lock_stat)
2284	ev_name = strdup(lock_tracepoints[j].name);
2285	else
2286	ev_name = strdup(contention_tracepoints[j].name);
2287
2288	if (!ev_name)
2289	return -ENOMEM;
2290
2291	rec_argv[i++] = "-e";
2292	rec_argv[i++] = ev_name;
2293	}
2294
2295	for (j = `0`; j < nr_callgraph_args; j++, i++)
2296	rec_argv[i] = callgraph_args[j];
2297
2298	for (j = `1`; j < (unsigned int)argc; j++, i++)
2299	rec_argv[i] = argv[j];
2300
2301	BUG_ON(i != rec_argc);
2302
2303	ret = cmd_record(argc: i, argv: rec_argv);
2304	free(rec_argv);
2305	return ret;
2306	}
2307
2308	static int parse_map_entry(const struct option opt, const* char *str,
2309	int unset __maybe_unused)
2310	{
2311	unsigned long len = (unsigned* long *)opt->value;
2312	unsigned long val;
2313	char *endptr;
2314
2315	errno = `0`;
2316	val = strtoul(str, &endptr, `0`);
2317	if (*endptr != `'\0'` \|\| errno != `0`) {
2318	pr_err("invalid BPF map length: %s\n", str);
2319	return -`1`;
2320	}
2321
2322	*len = val;
2323	return `0`;
2324	}
2325
2326	static int parse_max_stack(const struct option opt, const* char *str,
2327	int unset __maybe_unused)
2328	{
2329	unsigned long len = (unsigned* long *)opt->value;
2330	long val;
2331	char *endptr;
2332
2333	errno = `0`;
2334	val = strtol(str, &endptr, `0`);
2335	if (*endptr != `'\0'` \|\| errno != `0`) {
2336	pr_err("invalid max stack depth: %s\n", str);
2337	return -`1`;
2338	}
2339
2340	if (val < `0` \|\| val > sysctl__max_stack()) {
2341	pr_err("invalid max stack depth: %ld\n", val);
2342	return -`1`;
2343	}
2344
2345	*len = val;
2346	return `0`;
2347	}
2348
2349	static bool add_lock_type(unsigned int flags)
2350	{
2351	unsigned int *tmp;
2352
2353	tmp = realloc(filters.types, (filters.nr_types + `1`) * sizeof(*filters.types));
2354	if (tmp == NULL)
2355	return false;
2356
2357	tmp[filters.nr_types++] = flags;
2358	filters.types = tmp;
2359	return true;
2360	}
2361
2362	static int parse_lock_type(const struct option opt __maybe_unused, const* char *str,
2363	int unset __maybe_unused)
2364	{
2365	char s, tmp, *tok;
2366	int ret = `0`;
2367
2368	s = strdup(str);
2369	if (s == NULL)
2370	return -`1`;
2371
2372	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2373	unsigned int flags = get_type_flag(str: tok);
2374
2375	if (flags == -`1U`) {
2376	pr_err("Unknown lock flags: %s\n", tok);
2377	ret = -`1`;
2378	break;
2379	}
2380
2381	if (!add_lock_type(flags)) {
2382	ret = -`1`;
2383	break;
2384	}
2385	}
2386
2387	free(s);
2388	return ret;
2389	}
2390
2391	static bool add_lock_addr(unsigned long addr)
2392	{
2393	unsigned long *tmp;
2394
2395	tmp = realloc(filters.addrs, (filters.nr_addrs + `1`) * sizeof(*filters.addrs));
2396	if (tmp == NULL) {
2397	pr_err("Memory allocation failure\n");
2398	return false;
2399	}
2400
2401	tmp[filters.nr_addrs++] = addr;
2402	filters.addrs = tmp;
2403	return true;
2404	}
2405
2406	static bool add_lock_sym(char *name)
2407	{
2408	char **tmp;
2409	char *sym = strdup(name);
2410
2411	if (sym == NULL) {
2412	pr_err("Memory allocation failure\n");
2413	return false;
2414	}
2415
2416	tmp = realloc(filters.syms, (filters.nr_syms + `1`) * sizeof(*filters.syms));
2417	if (tmp == NULL) {
2418	pr_err("Memory allocation failure\n");
2419	free(sym);
2420	return false;
2421	}
2422
2423	tmp[filters.nr_syms++] = sym;
2424	filters.syms = tmp;
2425	return true;
2426	}
2427
2428	static int parse_lock_addr(const struct option opt __maybe_unused, const* char *str,
2429	int unset __maybe_unused)
2430	{
2431	char s, tmp, *tok;
2432	int ret = `0`;
2433	u64 addr;
2434
2435	s = strdup(str);
2436	if (s == NULL)
2437	return -`1`;
2438
2439	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2440	char *end;
2441
2442	addr = strtoul(tok, &end, `16`);
2443	if (*end == `'\0'`) {
2444	if (!add_lock_addr(addr)) {
2445	ret = -`1`;
2446	break;
2447	}
2448	continue;
2449	}
2450
2451	/*
2452	* At this moment, we don't have kernel symbols. Save the symbols
2453	* in a separate list and resolve them to addresses later.
2454	*/
2455	if (!add_lock_sym(name: tok)) {
2456	ret = -`1`;
2457	break;
2458	}
2459	}
2460
2461	free(s);
2462	return ret;
2463	}
2464
2465	static int parse_call_stack(const struct option opt __maybe_unused, const* char *str,
2466	int unset __maybe_unused)
2467	{
2468	char s, tmp, *tok;
2469	int ret = `0`;
2470
2471	s = strdup(str);
2472	if (s == NULL)
2473	return -`1`;
2474
2475	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2476	struct callstack_filter *entry;
2477
2478	entry = malloc(sizeof(*entry) + strlen(tok) + `1`);
2479	if (entry == NULL) {
2480	pr_err("Memory allocation failure\n");
2481	free(s);
2482	return -`1`;
2483	}
2484
2485	strcpy(p: entry->name, q: tok);
2486	list_add_tail(new: &entry->list, head: &callstack_filters);
2487	}
2488
2489	free(s);
2490	return ret;
2491	}
2492
2493	static int parse_output(const struct option opt __maybe_unused, const* char *str,
2494	int unset __maybe_unused)
2495	{
2496	const char *name = (const* char **)opt->value;
2497
2498	if (str == NULL)
2499	return -`1`;
2500
2501	lock_output = fopen(str, "w");
2502	if (lock_output == NULL) {
2503	pr_err("Cannot open %s\n", str);
2504	return -`1`;
2505	}
2506
2507	*name = str;
2508	return `0`;
2509	}
2510
2511	static bool add_lock_cgroup(char *name)
2512	{
2513	u64 *tmp;
2514	struct cgroup *cgrp;
2515
2516	cgrp = cgroup__new(name, /do_open=/false);
2517	if (cgrp == NULL) {
2518	pr_err("Failed to create cgroup: %s\n", name);
2519	return false;
2520	}
2521
2522	if (read_cgroup_id(cgrp) < `0`) {
2523	pr_err("Failed to read cgroup id for %s\n", name);
2524	cgroup__put(cgroup: cgrp);
2525	return false;
2526	}
2527
2528	tmp = realloc(filters.cgrps, (filters.nr_cgrps + `1`) * sizeof(*filters.cgrps));
2529	if (tmp == NULL) {
2530	pr_err("Memory allocation failure\n");
2531	return false;
2532	}
2533
2534	tmp[filters.nr_cgrps++] = cgrp->id;
2535	filters.cgrps = tmp;
2536	cgroup__put(cgroup: cgrp);
2537	return true;
2538	}
2539
2540	static int parse_cgroup_filter(const struct option opt __maybe_unused, const* char *str,
2541	int unset __maybe_unused)
2542	{
2543	char s, tmp, *tok;
2544	int ret = `0`;
2545
2546	s = strdup(str);
2547	if (s == NULL)
2548	return -`1`;
2549
2550	for (tok = strtok_r(s, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) {
2551	if (!add_lock_cgroup(name: tok)) {
2552	ret = -`1`;
2553	break;
2554	}
2555	}
2556
2557	free(s);
2558	return ret;
2559	}
2560
2561	int cmd_lock(int argc, const char **argv)
2562	{
2563	const struct option lock_options[] = {
2564	OPT_STRING(`'i'`, "input", &input_name, "file", "input file name"),
2565	OPT_CALLBACK(`0`, "output", &output_name, "file", "output file name", parse_output),
2566	OPT_INCR(`'v'`, "verbose", &verbose, "be more verbose (show symbol address, etc)"),
2567	OPT_BOOLEAN(`'D'`, "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
2568	OPT_BOOLEAN(`'f'`, "force", &force, "don't complain, do it"),
2569	OPT_STRING(`0`, "vmlinux", &symbol_conf.vmlinux_name,
2570	"file", "vmlinux pathname"),
2571	OPT_STRING(`0`, "kallsyms", &symbol_conf.kallsyms_name,
2572	"file", "kallsyms pathname"),
2573	OPT_BOOLEAN(`'q'`, "quiet", &quiet, "Do not show any warnings or messages"),
2574	OPT_END()
2575	};
2576
2577	const struct option info_options[] = {
2578	OPT_BOOLEAN(`'t'`, "threads", &info_threads,
2579	"dump thread list in perf.data"),
2580	OPT_BOOLEAN(`'m'`, "map", &info_map,
2581	"map of lock instances (address:name table)"),
2582	OPT_PARENT(lock_options)
2583	};
2584
2585	const struct option report_options[] = {
2586	OPT_STRING(`'k'`, "key", &sort_key, "acquired",
2587	"key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2588	OPT_STRING(`'F'`, "field", &output_fields, NULL,
2589	"output fields (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
2590	/ TODO: type /
2591	OPT_BOOLEAN(`'c'`, "combine-locks", &combine_locks,
2592	"combine locks in the same class"),
2593	OPT_BOOLEAN(`'t'`, "threads", &show_thread_stats,
2594	"show per-thread lock stats"),
2595	OPT_INTEGER(`'E'`, "entries", &print_nr_entries, "display this many functions"),
2596	OPT_PARENT(lock_options)
2597	};
2598
2599	struct option contention_options[] = {
2600	OPT_STRING(`'k'`, "key", &sort_key, "wait_total",
2601	"key for sorting (contended / wait_total / wait_max / wait_min / avg_wait)"),
2602	OPT_STRING(`'F'`, "field", &output_fields, "contended,wait_total,wait_max,avg_wait",
2603	"output fields (contended / wait_total / wait_max / wait_min / avg_wait)"),
2604	OPT_BOOLEAN(`'t'`, "threads", &show_thread_stats,
2605	"show per-thread lock stats"),
2606	OPT_BOOLEAN(`'b'`, "use-bpf", &use_bpf, "use BPF program to collect lock contention stats"),
2607	OPT_BOOLEAN(`'a'`, "all-cpus", &target.system_wide,
2608	"System-wide collection from all CPUs"),
2609	OPT_STRING(`'C'`, "cpu", &target.cpu_list, "cpu",
2610	"List of cpus to monitor"),
2611	OPT_STRING(`'p'`, "pid", &target.pid, "pid",
2612	"Trace on existing process id"),
2613	OPT_STRING(`0`, "tid", &target.tid, "tid",
2614	"Trace on existing thread id (exclusive to --pid)"),
2615	OPT_CALLBACK(`'M'`, "map-nr-entries", &bpf_map_entries, "num",
2616	"Max number of BPF map entries", parse_map_entry),
2617	OPT_CALLBACK(`0`, "max-stack", &max_stack_depth, "num",
2618	"Set the maximum stack depth when collecting lock contention, "
2619	"Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack),
2620	OPT_INTEGER(`0`, "stack-skip", &stack_skip,
2621	"Set the number of stack depth to skip when finding a lock caller, "
2622	"Default: " __stringify(CONTENTION_STACK_SKIP)),
2623	OPT_INTEGER(`'E'`, "entries", &print_nr_entries, "display this many functions"),
2624	OPT_BOOLEAN(`'l'`, "lock-addr", &show_lock_addrs, "show lock stats by address"),
2625	OPT_CALLBACK(`'Y'`, "type-filter", NULL, "FLAGS",
2626	"Filter specific type of locks", parse_lock_type),
2627	OPT_CALLBACK(`'L'`, "lock-filter", NULL, "ADDRS/NAMES",
2628	"Filter specific address/symbol of locks", parse_lock_addr),
2629	OPT_CALLBACK(`'S'`, "callstack-filter", NULL, "NAMES",
2630	"Filter specific function in the callstack", parse_call_stack),
2631	OPT_BOOLEAN(`'o'`, "lock-owner", &show_lock_owner, "show lock owners instead of waiters"),
2632	OPT_STRING_NOEMPTY(`'x'`, "field-separator", &symbol_conf.field_sep, "separator",
2633	"print result in CSV format with custom separator"),
2634	OPT_BOOLEAN(`0`, "lock-cgroup", &show_lock_cgroups, "show lock stats by cgroup"),
2635	OPT_CALLBACK(`'G'`, "cgroup-filter", NULL, "CGROUPS",
2636	"Filter specific cgroups", parse_cgroup_filter),
2637	OPT_PARENT(lock_options)
2638	};
2639
2640	const char * const info_usage[] = {
2641	"perf lock info [<options>]",
2642	NULL
2643	};
2644	const char *const lock_subcommands[] = { "record", "report", "script",
2645	"info", "contention", NULL };
2646	const char *lock_usage[] = {
2647	NULL,
2648	NULL
2649	};
2650	const char * const report_usage[] = {
2651	"perf lock report [<options>]",
2652	NULL
2653	};
2654	const char * const contention_usage[] = {
2655	"perf lock contention [<options>]",
2656	NULL
2657	};
2658	unsigned int i;
2659	int rc = `0`;
2660
2661	lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
2662	if (!lockhash_table)
2663	return -ENOMEM;
2664
2665	for (i = `0`; i < LOCKHASH_SIZE; i++)
2666	INIT_HLIST_HEAD(lockhash_table + i);
2667
2668	lock_output = stderr;
2669	argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
2670	lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
2671	if (!argc)
2672	usage_with_options(lock_usage, lock_options);
2673
2674	if (strlen(argv[`0`]) > `2` && strstarts(str: "record", prefix: argv[`0`])) {
2675	return __cmd_record(argc, argv);
2676	} else if (strlen(argv[`0`]) > `2` && strstarts(str: "report", prefix: argv[`0`])) {
2677	trace_handler = &report_lock_ops;
2678	if (argc) {
2679	argc = parse_options(argc, argv,
2680	report_options, report_usage, `0`);
2681	if (argc)
2682	usage_with_options(report_usage, report_options);
2683	}
2684	rc = __cmd_report(display_info: false);
2685	} else if (!strcmp(argv[`0`], "script")) {
2686	/ Aliased to 'perf script' /
2687	rc = cmd_script(argc, argv);
2688	} else if (!strcmp(argv[`0`], "info")) {
2689	if (argc) {
2690	argc = parse_options(argc, argv,
2691	info_options, info_usage, `0`);
2692	if (argc)
2693	usage_with_options(info_usage, info_options);
2694	}
2695	/ recycling report_lock_ops /
2696	trace_handler = &report_lock_ops;
2697	rc = __cmd_report(display_info: true);
2698	} else if (strlen(argv[`0`]) > `2` && strstarts(str: "contention", prefix: argv[`0`])) {
2699	trace_handler = &contention_lock_ops;
2700	sort_key = "wait_total";
2701	output_fields = "contended,wait_total,wait_max,avg_wait";
2702
2703	#ifndef HAVE_BPF_SKEL
2704	set_option_nobuild(contention_options, `'b'`, "use-bpf",
2705	"no BUILD_BPF_SKEL=1", false);
2706	#endif
2707	if (argc) {
2708	argc = parse_options(argc, argv, contention_options,
2709	contention_usage, `0`);
2710	}
2711
2712	if (check_lock_contention_options(options: contention_options,
2713	usage: contention_usage) < `0`)
2714	return -`1`;
2715
2716	rc = __cmd_contention(argc, argv);
2717	} else {
2718	usage_with_options(lock_usage, lock_options);
2719	}
2720
2721	zfree(&lockhash_table);
2722	return rc;
2723	}
2724

source code of linux/tools/perf/builtin-lock.c