1 | /* |
2 | * Netburst Performance Events (P4, old Xeon) |
3 | * |
4 | * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov <gorcunov@openvz.org> |
5 | * Copyright (C) 2010 Intel Corporation, Lin Ming <ming.m.lin@intel.com> |
6 | * |
7 | * For licencing details see kernel-base/COPYING |
8 | */ |
9 | |
10 | #include <linux/perf_event.h> |
11 | |
12 | #include <asm/perf_event_p4.h> |
13 | #include <asm/hardirq.h> |
14 | #include <asm/apic.h> |
15 | |
16 | #include "../perf_event.h" |
17 | |
18 | #define P4_CNTR_LIMIT 3 |
19 | /* |
20 | * array indices: 0,1 - HT threads, used with HT enabled cpu |
21 | */ |
22 | struct p4_event_bind { |
23 | unsigned int opcode; /* Event code and ESCR selector */ |
24 | unsigned int escr_msr[2]; /* ESCR MSR for this event */ |
25 | unsigned int escr_emask; /* valid ESCR EventMask bits */ |
26 | unsigned int shared; /* event is shared across threads */ |
27 | signed char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on absence */ |
28 | }; |
29 | |
30 | struct p4_pebs_bind { |
31 | unsigned int metric_pebs; |
32 | unsigned int metric_vert; |
33 | }; |
34 | |
35 | /* it sets P4_PEBS_ENABLE_UOP_TAG as well */ |
36 | #define P4_GEN_PEBS_BIND(name, pebs, vert) \ |
37 | [P4_PEBS_METRIC__##name] = { \ |
38 | .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \ |
39 | .metric_vert = vert, \ |
40 | } |
41 | |
42 | /* |
43 | * note we have P4_PEBS_ENABLE_UOP_TAG always set here |
44 | * |
45 | * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of |
46 | * event configuration to find out which values are to be |
47 | * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT |
48 | * registers |
49 | */ |
50 | static struct p4_pebs_bind p4_pebs_bind_map[] = { |
51 | P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001), |
52 | P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired, 0x0000002, 0x0000001), |
53 | P4_GEN_PEBS_BIND(dtlb_load_miss_retired, 0x0000004, 0x0000001), |
54 | P4_GEN_PEBS_BIND(dtlb_store_miss_retired, 0x0000004, 0x0000002), |
55 | P4_GEN_PEBS_BIND(dtlb_all_miss_retired, 0x0000004, 0x0000003), |
56 | P4_GEN_PEBS_BIND(tagged_mispred_branch, 0x0018000, 0x0000010), |
57 | P4_GEN_PEBS_BIND(mob_load_replay_retired, 0x0000200, 0x0000001), |
58 | P4_GEN_PEBS_BIND(split_load_retired, 0x0000400, 0x0000001), |
59 | P4_GEN_PEBS_BIND(split_store_retired, 0x0000400, 0x0000002), |
60 | }; |
61 | |
62 | /* |
63 | * Note that we don't use CCCR1 here, there is an |
64 | * exception for P4_BSQ_ALLOCATION but we just have |
65 | * no workaround |
66 | * |
67 | * consider this binding as resources which particular |
68 | * event may borrow, it doesn't contain EventMask, |
69 | * Tags and friends -- they are left to a caller |
70 | */ |
71 | static struct p4_event_bind p4_event_bind_map[] = { |
72 | [P4_EVENT_TC_DELIVER_MODE] = { |
73 | .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), |
74 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, |
75 | .escr_emask = |
76 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD) | |
77 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DB) | |
78 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DI) | |
79 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BD) | |
80 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BB) | |
81 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, BI) | |
82 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, ID), |
83 | .shared = 1, |
84 | .cntr = { {4, 5, -1}, {6, 7, -1} }, |
85 | }, |
86 | [P4_EVENT_BPU_FETCH_REQUEST] = { |
87 | .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), |
88 | .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, |
89 | .escr_emask = |
90 | P4_ESCR_EMASK_BIT(P4_EVENT_BPU_FETCH_REQUEST, TCMISS), |
91 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
92 | }, |
93 | [P4_EVENT_ITLB_REFERENCE] = { |
94 | .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), |
95 | .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, |
96 | .escr_emask = |
97 | P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT) | |
98 | P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, MISS) | |
99 | P4_ESCR_EMASK_BIT(P4_EVENT_ITLB_REFERENCE, HIT_UK), |
100 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
101 | }, |
102 | [P4_EVENT_MEMORY_CANCEL] = { |
103 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), |
104 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, |
105 | .escr_emask = |
106 | P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL) | |
107 | P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_CANCEL, 64K_CONF), |
108 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
109 | }, |
110 | [P4_EVENT_MEMORY_COMPLETE] = { |
111 | .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), |
112 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, |
113 | .escr_emask = |
114 | P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, LSC) | |
115 | P4_ESCR_EMASK_BIT(P4_EVENT_MEMORY_COMPLETE, SSC), |
116 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
117 | }, |
118 | [P4_EVENT_LOAD_PORT_REPLAY] = { |
119 | .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), |
120 | .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, |
121 | .escr_emask = |
122 | P4_ESCR_EMASK_BIT(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD), |
123 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
124 | }, |
125 | [P4_EVENT_STORE_PORT_REPLAY] = { |
126 | .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), |
127 | .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, |
128 | .escr_emask = |
129 | P4_ESCR_EMASK_BIT(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST), |
130 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
131 | }, |
132 | [P4_EVENT_MOB_LOAD_REPLAY] = { |
133 | .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), |
134 | .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, |
135 | .escr_emask = |
136 | P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STA) | |
137 | P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, NO_STD) | |
138 | P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA) | |
139 | P4_ESCR_EMASK_BIT(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR), |
140 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
141 | }, |
142 | [P4_EVENT_PAGE_WALK_TYPE] = { |
143 | .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), |
144 | .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, |
145 | .escr_emask = |
146 | P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, DTMISS) | |
147 | P4_ESCR_EMASK_BIT(P4_EVENT_PAGE_WALK_TYPE, ITMISS), |
148 | .shared = 1, |
149 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
150 | }, |
151 | [P4_EVENT_BSQ_CACHE_REFERENCE] = { |
152 | .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), |
153 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, |
154 | .escr_emask = |
155 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | |
156 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | |
157 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | |
158 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | |
159 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | |
160 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM) | |
161 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | |
162 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | |
163 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS), |
164 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
165 | }, |
166 | [P4_EVENT_IOQ_ALLOCATION] = { |
167 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), |
168 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, |
169 | .escr_emask = |
170 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, DEFAULT) | |
171 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_READ) | |
172 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE) | |
173 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_UC) | |
174 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WC) | |
175 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WT) | |
176 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WP) | |
177 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, MEM_WB) | |
178 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OWN) | |
179 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, OTHER) | |
180 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ALLOCATION, PREFETCH), |
181 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
182 | }, |
183 | [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ |
184 | .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), |
185 | .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, |
186 | .escr_emask = |
187 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT) | |
188 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ) | |
189 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE) | |
190 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC) | |
191 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC) | |
192 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT) | |
193 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP) | |
194 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB) | |
195 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN) | |
196 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER) | |
197 | P4_ESCR_EMASK_BIT(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH), |
198 | .cntr = { {2, -1, -1}, {3, -1, -1} }, |
199 | }, |
200 | [P4_EVENT_FSB_DATA_ACTIVITY] = { |
201 | .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), |
202 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, |
203 | .escr_emask = |
204 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | |
205 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN) | |
206 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER) | |
207 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV) | |
208 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN) | |
209 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER), |
210 | .shared = 1, |
211 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
212 | }, |
213 | [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ |
214 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), |
215 | .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, |
216 | .escr_emask = |
217 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0) | |
218 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1) | |
219 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0) | |
220 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1) | |
221 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE) | |
222 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE) | |
223 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE) | |
224 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE) | |
225 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE) | |
226 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE) | |
227 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0) | |
228 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1) | |
229 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2), |
230 | .cntr = { {0, -1, -1}, {1, -1, -1} }, |
231 | }, |
232 | [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ |
233 | .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), |
234 | .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, |
235 | .escr_emask = |
236 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0) | |
237 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1) | |
238 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0) | |
239 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1) | |
240 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE) | |
241 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE) | |
242 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE) | |
243 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE) | |
244 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE) | |
245 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE) | |
246 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0) | |
247 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1) | |
248 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2), |
249 | .cntr = { {2, -1, -1}, {3, -1, -1} }, |
250 | }, |
251 | [P4_EVENT_SSE_INPUT_ASSIST] = { |
252 | .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), |
253 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, |
254 | .escr_emask = |
255 | P4_ESCR_EMASK_BIT(P4_EVENT_SSE_INPUT_ASSIST, ALL), |
256 | .shared = 1, |
257 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
258 | }, |
259 | [P4_EVENT_PACKED_SP_UOP] = { |
260 | .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), |
261 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, |
262 | .escr_emask = |
263 | P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_SP_UOP, ALL), |
264 | .shared = 1, |
265 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
266 | }, |
267 | [P4_EVENT_PACKED_DP_UOP] = { |
268 | .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), |
269 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, |
270 | .escr_emask = |
271 | P4_ESCR_EMASK_BIT(P4_EVENT_PACKED_DP_UOP, ALL), |
272 | .shared = 1, |
273 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
274 | }, |
275 | [P4_EVENT_SCALAR_SP_UOP] = { |
276 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), |
277 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, |
278 | .escr_emask = |
279 | P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_SP_UOP, ALL), |
280 | .shared = 1, |
281 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
282 | }, |
283 | [P4_EVENT_SCALAR_DP_UOP] = { |
284 | .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), |
285 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, |
286 | .escr_emask = |
287 | P4_ESCR_EMASK_BIT(P4_EVENT_SCALAR_DP_UOP, ALL), |
288 | .shared = 1, |
289 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
290 | }, |
291 | [P4_EVENT_64BIT_MMX_UOP] = { |
292 | .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), |
293 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, |
294 | .escr_emask = |
295 | P4_ESCR_EMASK_BIT(P4_EVENT_64BIT_MMX_UOP, ALL), |
296 | .shared = 1, |
297 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
298 | }, |
299 | [P4_EVENT_128BIT_MMX_UOP] = { |
300 | .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), |
301 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, |
302 | .escr_emask = |
303 | P4_ESCR_EMASK_BIT(P4_EVENT_128BIT_MMX_UOP, ALL), |
304 | .shared = 1, |
305 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
306 | }, |
307 | [P4_EVENT_X87_FP_UOP] = { |
308 | .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), |
309 | .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, |
310 | .escr_emask = |
311 | P4_ESCR_EMASK_BIT(P4_EVENT_X87_FP_UOP, ALL), |
312 | .shared = 1, |
313 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
314 | }, |
315 | [P4_EVENT_TC_MISC] = { |
316 | .opcode = P4_OPCODE(P4_EVENT_TC_MISC), |
317 | .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, |
318 | .escr_emask = |
319 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_MISC, FLUSH), |
320 | .cntr = { {4, 5, -1}, {6, 7, -1} }, |
321 | }, |
322 | [P4_EVENT_GLOBAL_POWER_EVENTS] = { |
323 | .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), |
324 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, |
325 | .escr_emask = |
326 | P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING), |
327 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
328 | }, |
329 | [P4_EVENT_TC_MS_XFER] = { |
330 | .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), |
331 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, |
332 | .escr_emask = |
333 | P4_ESCR_EMASK_BIT(P4_EVENT_TC_MS_XFER, CISC), |
334 | .cntr = { {4, 5, -1}, {6, 7, -1} }, |
335 | }, |
336 | [P4_EVENT_UOP_QUEUE_WRITES] = { |
337 | .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), |
338 | .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, |
339 | .escr_emask = |
340 | P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD) | |
341 | P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER) | |
342 | P4_ESCR_EMASK_BIT(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM), |
343 | .cntr = { {4, 5, -1}, {6, 7, -1} }, |
344 | }, |
345 | [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { |
346 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), |
347 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, |
348 | .escr_emask = |
349 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL) | |
350 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL) | |
351 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN) | |
352 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT), |
353 | .cntr = { {4, 5, -1}, {6, 7, -1} }, |
354 | }, |
355 | [P4_EVENT_RETIRED_BRANCH_TYPE] = { |
356 | .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), |
357 | .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, |
358 | .escr_emask = |
359 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) | |
360 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) | |
361 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) | |
362 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT), |
363 | .cntr = { {4, 5, -1}, {6, 7, -1} }, |
364 | }, |
365 | [P4_EVENT_RESOURCE_STALL] = { |
366 | .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), |
367 | .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, |
368 | .escr_emask = |
369 | P4_ESCR_EMASK_BIT(P4_EVENT_RESOURCE_STALL, SBFULL), |
370 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
371 | }, |
372 | [P4_EVENT_WC_BUFFER] = { |
373 | .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), |
374 | .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, |
375 | .escr_emask = |
376 | P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_EVICTS) | |
377 | P4_ESCR_EMASK_BIT(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS), |
378 | .shared = 1, |
379 | .cntr = { {8, 9, -1}, {10, 11, -1} }, |
380 | }, |
381 | [P4_EVENT_B2B_CYCLES] = { |
382 | .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), |
383 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, |
384 | .escr_emask = 0, |
385 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
386 | }, |
387 | [P4_EVENT_BNR] = { |
388 | .opcode = P4_OPCODE(P4_EVENT_BNR), |
389 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, |
390 | .escr_emask = 0, |
391 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
392 | }, |
393 | [P4_EVENT_SNOOP] = { |
394 | .opcode = P4_OPCODE(P4_EVENT_SNOOP), |
395 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, |
396 | .escr_emask = 0, |
397 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
398 | }, |
399 | [P4_EVENT_RESPONSE] = { |
400 | .opcode = P4_OPCODE(P4_EVENT_RESPONSE), |
401 | .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, |
402 | .escr_emask = 0, |
403 | .cntr = { {0, -1, -1}, {2, -1, -1} }, |
404 | }, |
405 | [P4_EVENT_FRONT_END_EVENT] = { |
406 | .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), |
407 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, |
408 | .escr_emask = |
409 | P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, NBOGUS) | |
410 | P4_ESCR_EMASK_BIT(P4_EVENT_FRONT_END_EVENT, BOGUS), |
411 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
412 | }, |
413 | [P4_EVENT_EXECUTION_EVENT] = { |
414 | .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), |
415 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, |
416 | .escr_emask = |
417 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) | |
418 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) | |
419 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) | |
420 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) | |
421 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) | |
422 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) | |
423 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) | |
424 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3), |
425 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
426 | }, |
427 | [P4_EVENT_REPLAY_EVENT] = { |
428 | .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), |
429 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, |
430 | .escr_emask = |
431 | P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, NBOGUS) | |
432 | P4_ESCR_EMASK_BIT(P4_EVENT_REPLAY_EVENT, BOGUS), |
433 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
434 | }, |
435 | [P4_EVENT_INSTR_RETIRED] = { |
436 | .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), |
437 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, |
438 | .escr_emask = |
439 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) | |
440 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSTAG) | |
441 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG) | |
442 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSTAG), |
443 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
444 | }, |
445 | [P4_EVENT_UOPS_RETIRED] = { |
446 | .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), |
447 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, |
448 | .escr_emask = |
449 | P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, NBOGUS) | |
450 | P4_ESCR_EMASK_BIT(P4_EVENT_UOPS_RETIRED, BOGUS), |
451 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
452 | }, |
453 | [P4_EVENT_UOP_TYPE] = { |
454 | .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), |
455 | .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, |
456 | .escr_emask = |
457 | P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGLOADS) | |
458 | P4_ESCR_EMASK_BIT(P4_EVENT_UOP_TYPE, TAGSTORES), |
459 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
460 | }, |
461 | [P4_EVENT_BRANCH_RETIRED] = { |
462 | .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), |
463 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, |
464 | .escr_emask = |
465 | P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNP) | |
466 | P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMNM) | |
467 | P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTP) | |
468 | P4_ESCR_EMASK_BIT(P4_EVENT_BRANCH_RETIRED, MMTM), |
469 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
470 | }, |
471 | [P4_EVENT_MISPRED_BRANCH_RETIRED] = { |
472 | .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), |
473 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, |
474 | .escr_emask = |
475 | P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS), |
476 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
477 | }, |
478 | [P4_EVENT_X87_ASSIST] = { |
479 | .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), |
480 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, |
481 | .escr_emask = |
482 | P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSU) | |
483 | P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, FPSO) | |
484 | P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAO) | |
485 | P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, POAU) | |
486 | P4_ESCR_EMASK_BIT(P4_EVENT_X87_ASSIST, PREA), |
487 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
488 | }, |
489 | [P4_EVENT_MACHINE_CLEAR] = { |
490 | .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), |
491 | .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, |
492 | .escr_emask = |
493 | P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, CLEAR) | |
494 | P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, MOCLEAR) | |
495 | P4_ESCR_EMASK_BIT(P4_EVENT_MACHINE_CLEAR, SMCLEAR), |
496 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
497 | }, |
498 | [P4_EVENT_INSTR_COMPLETED] = { |
499 | .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), |
500 | .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, |
501 | .escr_emask = |
502 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, NBOGUS) | |
503 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_COMPLETED, BOGUS), |
504 | .cntr = { {12, 13, 16}, {14, 15, 17} }, |
505 | }, |
506 | }; |
507 | |
508 | #define P4_GEN_CACHE_EVENT(event, bit, metric) \ |
509 | p4_config_pack_escr(P4_ESCR_EVENT(event) | \ |
510 | P4_ESCR_EMASK_BIT(event, bit)) | \ |
511 | p4_config_pack_cccr(metric | \ |
512 | P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) |
513 | |
514 | static __initconst const u64 p4_hw_cache_event_ids |
515 | [PERF_COUNT_HW_CACHE_MAX] |
516 | [PERF_COUNT_HW_CACHE_OP_MAX] |
517 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = |
518 | { |
519 | [ C(L1D ) ] = { |
520 | [ C(OP_READ) ] = { |
521 | [ C(RESULT_ACCESS) ] = 0x0, |
522 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
523 | P4_PEBS_METRIC__1stl_cache_load_miss_retired), |
524 | }, |
525 | }, |
526 | [ C(LL ) ] = { |
527 | [ C(OP_READ) ] = { |
528 | [ C(RESULT_ACCESS) ] = 0x0, |
529 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
530 | P4_PEBS_METRIC__2ndl_cache_load_miss_retired), |
531 | }, |
532 | }, |
533 | [ C(DTLB) ] = { |
534 | [ C(OP_READ) ] = { |
535 | [ C(RESULT_ACCESS) ] = 0x0, |
536 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
537 | P4_PEBS_METRIC__dtlb_load_miss_retired), |
538 | }, |
539 | [ C(OP_WRITE) ] = { |
540 | [ C(RESULT_ACCESS) ] = 0x0, |
541 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, |
542 | P4_PEBS_METRIC__dtlb_store_miss_retired), |
543 | }, |
544 | }, |
545 | [ C(ITLB) ] = { |
546 | [ C(OP_READ) ] = { |
547 | [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, |
548 | P4_PEBS_METRIC__none), |
549 | [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, |
550 | P4_PEBS_METRIC__none), |
551 | }, |
552 | [ C(OP_WRITE) ] = { |
553 | [ C(RESULT_ACCESS) ] = -1, |
554 | [ C(RESULT_MISS) ] = -1, |
555 | }, |
556 | [ C(OP_PREFETCH) ] = { |
557 | [ C(RESULT_ACCESS) ] = -1, |
558 | [ C(RESULT_MISS) ] = -1, |
559 | }, |
560 | }, |
561 | [ C(NODE) ] = { |
562 | [ C(OP_READ) ] = { |
563 | [ C(RESULT_ACCESS) ] = -1, |
564 | [ C(RESULT_MISS) ] = -1, |
565 | }, |
566 | [ C(OP_WRITE) ] = { |
567 | [ C(RESULT_ACCESS) ] = -1, |
568 | [ C(RESULT_MISS) ] = -1, |
569 | }, |
570 | [ C(OP_PREFETCH) ] = { |
571 | [ C(RESULT_ACCESS) ] = -1, |
572 | [ C(RESULT_MISS) ] = -1, |
573 | }, |
574 | }, |
575 | }; |
576 | |
577 | /* |
578 | * Because of Netburst being quite restricted in how many |
579 | * identical events may run simultaneously, we introduce event aliases, |
580 | * ie the different events which have the same functionality but |
581 | * utilize non-intersected resources (ESCR/CCCR/counter registers). |
582 | * |
583 | * This allow us to relax restrictions a bit and run two or more |
584 | * identical events together. |
585 | * |
586 | * Never set any custom internal bits such as P4_CONFIG_HT, |
587 | * P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are |
588 | * either up to date automatically or not applicable at all. |
589 | */ |
590 | static struct p4_event_alias { |
591 | u64 original; |
592 | u64 alternative; |
593 | } p4_event_aliases[] = { |
594 | { |
595 | /* |
596 | * Non-halted cycles can be substituted with non-sleeping cycles (see |
597 | * Intel SDM Vol3b for details). We need this alias to be able |
598 | * to run nmi-watchdog and 'perf top' (or any other user space tool |
599 | * which is interested in running PERF_COUNT_HW_CPU_CYCLES) |
600 | * simultaneously. |
601 | */ |
602 | .original = |
603 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | |
604 | P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)), |
605 | .alternative = |
606 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT) | |
607 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)| |
608 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)| |
609 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)| |
610 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)| |
611 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) | |
612 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) | |
613 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) | |
614 | P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3))| |
615 | p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT | |
616 | P4_CCCR_COMPARE), |
617 | }, |
618 | }; |
619 | |
620 | static u64 p4_get_alias_event(u64 config) |
621 | { |
622 | u64 config_match; |
623 | int i; |
624 | |
625 | /* |
626 | * Only event with special mark is allowed, |
627 | * we're to be sure it didn't come as malformed |
628 | * RAW event. |
629 | */ |
630 | if (!(config & P4_CONFIG_ALIASABLE)) |
631 | return 0; |
632 | |
633 | config_match = config & P4_CONFIG_EVENT_ALIAS_MASK; |
634 | |
635 | for (i = 0; i < ARRAY_SIZE(p4_event_aliases); i++) { |
636 | if (config_match == p4_event_aliases[i].original) { |
637 | config_match = p4_event_aliases[i].alternative; |
638 | break; |
639 | } else if (config_match == p4_event_aliases[i].alternative) { |
640 | config_match = p4_event_aliases[i].original; |
641 | break; |
642 | } |
643 | } |
644 | |
645 | if (i >= ARRAY_SIZE(p4_event_aliases)) |
646 | return 0; |
647 | |
648 | return config_match | (config & P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS); |
649 | } |
650 | |
651 | static u64 p4_general_events[PERF_COUNT_HW_MAX] = { |
652 | /* non-halted CPU clocks */ |
653 | [PERF_COUNT_HW_CPU_CYCLES] = |
654 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | |
655 | P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)) | |
656 | P4_CONFIG_ALIASABLE, |
657 | |
658 | /* |
659 | * retired instructions |
660 | * in a sake of simplicity we don't use the FSB tagging |
661 | */ |
662 | [PERF_COUNT_HW_INSTRUCTIONS] = |
663 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) | |
664 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) | |
665 | P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)), |
666 | |
667 | /* cache hits */ |
668 | [PERF_COUNT_HW_CACHE_REFERENCES] = |
669 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | |
670 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | |
671 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | |
672 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | |
673 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | |
674 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | |
675 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)), |
676 | |
677 | /* cache misses */ |
678 | [PERF_COUNT_HW_CACHE_MISSES] = |
679 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | |
680 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | |
681 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | |
682 | P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)), |
683 | |
684 | /* branch instructions retired */ |
685 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = |
686 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) | |
687 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) | |
688 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) | |
689 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) | |
690 | P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)), |
691 | |
692 | /* mispredicted branches retired */ |
693 | [PERF_COUNT_HW_BRANCH_MISSES] = |
694 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) | |
695 | P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)), |
696 | |
697 | /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */ |
698 | [PERF_COUNT_HW_BUS_CYCLES] = |
699 | p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) | |
700 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | |
701 | P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) | |
702 | p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), |
703 | }; |
704 | |
705 | static struct p4_event_bind *p4_config_get_bind(u64 config) |
706 | { |
707 | unsigned int evnt = p4_config_unpack_event(config); |
708 | struct p4_event_bind *bind = NULL; |
709 | |
710 | if (evnt < ARRAY_SIZE(p4_event_bind_map)) |
711 | bind = &p4_event_bind_map[evnt]; |
712 | |
713 | return bind; |
714 | } |
715 | |
716 | static u64 p4_pmu_event_map(int hw_event) |
717 | { |
718 | struct p4_event_bind *bind; |
719 | unsigned int esel; |
720 | u64 config; |
721 | |
722 | config = p4_general_events[hw_event]; |
723 | bind = p4_config_get_bind(config); |
724 | esel = P4_OPCODE_ESEL(bind->opcode); |
725 | config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel)); |
726 | |
727 | return config; |
728 | } |
729 | |
730 | /* check cpu model specifics */ |
731 | static bool p4_event_match_cpu_model(unsigned int event_idx) |
732 | { |
733 | /* INSTR_COMPLETED event only exist for model 3, 4, 6 (Prescott) */ |
734 | if (event_idx == P4_EVENT_INSTR_COMPLETED) { |
735 | if (boot_cpu_data.x86_model != 3 && |
736 | boot_cpu_data.x86_model != 4 && |
737 | boot_cpu_data.x86_model != 6) |
738 | return false; |
739 | } |
740 | |
741 | /* |
742 | * For info |
743 | * - IQ_ESCR0, IQ_ESCR1 only for models 1 and 2 |
744 | */ |
745 | |
746 | return true; |
747 | } |
748 | |
749 | static int p4_validate_raw_event(struct perf_event *event) |
750 | { |
751 | unsigned int v, emask; |
752 | |
753 | /* User data may have out-of-bound event index */ |
754 | v = p4_config_unpack_event(event->attr.config); |
755 | if (v >= ARRAY_SIZE(p4_event_bind_map)) |
756 | return -EINVAL; |
757 | |
758 | /* It may be unsupported: */ |
759 | if (!p4_event_match_cpu_model(event_idx: v)) |
760 | return -EINVAL; |
761 | |
762 | /* |
763 | * NOTE: P4_CCCR_THREAD_ANY has not the same meaning as |
764 | * in Architectural Performance Monitoring, it means not |
765 | * on _which_ logical cpu to count but rather _when_, ie it |
766 | * depends on logical cpu state -- count event if one cpu active, |
767 | * none, both or any, so we just allow user to pass any value |
768 | * desired. |
769 | * |
770 | * In turn we always set Tx_OS/Tx_USR bits bound to logical |
771 | * cpu without their propagation to another cpu |
772 | */ |
773 | |
774 | /* |
775 | * if an event is shared across the logical threads |
776 | * the user needs special permissions to be able to use it |
777 | */ |
778 | if (p4_ht_active() && p4_event_bind_map[v].shared) { |
779 | v = perf_allow_cpu(attr: &event->attr); |
780 | if (v) |
781 | return v; |
782 | } |
783 | |
784 | /* ESCR EventMask bits may be invalid */ |
785 | emask = p4_config_unpack_escr(event->attr.config) & P4_ESCR_EVENTMASK_MASK; |
786 | if (emask & ~p4_event_bind_map[v].escr_emask) |
787 | return -EINVAL; |
788 | |
789 | /* |
790 | * it may have some invalid PEBS bits |
791 | */ |
792 | if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) |
793 | return -EINVAL; |
794 | |
795 | v = p4_config_unpack_metric(event->attr.config); |
796 | if (v >= ARRAY_SIZE(p4_pebs_bind_map)) |
797 | return -EINVAL; |
798 | |
799 | return 0; |
800 | } |
801 | |
802 | static int p4_hw_config(struct perf_event *event) |
803 | { |
804 | int cpu = get_cpu(); |
805 | int rc = 0; |
806 | u32 escr, cccr; |
807 | |
808 | /* |
809 | * the reason we use cpu that early is that: if we get scheduled |
810 | * first time on the same cpu -- we will not need swap thread |
811 | * specific flags in config (and will save some cpu cycles) |
812 | */ |
813 | |
814 | cccr = p4_default_cccr_conf(cpu); |
815 | escr = p4_default_escr_conf(cpu, exclude_os: event->attr.exclude_kernel, |
816 | exclude_usr: event->attr.exclude_user); |
817 | event->hw.config = p4_config_pack_escr(escr) | |
818 | p4_config_pack_cccr(cccr); |
819 | |
820 | if (p4_ht_active() && p4_ht_thread(cpu)) |
821 | event->hw.config = p4_set_ht_bit(config: event->hw.config); |
822 | |
823 | if (event->attr.type == PERF_TYPE_RAW) { |
824 | struct p4_event_bind *bind; |
825 | unsigned int esel; |
826 | /* |
827 | * Clear bits we reserve to be managed by kernel itself |
828 | * and never allowed from a user space |
829 | */ |
830 | event->attr.config &= P4_CONFIG_MASK; |
831 | |
832 | rc = p4_validate_raw_event(event); |
833 | if (rc) |
834 | goto out; |
835 | |
836 | /* |
837 | * Note that for RAW events we allow user to use P4_CCCR_RESERVED |
838 | * bits since we keep additional info here (for cache events and etc) |
839 | */ |
840 | event->hw.config |= event->attr.config; |
841 | bind = p4_config_get_bind(config: event->attr.config); |
842 | if (!bind) { |
843 | rc = -EINVAL; |
844 | goto out; |
845 | } |
846 | esel = P4_OPCODE_ESEL(bind->opcode); |
847 | event->hw.config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel)); |
848 | } |
849 | |
850 | rc = x86_setup_perfctr(event); |
851 | out: |
852 | put_cpu(); |
853 | return rc; |
854 | } |
855 | |
856 | static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) |
857 | { |
858 | u64 v; |
859 | |
860 | /* an official way for overflow indication */ |
861 | rdmsrl(hwc->config_base, v); |
862 | if (v & P4_CCCR_OVF) { |
863 | wrmsrl(msr: hwc->config_base, val: v & ~P4_CCCR_OVF); |
864 | return 1; |
865 | } |
866 | |
867 | /* |
868 | * In some circumstances the overflow might issue an NMI but did |
869 | * not set P4_CCCR_OVF bit. Because a counter holds a negative value |
870 | * we simply check for high bit being set, if it's cleared it means |
871 | * the counter has reached zero value and continued counting before |
872 | * real NMI signal was received: |
873 | */ |
874 | rdmsrl(hwc->event_base, v); |
875 | if (!(v & ARCH_P4_UNFLAGGED_BIT)) |
876 | return 1; |
877 | |
878 | return 0; |
879 | } |
880 | |
881 | static void p4_pmu_disable_pebs(void) |
882 | { |
883 | /* |
884 | * FIXME |
885 | * |
886 | * It's still allowed that two threads setup same cache |
887 | * events so we can't simply clear metrics until we knew |
888 | * no one is depending on us, so we need kind of counter |
889 | * for "ReplayEvent" users. |
890 | * |
891 | * What is more complex -- RAW events, if user (for some |
892 | * reason) will pass some cache event metric with improper |
893 | * event opcode -- it's fine from hardware point of view |
894 | * but completely nonsense from "meaning" of such action. |
895 | * |
896 | * So at moment let leave metrics turned on forever -- it's |
897 | * ok for now but need to be revisited! |
898 | * |
899 | * (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, 0); |
900 | * (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, 0); |
901 | */ |
902 | } |
903 | |
904 | static inline void p4_pmu_disable_event(struct perf_event *event) |
905 | { |
906 | struct hw_perf_event *hwc = &event->hw; |
907 | |
908 | /* |
909 | * If event gets disabled while counter is in overflowed |
910 | * state we need to clear P4_CCCR_OVF, otherwise interrupt get |
911 | * asserted again and again |
912 | */ |
913 | (void)wrmsrl_safe(msr: hwc->config_base, |
914 | p4_config_unpack_cccr(hwc->config) & ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); |
915 | } |
916 | |
917 | static void p4_pmu_disable_all(void) |
918 | { |
919 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
920 | int idx; |
921 | |
922 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
923 | struct perf_event *event = cpuc->events[idx]; |
924 | if (!test_bit(idx, cpuc->active_mask)) |
925 | continue; |
926 | p4_pmu_disable_event(event); |
927 | } |
928 | |
929 | p4_pmu_disable_pebs(); |
930 | } |
931 | |
932 | /* configuration must be valid */ |
933 | static void p4_pmu_enable_pebs(u64 config) |
934 | { |
935 | struct p4_pebs_bind *bind; |
936 | unsigned int idx; |
937 | |
938 | BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK); |
939 | |
940 | idx = p4_config_unpack_metric(config); |
941 | if (idx == P4_PEBS_METRIC__none) |
942 | return; |
943 | |
944 | bind = &p4_pebs_bind_map[idx]; |
945 | |
946 | (void)wrmsrl_safe(MSR_IA32_PEBS_ENABLE, val: (u64)bind->metric_pebs); |
947 | (void)wrmsrl_safe(MSR_P4_PEBS_MATRIX_VERT, val: (u64)bind->metric_vert); |
948 | } |
949 | |
950 | static void __p4_pmu_enable_event(struct perf_event *event) |
951 | { |
952 | struct hw_perf_event *hwc = &event->hw; |
953 | int thread = p4_ht_config_thread(config: hwc->config); |
954 | u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); |
955 | unsigned int idx = p4_config_unpack_event(hwc->config); |
956 | struct p4_event_bind *bind; |
957 | u64 escr_addr, cccr; |
958 | |
959 | bind = &p4_event_bind_map[idx]; |
960 | escr_addr = bind->escr_msr[thread]; |
961 | |
962 | /* |
963 | * - we dont support cascaded counters yet |
964 | * - and counter 1 is broken (erratum) |
965 | */ |
966 | WARN_ON_ONCE(p4_is_event_cascaded(hwc->config)); |
967 | WARN_ON_ONCE(hwc->idx == 1); |
968 | |
969 | /* we need a real Event value */ |
970 | escr_conf &= ~P4_ESCR_EVENT_MASK; |
971 | escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode)); |
972 | |
973 | cccr = p4_config_unpack_cccr(hwc->config); |
974 | |
975 | /* |
976 | * it could be Cache event so we need to write metrics |
977 | * into additional MSRs |
978 | */ |
979 | p4_pmu_enable_pebs(config: hwc->config); |
980 | |
981 | (void)wrmsrl_safe(msr: escr_addr, val: escr_conf); |
982 | (void)wrmsrl_safe(msr: hwc->config_base, |
983 | val: (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); |
984 | } |
985 | |
986 | static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(X86_PMC_IDX_MAX)], p4_running); |
987 | |
988 | static void p4_pmu_enable_event(struct perf_event *event) |
989 | { |
990 | int idx = event->hw.idx; |
991 | |
992 | __set_bit(idx, per_cpu(p4_running, smp_processor_id())); |
993 | __p4_pmu_enable_event(event); |
994 | } |
995 | |
996 | static void p4_pmu_enable_all(int added) |
997 | { |
998 | struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); |
999 | int idx; |
1000 | |
1001 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1002 | struct perf_event *event = cpuc->events[idx]; |
1003 | if (!test_bit(idx, cpuc->active_mask)) |
1004 | continue; |
1005 | __p4_pmu_enable_event(event); |
1006 | } |
1007 | } |
1008 | |
1009 | static int p4_pmu_set_period(struct perf_event *event) |
1010 | { |
1011 | struct hw_perf_event *hwc = &event->hw; |
1012 | s64 left = this_cpu_read(pmc_prev_left[hwc->idx]); |
1013 | int ret; |
1014 | |
1015 | ret = x86_perf_event_set_period(event); |
1016 | |
1017 | if (hwc->event_base) { |
1018 | /* |
1019 | * This handles erratum N15 in intel doc 249199-029, |
1020 | * the counter may not be updated correctly on write |
1021 | * so we need a second write operation to do the trick |
1022 | * (the official workaround didn't work) |
1023 | * |
1024 | * the former idea is taken from OProfile code |
1025 | */ |
1026 | wrmsrl(msr: hwc->event_base, val: (u64)(-left) & x86_pmu.cntval_mask); |
1027 | } |
1028 | |
1029 | return ret; |
1030 | } |
1031 | |
1032 | static int p4_pmu_handle_irq(struct pt_regs *regs) |
1033 | { |
1034 | struct perf_sample_data data; |
1035 | struct cpu_hw_events *cpuc; |
1036 | struct perf_event *event; |
1037 | struct hw_perf_event *hwc; |
1038 | int idx, handled = 0; |
1039 | u64 val; |
1040 | |
1041 | cpuc = this_cpu_ptr(&cpu_hw_events); |
1042 | |
1043 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1044 | int overflow; |
1045 | |
1046 | if (!test_bit(idx, cpuc->active_mask)) { |
1047 | /* catch in-flight IRQs */ |
1048 | if (__test_and_clear_bit(idx, per_cpu(p4_running, smp_processor_id()))) |
1049 | handled++; |
1050 | continue; |
1051 | } |
1052 | |
1053 | event = cpuc->events[idx]; |
1054 | hwc = &event->hw; |
1055 | |
1056 | WARN_ON_ONCE(hwc->idx != idx); |
1057 | |
1058 | /* it might be unflagged overflow */ |
1059 | overflow = p4_pmu_clear_cccr_ovf(hwc); |
1060 | |
1061 | val = x86_perf_event_update(event); |
1062 | if (!overflow && (val & (1ULL << (x86_pmu.cntval_bits - 1)))) |
1063 | continue; |
1064 | |
1065 | handled += overflow; |
1066 | |
1067 | /* event overflow for sure */ |
1068 | perf_sample_data_init(data: &data, addr: 0, period: hwc->last_period); |
1069 | |
1070 | if (!static_call(x86_pmu_set_period)(event)) |
1071 | continue; |
1072 | |
1073 | |
1074 | if (perf_event_overflow(event, data: &data, regs)) |
1075 | x86_pmu_stop(event, flags: 0); |
1076 | } |
1077 | |
1078 | if (handled) |
1079 | inc_irq_stat(apic_perf_irqs); |
1080 | |
1081 | /* |
1082 | * When dealing with the unmasking of the LVTPC on P4 perf hw, it has |
1083 | * been observed that the OVF bit flag has to be cleared first _before_ |
1084 | * the LVTPC can be unmasked. |
1085 | * |
1086 | * The reason is the NMI line will continue to be asserted while the OVF |
1087 | * bit is set. This causes a second NMI to generate if the LVTPC is |
1088 | * unmasked before the OVF bit is cleared, leading to unknown NMI |
1089 | * messages. |
1090 | */ |
1091 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1092 | |
1093 | return handled; |
1094 | } |
1095 | |
1096 | /* |
1097 | * swap thread specific fields according to a thread |
1098 | * we are going to run on |
1099 | */ |
1100 | static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) |
1101 | { |
1102 | u32 escr, cccr; |
1103 | |
1104 | /* |
1105 | * we either lucky and continue on same cpu or no HT support |
1106 | */ |
1107 | if (!p4_should_swap_ts(config: hwc->config, cpu)) |
1108 | return; |
1109 | |
1110 | /* |
1111 | * the event is migrated from an another logical |
1112 | * cpu, so we need to swap thread specific flags |
1113 | */ |
1114 | |
1115 | escr = p4_config_unpack_escr(hwc->config); |
1116 | cccr = p4_config_unpack_cccr(hwc->config); |
1117 | |
1118 | if (p4_ht_thread(cpu)) { |
1119 | cccr &= ~P4_CCCR_OVF_PMI_T0; |
1120 | cccr |= P4_CCCR_OVF_PMI_T1; |
1121 | if (escr & P4_ESCR_T0_OS) { |
1122 | escr &= ~P4_ESCR_T0_OS; |
1123 | escr |= P4_ESCR_T1_OS; |
1124 | } |
1125 | if (escr & P4_ESCR_T0_USR) { |
1126 | escr &= ~P4_ESCR_T0_USR; |
1127 | escr |= P4_ESCR_T1_USR; |
1128 | } |
1129 | hwc->config = p4_config_pack_escr(escr); |
1130 | hwc->config |= p4_config_pack_cccr(cccr); |
1131 | hwc->config |= P4_CONFIG_HT; |
1132 | } else { |
1133 | cccr &= ~P4_CCCR_OVF_PMI_T1; |
1134 | cccr |= P4_CCCR_OVF_PMI_T0; |
1135 | if (escr & P4_ESCR_T1_OS) { |
1136 | escr &= ~P4_ESCR_T1_OS; |
1137 | escr |= P4_ESCR_T0_OS; |
1138 | } |
1139 | if (escr & P4_ESCR_T1_USR) { |
1140 | escr &= ~P4_ESCR_T1_USR; |
1141 | escr |= P4_ESCR_T0_USR; |
1142 | } |
1143 | hwc->config = p4_config_pack_escr(escr); |
1144 | hwc->config |= p4_config_pack_cccr(cccr); |
1145 | hwc->config &= ~P4_CONFIG_HT; |
1146 | } |
1147 | } |
1148 | |
1149 | /* |
1150 | * ESCR address hashing is tricky, ESCRs are not sequential |
1151 | * in memory but all starts from MSR_P4_BSU_ESCR0 (0x03a0) and |
1152 | * the metric between any ESCRs is laid in range [0xa0,0xe1] |
1153 | * |
1154 | * so we make ~70% filled hashtable |
1155 | */ |
1156 | |
1157 | #define P4_ESCR_MSR_BASE 0x000003a0 |
1158 | #define P4_ESCR_MSR_MAX 0x000003e1 |
1159 | #define P4_ESCR_MSR_TABLE_SIZE (P4_ESCR_MSR_MAX - P4_ESCR_MSR_BASE + 1) |
1160 | #define P4_ESCR_MSR_IDX(msr) (msr - P4_ESCR_MSR_BASE) |
1161 | #define P4_ESCR_MSR_TABLE_ENTRY(msr) [P4_ESCR_MSR_IDX(msr)] = msr |
1162 | |
1163 | static const unsigned int p4_escr_table[P4_ESCR_MSR_TABLE_SIZE] = { |
1164 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR0), |
1165 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ALF_ESCR1), |
1166 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR0), |
1167 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BPU_ESCR1), |
1168 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR0), |
1169 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_BSU_ESCR1), |
1170 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR0), |
1171 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR1), |
1172 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR2), |
1173 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR3), |
1174 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR4), |
1175 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_CRU_ESCR5), |
1176 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR0), |
1177 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_DAC_ESCR1), |
1178 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR0), |
1179 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FIRM_ESCR1), |
1180 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR0), |
1181 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FLAME_ESCR1), |
1182 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR0), |
1183 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_FSB_ESCR1), |
1184 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR0), |
1185 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IQ_ESCR1), |
1186 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR0), |
1187 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IS_ESCR1), |
1188 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR0), |
1189 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_ITLB_ESCR1), |
1190 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR0), |
1191 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_IX_ESCR1), |
1192 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR0), |
1193 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MOB_ESCR1), |
1194 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR0), |
1195 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_MS_ESCR1), |
1196 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR0), |
1197 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_PMH_ESCR1), |
1198 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR0), |
1199 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_RAT_ESCR1), |
1200 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR0), |
1201 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SAAT_ESCR1), |
1202 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR0), |
1203 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_SSU_ESCR1), |
1204 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR0), |
1205 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TBPU_ESCR1), |
1206 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR0), |
1207 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_TC_ESCR1), |
1208 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR0), |
1209 | P4_ESCR_MSR_TABLE_ENTRY(MSR_P4_U2L_ESCR1), |
1210 | }; |
1211 | |
1212 | static int p4_get_escr_idx(unsigned int addr) |
1213 | { |
1214 | unsigned int idx = P4_ESCR_MSR_IDX(addr); |
1215 | |
1216 | if (unlikely(idx >= P4_ESCR_MSR_TABLE_SIZE || |
1217 | !p4_escr_table[idx] || |
1218 | p4_escr_table[idx] != addr)) { |
1219 | WARN_ONCE(1, "P4 PMU: Wrong address passed: %x\n" , addr); |
1220 | return -1; |
1221 | } |
1222 | |
1223 | return idx; |
1224 | } |
1225 | |
1226 | static int p4_next_cntr(int thread, unsigned long *used_mask, |
1227 | struct p4_event_bind *bind) |
1228 | { |
1229 | int i, j; |
1230 | |
1231 | for (i = 0; i < P4_CNTR_LIMIT; i++) { |
1232 | j = bind->cntr[thread][i]; |
1233 | if (j != -1 && !test_bit(j, used_mask)) |
1234 | return j; |
1235 | } |
1236 | |
1237 | return -1; |
1238 | } |
1239 | |
1240 | static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) |
1241 | { |
1242 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
1243 | unsigned long escr_mask[BITS_TO_LONGS(P4_ESCR_MSR_TABLE_SIZE)]; |
1244 | int cpu = smp_processor_id(); |
1245 | struct hw_perf_event *hwc; |
1246 | struct p4_event_bind *bind; |
1247 | unsigned int i, thread, num; |
1248 | int cntr_idx, escr_idx; |
1249 | u64 config_alias; |
1250 | int pass; |
1251 | |
1252 | bitmap_zero(dst: used_mask, X86_PMC_IDX_MAX); |
1253 | bitmap_zero(dst: escr_mask, P4_ESCR_MSR_TABLE_SIZE); |
1254 | |
1255 | for (i = 0, num = n; i < n; i++, num--) { |
1256 | |
1257 | hwc = &cpuc->event_list[i]->hw; |
1258 | thread = p4_ht_thread(cpu); |
1259 | pass = 0; |
1260 | |
1261 | again: |
1262 | /* |
1263 | * It's possible to hit a circular lock |
1264 | * between original and alternative events |
1265 | * if both are scheduled already. |
1266 | */ |
1267 | if (pass > 2) |
1268 | goto done; |
1269 | |
1270 | bind = p4_config_get_bind(config: hwc->config); |
1271 | escr_idx = p4_get_escr_idx(addr: bind->escr_msr[thread]); |
1272 | if (unlikely(escr_idx == -1)) |
1273 | goto done; |
1274 | |
1275 | if (hwc->idx != -1 && !p4_should_swap_ts(config: hwc->config, cpu)) { |
1276 | cntr_idx = hwc->idx; |
1277 | if (assign) |
1278 | assign[i] = hwc->idx; |
1279 | goto reserve; |
1280 | } |
1281 | |
1282 | cntr_idx = p4_next_cntr(thread, used_mask, bind); |
1283 | if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) { |
1284 | /* |
1285 | * Check whether an event alias is still available. |
1286 | */ |
1287 | config_alias = p4_get_alias_event(config: hwc->config); |
1288 | if (!config_alias) |
1289 | goto done; |
1290 | hwc->config = config_alias; |
1291 | pass++; |
1292 | goto again; |
1293 | } |
1294 | /* |
1295 | * Perf does test runs to see if a whole group can be assigned |
1296 | * together successfully. There can be multiple rounds of this. |
1297 | * Unfortunately, p4_pmu_swap_config_ts touches the hwc->config |
1298 | * bits, such that the next round of group assignments will |
1299 | * cause the above p4_should_swap_ts to pass instead of fail. |
1300 | * This leads to counters exclusive to thread0 being used by |
1301 | * thread1. |
1302 | * |
1303 | * Solve this with a cheap hack, reset the idx back to -1 to |
1304 | * force a new lookup (p4_next_cntr) to get the right counter |
1305 | * for the right thread. |
1306 | * |
1307 | * This probably doesn't comply with the general spirit of how |
1308 | * perf wants to work, but P4 is special. :-( |
1309 | */ |
1310 | if (p4_should_swap_ts(config: hwc->config, cpu)) |
1311 | hwc->idx = -1; |
1312 | p4_pmu_swap_config_ts(hwc, cpu); |
1313 | if (assign) |
1314 | assign[i] = cntr_idx; |
1315 | reserve: |
1316 | set_bit(nr: cntr_idx, addr: used_mask); |
1317 | set_bit(nr: escr_idx, addr: escr_mask); |
1318 | } |
1319 | |
1320 | done: |
1321 | return num ? -EINVAL : 0; |
1322 | } |
1323 | |
1324 | PMU_FORMAT_ATTR(cccr, "config:0-31" ); |
1325 | PMU_FORMAT_ATTR(escr, "config:32-62" ); |
1326 | PMU_FORMAT_ATTR(ht, "config:63" ); |
1327 | |
1328 | static struct attribute *intel_p4_formats_attr[] = { |
1329 | &format_attr_cccr.attr, |
1330 | &format_attr_escr.attr, |
1331 | &format_attr_ht.attr, |
1332 | NULL, |
1333 | }; |
1334 | |
1335 | static __initconst const struct x86_pmu p4_pmu = { |
1336 | .name = "Netburst P4/Xeon" , |
1337 | .handle_irq = p4_pmu_handle_irq, |
1338 | .disable_all = p4_pmu_disable_all, |
1339 | .enable_all = p4_pmu_enable_all, |
1340 | .enable = p4_pmu_enable_event, |
1341 | .disable = p4_pmu_disable_event, |
1342 | |
1343 | .set_period = p4_pmu_set_period, |
1344 | |
1345 | .eventsel = MSR_P4_BPU_CCCR0, |
1346 | .perfctr = MSR_P4_BPU_PERFCTR0, |
1347 | .event_map = p4_pmu_event_map, |
1348 | .max_events = ARRAY_SIZE(p4_general_events), |
1349 | .get_event_constraints = x86_get_event_constraints, |
1350 | /* |
1351 | * IF HT disabled we may need to use all |
1352 | * ARCH_P4_MAX_CCCR counters simultaneously |
1353 | * though leave it restricted at moment assuming |
1354 | * HT is on |
1355 | */ |
1356 | .num_counters = ARCH_P4_MAX_CCCR, |
1357 | .apic = 1, |
1358 | .cntval_bits = ARCH_P4_CNTRVAL_BITS, |
1359 | .cntval_mask = ARCH_P4_CNTRVAL_MASK, |
1360 | .max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1, |
1361 | .hw_config = p4_hw_config, |
1362 | .schedule_events = p4_pmu_schedule_events, |
1363 | |
1364 | .format_attrs = intel_p4_formats_attr, |
1365 | }; |
1366 | |
1367 | __init int p4_pmu_init(void) |
1368 | { |
1369 | unsigned int low, high; |
1370 | int i, reg; |
1371 | |
1372 | /* If we get stripped -- indexing fails */ |
1373 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC); |
1374 | |
1375 | rdmsr(MSR_IA32_MISC_ENABLE, low, high); |
1376 | if (!(low & (1 << 7))) { |
1377 | pr_cont("unsupported Netburst CPU model %d " , |
1378 | boot_cpu_data.x86_model); |
1379 | return -ENODEV; |
1380 | } |
1381 | |
1382 | memcpy(hw_cache_event_ids, p4_hw_cache_event_ids, |
1383 | sizeof(hw_cache_event_ids)); |
1384 | |
1385 | pr_cont("Netburst events, " ); |
1386 | |
1387 | x86_pmu = p4_pmu; |
1388 | |
1389 | /* |
1390 | * Even though the counters are configured to interrupt a particular |
1391 | * logical processor when an overflow happens, testing has shown that |
1392 | * on kdump kernels (which uses a single cpu), thread1's counter |
1393 | * continues to run and will report an NMI on thread0. Due to the |
1394 | * overflow bug, this leads to a stream of unknown NMIs. |
1395 | * |
1396 | * Solve this by zero'ing out the registers to mimic a reset. |
1397 | */ |
1398 | for (i = 0; i < x86_pmu.num_counters; i++) { |
1399 | reg = x86_pmu_config_addr(index: i); |
1400 | wrmsrl_safe(msr: reg, val: 0ULL); |
1401 | } |
1402 | |
1403 | return 0; |
1404 | } |
1405 | |