1 | // SPDX-License-Identifier: GPL-2.0 |
2 | |
3 | /* |
4 | * CPU accounting code for task groups. |
5 | * |
6 | * Based on the work by Paul Menage (menage@google.com) and Balbir Singh |
7 | * (balbir@in.ibm.com). |
8 | */ |
9 | |
10 | /* Time spent by the tasks of the CPU accounting group executing in ... */ |
11 | enum cpuacct_stat_index { |
12 | CPUACCT_STAT_USER, /* ... user mode */ |
13 | CPUACCT_STAT_SYSTEM, /* ... kernel mode */ |
14 | |
15 | CPUACCT_STAT_NSTATS, |
16 | }; |
17 | |
18 | static const char * const cpuacct_stat_desc[] = { |
19 | [CPUACCT_STAT_USER] = "user" , |
20 | [CPUACCT_STAT_SYSTEM] = "system" , |
21 | }; |
22 | |
23 | /* track CPU usage of a group of tasks and its child groups */ |
24 | struct cpuacct { |
25 | struct cgroup_subsys_state css; |
26 | /* cpuusage holds pointer to a u64-type object on every CPU */ |
27 | u64 __percpu *cpuusage; |
28 | struct kernel_cpustat __percpu *cpustat; |
29 | }; |
30 | |
31 | static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css) |
32 | { |
33 | return css ? container_of(css, struct cpuacct, css) : NULL; |
34 | } |
35 | |
36 | /* Return CPU accounting group to which this task belongs */ |
37 | static inline struct cpuacct *task_ca(struct task_struct *tsk) |
38 | { |
39 | return css_ca(css: task_css(task: tsk, subsys_id: cpuacct_cgrp_id)); |
40 | } |
41 | |
42 | static inline struct cpuacct *parent_ca(struct cpuacct *ca) |
43 | { |
44 | return css_ca(css: ca->css.parent); |
45 | } |
46 | |
47 | static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage); |
48 | static struct cpuacct root_cpuacct = { |
49 | .cpustat = &kernel_cpustat, |
50 | .cpuusage = &root_cpuacct_cpuusage, |
51 | }; |
52 | |
53 | /* Create a new CPU accounting group */ |
54 | static struct cgroup_subsys_state * |
55 | cpuacct_css_alloc(struct cgroup_subsys_state *parent_css) |
56 | { |
57 | struct cpuacct *ca; |
58 | |
59 | if (!parent_css) |
60 | return &root_cpuacct.css; |
61 | |
62 | ca = kzalloc(size: sizeof(*ca), GFP_KERNEL); |
63 | if (!ca) |
64 | goto out; |
65 | |
66 | ca->cpuusage = alloc_percpu(u64); |
67 | if (!ca->cpuusage) |
68 | goto out_free_ca; |
69 | |
70 | ca->cpustat = alloc_percpu(struct kernel_cpustat); |
71 | if (!ca->cpustat) |
72 | goto out_free_cpuusage; |
73 | |
74 | return &ca->css; |
75 | |
76 | out_free_cpuusage: |
77 | free_percpu(pdata: ca->cpuusage); |
78 | out_free_ca: |
79 | kfree(objp: ca); |
80 | out: |
81 | return ERR_PTR(error: -ENOMEM); |
82 | } |
83 | |
84 | /* Destroy an existing CPU accounting group */ |
85 | static void cpuacct_css_free(struct cgroup_subsys_state *css) |
86 | { |
87 | struct cpuacct *ca = css_ca(css); |
88 | |
89 | free_percpu(pdata: ca->cpustat); |
90 | free_percpu(pdata: ca->cpuusage); |
91 | kfree(objp: ca); |
92 | } |
93 | |
94 | static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, |
95 | enum cpuacct_stat_index index) |
96 | { |
97 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
98 | u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; |
99 | u64 data; |
100 | |
101 | /* |
102 | * We allow index == CPUACCT_STAT_NSTATS here to read |
103 | * the sum of usages. |
104 | */ |
105 | if (WARN_ON_ONCE(index > CPUACCT_STAT_NSTATS)) |
106 | return 0; |
107 | |
108 | #ifndef CONFIG_64BIT |
109 | /* |
110 | * Take rq->lock to make 64-bit read safe on 32-bit platforms. |
111 | */ |
112 | raw_spin_rq_lock_irq(cpu_rq(cpu)); |
113 | #endif |
114 | |
115 | switch (index) { |
116 | case CPUACCT_STAT_USER: |
117 | data = cpustat[CPUTIME_USER] + cpustat[CPUTIME_NICE]; |
118 | break; |
119 | case CPUACCT_STAT_SYSTEM: |
120 | data = cpustat[CPUTIME_SYSTEM] + cpustat[CPUTIME_IRQ] + |
121 | cpustat[CPUTIME_SOFTIRQ]; |
122 | break; |
123 | case CPUACCT_STAT_NSTATS: |
124 | data = *cpuusage; |
125 | break; |
126 | } |
127 | |
128 | #ifndef CONFIG_64BIT |
129 | raw_spin_rq_unlock_irq(cpu_rq(cpu)); |
130 | #endif |
131 | |
132 | return data; |
133 | } |
134 | |
135 | static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu) |
136 | { |
137 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
138 | u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; |
139 | |
140 | /* Don't allow to reset global kernel_cpustat */ |
141 | if (ca == &root_cpuacct) |
142 | return; |
143 | |
144 | #ifndef CONFIG_64BIT |
145 | /* |
146 | * Take rq->lock to make 64-bit write safe on 32-bit platforms. |
147 | */ |
148 | raw_spin_rq_lock_irq(cpu_rq(cpu)); |
149 | #endif |
150 | *cpuusage = 0; |
151 | cpustat[CPUTIME_USER] = cpustat[CPUTIME_NICE] = 0; |
152 | cpustat[CPUTIME_SYSTEM] = cpustat[CPUTIME_IRQ] = 0; |
153 | cpustat[CPUTIME_SOFTIRQ] = 0; |
154 | |
155 | #ifndef CONFIG_64BIT |
156 | raw_spin_rq_unlock_irq(cpu_rq(cpu)); |
157 | #endif |
158 | } |
159 | |
160 | /* Return total CPU usage (in nanoseconds) of a group */ |
161 | static u64 __cpuusage_read(struct cgroup_subsys_state *css, |
162 | enum cpuacct_stat_index index) |
163 | { |
164 | struct cpuacct *ca = css_ca(css); |
165 | u64 totalcpuusage = 0; |
166 | int i; |
167 | |
168 | for_each_possible_cpu(i) |
169 | totalcpuusage += cpuacct_cpuusage_read(ca, cpu: i, index); |
170 | |
171 | return totalcpuusage; |
172 | } |
173 | |
174 | static u64 cpuusage_user_read(struct cgroup_subsys_state *css, |
175 | struct cftype *cft) |
176 | { |
177 | return __cpuusage_read(css, index: CPUACCT_STAT_USER); |
178 | } |
179 | |
180 | static u64 cpuusage_sys_read(struct cgroup_subsys_state *css, |
181 | struct cftype *cft) |
182 | { |
183 | return __cpuusage_read(css, index: CPUACCT_STAT_SYSTEM); |
184 | } |
185 | |
186 | static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft) |
187 | { |
188 | return __cpuusage_read(css, index: CPUACCT_STAT_NSTATS); |
189 | } |
190 | |
191 | static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft, |
192 | u64 val) |
193 | { |
194 | struct cpuacct *ca = css_ca(css); |
195 | int cpu; |
196 | |
197 | /* |
198 | * Only allow '0' here to do a reset. |
199 | */ |
200 | if (val) |
201 | return -EINVAL; |
202 | |
203 | for_each_possible_cpu(cpu) |
204 | cpuacct_cpuusage_write(ca, cpu); |
205 | |
206 | return 0; |
207 | } |
208 | |
209 | static int __cpuacct_percpu_seq_show(struct seq_file *m, |
210 | enum cpuacct_stat_index index) |
211 | { |
212 | struct cpuacct *ca = css_ca(css: seq_css(seq: m)); |
213 | u64 percpu; |
214 | int i; |
215 | |
216 | for_each_possible_cpu(i) { |
217 | percpu = cpuacct_cpuusage_read(ca, cpu: i, index); |
218 | seq_printf(m, fmt: "%llu " , (unsigned long long) percpu); |
219 | } |
220 | seq_printf(m, fmt: "\n" ); |
221 | return 0; |
222 | } |
223 | |
224 | static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V) |
225 | { |
226 | return __cpuacct_percpu_seq_show(m, index: CPUACCT_STAT_USER); |
227 | } |
228 | |
229 | static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V) |
230 | { |
231 | return __cpuacct_percpu_seq_show(m, index: CPUACCT_STAT_SYSTEM); |
232 | } |
233 | |
234 | static int cpuacct_percpu_seq_show(struct seq_file *m, void *V) |
235 | { |
236 | return __cpuacct_percpu_seq_show(m, index: CPUACCT_STAT_NSTATS); |
237 | } |
238 | |
239 | static int cpuacct_all_seq_show(struct seq_file *m, void *V) |
240 | { |
241 | struct cpuacct *ca = css_ca(css: seq_css(seq: m)); |
242 | int index; |
243 | int cpu; |
244 | |
245 | seq_puts(m, s: "cpu" ); |
246 | for (index = 0; index < CPUACCT_STAT_NSTATS; index++) |
247 | seq_printf(m, fmt: " %s" , cpuacct_stat_desc[index]); |
248 | seq_puts(m, s: "\n" ); |
249 | |
250 | for_each_possible_cpu(cpu) { |
251 | seq_printf(m, fmt: "%d" , cpu); |
252 | for (index = 0; index < CPUACCT_STAT_NSTATS; index++) |
253 | seq_printf(m, fmt: " %llu" , |
254 | cpuacct_cpuusage_read(ca, cpu, index)); |
255 | seq_puts(m, s: "\n" ); |
256 | } |
257 | return 0; |
258 | } |
259 | |
260 | static int cpuacct_stats_show(struct seq_file *sf, void *v) |
261 | { |
262 | struct cpuacct *ca = css_ca(css: seq_css(seq: sf)); |
263 | struct task_cputime cputime; |
264 | u64 val[CPUACCT_STAT_NSTATS]; |
265 | int cpu; |
266 | int stat; |
267 | |
268 | memset(&cputime, 0, sizeof(cputime)); |
269 | for_each_possible_cpu(cpu) { |
270 | u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; |
271 | |
272 | cputime.utime += cpustat[CPUTIME_USER]; |
273 | cputime.utime += cpustat[CPUTIME_NICE]; |
274 | cputime.stime += cpustat[CPUTIME_SYSTEM]; |
275 | cputime.stime += cpustat[CPUTIME_IRQ]; |
276 | cputime.stime += cpustat[CPUTIME_SOFTIRQ]; |
277 | |
278 | cputime.sum_exec_runtime += *per_cpu_ptr(ca->cpuusage, cpu); |
279 | } |
280 | |
281 | cputime_adjust(curr: &cputime, prev: &seq_css(seq: sf)->cgroup->prev_cputime, |
282 | ut: &val[CPUACCT_STAT_USER], st: &val[CPUACCT_STAT_SYSTEM]); |
283 | |
284 | for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) { |
285 | seq_printf(m: sf, fmt: "%s %llu\n" , cpuacct_stat_desc[stat], |
286 | nsec_to_clock_t(x: val[stat])); |
287 | } |
288 | |
289 | return 0; |
290 | } |
291 | |
292 | static struct cftype files[] = { |
293 | { |
294 | .name = "usage" , |
295 | .read_u64 = cpuusage_read, |
296 | .write_u64 = cpuusage_write, |
297 | }, |
298 | { |
299 | .name = "usage_user" , |
300 | .read_u64 = cpuusage_user_read, |
301 | }, |
302 | { |
303 | .name = "usage_sys" , |
304 | .read_u64 = cpuusage_sys_read, |
305 | }, |
306 | { |
307 | .name = "usage_percpu" , |
308 | .seq_show = cpuacct_percpu_seq_show, |
309 | }, |
310 | { |
311 | .name = "usage_percpu_user" , |
312 | .seq_show = cpuacct_percpu_user_seq_show, |
313 | }, |
314 | { |
315 | .name = "usage_percpu_sys" , |
316 | .seq_show = cpuacct_percpu_sys_seq_show, |
317 | }, |
318 | { |
319 | .name = "usage_all" , |
320 | .seq_show = cpuacct_all_seq_show, |
321 | }, |
322 | { |
323 | .name = "stat" , |
324 | .seq_show = cpuacct_stats_show, |
325 | }, |
326 | { } /* terminate */ |
327 | }; |
328 | |
329 | /* |
330 | * charge this task's execution time to its accounting group. |
331 | * |
332 | * called with rq->lock held. |
333 | */ |
334 | void cpuacct_charge(struct task_struct *tsk, u64 cputime) |
335 | { |
336 | unsigned int cpu = task_cpu(p: tsk); |
337 | struct cpuacct *ca; |
338 | |
339 | lockdep_assert_rq_held(cpu_rq(cpu)); |
340 | |
341 | for (ca = task_ca(tsk); ca; ca = parent_ca(ca)) |
342 | *per_cpu_ptr(ca->cpuusage, cpu) += cputime; |
343 | } |
344 | |
345 | /* |
346 | * Add user/system time to cpuacct. |
347 | * |
348 | * Note: it's the caller that updates the account of the root cgroup. |
349 | */ |
350 | void cpuacct_account_field(struct task_struct *tsk, int index, u64 val) |
351 | { |
352 | struct cpuacct *ca; |
353 | |
354 | for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca)) |
355 | __this_cpu_add(ca->cpustat->cpustat[index], val); |
356 | } |
357 | |
358 | struct cgroup_subsys cpuacct_cgrp_subsys = { |
359 | .css_alloc = cpuacct_css_alloc, |
360 | .css_free = cpuacct_css_free, |
361 | .legacy_cftypes = files, |
362 | .early_init = true, |
363 | }; |
364 | |