1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | // |
3 | // VCPU stall detector. |
4 | // Copyright (C) Google, 2022 |
5 | |
6 | #include <linux/cpu.h> |
7 | #include <linux/init.h> |
8 | #include <linux/io.h> |
9 | #include <linux/kernel.h> |
10 | |
11 | #include <linux/device.h> |
12 | #include <linux/interrupt.h> |
13 | #include <linux/module.h> |
14 | #include <linux/nmi.h> |
15 | #include <linux/of.h> |
16 | #include <linux/param.h> |
17 | #include <linux/percpu.h> |
18 | #include <linux/platform_device.h> |
19 | #include <linux/slab.h> |
20 | |
21 | #define VCPU_STALL_REG_STATUS (0x00) |
22 | #define VCPU_STALL_REG_LOAD_CNT (0x04) |
23 | #define VCPU_STALL_REG_CURRENT_CNT (0x08) |
24 | #define VCPU_STALL_REG_CLOCK_FREQ_HZ (0x0C) |
25 | #define VCPU_STALL_REG_LEN (0x10) |
26 | |
27 | #define VCPU_STALL_DEFAULT_CLOCK_HZ (10) |
28 | #define VCPU_STALL_MAX_CLOCK_HZ (100) |
29 | #define VCPU_STALL_DEFAULT_TIMEOUT_SEC (8) |
30 | #define VCPU_STALL_MAX_TIMEOUT_SEC (600) |
31 | |
32 | struct vcpu_stall_detect_config { |
33 | u32 clock_freq_hz; |
34 | u32 stall_timeout_sec; |
35 | |
36 | void __iomem *membase; |
37 | struct platform_device *dev; |
38 | enum cpuhp_state hp_online; |
39 | }; |
40 | |
41 | struct vcpu_stall_priv { |
42 | struct hrtimer vcpu_hrtimer; |
43 | bool is_initialized; |
44 | }; |
45 | |
46 | /* The vcpu stall configuration structure which applies to all the CPUs */ |
47 | static struct vcpu_stall_detect_config vcpu_stall_config; |
48 | |
49 | #define vcpu_stall_reg_write(vcpu, reg, value) \ |
50 | writel_relaxed((value), \ |
51 | (void __iomem *)(vcpu_stall_config.membase + \ |
52 | (vcpu) * VCPU_STALL_REG_LEN + (reg))) |
53 | |
54 | |
55 | static struct vcpu_stall_priv __percpu *vcpu_stall_detectors; |
56 | |
57 | static enum hrtimer_restart |
58 | vcpu_stall_detect_timer_fn(struct hrtimer *hrtimer) |
59 | { |
60 | u32 ticks, ping_timeout_ms; |
61 | |
62 | /* Reload the stall detector counter register every |
63 | * `ping_timeout_ms` to prevent the virtual device |
64 | * from decrementing it to 0. The virtual device decrements this |
65 | * register at 'clock_freq_hz' frequency. |
66 | */ |
67 | ticks = vcpu_stall_config.clock_freq_hz * |
68 | vcpu_stall_config.stall_timeout_sec; |
69 | vcpu_stall_reg_write(smp_processor_id(), |
70 | VCPU_STALL_REG_LOAD_CNT, ticks); |
71 | |
72 | ping_timeout_ms = vcpu_stall_config.stall_timeout_sec * |
73 | MSEC_PER_SEC / 2; |
74 | hrtimer_forward_now(timer: hrtimer, |
75 | interval: ms_to_ktime(ms: ping_timeout_ms)); |
76 | |
77 | return HRTIMER_RESTART; |
78 | } |
79 | |
80 | static int start_stall_detector_cpu(unsigned int cpu) |
81 | { |
82 | u32 ticks, ping_timeout_ms; |
83 | struct vcpu_stall_priv *vcpu_stall_detector = |
84 | this_cpu_ptr(vcpu_stall_detectors); |
85 | struct hrtimer *vcpu_hrtimer = &vcpu_stall_detector->vcpu_hrtimer; |
86 | |
87 | vcpu_stall_reg_write(cpu, VCPU_STALL_REG_CLOCK_FREQ_HZ, |
88 | vcpu_stall_config.clock_freq_hz); |
89 | |
90 | /* Compute the number of ticks required for the stall detector |
91 | * counter register based on the internal clock frequency and the |
92 | * timeout value given from the device tree. |
93 | */ |
94 | ticks = vcpu_stall_config.clock_freq_hz * |
95 | vcpu_stall_config.stall_timeout_sec; |
96 | vcpu_stall_reg_write(cpu, VCPU_STALL_REG_LOAD_CNT, ticks); |
97 | |
98 | /* Enable the internal clock and start the stall detector */ |
99 | vcpu_stall_reg_write(cpu, VCPU_STALL_REG_STATUS, 1); |
100 | |
101 | /* Pet the stall detector at half of its expiration timeout |
102 | * to prevent spurious resets. |
103 | */ |
104 | ping_timeout_ms = vcpu_stall_config.stall_timeout_sec * |
105 | MSEC_PER_SEC / 2; |
106 | |
107 | hrtimer_init(timer: vcpu_hrtimer, CLOCK_MONOTONIC, mode: HRTIMER_MODE_REL); |
108 | vcpu_hrtimer->function = vcpu_stall_detect_timer_fn; |
109 | vcpu_stall_detector->is_initialized = true; |
110 | |
111 | hrtimer_start(timer: vcpu_hrtimer, tim: ms_to_ktime(ms: ping_timeout_ms), |
112 | mode: HRTIMER_MODE_REL_PINNED); |
113 | |
114 | return 0; |
115 | } |
116 | |
117 | static int stop_stall_detector_cpu(unsigned int cpu) |
118 | { |
119 | struct vcpu_stall_priv *vcpu_stall_detector = |
120 | per_cpu_ptr(vcpu_stall_detectors, cpu); |
121 | |
122 | if (!vcpu_stall_detector->is_initialized) |
123 | return 0; |
124 | |
125 | /* Disable the stall detector for the current CPU */ |
126 | hrtimer_cancel(timer: &vcpu_stall_detector->vcpu_hrtimer); |
127 | vcpu_stall_reg_write(cpu, VCPU_STALL_REG_STATUS, 0); |
128 | vcpu_stall_detector->is_initialized = false; |
129 | |
130 | return 0; |
131 | } |
132 | |
133 | static int vcpu_stall_detect_probe(struct platform_device *pdev) |
134 | { |
135 | int ret; |
136 | struct resource *r; |
137 | void __iomem *membase; |
138 | u32 clock_freq_hz = VCPU_STALL_DEFAULT_CLOCK_HZ; |
139 | u32 stall_timeout_sec = VCPU_STALL_DEFAULT_TIMEOUT_SEC; |
140 | struct device_node *np = pdev->dev.of_node; |
141 | |
142 | vcpu_stall_detectors = devm_alloc_percpu(&pdev->dev, |
143 | typeof(struct vcpu_stall_priv)); |
144 | if (!vcpu_stall_detectors) |
145 | return -ENOMEM; |
146 | |
147 | membase = devm_platform_get_and_ioremap_resource(pdev, index: 0, res: &r); |
148 | if (IS_ERR(ptr: membase)) { |
149 | dev_err(&pdev->dev, "Failed to get memory resource\n" ); |
150 | return PTR_ERR(ptr: membase); |
151 | } |
152 | |
153 | if (!of_property_read_u32(np, propname: "clock-frequency" , out_value: &clock_freq_hz)) { |
154 | if (!(clock_freq_hz > 0 && |
155 | clock_freq_hz < VCPU_STALL_MAX_CLOCK_HZ)) { |
156 | dev_warn(&pdev->dev, "clk out of range\n" ); |
157 | clock_freq_hz = VCPU_STALL_DEFAULT_CLOCK_HZ; |
158 | } |
159 | } |
160 | |
161 | if (!of_property_read_u32(np, propname: "timeout-sec" , out_value: &stall_timeout_sec)) { |
162 | if (!(stall_timeout_sec > 0 && |
163 | stall_timeout_sec < VCPU_STALL_MAX_TIMEOUT_SEC)) { |
164 | dev_warn(&pdev->dev, "stall timeout out of range\n" ); |
165 | stall_timeout_sec = VCPU_STALL_DEFAULT_TIMEOUT_SEC; |
166 | } |
167 | } |
168 | |
169 | vcpu_stall_config = (struct vcpu_stall_detect_config) { |
170 | .membase = membase, |
171 | .clock_freq_hz = clock_freq_hz, |
172 | .stall_timeout_sec = stall_timeout_sec |
173 | }; |
174 | |
175 | ret = cpuhp_setup_state(state: CPUHP_AP_ONLINE_DYN, |
176 | name: "virt/vcpu_stall_detector:online" , |
177 | startup: start_stall_detector_cpu, |
178 | teardown: stop_stall_detector_cpu); |
179 | if (ret < 0) { |
180 | dev_err(&pdev->dev, "failed to install cpu hotplug" ); |
181 | goto err; |
182 | } |
183 | |
184 | vcpu_stall_config.hp_online = ret; |
185 | return 0; |
186 | err: |
187 | return ret; |
188 | } |
189 | |
190 | static void vcpu_stall_detect_remove(struct platform_device *pdev) |
191 | { |
192 | int cpu; |
193 | |
194 | cpuhp_remove_state(state: vcpu_stall_config.hp_online); |
195 | |
196 | for_each_possible_cpu(cpu) |
197 | stop_stall_detector_cpu(cpu); |
198 | } |
199 | |
200 | static const struct of_device_id vcpu_stall_detect_of_match[] = { |
201 | { .compatible = "qemu,vcpu-stall-detector" , }, |
202 | {} |
203 | }; |
204 | |
205 | MODULE_DEVICE_TABLE(of, vcpu_stall_detect_of_match); |
206 | |
207 | static struct platform_driver vcpu_stall_detect_driver = { |
208 | .probe = vcpu_stall_detect_probe, |
209 | .remove_new = vcpu_stall_detect_remove, |
210 | .driver = { |
211 | .name = KBUILD_MODNAME, |
212 | .of_match_table = vcpu_stall_detect_of_match, |
213 | }, |
214 | }; |
215 | |
216 | module_platform_driver(vcpu_stall_detect_driver); |
217 | |
218 | MODULE_LICENSE("GPL" ); |
219 | MODULE_AUTHOR("Sebastian Ene <sebastianene@google.com>" ); |
220 | MODULE_DESCRIPTION("VCPU stall detector" ); |
221 | |