1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (c) 2009, Microsoft Corporation. |
4 | * |
5 | * Authors: |
6 | * Haiyang Zhang <haiyangz@microsoft.com> |
7 | * Hank Janssen <hjanssen@microsoft.com> |
8 | * K. Y. Srinivasan <kys@microsoft.com> |
9 | */ |
10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
11 | |
12 | #include <linux/init.h> |
13 | #include <linux/module.h> |
14 | #include <linux/device.h> |
15 | #include <linux/platform_device.h> |
16 | #include <linux/interrupt.h> |
17 | #include <linux/sysctl.h> |
18 | #include <linux/slab.h> |
19 | #include <linux/acpi.h> |
20 | #include <linux/completion.h> |
21 | #include <linux/hyperv.h> |
22 | #include <linux/kernel_stat.h> |
23 | #include <linux/of_address.h> |
24 | #include <linux/clockchips.h> |
25 | #include <linux/cpu.h> |
26 | #include <linux/sched/isolation.h> |
27 | #include <linux/sched/task_stack.h> |
28 | |
29 | #include <linux/delay.h> |
30 | #include <linux/panic_notifier.h> |
31 | #include <linux/ptrace.h> |
32 | #include <linux/screen_info.h> |
33 | #include <linux/efi.h> |
34 | #include <linux/random.h> |
35 | #include <linux/kernel.h> |
36 | #include <linux/syscore_ops.h> |
37 | #include <linux/dma-map-ops.h> |
38 | #include <linux/pci.h> |
39 | #include <clocksource/hyperv_timer.h> |
40 | #include <asm/mshyperv.h> |
41 | #include "hyperv_vmbus.h" |
42 | |
43 | struct vmbus_dynid { |
44 | struct list_head node; |
45 | struct hv_vmbus_device_id id; |
46 | }; |
47 | |
48 | static struct device *hv_dev; |
49 | |
50 | static int hyperv_cpuhp_online; |
51 | |
52 | static long __percpu *vmbus_evt; |
53 | |
54 | /* Values parsed from ACPI DSDT */ |
55 | int vmbus_irq; |
56 | int vmbus_interrupt; |
57 | |
58 | /* |
59 | * The panic notifier below is responsible solely for unloading the |
60 | * vmbus connection, which is necessary in a panic event. |
61 | * |
62 | * Notice an intrincate relation of this notifier with Hyper-V |
63 | * framebuffer panic notifier exists - we need vmbus connection alive |
64 | * there in order to succeed, so we need to order both with each other |
65 | * [see hvfb_on_panic()] - this is done using notifiers' priorities. |
66 | */ |
67 | static int hv_panic_vmbus_unload(struct notifier_block *nb, unsigned long val, |
68 | void *args) |
69 | { |
70 | vmbus_initiate_unload(crash: true); |
71 | return NOTIFY_DONE; |
72 | } |
73 | static struct notifier_block hyperv_panic_vmbus_unload_block = { |
74 | .notifier_call = hv_panic_vmbus_unload, |
75 | .priority = INT_MIN + 1, /* almost the latest one to execute */ |
76 | }; |
77 | |
78 | static const char *fb_mmio_name = "fb_range" ; |
79 | static struct resource *fb_mmio; |
80 | static struct resource *hyperv_mmio; |
81 | static DEFINE_MUTEX(hyperv_mmio_lock); |
82 | |
83 | static int vmbus_exists(void) |
84 | { |
85 | if (hv_dev == NULL) |
86 | return -ENODEV; |
87 | |
88 | return 0; |
89 | } |
90 | |
91 | static u8 channel_monitor_group(const struct vmbus_channel *channel) |
92 | { |
93 | return (u8)channel->offermsg.monitorid / 32; |
94 | } |
95 | |
96 | static u8 channel_monitor_offset(const struct vmbus_channel *channel) |
97 | { |
98 | return (u8)channel->offermsg.monitorid % 32; |
99 | } |
100 | |
101 | static u32 channel_pending(const struct vmbus_channel *channel, |
102 | const struct hv_monitor_page *monitor_page) |
103 | { |
104 | u8 monitor_group = channel_monitor_group(channel); |
105 | |
106 | return monitor_page->trigger_group[monitor_group].pending; |
107 | } |
108 | |
109 | static u32 channel_latency(const struct vmbus_channel *channel, |
110 | const struct hv_monitor_page *monitor_page) |
111 | { |
112 | u8 monitor_group = channel_monitor_group(channel); |
113 | u8 monitor_offset = channel_monitor_offset(channel); |
114 | |
115 | return monitor_page->latency[monitor_group][monitor_offset]; |
116 | } |
117 | |
118 | static u32 channel_conn_id(struct vmbus_channel *channel, |
119 | struct hv_monitor_page *monitor_page) |
120 | { |
121 | u8 monitor_group = channel_monitor_group(channel); |
122 | u8 monitor_offset = channel_monitor_offset(channel); |
123 | |
124 | return monitor_page->parameter[monitor_group][monitor_offset].connectionid.u.id; |
125 | } |
126 | |
127 | static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr, |
128 | char *buf) |
129 | { |
130 | struct hv_device *hv_dev = device_to_hv_device(dev); |
131 | |
132 | if (!hv_dev->channel) |
133 | return -ENODEV; |
134 | return sprintf(buf, fmt: "%d\n" , hv_dev->channel->offermsg.child_relid); |
135 | } |
136 | static DEVICE_ATTR_RO(id); |
137 | |
138 | static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr, |
139 | char *buf) |
140 | { |
141 | struct hv_device *hv_dev = device_to_hv_device(dev); |
142 | |
143 | if (!hv_dev->channel) |
144 | return -ENODEV; |
145 | return sprintf(buf, fmt: "%d\n" , hv_dev->channel->state); |
146 | } |
147 | static DEVICE_ATTR_RO(state); |
148 | |
149 | static ssize_t monitor_id_show(struct device *dev, |
150 | struct device_attribute *dev_attr, char *buf) |
151 | { |
152 | struct hv_device *hv_dev = device_to_hv_device(dev); |
153 | |
154 | if (!hv_dev->channel) |
155 | return -ENODEV; |
156 | return sprintf(buf, fmt: "%d\n" , hv_dev->channel->offermsg.monitorid); |
157 | } |
158 | static DEVICE_ATTR_RO(monitor_id); |
159 | |
160 | static ssize_t class_id_show(struct device *dev, |
161 | struct device_attribute *dev_attr, char *buf) |
162 | { |
163 | struct hv_device *hv_dev = device_to_hv_device(dev); |
164 | |
165 | if (!hv_dev->channel) |
166 | return -ENODEV; |
167 | return sprintf(buf, fmt: "{%pUl}\n" , |
168 | &hv_dev->channel->offermsg.offer.if_type); |
169 | } |
170 | static DEVICE_ATTR_RO(class_id); |
171 | |
172 | static ssize_t device_id_show(struct device *dev, |
173 | struct device_attribute *dev_attr, char *buf) |
174 | { |
175 | struct hv_device *hv_dev = device_to_hv_device(dev); |
176 | |
177 | if (!hv_dev->channel) |
178 | return -ENODEV; |
179 | return sprintf(buf, fmt: "{%pUl}\n" , |
180 | &hv_dev->channel->offermsg.offer.if_instance); |
181 | } |
182 | static DEVICE_ATTR_RO(device_id); |
183 | |
184 | static ssize_t modalias_show(struct device *dev, |
185 | struct device_attribute *dev_attr, char *buf) |
186 | { |
187 | struct hv_device *hv_dev = device_to_hv_device(dev); |
188 | |
189 | return sprintf(buf, fmt: "vmbus:%*phN\n" , UUID_SIZE, &hv_dev->dev_type); |
190 | } |
191 | static DEVICE_ATTR_RO(modalias); |
192 | |
193 | #ifdef CONFIG_NUMA |
194 | static ssize_t numa_node_show(struct device *dev, |
195 | struct device_attribute *attr, char *buf) |
196 | { |
197 | struct hv_device *hv_dev = device_to_hv_device(dev); |
198 | |
199 | if (!hv_dev->channel) |
200 | return -ENODEV; |
201 | |
202 | return sprintf(buf, fmt: "%d\n" , cpu_to_node(cpu: hv_dev->channel->target_cpu)); |
203 | } |
204 | static DEVICE_ATTR_RO(numa_node); |
205 | #endif |
206 | |
207 | static ssize_t server_monitor_pending_show(struct device *dev, |
208 | struct device_attribute *dev_attr, |
209 | char *buf) |
210 | { |
211 | struct hv_device *hv_dev = device_to_hv_device(dev); |
212 | |
213 | if (!hv_dev->channel) |
214 | return -ENODEV; |
215 | return sprintf(buf, fmt: "%d\n" , |
216 | channel_pending(channel: hv_dev->channel, |
217 | monitor_page: vmbus_connection.monitor_pages[0])); |
218 | } |
219 | static DEVICE_ATTR_RO(server_monitor_pending); |
220 | |
221 | static ssize_t client_monitor_pending_show(struct device *dev, |
222 | struct device_attribute *dev_attr, |
223 | char *buf) |
224 | { |
225 | struct hv_device *hv_dev = device_to_hv_device(dev); |
226 | |
227 | if (!hv_dev->channel) |
228 | return -ENODEV; |
229 | return sprintf(buf, fmt: "%d\n" , |
230 | channel_pending(channel: hv_dev->channel, |
231 | monitor_page: vmbus_connection.monitor_pages[1])); |
232 | } |
233 | static DEVICE_ATTR_RO(client_monitor_pending); |
234 | |
235 | static ssize_t server_monitor_latency_show(struct device *dev, |
236 | struct device_attribute *dev_attr, |
237 | char *buf) |
238 | { |
239 | struct hv_device *hv_dev = device_to_hv_device(dev); |
240 | |
241 | if (!hv_dev->channel) |
242 | return -ENODEV; |
243 | return sprintf(buf, fmt: "%d\n" , |
244 | channel_latency(channel: hv_dev->channel, |
245 | monitor_page: vmbus_connection.monitor_pages[0])); |
246 | } |
247 | static DEVICE_ATTR_RO(server_monitor_latency); |
248 | |
249 | static ssize_t client_monitor_latency_show(struct device *dev, |
250 | struct device_attribute *dev_attr, |
251 | char *buf) |
252 | { |
253 | struct hv_device *hv_dev = device_to_hv_device(dev); |
254 | |
255 | if (!hv_dev->channel) |
256 | return -ENODEV; |
257 | return sprintf(buf, fmt: "%d\n" , |
258 | channel_latency(channel: hv_dev->channel, |
259 | monitor_page: vmbus_connection.monitor_pages[1])); |
260 | } |
261 | static DEVICE_ATTR_RO(client_monitor_latency); |
262 | |
263 | static ssize_t server_monitor_conn_id_show(struct device *dev, |
264 | struct device_attribute *dev_attr, |
265 | char *buf) |
266 | { |
267 | struct hv_device *hv_dev = device_to_hv_device(dev); |
268 | |
269 | if (!hv_dev->channel) |
270 | return -ENODEV; |
271 | return sprintf(buf, fmt: "%d\n" , |
272 | channel_conn_id(channel: hv_dev->channel, |
273 | monitor_page: vmbus_connection.monitor_pages[0])); |
274 | } |
275 | static DEVICE_ATTR_RO(server_monitor_conn_id); |
276 | |
277 | static ssize_t client_monitor_conn_id_show(struct device *dev, |
278 | struct device_attribute *dev_attr, |
279 | char *buf) |
280 | { |
281 | struct hv_device *hv_dev = device_to_hv_device(dev); |
282 | |
283 | if (!hv_dev->channel) |
284 | return -ENODEV; |
285 | return sprintf(buf, fmt: "%d\n" , |
286 | channel_conn_id(channel: hv_dev->channel, |
287 | monitor_page: vmbus_connection.monitor_pages[1])); |
288 | } |
289 | static DEVICE_ATTR_RO(client_monitor_conn_id); |
290 | |
291 | static ssize_t out_intr_mask_show(struct device *dev, |
292 | struct device_attribute *dev_attr, char *buf) |
293 | { |
294 | struct hv_device *hv_dev = device_to_hv_device(dev); |
295 | struct hv_ring_buffer_debug_info outbound; |
296 | int ret; |
297 | |
298 | if (!hv_dev->channel) |
299 | return -ENODEV; |
300 | |
301 | ret = hv_ringbuffer_get_debuginfo(ring_info: &hv_dev->channel->outbound, |
302 | debug_info: &outbound); |
303 | if (ret < 0) |
304 | return ret; |
305 | |
306 | return sprintf(buf, fmt: "%d\n" , outbound.current_interrupt_mask); |
307 | } |
308 | static DEVICE_ATTR_RO(out_intr_mask); |
309 | |
310 | static ssize_t out_read_index_show(struct device *dev, |
311 | struct device_attribute *dev_attr, char *buf) |
312 | { |
313 | struct hv_device *hv_dev = device_to_hv_device(dev); |
314 | struct hv_ring_buffer_debug_info outbound; |
315 | int ret; |
316 | |
317 | if (!hv_dev->channel) |
318 | return -ENODEV; |
319 | |
320 | ret = hv_ringbuffer_get_debuginfo(ring_info: &hv_dev->channel->outbound, |
321 | debug_info: &outbound); |
322 | if (ret < 0) |
323 | return ret; |
324 | return sprintf(buf, fmt: "%d\n" , outbound.current_read_index); |
325 | } |
326 | static DEVICE_ATTR_RO(out_read_index); |
327 | |
328 | static ssize_t out_write_index_show(struct device *dev, |
329 | struct device_attribute *dev_attr, |
330 | char *buf) |
331 | { |
332 | struct hv_device *hv_dev = device_to_hv_device(dev); |
333 | struct hv_ring_buffer_debug_info outbound; |
334 | int ret; |
335 | |
336 | if (!hv_dev->channel) |
337 | return -ENODEV; |
338 | |
339 | ret = hv_ringbuffer_get_debuginfo(ring_info: &hv_dev->channel->outbound, |
340 | debug_info: &outbound); |
341 | if (ret < 0) |
342 | return ret; |
343 | return sprintf(buf, fmt: "%d\n" , outbound.current_write_index); |
344 | } |
345 | static DEVICE_ATTR_RO(out_write_index); |
346 | |
347 | static ssize_t out_read_bytes_avail_show(struct device *dev, |
348 | struct device_attribute *dev_attr, |
349 | char *buf) |
350 | { |
351 | struct hv_device *hv_dev = device_to_hv_device(dev); |
352 | struct hv_ring_buffer_debug_info outbound; |
353 | int ret; |
354 | |
355 | if (!hv_dev->channel) |
356 | return -ENODEV; |
357 | |
358 | ret = hv_ringbuffer_get_debuginfo(ring_info: &hv_dev->channel->outbound, |
359 | debug_info: &outbound); |
360 | if (ret < 0) |
361 | return ret; |
362 | return sprintf(buf, fmt: "%d\n" , outbound.bytes_avail_toread); |
363 | } |
364 | static DEVICE_ATTR_RO(out_read_bytes_avail); |
365 | |
366 | static ssize_t out_write_bytes_avail_show(struct device *dev, |
367 | struct device_attribute *dev_attr, |
368 | char *buf) |
369 | { |
370 | struct hv_device *hv_dev = device_to_hv_device(dev); |
371 | struct hv_ring_buffer_debug_info outbound; |
372 | int ret; |
373 | |
374 | if (!hv_dev->channel) |
375 | return -ENODEV; |
376 | |
377 | ret = hv_ringbuffer_get_debuginfo(ring_info: &hv_dev->channel->outbound, |
378 | debug_info: &outbound); |
379 | if (ret < 0) |
380 | return ret; |
381 | return sprintf(buf, fmt: "%d\n" , outbound.bytes_avail_towrite); |
382 | } |
383 | static DEVICE_ATTR_RO(out_write_bytes_avail); |
384 | |
385 | static ssize_t in_intr_mask_show(struct device *dev, |
386 | struct device_attribute *dev_attr, char *buf) |
387 | { |
388 | struct hv_device *hv_dev = device_to_hv_device(dev); |
389 | struct hv_ring_buffer_debug_info inbound; |
390 | int ret; |
391 | |
392 | if (!hv_dev->channel) |
393 | return -ENODEV; |
394 | |
395 | ret = hv_ringbuffer_get_debuginfo(ring_info: &hv_dev->channel->inbound, debug_info: &inbound); |
396 | if (ret < 0) |
397 | return ret; |
398 | |
399 | return sprintf(buf, fmt: "%d\n" , inbound.current_interrupt_mask); |
400 | } |
401 | static DEVICE_ATTR_RO(in_intr_mask); |
402 | |
403 | static ssize_t in_read_index_show(struct device *dev, |
404 | struct device_attribute *dev_attr, char *buf) |
405 | { |
406 | struct hv_device *hv_dev = device_to_hv_device(dev); |
407 | struct hv_ring_buffer_debug_info inbound; |
408 | int ret; |
409 | |
410 | if (!hv_dev->channel) |
411 | return -ENODEV; |
412 | |
413 | ret = hv_ringbuffer_get_debuginfo(ring_info: &hv_dev->channel->inbound, debug_info: &inbound); |
414 | if (ret < 0) |
415 | return ret; |
416 | |
417 | return sprintf(buf, fmt: "%d\n" , inbound.current_read_index); |
418 | } |
419 | static DEVICE_ATTR_RO(in_read_index); |
420 | |
421 | static ssize_t in_write_index_show(struct device *dev, |
422 | struct device_attribute *dev_attr, char *buf) |
423 | { |
424 | struct hv_device *hv_dev = device_to_hv_device(dev); |
425 | struct hv_ring_buffer_debug_info inbound; |
426 | int ret; |
427 | |
428 | if (!hv_dev->channel) |
429 | return -ENODEV; |
430 | |
431 | ret = hv_ringbuffer_get_debuginfo(ring_info: &hv_dev->channel->inbound, debug_info: &inbound); |
432 | if (ret < 0) |
433 | return ret; |
434 | |
435 | return sprintf(buf, fmt: "%d\n" , inbound.current_write_index); |
436 | } |
437 | static DEVICE_ATTR_RO(in_write_index); |
438 | |
439 | static ssize_t in_read_bytes_avail_show(struct device *dev, |
440 | struct device_attribute *dev_attr, |
441 | char *buf) |
442 | { |
443 | struct hv_device *hv_dev = device_to_hv_device(dev); |
444 | struct hv_ring_buffer_debug_info inbound; |
445 | int ret; |
446 | |
447 | if (!hv_dev->channel) |
448 | return -ENODEV; |
449 | |
450 | ret = hv_ringbuffer_get_debuginfo(ring_info: &hv_dev->channel->inbound, debug_info: &inbound); |
451 | if (ret < 0) |
452 | return ret; |
453 | |
454 | return sprintf(buf, fmt: "%d\n" , inbound.bytes_avail_toread); |
455 | } |
456 | static DEVICE_ATTR_RO(in_read_bytes_avail); |
457 | |
458 | static ssize_t in_write_bytes_avail_show(struct device *dev, |
459 | struct device_attribute *dev_attr, |
460 | char *buf) |
461 | { |
462 | struct hv_device *hv_dev = device_to_hv_device(dev); |
463 | struct hv_ring_buffer_debug_info inbound; |
464 | int ret; |
465 | |
466 | if (!hv_dev->channel) |
467 | return -ENODEV; |
468 | |
469 | ret = hv_ringbuffer_get_debuginfo(ring_info: &hv_dev->channel->inbound, debug_info: &inbound); |
470 | if (ret < 0) |
471 | return ret; |
472 | |
473 | return sprintf(buf, fmt: "%d\n" , inbound.bytes_avail_towrite); |
474 | } |
475 | static DEVICE_ATTR_RO(in_write_bytes_avail); |
476 | |
477 | static ssize_t channel_vp_mapping_show(struct device *dev, |
478 | struct device_attribute *dev_attr, |
479 | char *buf) |
480 | { |
481 | struct hv_device *hv_dev = device_to_hv_device(dev); |
482 | struct vmbus_channel *channel = hv_dev->channel, *cur_sc; |
483 | int buf_size = PAGE_SIZE, n_written, tot_written; |
484 | struct list_head *cur; |
485 | |
486 | if (!channel) |
487 | return -ENODEV; |
488 | |
489 | mutex_lock(&vmbus_connection.channel_mutex); |
490 | |
491 | tot_written = snprintf(buf, size: buf_size, fmt: "%u:%u\n" , |
492 | channel->offermsg.child_relid, channel->target_cpu); |
493 | |
494 | list_for_each(cur, &channel->sc_list) { |
495 | if (tot_written >= buf_size - 1) |
496 | break; |
497 | |
498 | cur_sc = list_entry(cur, struct vmbus_channel, sc_list); |
499 | n_written = scnprintf(buf: buf + tot_written, |
500 | size: buf_size - tot_written, |
501 | fmt: "%u:%u\n" , |
502 | cur_sc->offermsg.child_relid, |
503 | cur_sc->target_cpu); |
504 | tot_written += n_written; |
505 | } |
506 | |
507 | mutex_unlock(lock: &vmbus_connection.channel_mutex); |
508 | |
509 | return tot_written; |
510 | } |
511 | static DEVICE_ATTR_RO(channel_vp_mapping); |
512 | |
513 | static ssize_t vendor_show(struct device *dev, |
514 | struct device_attribute *dev_attr, |
515 | char *buf) |
516 | { |
517 | struct hv_device *hv_dev = device_to_hv_device(dev); |
518 | |
519 | return sprintf(buf, fmt: "0x%x\n" , hv_dev->vendor_id); |
520 | } |
521 | static DEVICE_ATTR_RO(vendor); |
522 | |
523 | static ssize_t device_show(struct device *dev, |
524 | struct device_attribute *dev_attr, |
525 | char *buf) |
526 | { |
527 | struct hv_device *hv_dev = device_to_hv_device(dev); |
528 | |
529 | return sprintf(buf, fmt: "0x%x\n" , hv_dev->device_id); |
530 | } |
531 | static DEVICE_ATTR_RO(device); |
532 | |
533 | static ssize_t driver_override_store(struct device *dev, |
534 | struct device_attribute *attr, |
535 | const char *buf, size_t count) |
536 | { |
537 | struct hv_device *hv_dev = device_to_hv_device(dev); |
538 | int ret; |
539 | |
540 | ret = driver_set_override(dev, override: &hv_dev->driver_override, s: buf, len: count); |
541 | if (ret) |
542 | return ret; |
543 | |
544 | return count; |
545 | } |
546 | |
547 | static ssize_t driver_override_show(struct device *dev, |
548 | struct device_attribute *attr, char *buf) |
549 | { |
550 | struct hv_device *hv_dev = device_to_hv_device(dev); |
551 | ssize_t len; |
552 | |
553 | device_lock(dev); |
554 | len = snprintf(buf, PAGE_SIZE, fmt: "%s\n" , hv_dev->driver_override); |
555 | device_unlock(dev); |
556 | |
557 | return len; |
558 | } |
559 | static DEVICE_ATTR_RW(driver_override); |
560 | |
561 | /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */ |
562 | static struct attribute *vmbus_dev_attrs[] = { |
563 | &dev_attr_id.attr, |
564 | &dev_attr_state.attr, |
565 | &dev_attr_monitor_id.attr, |
566 | &dev_attr_class_id.attr, |
567 | &dev_attr_device_id.attr, |
568 | &dev_attr_modalias.attr, |
569 | #ifdef CONFIG_NUMA |
570 | &dev_attr_numa_node.attr, |
571 | #endif |
572 | &dev_attr_server_monitor_pending.attr, |
573 | &dev_attr_client_monitor_pending.attr, |
574 | &dev_attr_server_monitor_latency.attr, |
575 | &dev_attr_client_monitor_latency.attr, |
576 | &dev_attr_server_monitor_conn_id.attr, |
577 | &dev_attr_client_monitor_conn_id.attr, |
578 | &dev_attr_out_intr_mask.attr, |
579 | &dev_attr_out_read_index.attr, |
580 | &dev_attr_out_write_index.attr, |
581 | &dev_attr_out_read_bytes_avail.attr, |
582 | &dev_attr_out_write_bytes_avail.attr, |
583 | &dev_attr_in_intr_mask.attr, |
584 | &dev_attr_in_read_index.attr, |
585 | &dev_attr_in_write_index.attr, |
586 | &dev_attr_in_read_bytes_avail.attr, |
587 | &dev_attr_in_write_bytes_avail.attr, |
588 | &dev_attr_channel_vp_mapping.attr, |
589 | &dev_attr_vendor.attr, |
590 | &dev_attr_device.attr, |
591 | &dev_attr_driver_override.attr, |
592 | NULL, |
593 | }; |
594 | |
595 | /* |
596 | * Device-level attribute_group callback function. Returns the permission for |
597 | * each attribute, and returns 0 if an attribute is not visible. |
598 | */ |
599 | static umode_t vmbus_dev_attr_is_visible(struct kobject *kobj, |
600 | struct attribute *attr, int idx) |
601 | { |
602 | struct device *dev = kobj_to_dev(kobj); |
603 | const struct hv_device *hv_dev = device_to_hv_device(dev); |
604 | |
605 | /* Hide the monitor attributes if the monitor mechanism is not used. */ |
606 | if (!hv_dev->channel->offermsg.monitor_allocated && |
607 | (attr == &dev_attr_monitor_id.attr || |
608 | attr == &dev_attr_server_monitor_pending.attr || |
609 | attr == &dev_attr_client_monitor_pending.attr || |
610 | attr == &dev_attr_server_monitor_latency.attr || |
611 | attr == &dev_attr_client_monitor_latency.attr || |
612 | attr == &dev_attr_server_monitor_conn_id.attr || |
613 | attr == &dev_attr_client_monitor_conn_id.attr)) |
614 | return 0; |
615 | |
616 | return attr->mode; |
617 | } |
618 | |
619 | static const struct attribute_group vmbus_dev_group = { |
620 | .attrs = vmbus_dev_attrs, |
621 | .is_visible = vmbus_dev_attr_is_visible |
622 | }; |
623 | __ATTRIBUTE_GROUPS(vmbus_dev); |
624 | |
625 | /* Set up the attribute for /sys/bus/vmbus/hibernation */ |
626 | static ssize_t hibernation_show(const struct bus_type *bus, char *buf) |
627 | { |
628 | return sprintf(buf, fmt: "%d\n" , !!hv_is_hibernation_supported()); |
629 | } |
630 | |
631 | static BUS_ATTR_RO(hibernation); |
632 | |
633 | static struct attribute *vmbus_bus_attrs[] = { |
634 | &bus_attr_hibernation.attr, |
635 | NULL, |
636 | }; |
637 | static const struct attribute_group vmbus_bus_group = { |
638 | .attrs = vmbus_bus_attrs, |
639 | }; |
640 | __ATTRIBUTE_GROUPS(vmbus_bus); |
641 | |
642 | /* |
643 | * vmbus_uevent - add uevent for our device |
644 | * |
645 | * This routine is invoked when a device is added or removed on the vmbus to |
646 | * generate a uevent to udev in the userspace. The udev will then look at its |
647 | * rule and the uevent generated here to load the appropriate driver |
648 | * |
649 | * The alias string will be of the form vmbus:guid where guid is the string |
650 | * representation of the device guid (each byte of the guid will be |
651 | * represented with two hex characters. |
652 | */ |
653 | static int vmbus_uevent(const struct device *device, struct kobj_uevent_env *env) |
654 | { |
655 | const struct hv_device *dev = device_to_hv_device(device); |
656 | const char *format = "MODALIAS=vmbus:%*phN" ; |
657 | |
658 | return add_uevent_var(env, format, UUID_SIZE, &dev->dev_type); |
659 | } |
660 | |
661 | static const struct hv_vmbus_device_id * |
662 | hv_vmbus_dev_match(const struct hv_vmbus_device_id *id, const guid_t *guid) |
663 | { |
664 | if (id == NULL) |
665 | return NULL; /* empty device table */ |
666 | |
667 | for (; !guid_is_null(guid: &id->guid); id++) |
668 | if (guid_equal(u1: &id->guid, u2: guid)) |
669 | return id; |
670 | |
671 | return NULL; |
672 | } |
673 | |
674 | static const struct hv_vmbus_device_id * |
675 | hv_vmbus_dynid_match(struct hv_driver *drv, const guid_t *guid) |
676 | { |
677 | const struct hv_vmbus_device_id *id = NULL; |
678 | struct vmbus_dynid *dynid; |
679 | |
680 | spin_lock(lock: &drv->dynids.lock); |
681 | list_for_each_entry(dynid, &drv->dynids.list, node) { |
682 | if (guid_equal(u1: &dynid->id.guid, u2: guid)) { |
683 | id = &dynid->id; |
684 | break; |
685 | } |
686 | } |
687 | spin_unlock(lock: &drv->dynids.lock); |
688 | |
689 | return id; |
690 | } |
691 | |
692 | static const struct hv_vmbus_device_id vmbus_device_null; |
693 | |
694 | /* |
695 | * Return a matching hv_vmbus_device_id pointer. |
696 | * If there is no match, return NULL. |
697 | */ |
698 | static const struct hv_vmbus_device_id *hv_vmbus_get_id(struct hv_driver *drv, |
699 | struct hv_device *dev) |
700 | { |
701 | const guid_t *guid = &dev->dev_type; |
702 | const struct hv_vmbus_device_id *id; |
703 | |
704 | /* When driver_override is set, only bind to the matching driver */ |
705 | if (dev->driver_override && strcmp(dev->driver_override, drv->name)) |
706 | return NULL; |
707 | |
708 | /* Look at the dynamic ids first, before the static ones */ |
709 | id = hv_vmbus_dynid_match(drv, guid); |
710 | if (!id) |
711 | id = hv_vmbus_dev_match(id: drv->id_table, guid); |
712 | |
713 | /* driver_override will always match, send a dummy id */ |
714 | if (!id && dev->driver_override) |
715 | id = &vmbus_device_null; |
716 | |
717 | return id; |
718 | } |
719 | |
720 | /* vmbus_add_dynid - add a new device ID to this driver and re-probe devices */ |
721 | static int vmbus_add_dynid(struct hv_driver *drv, guid_t *guid) |
722 | { |
723 | struct vmbus_dynid *dynid; |
724 | |
725 | dynid = kzalloc(size: sizeof(*dynid), GFP_KERNEL); |
726 | if (!dynid) |
727 | return -ENOMEM; |
728 | |
729 | dynid->id.guid = *guid; |
730 | |
731 | spin_lock(lock: &drv->dynids.lock); |
732 | list_add_tail(new: &dynid->node, head: &drv->dynids.list); |
733 | spin_unlock(lock: &drv->dynids.lock); |
734 | |
735 | return driver_attach(drv: &drv->driver); |
736 | } |
737 | |
738 | static void vmbus_free_dynids(struct hv_driver *drv) |
739 | { |
740 | struct vmbus_dynid *dynid, *n; |
741 | |
742 | spin_lock(lock: &drv->dynids.lock); |
743 | list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { |
744 | list_del(entry: &dynid->node); |
745 | kfree(objp: dynid); |
746 | } |
747 | spin_unlock(lock: &drv->dynids.lock); |
748 | } |
749 | |
750 | /* |
751 | * store_new_id - sysfs frontend to vmbus_add_dynid() |
752 | * |
753 | * Allow GUIDs to be added to an existing driver via sysfs. |
754 | */ |
755 | static ssize_t new_id_store(struct device_driver *driver, const char *buf, |
756 | size_t count) |
757 | { |
758 | struct hv_driver *drv = drv_to_hv_drv(d: driver); |
759 | guid_t guid; |
760 | ssize_t retval; |
761 | |
762 | retval = guid_parse(uuid: buf, u: &guid); |
763 | if (retval) |
764 | return retval; |
765 | |
766 | if (hv_vmbus_dynid_match(drv, guid: &guid)) |
767 | return -EEXIST; |
768 | |
769 | retval = vmbus_add_dynid(drv, guid: &guid); |
770 | if (retval) |
771 | return retval; |
772 | return count; |
773 | } |
774 | static DRIVER_ATTR_WO(new_id); |
775 | |
776 | /* |
777 | * store_remove_id - remove a PCI device ID from this driver |
778 | * |
779 | * Removes a dynamic pci device ID to this driver. |
780 | */ |
781 | static ssize_t remove_id_store(struct device_driver *driver, const char *buf, |
782 | size_t count) |
783 | { |
784 | struct hv_driver *drv = drv_to_hv_drv(d: driver); |
785 | struct vmbus_dynid *dynid, *n; |
786 | guid_t guid; |
787 | ssize_t retval; |
788 | |
789 | retval = guid_parse(uuid: buf, u: &guid); |
790 | if (retval) |
791 | return retval; |
792 | |
793 | retval = -ENODEV; |
794 | spin_lock(lock: &drv->dynids.lock); |
795 | list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { |
796 | struct hv_vmbus_device_id *id = &dynid->id; |
797 | |
798 | if (guid_equal(u1: &id->guid, u2: &guid)) { |
799 | list_del(entry: &dynid->node); |
800 | kfree(objp: dynid); |
801 | retval = count; |
802 | break; |
803 | } |
804 | } |
805 | spin_unlock(lock: &drv->dynids.lock); |
806 | |
807 | return retval; |
808 | } |
809 | static DRIVER_ATTR_WO(remove_id); |
810 | |
811 | static struct attribute *vmbus_drv_attrs[] = { |
812 | &driver_attr_new_id.attr, |
813 | &driver_attr_remove_id.attr, |
814 | NULL, |
815 | }; |
816 | ATTRIBUTE_GROUPS(vmbus_drv); |
817 | |
818 | |
819 | /* |
820 | * vmbus_match - Attempt to match the specified device to the specified driver |
821 | */ |
822 | static int vmbus_match(struct device *device, struct device_driver *driver) |
823 | { |
824 | struct hv_driver *drv = drv_to_hv_drv(d: driver); |
825 | struct hv_device *hv_dev = device_to_hv_device(device); |
826 | |
827 | /* The hv_sock driver handles all hv_sock offers. */ |
828 | if (is_hvsock_channel(c: hv_dev->channel)) |
829 | return drv->hvsock; |
830 | |
831 | if (hv_vmbus_get_id(drv, dev: hv_dev)) |
832 | return 1; |
833 | |
834 | return 0; |
835 | } |
836 | |
837 | /* |
838 | * vmbus_probe - Add the new vmbus's child device |
839 | */ |
840 | static int vmbus_probe(struct device *child_device) |
841 | { |
842 | int ret = 0; |
843 | struct hv_driver *drv = |
844 | drv_to_hv_drv(d: child_device->driver); |
845 | struct hv_device *dev = device_to_hv_device(child_device); |
846 | const struct hv_vmbus_device_id *dev_id; |
847 | |
848 | dev_id = hv_vmbus_get_id(drv, dev); |
849 | if (drv->probe) { |
850 | ret = drv->probe(dev, dev_id); |
851 | if (ret != 0) |
852 | pr_err("probe failed for device %s (%d)\n" , |
853 | dev_name(child_device), ret); |
854 | |
855 | } else { |
856 | pr_err("probe not set for driver %s\n" , |
857 | dev_name(child_device)); |
858 | ret = -ENODEV; |
859 | } |
860 | return ret; |
861 | } |
862 | |
863 | /* |
864 | * vmbus_dma_configure -- Configure DMA coherence for VMbus device |
865 | */ |
866 | static int vmbus_dma_configure(struct device *child_device) |
867 | { |
868 | /* |
869 | * On ARM64, propagate the DMA coherence setting from the top level |
870 | * VMbus ACPI device to the child VMbus device being added here. |
871 | * On x86/x64 coherence is assumed and these calls have no effect. |
872 | */ |
873 | hv_setup_dma_ops(dev: child_device, |
874 | coherent: device_get_dma_attr(dev: hv_dev) == DEV_DMA_COHERENT); |
875 | return 0; |
876 | } |
877 | |
878 | /* |
879 | * vmbus_remove - Remove a vmbus device |
880 | */ |
881 | static void vmbus_remove(struct device *child_device) |
882 | { |
883 | struct hv_driver *drv; |
884 | struct hv_device *dev = device_to_hv_device(child_device); |
885 | |
886 | if (child_device->driver) { |
887 | drv = drv_to_hv_drv(d: child_device->driver); |
888 | if (drv->remove) |
889 | drv->remove(dev); |
890 | } |
891 | } |
892 | |
893 | /* |
894 | * vmbus_shutdown - Shutdown a vmbus device |
895 | */ |
896 | static void vmbus_shutdown(struct device *child_device) |
897 | { |
898 | struct hv_driver *drv; |
899 | struct hv_device *dev = device_to_hv_device(child_device); |
900 | |
901 | |
902 | /* The device may not be attached yet */ |
903 | if (!child_device->driver) |
904 | return; |
905 | |
906 | drv = drv_to_hv_drv(d: child_device->driver); |
907 | |
908 | if (drv->shutdown) |
909 | drv->shutdown(dev); |
910 | } |
911 | |
912 | #ifdef CONFIG_PM_SLEEP |
913 | /* |
914 | * vmbus_suspend - Suspend a vmbus device |
915 | */ |
916 | static int vmbus_suspend(struct device *child_device) |
917 | { |
918 | struct hv_driver *drv; |
919 | struct hv_device *dev = device_to_hv_device(child_device); |
920 | |
921 | /* The device may not be attached yet */ |
922 | if (!child_device->driver) |
923 | return 0; |
924 | |
925 | drv = drv_to_hv_drv(d: child_device->driver); |
926 | if (!drv->suspend) |
927 | return -EOPNOTSUPP; |
928 | |
929 | return drv->suspend(dev); |
930 | } |
931 | |
932 | /* |
933 | * vmbus_resume - Resume a vmbus device |
934 | */ |
935 | static int vmbus_resume(struct device *child_device) |
936 | { |
937 | struct hv_driver *drv; |
938 | struct hv_device *dev = device_to_hv_device(child_device); |
939 | |
940 | /* The device may not be attached yet */ |
941 | if (!child_device->driver) |
942 | return 0; |
943 | |
944 | drv = drv_to_hv_drv(d: child_device->driver); |
945 | if (!drv->resume) |
946 | return -EOPNOTSUPP; |
947 | |
948 | return drv->resume(dev); |
949 | } |
950 | #else |
951 | #define vmbus_suspend NULL |
952 | #define vmbus_resume NULL |
953 | #endif /* CONFIG_PM_SLEEP */ |
954 | |
955 | /* |
956 | * vmbus_device_release - Final callback release of the vmbus child device |
957 | */ |
958 | static void vmbus_device_release(struct device *device) |
959 | { |
960 | struct hv_device *hv_dev = device_to_hv_device(device); |
961 | struct vmbus_channel *channel = hv_dev->channel; |
962 | |
963 | hv_debug_rm_dev_dir(dev: hv_dev); |
964 | |
965 | mutex_lock(&vmbus_connection.channel_mutex); |
966 | hv_process_channel_removal(channel); |
967 | mutex_unlock(lock: &vmbus_connection.channel_mutex); |
968 | kfree(objp: hv_dev); |
969 | } |
970 | |
971 | /* |
972 | * Note: we must use the "noirq" ops: see the comment before vmbus_bus_pm. |
973 | * |
974 | * suspend_noirq/resume_noirq are set to NULL to support Suspend-to-Idle: we |
975 | * shouldn't suspend the vmbus devices upon Suspend-to-Idle, otherwise there |
976 | * is no way to wake up a Generation-2 VM. |
977 | * |
978 | * The other 4 ops are for hibernation. |
979 | */ |
980 | |
981 | static const struct dev_pm_ops vmbus_pm = { |
982 | .suspend_noirq = NULL, |
983 | .resume_noirq = NULL, |
984 | .freeze_noirq = vmbus_suspend, |
985 | .thaw_noirq = vmbus_resume, |
986 | .poweroff_noirq = vmbus_suspend, |
987 | .restore_noirq = vmbus_resume, |
988 | }; |
989 | |
990 | /* The one and only one */ |
991 | static struct bus_type hv_bus = { |
992 | .name = "vmbus" , |
993 | .match = vmbus_match, |
994 | .shutdown = vmbus_shutdown, |
995 | .remove = vmbus_remove, |
996 | .probe = vmbus_probe, |
997 | .uevent = vmbus_uevent, |
998 | .dma_configure = vmbus_dma_configure, |
999 | .dev_groups = vmbus_dev_groups, |
1000 | .drv_groups = vmbus_drv_groups, |
1001 | .bus_groups = vmbus_bus_groups, |
1002 | .pm = &vmbus_pm, |
1003 | }; |
1004 | |
1005 | struct onmessage_work_context { |
1006 | struct work_struct work; |
1007 | struct { |
1008 | struct hv_message_header ; |
1009 | u8 payload[]; |
1010 | } msg; |
1011 | }; |
1012 | |
1013 | static void vmbus_onmessage_work(struct work_struct *work) |
1014 | { |
1015 | struct onmessage_work_context *ctx; |
1016 | |
1017 | /* Do not process messages if we're in DISCONNECTED state */ |
1018 | if (vmbus_connection.conn_state == DISCONNECTED) |
1019 | return; |
1020 | |
1021 | ctx = container_of(work, struct onmessage_work_context, |
1022 | work); |
1023 | vmbus_onmessage(hdr: (struct vmbus_channel_message_header *) |
1024 | &ctx->msg.payload); |
1025 | kfree(objp: ctx); |
1026 | } |
1027 | |
1028 | void vmbus_on_msg_dpc(unsigned long data) |
1029 | { |
1030 | struct hv_per_cpu_context *hv_cpu = (void *)data; |
1031 | void *page_addr = hv_cpu->synic_message_page; |
1032 | struct hv_message msg_copy, *msg = (struct hv_message *)page_addr + |
1033 | VMBUS_MESSAGE_SINT; |
1034 | struct vmbus_channel_message_header *hdr; |
1035 | enum vmbus_channel_message_type msgtype; |
1036 | const struct vmbus_channel_message_table_entry *entry; |
1037 | struct onmessage_work_context *ctx; |
1038 | __u8 payload_size; |
1039 | u32 message_type; |
1040 | |
1041 | /* |
1042 | * 'enum vmbus_channel_message_type' is supposed to always be 'u32' as |
1043 | * it is being used in 'struct vmbus_channel_message_header' definition |
1044 | * which is supposed to match hypervisor ABI. |
1045 | */ |
1046 | BUILD_BUG_ON(sizeof(enum vmbus_channel_message_type) != sizeof(u32)); |
1047 | |
1048 | /* |
1049 | * Since the message is in memory shared with the host, an erroneous or |
1050 | * malicious Hyper-V could modify the message while vmbus_on_msg_dpc() |
1051 | * or individual message handlers are executing; to prevent this, copy |
1052 | * the message into private memory. |
1053 | */ |
1054 | memcpy(&msg_copy, msg, sizeof(struct hv_message)); |
1055 | |
1056 | message_type = msg_copy.header.message_type; |
1057 | if (message_type == HVMSG_NONE) |
1058 | /* no msg */ |
1059 | return; |
1060 | |
1061 | hdr = (struct vmbus_channel_message_header *)msg_copy.u.payload; |
1062 | msgtype = hdr->msgtype; |
1063 | |
1064 | trace_vmbus_on_msg_dpc(hdr); |
1065 | |
1066 | if (msgtype >= CHANNELMSG_COUNT) { |
1067 | WARN_ONCE(1, "unknown msgtype=%d\n" , msgtype); |
1068 | goto msg_handled; |
1069 | } |
1070 | |
1071 | payload_size = msg_copy.header.payload_size; |
1072 | if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT) { |
1073 | WARN_ONCE(1, "payload size is too large (%d)\n" , payload_size); |
1074 | goto msg_handled; |
1075 | } |
1076 | |
1077 | entry = &channel_message_table[msgtype]; |
1078 | |
1079 | if (!entry->message_handler) |
1080 | goto msg_handled; |
1081 | |
1082 | if (payload_size < entry->min_payload_len) { |
1083 | WARN_ONCE(1, "message too short: msgtype=%d len=%d\n" , msgtype, payload_size); |
1084 | goto msg_handled; |
1085 | } |
1086 | |
1087 | if (entry->handler_type == VMHT_BLOCKING) { |
1088 | ctx = kmalloc(struct_size(ctx, msg.payload, payload_size), GFP_ATOMIC); |
1089 | if (ctx == NULL) |
1090 | return; |
1091 | |
1092 | INIT_WORK(&ctx->work, vmbus_onmessage_work); |
1093 | ctx->msg.header = msg_copy.header; |
1094 | memcpy(&ctx->msg.payload, msg_copy.u.payload, payload_size); |
1095 | |
1096 | /* |
1097 | * The host can generate a rescind message while we |
1098 | * may still be handling the original offer. We deal with |
1099 | * this condition by relying on the synchronization provided |
1100 | * by offer_in_progress and by channel_mutex. See also the |
1101 | * inline comments in vmbus_onoffer_rescind(). |
1102 | */ |
1103 | switch (msgtype) { |
1104 | case CHANNELMSG_RESCIND_CHANNELOFFER: |
1105 | /* |
1106 | * If we are handling the rescind message; |
1107 | * schedule the work on the global work queue. |
1108 | * |
1109 | * The OFFER message and the RESCIND message should |
1110 | * not be handled by the same serialized work queue, |
1111 | * because the OFFER handler may call vmbus_open(), |
1112 | * which tries to open the channel by sending an |
1113 | * OPEN_CHANNEL message to the host and waits for |
1114 | * the host's response; however, if the host has |
1115 | * rescinded the channel before it receives the |
1116 | * OPEN_CHANNEL message, the host just silently |
1117 | * ignores the OPEN_CHANNEL message; as a result, |
1118 | * the guest's OFFER handler hangs for ever, if we |
1119 | * handle the RESCIND message in the same serialized |
1120 | * work queue: the RESCIND handler can not start to |
1121 | * run before the OFFER handler finishes. |
1122 | */ |
1123 | if (vmbus_connection.ignore_any_offer_msg) |
1124 | break; |
1125 | queue_work(wq: vmbus_connection.rescind_work_queue, work: &ctx->work); |
1126 | break; |
1127 | |
1128 | case CHANNELMSG_OFFERCHANNEL: |
1129 | /* |
1130 | * The host sends the offer message of a given channel |
1131 | * before sending the rescind message of the same |
1132 | * channel. These messages are sent to the guest's |
1133 | * connect CPU; the guest then starts processing them |
1134 | * in the tasklet handler on this CPU: |
1135 | * |
1136 | * VMBUS_CONNECT_CPU |
1137 | * |
1138 | * [vmbus_on_msg_dpc()] |
1139 | * atomic_inc() // CHANNELMSG_OFFERCHANNEL |
1140 | * queue_work() |
1141 | * ... |
1142 | * [vmbus_on_msg_dpc()] |
1143 | * schedule_work() // CHANNELMSG_RESCIND_CHANNELOFFER |
1144 | * |
1145 | * We rely on the memory-ordering properties of the |
1146 | * queue_work() and schedule_work() primitives, which |
1147 | * guarantee that the atomic increment will be visible |
1148 | * to the CPUs which will execute the offer & rescind |
1149 | * works by the time these works will start execution. |
1150 | */ |
1151 | if (vmbus_connection.ignore_any_offer_msg) |
1152 | break; |
1153 | atomic_inc(v: &vmbus_connection.offer_in_progress); |
1154 | fallthrough; |
1155 | |
1156 | default: |
1157 | queue_work(wq: vmbus_connection.work_queue, work: &ctx->work); |
1158 | } |
1159 | } else |
1160 | entry->message_handler(hdr); |
1161 | |
1162 | msg_handled: |
1163 | vmbus_signal_eom(msg, old_msg_type: message_type); |
1164 | } |
1165 | |
1166 | #ifdef CONFIG_PM_SLEEP |
1167 | /* |
1168 | * Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for |
1169 | * hibernation, because hv_sock connections can not persist across hibernation. |
1170 | */ |
1171 | static void vmbus_force_channel_rescinded(struct vmbus_channel *channel) |
1172 | { |
1173 | struct onmessage_work_context *ctx; |
1174 | struct vmbus_channel_rescind_offer *rescind; |
1175 | |
1176 | WARN_ON(!is_hvsock_channel(channel)); |
1177 | |
1178 | /* |
1179 | * Allocation size is small and the allocation should really not fail, |
1180 | * otherwise the state of the hv_sock connections ends up in limbo. |
1181 | */ |
1182 | ctx = kzalloc(size: sizeof(*ctx) + sizeof(*rescind), |
1183 | GFP_KERNEL | __GFP_NOFAIL); |
1184 | |
1185 | /* |
1186 | * So far, these are not really used by Linux. Just set them to the |
1187 | * reasonable values conforming to the definitions of the fields. |
1188 | */ |
1189 | ctx->msg.header.message_type = 1; |
1190 | ctx->msg.header.payload_size = sizeof(*rescind); |
1191 | |
1192 | /* These values are actually used by Linux. */ |
1193 | rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.payload; |
1194 | rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER; |
1195 | rescind->child_relid = channel->offermsg.child_relid; |
1196 | |
1197 | INIT_WORK(&ctx->work, vmbus_onmessage_work); |
1198 | |
1199 | queue_work(wq: vmbus_connection.work_queue, work: &ctx->work); |
1200 | } |
1201 | #endif /* CONFIG_PM_SLEEP */ |
1202 | |
1203 | /* |
1204 | * Schedule all channels with events pending |
1205 | */ |
1206 | static void vmbus_chan_sched(struct hv_per_cpu_context *hv_cpu) |
1207 | { |
1208 | unsigned long *recv_int_page; |
1209 | u32 maxbits, relid; |
1210 | |
1211 | /* |
1212 | * The event page can be directly checked to get the id of |
1213 | * the channel that has the interrupt pending. |
1214 | */ |
1215 | void *page_addr = hv_cpu->synic_event_page; |
1216 | union hv_synic_event_flags *event |
1217 | = (union hv_synic_event_flags *)page_addr + |
1218 | VMBUS_MESSAGE_SINT; |
1219 | |
1220 | maxbits = HV_EVENT_FLAGS_COUNT; |
1221 | recv_int_page = event->flags; |
1222 | |
1223 | if (unlikely(!recv_int_page)) |
1224 | return; |
1225 | |
1226 | for_each_set_bit(relid, recv_int_page, maxbits) { |
1227 | void (*callback_fn)(void *context); |
1228 | struct vmbus_channel *channel; |
1229 | |
1230 | if (!sync_test_and_clear_bit(nr: relid, addr: recv_int_page)) |
1231 | continue; |
1232 | |
1233 | /* Special case - vmbus channel protocol msg */ |
1234 | if (relid == 0) |
1235 | continue; |
1236 | |
1237 | /* |
1238 | * Pairs with the kfree_rcu() in vmbus_chan_release(). |
1239 | * Guarantees that the channel data structure doesn't |
1240 | * get freed while the channel pointer below is being |
1241 | * dereferenced. |
1242 | */ |
1243 | rcu_read_lock(); |
1244 | |
1245 | /* Find channel based on relid */ |
1246 | channel = relid2channel(relid); |
1247 | if (channel == NULL) |
1248 | goto sched_unlock_rcu; |
1249 | |
1250 | if (channel->rescind) |
1251 | goto sched_unlock_rcu; |
1252 | |
1253 | /* |
1254 | * Make sure that the ring buffer data structure doesn't get |
1255 | * freed while we dereference the ring buffer pointer. Test |
1256 | * for the channel's onchannel_callback being NULL within a |
1257 | * sched_lock critical section. See also the inline comments |
1258 | * in vmbus_reset_channel_cb(). |
1259 | */ |
1260 | spin_lock(lock: &channel->sched_lock); |
1261 | |
1262 | callback_fn = channel->onchannel_callback; |
1263 | if (unlikely(callback_fn == NULL)) |
1264 | goto sched_unlock; |
1265 | |
1266 | trace_vmbus_chan_sched(channel); |
1267 | |
1268 | ++channel->interrupts; |
1269 | |
1270 | switch (channel->callback_mode) { |
1271 | case HV_CALL_ISR: |
1272 | (*callback_fn)(channel->channel_callback_context); |
1273 | break; |
1274 | |
1275 | case HV_CALL_BATCHED: |
1276 | hv_begin_read(rbi: &channel->inbound); |
1277 | fallthrough; |
1278 | case HV_CALL_DIRECT: |
1279 | tasklet_schedule(t: &channel->callback_event); |
1280 | } |
1281 | |
1282 | sched_unlock: |
1283 | spin_unlock(lock: &channel->sched_lock); |
1284 | sched_unlock_rcu: |
1285 | rcu_read_unlock(); |
1286 | } |
1287 | } |
1288 | |
1289 | static void vmbus_isr(void) |
1290 | { |
1291 | struct hv_per_cpu_context *hv_cpu |
1292 | = this_cpu_ptr(hv_context.cpu_context); |
1293 | void *page_addr; |
1294 | struct hv_message *msg; |
1295 | |
1296 | vmbus_chan_sched(hv_cpu); |
1297 | |
1298 | page_addr = hv_cpu->synic_message_page; |
1299 | msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; |
1300 | |
1301 | /* Check if there are actual msgs to be processed */ |
1302 | if (msg->header.message_type != HVMSG_NONE) { |
1303 | if (msg->header.message_type == HVMSG_TIMER_EXPIRED) { |
1304 | hv_stimer0_isr(); |
1305 | vmbus_signal_eom(msg, old_msg_type: HVMSG_TIMER_EXPIRED); |
1306 | } else |
1307 | tasklet_schedule(t: &hv_cpu->msg_dpc); |
1308 | } |
1309 | |
1310 | add_interrupt_randomness(irq: vmbus_interrupt); |
1311 | } |
1312 | |
1313 | static irqreturn_t vmbus_percpu_isr(int irq, void *dev_id) |
1314 | { |
1315 | vmbus_isr(); |
1316 | return IRQ_HANDLED; |
1317 | } |
1318 | |
1319 | /* |
1320 | * vmbus_bus_init -Main vmbus driver initialization routine. |
1321 | * |
1322 | * Here, we |
1323 | * - initialize the vmbus driver context |
1324 | * - invoke the vmbus hv main init routine |
1325 | * - retrieve the channel offers |
1326 | */ |
1327 | static int vmbus_bus_init(void) |
1328 | { |
1329 | int ret; |
1330 | |
1331 | ret = hv_init(); |
1332 | if (ret != 0) { |
1333 | pr_err("Unable to initialize the hypervisor - 0x%x\n" , ret); |
1334 | return ret; |
1335 | } |
1336 | |
1337 | ret = bus_register(bus: &hv_bus); |
1338 | if (ret) |
1339 | return ret; |
1340 | |
1341 | /* |
1342 | * VMbus interrupts are best modeled as per-cpu interrupts. If |
1343 | * on an architecture with support for per-cpu IRQs (e.g. ARM64), |
1344 | * allocate a per-cpu IRQ using standard Linux kernel functionality. |
1345 | * If not on such an architecture (e.g., x86/x64), then rely on |
1346 | * code in the arch-specific portion of the code tree to connect |
1347 | * the VMbus interrupt handler. |
1348 | */ |
1349 | |
1350 | if (vmbus_irq == -1) { |
1351 | hv_setup_vmbus_handler(handler: vmbus_isr); |
1352 | } else { |
1353 | vmbus_evt = alloc_percpu(long); |
1354 | ret = request_percpu_irq(irq: vmbus_irq, handler: vmbus_percpu_isr, |
1355 | devname: "Hyper-V VMbus" , percpu_dev_id: vmbus_evt); |
1356 | if (ret) { |
1357 | pr_err("Can't request Hyper-V VMbus IRQ %d, Err %d" , |
1358 | vmbus_irq, ret); |
1359 | free_percpu(pdata: vmbus_evt); |
1360 | goto err_setup; |
1361 | } |
1362 | } |
1363 | |
1364 | ret = hv_synic_alloc(); |
1365 | if (ret) |
1366 | goto err_alloc; |
1367 | |
1368 | /* |
1369 | * Initialize the per-cpu interrupt state and stimer state. |
1370 | * Then connect to the host. |
1371 | */ |
1372 | ret = cpuhp_setup_state(state: CPUHP_AP_ONLINE_DYN, name: "hyperv/vmbus:online" , |
1373 | startup: hv_synic_init, teardown: hv_synic_cleanup); |
1374 | if (ret < 0) |
1375 | goto err_alloc; |
1376 | hyperv_cpuhp_online = ret; |
1377 | |
1378 | ret = vmbus_connect(); |
1379 | if (ret) |
1380 | goto err_connect; |
1381 | |
1382 | /* |
1383 | * Always register the vmbus unload panic notifier because we |
1384 | * need to shut the VMbus channel connection on panic. |
1385 | */ |
1386 | atomic_notifier_chain_register(nh: &panic_notifier_list, |
1387 | nb: &hyperv_panic_vmbus_unload_block); |
1388 | |
1389 | vmbus_request_offers(); |
1390 | |
1391 | return 0; |
1392 | |
1393 | err_connect: |
1394 | cpuhp_remove_state(state: hyperv_cpuhp_online); |
1395 | err_alloc: |
1396 | hv_synic_free(); |
1397 | if (vmbus_irq == -1) { |
1398 | hv_remove_vmbus_handler(); |
1399 | } else { |
1400 | free_percpu_irq(vmbus_irq, vmbus_evt); |
1401 | free_percpu(pdata: vmbus_evt); |
1402 | } |
1403 | err_setup: |
1404 | bus_unregister(bus: &hv_bus); |
1405 | return ret; |
1406 | } |
1407 | |
1408 | /** |
1409 | * __vmbus_driver_register() - Register a vmbus's driver |
1410 | * @hv_driver: Pointer to driver structure you want to register |
1411 | * @owner: owner module of the drv |
1412 | * @mod_name: module name string |
1413 | * |
1414 | * Registers the given driver with Linux through the 'driver_register()' call |
1415 | * and sets up the hyper-v vmbus handling for this driver. |
1416 | * It will return the state of the 'driver_register()' call. |
1417 | * |
1418 | */ |
1419 | int __vmbus_driver_register(struct hv_driver *hv_driver, struct module *owner, const char *mod_name) |
1420 | { |
1421 | int ret; |
1422 | |
1423 | pr_info("registering driver %s\n" , hv_driver->name); |
1424 | |
1425 | ret = vmbus_exists(); |
1426 | if (ret < 0) |
1427 | return ret; |
1428 | |
1429 | hv_driver->driver.name = hv_driver->name; |
1430 | hv_driver->driver.owner = owner; |
1431 | hv_driver->driver.mod_name = mod_name; |
1432 | hv_driver->driver.bus = &hv_bus; |
1433 | |
1434 | spin_lock_init(&hv_driver->dynids.lock); |
1435 | INIT_LIST_HEAD(list: &hv_driver->dynids.list); |
1436 | |
1437 | ret = driver_register(drv: &hv_driver->driver); |
1438 | |
1439 | return ret; |
1440 | } |
1441 | EXPORT_SYMBOL_GPL(__vmbus_driver_register); |
1442 | |
1443 | /** |
1444 | * vmbus_driver_unregister() - Unregister a vmbus's driver |
1445 | * @hv_driver: Pointer to driver structure you want to |
1446 | * un-register |
1447 | * |
1448 | * Un-register the given driver that was previous registered with a call to |
1449 | * vmbus_driver_register() |
1450 | */ |
1451 | void vmbus_driver_unregister(struct hv_driver *hv_driver) |
1452 | { |
1453 | pr_info("unregistering driver %s\n" , hv_driver->name); |
1454 | |
1455 | if (!vmbus_exists()) { |
1456 | driver_unregister(drv: &hv_driver->driver); |
1457 | vmbus_free_dynids(drv: hv_driver); |
1458 | } |
1459 | } |
1460 | EXPORT_SYMBOL_GPL(vmbus_driver_unregister); |
1461 | |
1462 | |
1463 | /* |
1464 | * Called when last reference to channel is gone. |
1465 | */ |
1466 | static void vmbus_chan_release(struct kobject *kobj) |
1467 | { |
1468 | struct vmbus_channel *channel |
1469 | = container_of(kobj, struct vmbus_channel, kobj); |
1470 | |
1471 | kfree_rcu(channel, rcu); |
1472 | } |
1473 | |
1474 | struct vmbus_chan_attribute { |
1475 | struct attribute attr; |
1476 | ssize_t (*show)(struct vmbus_channel *chan, char *buf); |
1477 | ssize_t (*store)(struct vmbus_channel *chan, |
1478 | const char *buf, size_t count); |
1479 | }; |
1480 | #define VMBUS_CHAN_ATTR(_name, _mode, _show, _store) \ |
1481 | struct vmbus_chan_attribute chan_attr_##_name \ |
1482 | = __ATTR(_name, _mode, _show, _store) |
1483 | #define VMBUS_CHAN_ATTR_RW(_name) \ |
1484 | struct vmbus_chan_attribute chan_attr_##_name = __ATTR_RW(_name) |
1485 | #define VMBUS_CHAN_ATTR_RO(_name) \ |
1486 | struct vmbus_chan_attribute chan_attr_##_name = __ATTR_RO(_name) |
1487 | #define VMBUS_CHAN_ATTR_WO(_name) \ |
1488 | struct vmbus_chan_attribute chan_attr_##_name = __ATTR_WO(_name) |
1489 | |
1490 | static ssize_t vmbus_chan_attr_show(struct kobject *kobj, |
1491 | struct attribute *attr, char *buf) |
1492 | { |
1493 | const struct vmbus_chan_attribute *attribute |
1494 | = container_of(attr, struct vmbus_chan_attribute, attr); |
1495 | struct vmbus_channel *chan |
1496 | = container_of(kobj, struct vmbus_channel, kobj); |
1497 | |
1498 | if (!attribute->show) |
1499 | return -EIO; |
1500 | |
1501 | return attribute->show(chan, buf); |
1502 | } |
1503 | |
1504 | static ssize_t vmbus_chan_attr_store(struct kobject *kobj, |
1505 | struct attribute *attr, const char *buf, |
1506 | size_t count) |
1507 | { |
1508 | const struct vmbus_chan_attribute *attribute |
1509 | = container_of(attr, struct vmbus_chan_attribute, attr); |
1510 | struct vmbus_channel *chan |
1511 | = container_of(kobj, struct vmbus_channel, kobj); |
1512 | |
1513 | if (!attribute->store) |
1514 | return -EIO; |
1515 | |
1516 | return attribute->store(chan, buf, count); |
1517 | } |
1518 | |
1519 | static const struct sysfs_ops vmbus_chan_sysfs_ops = { |
1520 | .show = vmbus_chan_attr_show, |
1521 | .store = vmbus_chan_attr_store, |
1522 | }; |
1523 | |
1524 | static ssize_t out_mask_show(struct vmbus_channel *channel, char *buf) |
1525 | { |
1526 | struct hv_ring_buffer_info *rbi = &channel->outbound; |
1527 | ssize_t ret; |
1528 | |
1529 | mutex_lock(&rbi->ring_buffer_mutex); |
1530 | if (!rbi->ring_buffer) { |
1531 | mutex_unlock(lock: &rbi->ring_buffer_mutex); |
1532 | return -EINVAL; |
1533 | } |
1534 | |
1535 | ret = sprintf(buf, fmt: "%u\n" , rbi->ring_buffer->interrupt_mask); |
1536 | mutex_unlock(lock: &rbi->ring_buffer_mutex); |
1537 | return ret; |
1538 | } |
1539 | static VMBUS_CHAN_ATTR_RO(out_mask); |
1540 | |
1541 | static ssize_t in_mask_show(struct vmbus_channel *channel, char *buf) |
1542 | { |
1543 | struct hv_ring_buffer_info *rbi = &channel->inbound; |
1544 | ssize_t ret; |
1545 | |
1546 | mutex_lock(&rbi->ring_buffer_mutex); |
1547 | if (!rbi->ring_buffer) { |
1548 | mutex_unlock(lock: &rbi->ring_buffer_mutex); |
1549 | return -EINVAL; |
1550 | } |
1551 | |
1552 | ret = sprintf(buf, fmt: "%u\n" , rbi->ring_buffer->interrupt_mask); |
1553 | mutex_unlock(lock: &rbi->ring_buffer_mutex); |
1554 | return ret; |
1555 | } |
1556 | static VMBUS_CHAN_ATTR_RO(in_mask); |
1557 | |
1558 | static ssize_t read_avail_show(struct vmbus_channel *channel, char *buf) |
1559 | { |
1560 | struct hv_ring_buffer_info *rbi = &channel->inbound; |
1561 | ssize_t ret; |
1562 | |
1563 | mutex_lock(&rbi->ring_buffer_mutex); |
1564 | if (!rbi->ring_buffer) { |
1565 | mutex_unlock(lock: &rbi->ring_buffer_mutex); |
1566 | return -EINVAL; |
1567 | } |
1568 | |
1569 | ret = sprintf(buf, fmt: "%u\n" , hv_get_bytes_to_read(rbi)); |
1570 | mutex_unlock(lock: &rbi->ring_buffer_mutex); |
1571 | return ret; |
1572 | } |
1573 | static VMBUS_CHAN_ATTR_RO(read_avail); |
1574 | |
1575 | static ssize_t write_avail_show(struct vmbus_channel *channel, char *buf) |
1576 | { |
1577 | struct hv_ring_buffer_info *rbi = &channel->outbound; |
1578 | ssize_t ret; |
1579 | |
1580 | mutex_lock(&rbi->ring_buffer_mutex); |
1581 | if (!rbi->ring_buffer) { |
1582 | mutex_unlock(lock: &rbi->ring_buffer_mutex); |
1583 | return -EINVAL; |
1584 | } |
1585 | |
1586 | ret = sprintf(buf, fmt: "%u\n" , hv_get_bytes_to_write(rbi)); |
1587 | mutex_unlock(lock: &rbi->ring_buffer_mutex); |
1588 | return ret; |
1589 | } |
1590 | static VMBUS_CHAN_ATTR_RO(write_avail); |
1591 | |
1592 | static ssize_t target_cpu_show(struct vmbus_channel *channel, char *buf) |
1593 | { |
1594 | return sprintf(buf, fmt: "%u\n" , channel->target_cpu); |
1595 | } |
1596 | static ssize_t target_cpu_store(struct vmbus_channel *channel, |
1597 | const char *buf, size_t count) |
1598 | { |
1599 | u32 target_cpu, origin_cpu; |
1600 | ssize_t ret = count; |
1601 | |
1602 | if (vmbus_proto_version < VERSION_WIN10_V4_1) |
1603 | return -EIO; |
1604 | |
1605 | if (sscanf(buf, "%uu" , &target_cpu) != 1) |
1606 | return -EIO; |
1607 | |
1608 | /* Validate target_cpu for the cpumask_test_cpu() operation below. */ |
1609 | if (target_cpu >= nr_cpumask_bits) |
1610 | return -EINVAL; |
1611 | |
1612 | if (!cpumask_test_cpu(cpu: target_cpu, cpumask: housekeeping_cpumask(type: HK_TYPE_MANAGED_IRQ))) |
1613 | return -EINVAL; |
1614 | |
1615 | /* No CPUs should come up or down during this. */ |
1616 | cpus_read_lock(); |
1617 | |
1618 | if (!cpu_online(cpu: target_cpu)) { |
1619 | cpus_read_unlock(); |
1620 | return -EINVAL; |
1621 | } |
1622 | |
1623 | /* |
1624 | * Synchronizes target_cpu_store() and channel closure: |
1625 | * |
1626 | * { Initially: state = CHANNEL_OPENED } |
1627 | * |
1628 | * CPU1 CPU2 |
1629 | * |
1630 | * [target_cpu_store()] [vmbus_disconnect_ring()] |
1631 | * |
1632 | * LOCK channel_mutex LOCK channel_mutex |
1633 | * LOAD r1 = state LOAD r2 = state |
1634 | * IF (r1 == CHANNEL_OPENED) IF (r2 == CHANNEL_OPENED) |
1635 | * SEND MODIFYCHANNEL STORE state = CHANNEL_OPEN |
1636 | * [...] SEND CLOSECHANNEL |
1637 | * UNLOCK channel_mutex UNLOCK channel_mutex |
1638 | * |
1639 | * Forbids: r1 == r2 == CHANNEL_OPENED (i.e., CPU1's LOCK precedes |
1640 | * CPU2's LOCK) && CPU2's SEND precedes CPU1's SEND |
1641 | * |
1642 | * Note. The host processes the channel messages "sequentially", in |
1643 | * the order in which they are received on a per-partition basis. |
1644 | */ |
1645 | mutex_lock(&vmbus_connection.channel_mutex); |
1646 | |
1647 | /* |
1648 | * Hyper-V will ignore MODIFYCHANNEL messages for "non-open" channels; |
1649 | * avoid sending the message and fail here for such channels. |
1650 | */ |
1651 | if (channel->state != CHANNEL_OPENED_STATE) { |
1652 | ret = -EIO; |
1653 | goto cpu_store_unlock; |
1654 | } |
1655 | |
1656 | origin_cpu = channel->target_cpu; |
1657 | if (target_cpu == origin_cpu) |
1658 | goto cpu_store_unlock; |
1659 | |
1660 | if (vmbus_send_modifychannel(channel, |
1661 | target_vp: hv_cpu_number_to_vp_number(cpu_number: target_cpu))) { |
1662 | ret = -EIO; |
1663 | goto cpu_store_unlock; |
1664 | } |
1665 | |
1666 | /* |
1667 | * For version before VERSION_WIN10_V5_3, the following warning holds: |
1668 | * |
1669 | * Warning. At this point, there is *no* guarantee that the host will |
1670 | * have successfully processed the vmbus_send_modifychannel() request. |
1671 | * See the header comment of vmbus_send_modifychannel() for more info. |
1672 | * |
1673 | * Lags in the processing of the above vmbus_send_modifychannel() can |
1674 | * result in missed interrupts if the "old" target CPU is taken offline |
1675 | * before Hyper-V starts sending interrupts to the "new" target CPU. |
1676 | * But apart from this offlining scenario, the code tolerates such |
1677 | * lags. It will function correctly even if a channel interrupt comes |
1678 | * in on a CPU that is different from the channel target_cpu value. |
1679 | */ |
1680 | |
1681 | channel->target_cpu = target_cpu; |
1682 | |
1683 | /* See init_vp_index(). */ |
1684 | if (hv_is_perf_channel(channel)) |
1685 | hv_update_allocated_cpus(old_cpu: origin_cpu, new_cpu: target_cpu); |
1686 | |
1687 | /* Currently set only for storvsc channels. */ |
1688 | if (channel->change_target_cpu_callback) { |
1689 | (*channel->change_target_cpu_callback)(channel, |
1690 | origin_cpu, target_cpu); |
1691 | } |
1692 | |
1693 | cpu_store_unlock: |
1694 | mutex_unlock(lock: &vmbus_connection.channel_mutex); |
1695 | cpus_read_unlock(); |
1696 | return ret; |
1697 | } |
1698 | static VMBUS_CHAN_ATTR(cpu, 0644, target_cpu_show, target_cpu_store); |
1699 | |
1700 | static ssize_t channel_pending_show(struct vmbus_channel *channel, |
1701 | char *buf) |
1702 | { |
1703 | return sprintf(buf, fmt: "%d\n" , |
1704 | channel_pending(channel, |
1705 | monitor_page: vmbus_connection.monitor_pages[1])); |
1706 | } |
1707 | static VMBUS_CHAN_ATTR(pending, 0444, channel_pending_show, NULL); |
1708 | |
1709 | static ssize_t channel_latency_show(struct vmbus_channel *channel, |
1710 | char *buf) |
1711 | { |
1712 | return sprintf(buf, fmt: "%d\n" , |
1713 | channel_latency(channel, |
1714 | monitor_page: vmbus_connection.monitor_pages[1])); |
1715 | } |
1716 | static VMBUS_CHAN_ATTR(latency, 0444, channel_latency_show, NULL); |
1717 | |
1718 | static ssize_t channel_interrupts_show(struct vmbus_channel *channel, char *buf) |
1719 | { |
1720 | return sprintf(buf, fmt: "%llu\n" , channel->interrupts); |
1721 | } |
1722 | static VMBUS_CHAN_ATTR(interrupts, 0444, channel_interrupts_show, NULL); |
1723 | |
1724 | static ssize_t channel_events_show(struct vmbus_channel *channel, char *buf) |
1725 | { |
1726 | return sprintf(buf, fmt: "%llu\n" , channel->sig_events); |
1727 | } |
1728 | static VMBUS_CHAN_ATTR(events, 0444, channel_events_show, NULL); |
1729 | |
1730 | static ssize_t channel_intr_in_full_show(struct vmbus_channel *channel, |
1731 | char *buf) |
1732 | { |
1733 | return sprintf(buf, fmt: "%llu\n" , |
1734 | (unsigned long long)channel->intr_in_full); |
1735 | } |
1736 | static VMBUS_CHAN_ATTR(intr_in_full, 0444, channel_intr_in_full_show, NULL); |
1737 | |
1738 | static ssize_t channel_intr_out_empty_show(struct vmbus_channel *channel, |
1739 | char *buf) |
1740 | { |
1741 | return sprintf(buf, fmt: "%llu\n" , |
1742 | (unsigned long long)channel->intr_out_empty); |
1743 | } |
1744 | static VMBUS_CHAN_ATTR(intr_out_empty, 0444, channel_intr_out_empty_show, NULL); |
1745 | |
1746 | static ssize_t channel_out_full_first_show(struct vmbus_channel *channel, |
1747 | char *buf) |
1748 | { |
1749 | return sprintf(buf, fmt: "%llu\n" , |
1750 | (unsigned long long)channel->out_full_first); |
1751 | } |
1752 | static VMBUS_CHAN_ATTR(out_full_first, 0444, channel_out_full_first_show, NULL); |
1753 | |
1754 | static ssize_t channel_out_full_total_show(struct vmbus_channel *channel, |
1755 | char *buf) |
1756 | { |
1757 | return sprintf(buf, fmt: "%llu\n" , |
1758 | (unsigned long long)channel->out_full_total); |
1759 | } |
1760 | static VMBUS_CHAN_ATTR(out_full_total, 0444, channel_out_full_total_show, NULL); |
1761 | |
1762 | static ssize_t subchannel_monitor_id_show(struct vmbus_channel *channel, |
1763 | char *buf) |
1764 | { |
1765 | return sprintf(buf, fmt: "%u\n" , channel->offermsg.monitorid); |
1766 | } |
1767 | static VMBUS_CHAN_ATTR(monitor_id, 0444, subchannel_monitor_id_show, NULL); |
1768 | |
1769 | static ssize_t subchannel_id_show(struct vmbus_channel *channel, |
1770 | char *buf) |
1771 | { |
1772 | return sprintf(buf, fmt: "%u\n" , |
1773 | channel->offermsg.offer.sub_channel_index); |
1774 | } |
1775 | static VMBUS_CHAN_ATTR_RO(subchannel_id); |
1776 | |
1777 | static struct attribute *vmbus_chan_attrs[] = { |
1778 | &chan_attr_out_mask.attr, |
1779 | &chan_attr_in_mask.attr, |
1780 | &chan_attr_read_avail.attr, |
1781 | &chan_attr_write_avail.attr, |
1782 | &chan_attr_cpu.attr, |
1783 | &chan_attr_pending.attr, |
1784 | &chan_attr_latency.attr, |
1785 | &chan_attr_interrupts.attr, |
1786 | &chan_attr_events.attr, |
1787 | &chan_attr_intr_in_full.attr, |
1788 | &chan_attr_intr_out_empty.attr, |
1789 | &chan_attr_out_full_first.attr, |
1790 | &chan_attr_out_full_total.attr, |
1791 | &chan_attr_monitor_id.attr, |
1792 | &chan_attr_subchannel_id.attr, |
1793 | NULL |
1794 | }; |
1795 | |
1796 | /* |
1797 | * Channel-level attribute_group callback function. Returns the permission for |
1798 | * each attribute, and returns 0 if an attribute is not visible. |
1799 | */ |
1800 | static umode_t vmbus_chan_attr_is_visible(struct kobject *kobj, |
1801 | struct attribute *attr, int idx) |
1802 | { |
1803 | const struct vmbus_channel *channel = |
1804 | container_of(kobj, struct vmbus_channel, kobj); |
1805 | |
1806 | /* Hide the monitor attributes if the monitor mechanism is not used. */ |
1807 | if (!channel->offermsg.monitor_allocated && |
1808 | (attr == &chan_attr_pending.attr || |
1809 | attr == &chan_attr_latency.attr || |
1810 | attr == &chan_attr_monitor_id.attr)) |
1811 | return 0; |
1812 | |
1813 | return attr->mode; |
1814 | } |
1815 | |
1816 | static struct attribute_group vmbus_chan_group = { |
1817 | .attrs = vmbus_chan_attrs, |
1818 | .is_visible = vmbus_chan_attr_is_visible |
1819 | }; |
1820 | |
1821 | static struct kobj_type vmbus_chan_ktype = { |
1822 | .sysfs_ops = &vmbus_chan_sysfs_ops, |
1823 | .release = vmbus_chan_release, |
1824 | }; |
1825 | |
1826 | /* |
1827 | * vmbus_add_channel_kobj - setup a sub-directory under device/channels |
1828 | */ |
1829 | int vmbus_add_channel_kobj(struct hv_device *dev, struct vmbus_channel *channel) |
1830 | { |
1831 | const struct device *device = &dev->device; |
1832 | struct kobject *kobj = &channel->kobj; |
1833 | u32 relid = channel->offermsg.child_relid; |
1834 | int ret; |
1835 | |
1836 | kobj->kset = dev->channels_kset; |
1837 | ret = kobject_init_and_add(kobj, ktype: &vmbus_chan_ktype, NULL, |
1838 | fmt: "%u" , relid); |
1839 | if (ret) { |
1840 | kobject_put(kobj); |
1841 | return ret; |
1842 | } |
1843 | |
1844 | ret = sysfs_create_group(kobj, grp: &vmbus_chan_group); |
1845 | |
1846 | if (ret) { |
1847 | /* |
1848 | * The calling functions' error handling paths will cleanup the |
1849 | * empty channel directory. |
1850 | */ |
1851 | kobject_put(kobj); |
1852 | dev_err(device, "Unable to set up channel sysfs files\n" ); |
1853 | return ret; |
1854 | } |
1855 | |
1856 | kobject_uevent(kobj, action: KOBJ_ADD); |
1857 | |
1858 | return 0; |
1859 | } |
1860 | |
1861 | /* |
1862 | * vmbus_remove_channel_attr_group - remove the channel's attribute group |
1863 | */ |
1864 | void vmbus_remove_channel_attr_group(struct vmbus_channel *channel) |
1865 | { |
1866 | sysfs_remove_group(kobj: &channel->kobj, grp: &vmbus_chan_group); |
1867 | } |
1868 | |
1869 | /* |
1870 | * vmbus_device_create - Creates and registers a new child device |
1871 | * on the vmbus. |
1872 | */ |
1873 | struct hv_device *vmbus_device_create(const guid_t *type, |
1874 | const guid_t *instance, |
1875 | struct vmbus_channel *channel) |
1876 | { |
1877 | struct hv_device *child_device_obj; |
1878 | |
1879 | child_device_obj = kzalloc(size: sizeof(struct hv_device), GFP_KERNEL); |
1880 | if (!child_device_obj) { |
1881 | pr_err("Unable to allocate device object for child device\n" ); |
1882 | return NULL; |
1883 | } |
1884 | |
1885 | child_device_obj->channel = channel; |
1886 | guid_copy(dst: &child_device_obj->dev_type, src: type); |
1887 | guid_copy(dst: &child_device_obj->dev_instance, src: instance); |
1888 | child_device_obj->vendor_id = PCI_VENDOR_ID_MICROSOFT; |
1889 | |
1890 | return child_device_obj; |
1891 | } |
1892 | |
1893 | /* |
1894 | * vmbus_device_register - Register the child device |
1895 | */ |
1896 | int vmbus_device_register(struct hv_device *child_device_obj) |
1897 | { |
1898 | struct kobject *kobj = &child_device_obj->device.kobj; |
1899 | int ret; |
1900 | |
1901 | dev_set_name(dev: &child_device_obj->device, name: "%pUl" , |
1902 | &child_device_obj->channel->offermsg.offer.if_instance); |
1903 | |
1904 | child_device_obj->device.bus = &hv_bus; |
1905 | child_device_obj->device.parent = hv_dev; |
1906 | child_device_obj->device.release = vmbus_device_release; |
1907 | |
1908 | child_device_obj->device.dma_parms = &child_device_obj->dma_parms; |
1909 | child_device_obj->device.dma_mask = &child_device_obj->dma_mask; |
1910 | dma_set_mask(dev: &child_device_obj->device, DMA_BIT_MASK(64)); |
1911 | |
1912 | /* |
1913 | * Register with the LDM. This will kick off the driver/device |
1914 | * binding...which will eventually call vmbus_match() and vmbus_probe() |
1915 | */ |
1916 | ret = device_register(dev: &child_device_obj->device); |
1917 | if (ret) { |
1918 | pr_err("Unable to register child device\n" ); |
1919 | put_device(dev: &child_device_obj->device); |
1920 | return ret; |
1921 | } |
1922 | |
1923 | child_device_obj->channels_kset = kset_create_and_add(name: "channels" , |
1924 | NULL, parent_kobj: kobj); |
1925 | if (!child_device_obj->channels_kset) { |
1926 | ret = -ENOMEM; |
1927 | goto err_dev_unregister; |
1928 | } |
1929 | |
1930 | ret = vmbus_add_channel_kobj(dev: child_device_obj, |
1931 | channel: child_device_obj->channel); |
1932 | if (ret) { |
1933 | pr_err("Unable to register primary channeln" ); |
1934 | goto err_kset_unregister; |
1935 | } |
1936 | hv_debug_add_dev_dir(dev: child_device_obj); |
1937 | |
1938 | return 0; |
1939 | |
1940 | err_kset_unregister: |
1941 | kset_unregister(kset: child_device_obj->channels_kset); |
1942 | |
1943 | err_dev_unregister: |
1944 | device_unregister(dev: &child_device_obj->device); |
1945 | return ret; |
1946 | } |
1947 | |
1948 | /* |
1949 | * vmbus_device_unregister - Remove the specified child device |
1950 | * from the vmbus. |
1951 | */ |
1952 | void vmbus_device_unregister(struct hv_device *device_obj) |
1953 | { |
1954 | pr_debug("child device %s unregistered\n" , |
1955 | dev_name(&device_obj->device)); |
1956 | |
1957 | kset_unregister(kset: device_obj->channels_kset); |
1958 | |
1959 | /* |
1960 | * Kick off the process of unregistering the device. |
1961 | * This will call vmbus_remove() and eventually vmbus_device_release() |
1962 | */ |
1963 | device_unregister(dev: &device_obj->device); |
1964 | } |
1965 | |
1966 | #ifdef CONFIG_ACPI |
1967 | /* |
1968 | * VMBUS is an acpi enumerated device. Get the information we |
1969 | * need from DSDT. |
1970 | */ |
1971 | static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) |
1972 | { |
1973 | resource_size_t start = 0; |
1974 | resource_size_t end = 0; |
1975 | struct resource *new_res; |
1976 | struct resource **old_res = &hyperv_mmio; |
1977 | struct resource **prev_res = NULL; |
1978 | struct resource r; |
1979 | |
1980 | switch (res->type) { |
1981 | |
1982 | /* |
1983 | * "Address" descriptors are for bus windows. Ignore |
1984 | * "memory" descriptors, which are for registers on |
1985 | * devices. |
1986 | */ |
1987 | case ACPI_RESOURCE_TYPE_ADDRESS32: |
1988 | start = res->data.address32.address.minimum; |
1989 | end = res->data.address32.address.maximum; |
1990 | break; |
1991 | |
1992 | case ACPI_RESOURCE_TYPE_ADDRESS64: |
1993 | start = res->data.address64.address.minimum; |
1994 | end = res->data.address64.address.maximum; |
1995 | break; |
1996 | |
1997 | /* |
1998 | * The IRQ information is needed only on ARM64, which Hyper-V |
1999 | * sets up in the extended format. IRQ information is present |
2000 | * on x86/x64 in the non-extended format but it is not used by |
2001 | * Linux. So don't bother checking for the non-extended format. |
2002 | */ |
2003 | case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: |
2004 | if (!acpi_dev_resource_interrupt(ares: res, index: 0, res: &r)) { |
2005 | pr_err("Unable to parse Hyper-V ACPI interrupt\n" ); |
2006 | return AE_ERROR; |
2007 | } |
2008 | /* ARM64 INTID for VMbus */ |
2009 | vmbus_interrupt = res->data.extended_irq.interrupts[0]; |
2010 | /* Linux IRQ number */ |
2011 | vmbus_irq = r.start; |
2012 | return AE_OK; |
2013 | |
2014 | default: |
2015 | /* Unused resource type */ |
2016 | return AE_OK; |
2017 | |
2018 | } |
2019 | /* |
2020 | * Ignore ranges that are below 1MB, as they're not |
2021 | * necessary or useful here. |
2022 | */ |
2023 | if (end < 0x100000) |
2024 | return AE_OK; |
2025 | |
2026 | new_res = kzalloc(size: sizeof(*new_res), GFP_ATOMIC); |
2027 | if (!new_res) |
2028 | return AE_NO_MEMORY; |
2029 | |
2030 | /* If this range overlaps the virtual TPM, truncate it. */ |
2031 | if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS) |
2032 | end = VTPM_BASE_ADDRESS; |
2033 | |
2034 | new_res->name = "hyperv mmio" ; |
2035 | new_res->flags = IORESOURCE_MEM; |
2036 | new_res->start = start; |
2037 | new_res->end = end; |
2038 | |
2039 | /* |
2040 | * If two ranges are adjacent, merge them. |
2041 | */ |
2042 | do { |
2043 | if (!*old_res) { |
2044 | *old_res = new_res; |
2045 | break; |
2046 | } |
2047 | |
2048 | if (((*old_res)->end + 1) == new_res->start) { |
2049 | (*old_res)->end = new_res->end; |
2050 | kfree(objp: new_res); |
2051 | break; |
2052 | } |
2053 | |
2054 | if ((*old_res)->start == new_res->end + 1) { |
2055 | (*old_res)->start = new_res->start; |
2056 | kfree(objp: new_res); |
2057 | break; |
2058 | } |
2059 | |
2060 | if ((*old_res)->start > new_res->end) { |
2061 | new_res->sibling = *old_res; |
2062 | if (prev_res) |
2063 | (*prev_res)->sibling = new_res; |
2064 | *old_res = new_res; |
2065 | break; |
2066 | } |
2067 | |
2068 | prev_res = old_res; |
2069 | old_res = &(*old_res)->sibling; |
2070 | |
2071 | } while (1); |
2072 | |
2073 | return AE_OK; |
2074 | } |
2075 | #endif |
2076 | |
2077 | static void vmbus_mmio_remove(void) |
2078 | { |
2079 | struct resource *cur_res; |
2080 | struct resource *next_res; |
2081 | |
2082 | if (hyperv_mmio) { |
2083 | if (fb_mmio) { |
2084 | __release_region(hyperv_mmio, fb_mmio->start, |
2085 | resource_size(res: fb_mmio)); |
2086 | fb_mmio = NULL; |
2087 | } |
2088 | |
2089 | for (cur_res = hyperv_mmio; cur_res; cur_res = next_res) { |
2090 | next_res = cur_res->sibling; |
2091 | kfree(objp: cur_res); |
2092 | } |
2093 | } |
2094 | } |
2095 | |
2096 | static void __maybe_unused vmbus_reserve_fb(void) |
2097 | { |
2098 | resource_size_t start = 0, size; |
2099 | struct pci_dev *pdev; |
2100 | |
2101 | if (efi_enabled(EFI_BOOT)) { |
2102 | /* Gen2 VM: get FB base from EFI framebuffer */ |
2103 | if (IS_ENABLED(CONFIG_SYSFB)) { |
2104 | start = screen_info.lfb_base; |
2105 | size = max_t(__u32, screen_info.lfb_size, 0x800000); |
2106 | } |
2107 | } else { |
2108 | /* Gen1 VM: get FB base from PCI */ |
2109 | pdev = pci_get_device(PCI_VENDOR_ID_MICROSOFT, |
2110 | PCI_DEVICE_ID_HYPERV_VIDEO, NULL); |
2111 | if (!pdev) |
2112 | return; |
2113 | |
2114 | if (pdev->resource[0].flags & IORESOURCE_MEM) { |
2115 | start = pci_resource_start(pdev, 0); |
2116 | size = pci_resource_len(pdev, 0); |
2117 | } |
2118 | |
2119 | /* |
2120 | * Release the PCI device so hyperv_drm or hyperv_fb driver can |
2121 | * grab it later. |
2122 | */ |
2123 | pci_dev_put(dev: pdev); |
2124 | } |
2125 | |
2126 | if (!start) |
2127 | return; |
2128 | |
2129 | /* |
2130 | * Make a claim for the frame buffer in the resource tree under the |
2131 | * first node, which will be the one below 4GB. The length seems to |
2132 | * be underreported, particularly in a Generation 1 VM. So start out |
2133 | * reserving a larger area and make it smaller until it succeeds. |
2134 | */ |
2135 | for (; !fb_mmio && (size >= 0x100000); size >>= 1) |
2136 | fb_mmio = __request_region(hyperv_mmio, start, n: size, name: fb_mmio_name, flags: 0); |
2137 | } |
2138 | |
2139 | /** |
2140 | * vmbus_allocate_mmio() - Pick a memory-mapped I/O range. |
2141 | * @new: If successful, supplied a pointer to the |
2142 | * allocated MMIO space. |
2143 | * @device_obj: Identifies the caller |
2144 | * @min: Minimum guest physical address of the |
2145 | * allocation |
2146 | * @max: Maximum guest physical address |
2147 | * @size: Size of the range to be allocated |
2148 | * @align: Alignment of the range to be allocated |
2149 | * @fb_overlap_ok: Whether this allocation can be allowed |
2150 | * to overlap the video frame buffer. |
2151 | * |
2152 | * This function walks the resources granted to VMBus by the |
2153 | * _CRS object in the ACPI namespace underneath the parent |
2154 | * "bridge" whether that's a root PCI bus in the Generation 1 |
2155 | * case or a Module Device in the Generation 2 case. It then |
2156 | * attempts to allocate from the global MMIO pool in a way that |
2157 | * matches the constraints supplied in these parameters and by |
2158 | * that _CRS. |
2159 | * |
2160 | * Return: 0 on success, -errno on failure |
2161 | */ |
2162 | int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, |
2163 | resource_size_t min, resource_size_t max, |
2164 | resource_size_t size, resource_size_t align, |
2165 | bool fb_overlap_ok) |
2166 | { |
2167 | struct resource *iter, *shadow; |
2168 | resource_size_t range_min, range_max, start, end; |
2169 | const char *dev_n = dev_name(dev: &device_obj->device); |
2170 | int retval; |
2171 | |
2172 | retval = -ENXIO; |
2173 | mutex_lock(&hyperv_mmio_lock); |
2174 | |
2175 | /* |
2176 | * If overlaps with frame buffers are allowed, then first attempt to |
2177 | * make the allocation from within the reserved region. Because it |
2178 | * is already reserved, no shadow allocation is necessary. |
2179 | */ |
2180 | if (fb_overlap_ok && fb_mmio && !(min > fb_mmio->end) && |
2181 | !(max < fb_mmio->start)) { |
2182 | |
2183 | range_min = fb_mmio->start; |
2184 | range_max = fb_mmio->end; |
2185 | start = (range_min + align - 1) & ~(align - 1); |
2186 | for (; start + size - 1 <= range_max; start += align) { |
2187 | *new = request_mem_region_exclusive(start, size, dev_n); |
2188 | if (*new) { |
2189 | retval = 0; |
2190 | goto exit; |
2191 | } |
2192 | } |
2193 | } |
2194 | |
2195 | for (iter = hyperv_mmio; iter; iter = iter->sibling) { |
2196 | if ((iter->start >= max) || (iter->end <= min)) |
2197 | continue; |
2198 | |
2199 | range_min = iter->start; |
2200 | range_max = iter->end; |
2201 | start = (range_min + align - 1) & ~(align - 1); |
2202 | for (; start + size - 1 <= range_max; start += align) { |
2203 | end = start + size - 1; |
2204 | |
2205 | /* Skip the whole fb_mmio region if not fb_overlap_ok */ |
2206 | if (!fb_overlap_ok && fb_mmio && |
2207 | (((start >= fb_mmio->start) && (start <= fb_mmio->end)) || |
2208 | ((end >= fb_mmio->start) && (end <= fb_mmio->end)))) |
2209 | continue; |
2210 | |
2211 | shadow = __request_region(iter, start, n: size, NULL, |
2212 | IORESOURCE_BUSY); |
2213 | if (!shadow) |
2214 | continue; |
2215 | |
2216 | *new = request_mem_region_exclusive(start, size, dev_n); |
2217 | if (*new) { |
2218 | shadow->name = (char *)*new; |
2219 | retval = 0; |
2220 | goto exit; |
2221 | } |
2222 | |
2223 | __release_region(iter, start, size); |
2224 | } |
2225 | } |
2226 | |
2227 | exit: |
2228 | mutex_unlock(lock: &hyperv_mmio_lock); |
2229 | return retval; |
2230 | } |
2231 | EXPORT_SYMBOL_GPL(vmbus_allocate_mmio); |
2232 | |
2233 | /** |
2234 | * vmbus_free_mmio() - Free a memory-mapped I/O range. |
2235 | * @start: Base address of region to release. |
2236 | * @size: Size of the range to be allocated |
2237 | * |
2238 | * This function releases anything requested by |
2239 | * vmbus_mmio_allocate(). |
2240 | */ |
2241 | void vmbus_free_mmio(resource_size_t start, resource_size_t size) |
2242 | { |
2243 | struct resource *iter; |
2244 | |
2245 | mutex_lock(&hyperv_mmio_lock); |
2246 | for (iter = hyperv_mmio; iter; iter = iter->sibling) { |
2247 | if ((iter->start >= start + size) || (iter->end <= start)) |
2248 | continue; |
2249 | |
2250 | __release_region(iter, start, size); |
2251 | } |
2252 | release_mem_region(start, size); |
2253 | mutex_unlock(lock: &hyperv_mmio_lock); |
2254 | |
2255 | } |
2256 | EXPORT_SYMBOL_GPL(vmbus_free_mmio); |
2257 | |
2258 | #ifdef CONFIG_ACPI |
2259 | static int vmbus_acpi_add(struct platform_device *pdev) |
2260 | { |
2261 | acpi_status result; |
2262 | int ret_val = -ENODEV; |
2263 | struct acpi_device *ancestor; |
2264 | struct acpi_device *device = ACPI_COMPANION(&pdev->dev); |
2265 | |
2266 | hv_dev = &device->dev; |
2267 | |
2268 | /* |
2269 | * Older versions of Hyper-V for ARM64 fail to include the _CCA |
2270 | * method on the top level VMbus device in the DSDT. But devices |
2271 | * are hardware coherent in all current Hyper-V use cases, so fix |
2272 | * up the ACPI device to behave as if _CCA is present and indicates |
2273 | * hardware coherence. |
2274 | */ |
2275 | ACPI_COMPANION_SET(&device->dev, device); |
2276 | if (IS_ENABLED(CONFIG_ACPI_CCA_REQUIRED) && |
2277 | device_get_dma_attr(dev: &device->dev) == DEV_DMA_NOT_SUPPORTED) { |
2278 | pr_info("No ACPI _CCA found; assuming coherent device I/O\n" ); |
2279 | device->flags.cca_seen = true; |
2280 | device->flags.coherent_dma = true; |
2281 | } |
2282 | |
2283 | result = acpi_walk_resources(device: device->handle, METHOD_NAME__CRS, |
2284 | user_function: vmbus_walk_resources, NULL); |
2285 | |
2286 | if (ACPI_FAILURE(result)) |
2287 | goto acpi_walk_err; |
2288 | /* |
2289 | * Some ancestor of the vmbus acpi device (Gen1 or Gen2 |
2290 | * firmware) is the VMOD that has the mmio ranges. Get that. |
2291 | */ |
2292 | for (ancestor = acpi_dev_parent(adev: device); |
2293 | ancestor && ancestor->handle != ACPI_ROOT_OBJECT; |
2294 | ancestor = acpi_dev_parent(adev: ancestor)) { |
2295 | result = acpi_walk_resources(device: ancestor->handle, METHOD_NAME__CRS, |
2296 | user_function: vmbus_walk_resources, NULL); |
2297 | |
2298 | if (ACPI_FAILURE(result)) |
2299 | continue; |
2300 | if (hyperv_mmio) { |
2301 | vmbus_reserve_fb(); |
2302 | break; |
2303 | } |
2304 | } |
2305 | ret_val = 0; |
2306 | |
2307 | acpi_walk_err: |
2308 | if (ret_val) |
2309 | vmbus_mmio_remove(); |
2310 | return ret_val; |
2311 | } |
2312 | #else |
2313 | static int vmbus_acpi_add(struct platform_device *pdev) |
2314 | { |
2315 | return 0; |
2316 | } |
2317 | #endif |
2318 | |
2319 | static int vmbus_device_add(struct platform_device *pdev) |
2320 | { |
2321 | struct resource **cur_res = &hyperv_mmio; |
2322 | struct of_range range; |
2323 | struct of_range_parser parser; |
2324 | struct device_node *np = pdev->dev.of_node; |
2325 | int ret; |
2326 | |
2327 | hv_dev = &pdev->dev; |
2328 | |
2329 | ret = of_range_parser_init(parser: &parser, node: np); |
2330 | if (ret) |
2331 | return ret; |
2332 | |
2333 | for_each_of_range(&parser, &range) { |
2334 | struct resource *res; |
2335 | |
2336 | res = kzalloc(size: sizeof(*res), GFP_KERNEL); |
2337 | if (!res) { |
2338 | vmbus_mmio_remove(); |
2339 | return -ENOMEM; |
2340 | } |
2341 | |
2342 | res->name = "hyperv mmio" ; |
2343 | res->flags = range.flags; |
2344 | res->start = range.cpu_addr; |
2345 | res->end = range.cpu_addr + range.size; |
2346 | |
2347 | *cur_res = res; |
2348 | cur_res = &res->sibling; |
2349 | } |
2350 | |
2351 | return ret; |
2352 | } |
2353 | |
2354 | static int vmbus_platform_driver_probe(struct platform_device *pdev) |
2355 | { |
2356 | if (acpi_disabled) |
2357 | return vmbus_device_add(pdev); |
2358 | else |
2359 | return vmbus_acpi_add(pdev); |
2360 | } |
2361 | |
2362 | static int vmbus_platform_driver_remove(struct platform_device *pdev) |
2363 | { |
2364 | vmbus_mmio_remove(); |
2365 | return 0; |
2366 | } |
2367 | |
2368 | #ifdef CONFIG_PM_SLEEP |
2369 | static int vmbus_bus_suspend(struct device *dev) |
2370 | { |
2371 | struct hv_per_cpu_context *hv_cpu = per_cpu_ptr( |
2372 | hv_context.cpu_context, VMBUS_CONNECT_CPU); |
2373 | struct vmbus_channel *channel, *sc; |
2374 | |
2375 | tasklet_disable(t: &hv_cpu->msg_dpc); |
2376 | vmbus_connection.ignore_any_offer_msg = true; |
2377 | /* The tasklet_enable() takes care of providing a memory barrier */ |
2378 | tasklet_enable(t: &hv_cpu->msg_dpc); |
2379 | |
2380 | /* Drain all the workqueues as we are in suspend */ |
2381 | drain_workqueue(wq: vmbus_connection.rescind_work_queue); |
2382 | drain_workqueue(wq: vmbus_connection.work_queue); |
2383 | drain_workqueue(wq: vmbus_connection.handle_primary_chan_wq); |
2384 | drain_workqueue(wq: vmbus_connection.handle_sub_chan_wq); |
2385 | |
2386 | mutex_lock(&vmbus_connection.channel_mutex); |
2387 | list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { |
2388 | if (!is_hvsock_channel(c: channel)) |
2389 | continue; |
2390 | |
2391 | vmbus_force_channel_rescinded(channel); |
2392 | } |
2393 | mutex_unlock(lock: &vmbus_connection.channel_mutex); |
2394 | |
2395 | /* |
2396 | * Wait until all the sub-channels and hv_sock channels have been |
2397 | * cleaned up. Sub-channels should be destroyed upon suspend, otherwise |
2398 | * they would conflict with the new sub-channels that will be created |
2399 | * in the resume path. hv_sock channels should also be destroyed, but |
2400 | * a hv_sock channel of an established hv_sock connection can not be |
2401 | * really destroyed since it may still be referenced by the userspace |
2402 | * application, so we just force the hv_sock channel to be rescinded |
2403 | * by vmbus_force_channel_rescinded(), and the userspace application |
2404 | * will thoroughly destroy the channel after hibernation. |
2405 | * |
2406 | * Note: the counter nr_chan_close_on_suspend may never go above 0 if |
2407 | * the VM has no sub-channel and hv_sock channel, e.g. a 1-vCPU VM. |
2408 | */ |
2409 | if (atomic_read(v: &vmbus_connection.nr_chan_close_on_suspend) > 0) |
2410 | wait_for_completion(&vmbus_connection.ready_for_suspend_event); |
2411 | |
2412 | if (atomic_read(v: &vmbus_connection.nr_chan_fixup_on_resume) != 0) { |
2413 | pr_err("Can not suspend due to a previous failed resuming\n" ); |
2414 | return -EBUSY; |
2415 | } |
2416 | |
2417 | mutex_lock(&vmbus_connection.channel_mutex); |
2418 | |
2419 | list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { |
2420 | /* |
2421 | * Remove the channel from the array of channels and invalidate |
2422 | * the channel's relid. Upon resume, vmbus_onoffer() will fix |
2423 | * up the relid (and other fields, if necessary) and add the |
2424 | * channel back to the array. |
2425 | */ |
2426 | vmbus_channel_unmap_relid(channel); |
2427 | channel->offermsg.child_relid = INVALID_RELID; |
2428 | |
2429 | if (is_hvsock_channel(c: channel)) { |
2430 | if (!channel->rescind) { |
2431 | pr_err("hv_sock channel not rescinded!\n" ); |
2432 | WARN_ON_ONCE(1); |
2433 | } |
2434 | continue; |
2435 | } |
2436 | |
2437 | list_for_each_entry(sc, &channel->sc_list, sc_list) { |
2438 | pr_err("Sub-channel not deleted!\n" ); |
2439 | WARN_ON_ONCE(1); |
2440 | } |
2441 | |
2442 | atomic_inc(v: &vmbus_connection.nr_chan_fixup_on_resume); |
2443 | } |
2444 | |
2445 | mutex_unlock(lock: &vmbus_connection.channel_mutex); |
2446 | |
2447 | vmbus_initiate_unload(crash: false); |
2448 | |
2449 | /* Reset the event for the next resume. */ |
2450 | reinit_completion(x: &vmbus_connection.ready_for_resume_event); |
2451 | |
2452 | return 0; |
2453 | } |
2454 | |
2455 | static int vmbus_bus_resume(struct device *dev) |
2456 | { |
2457 | struct vmbus_channel_msginfo *msginfo; |
2458 | size_t msgsize; |
2459 | int ret; |
2460 | |
2461 | vmbus_connection.ignore_any_offer_msg = false; |
2462 | |
2463 | /* |
2464 | * We only use the 'vmbus_proto_version', which was in use before |
2465 | * hibernation, to re-negotiate with the host. |
2466 | */ |
2467 | if (!vmbus_proto_version) { |
2468 | pr_err("Invalid proto version = 0x%x\n" , vmbus_proto_version); |
2469 | return -EINVAL; |
2470 | } |
2471 | |
2472 | msgsize = sizeof(*msginfo) + |
2473 | sizeof(struct vmbus_channel_initiate_contact); |
2474 | |
2475 | msginfo = kzalloc(size: msgsize, GFP_KERNEL); |
2476 | |
2477 | if (msginfo == NULL) |
2478 | return -ENOMEM; |
2479 | |
2480 | ret = vmbus_negotiate_version(msginfo, version: vmbus_proto_version); |
2481 | |
2482 | kfree(objp: msginfo); |
2483 | |
2484 | if (ret != 0) |
2485 | return ret; |
2486 | |
2487 | WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0); |
2488 | |
2489 | vmbus_request_offers(); |
2490 | |
2491 | if (wait_for_completion_timeout( |
2492 | x: &vmbus_connection.ready_for_resume_event, timeout: 10 * HZ) == 0) |
2493 | pr_err("Some vmbus device is missing after suspending?\n" ); |
2494 | |
2495 | /* Reset the event for the next suspend. */ |
2496 | reinit_completion(x: &vmbus_connection.ready_for_suspend_event); |
2497 | |
2498 | return 0; |
2499 | } |
2500 | #else |
2501 | #define vmbus_bus_suspend NULL |
2502 | #define vmbus_bus_resume NULL |
2503 | #endif /* CONFIG_PM_SLEEP */ |
2504 | |
2505 | static const __maybe_unused struct of_device_id vmbus_of_match[] = { |
2506 | { |
2507 | .compatible = "microsoft,vmbus" , |
2508 | }, |
2509 | { |
2510 | /* sentinel */ |
2511 | }, |
2512 | }; |
2513 | MODULE_DEVICE_TABLE(of, vmbus_of_match); |
2514 | |
2515 | static const __maybe_unused struct acpi_device_id vmbus_acpi_device_ids[] = { |
2516 | {"VMBUS" , 0}, |
2517 | {"VMBus" , 0}, |
2518 | {"" , 0}, |
2519 | }; |
2520 | MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids); |
2521 | |
2522 | /* |
2523 | * Note: we must use the "no_irq" ops, otherwise hibernation can not work with |
2524 | * PCI device assignment, because "pci_dev_pm_ops" uses the "noirq" ops: in |
2525 | * the resume path, the pci "noirq" restore op runs before "non-noirq" op (see |
2526 | * resume_target_kernel() -> dpm_resume_start(), and hibernation_restore() -> |
2527 | * dpm_resume_end()). This means vmbus_bus_resume() and the pci-hyperv's |
2528 | * resume callback must also run via the "noirq" ops. |
2529 | * |
2530 | * Set suspend_noirq/resume_noirq to NULL for Suspend-to-Idle: see the comment |
2531 | * earlier in this file before vmbus_pm. |
2532 | */ |
2533 | |
2534 | static const struct dev_pm_ops vmbus_bus_pm = { |
2535 | .suspend_noirq = NULL, |
2536 | .resume_noirq = NULL, |
2537 | .freeze_noirq = vmbus_bus_suspend, |
2538 | .thaw_noirq = vmbus_bus_resume, |
2539 | .poweroff_noirq = vmbus_bus_suspend, |
2540 | .restore_noirq = vmbus_bus_resume |
2541 | }; |
2542 | |
2543 | static struct platform_driver vmbus_platform_driver = { |
2544 | .probe = vmbus_platform_driver_probe, |
2545 | .remove = vmbus_platform_driver_remove, |
2546 | .driver = { |
2547 | .name = "vmbus" , |
2548 | .acpi_match_table = ACPI_PTR(vmbus_acpi_device_ids), |
2549 | .of_match_table = of_match_ptr(vmbus_of_match), |
2550 | .pm = &vmbus_bus_pm, |
2551 | .probe_type = PROBE_FORCE_SYNCHRONOUS, |
2552 | } |
2553 | }; |
2554 | |
2555 | static void hv_kexec_handler(void) |
2556 | { |
2557 | hv_stimer_global_cleanup(); |
2558 | vmbus_initiate_unload(crash: false); |
2559 | /* Make sure conn_state is set as hv_synic_cleanup checks for it */ |
2560 | mb(); |
2561 | cpuhp_remove_state(state: hyperv_cpuhp_online); |
2562 | }; |
2563 | |
2564 | static void hv_crash_handler(struct pt_regs *regs) |
2565 | { |
2566 | int cpu; |
2567 | |
2568 | vmbus_initiate_unload(crash: true); |
2569 | /* |
2570 | * In crash handler we can't schedule synic cleanup for all CPUs, |
2571 | * doing the cleanup for current CPU only. This should be sufficient |
2572 | * for kdump. |
2573 | */ |
2574 | cpu = smp_processor_id(); |
2575 | hv_stimer_cleanup(cpu); |
2576 | hv_synic_disable_regs(cpu); |
2577 | }; |
2578 | |
2579 | static int hv_synic_suspend(void) |
2580 | { |
2581 | /* |
2582 | * When we reach here, all the non-boot CPUs have been offlined. |
2583 | * If we're in a legacy configuration where stimer Direct Mode is |
2584 | * not enabled, the stimers on the non-boot CPUs have been unbound |
2585 | * in hv_synic_cleanup() -> hv_stimer_legacy_cleanup() -> |
2586 | * hv_stimer_cleanup() -> clockevents_unbind_device(). |
2587 | * |
2588 | * hv_synic_suspend() only runs on CPU0 with interrupts disabled. |
2589 | * Here we do not call hv_stimer_legacy_cleanup() on CPU0 because: |
2590 | * 1) it's unnecessary as interrupts remain disabled between |
2591 | * syscore_suspend() and syscore_resume(): see create_image() and |
2592 | * resume_target_kernel() |
2593 | * 2) the stimer on CPU0 is automatically disabled later by |
2594 | * syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ... |
2595 | * -> clockevents_shutdown() -> ... -> hv_ce_shutdown() |
2596 | * 3) a warning would be triggered if we call |
2597 | * clockevents_unbind_device(), which may sleep, in an |
2598 | * interrupts-disabled context. |
2599 | */ |
2600 | |
2601 | hv_synic_disable_regs(cpu: 0); |
2602 | |
2603 | return 0; |
2604 | } |
2605 | |
2606 | static void hv_synic_resume(void) |
2607 | { |
2608 | hv_synic_enable_regs(cpu: 0); |
2609 | |
2610 | /* |
2611 | * Note: we don't need to call hv_stimer_init(0), because the timer |
2612 | * on CPU0 is not unbound in hv_synic_suspend(), and the timer is |
2613 | * automatically re-enabled in timekeeping_resume(). |
2614 | */ |
2615 | } |
2616 | |
2617 | /* The callbacks run only on CPU0, with irqs_disabled. */ |
2618 | static struct syscore_ops hv_synic_syscore_ops = { |
2619 | .suspend = hv_synic_suspend, |
2620 | .resume = hv_synic_resume, |
2621 | }; |
2622 | |
2623 | static int __init hv_acpi_init(void) |
2624 | { |
2625 | int ret; |
2626 | |
2627 | if (!hv_is_hyperv_initialized()) |
2628 | return -ENODEV; |
2629 | |
2630 | if (hv_root_partition && !hv_nested) |
2631 | return 0; |
2632 | |
2633 | /* |
2634 | * Get ACPI resources first. |
2635 | */ |
2636 | ret = platform_driver_register(&vmbus_platform_driver); |
2637 | if (ret) |
2638 | return ret; |
2639 | |
2640 | if (!hv_dev) { |
2641 | ret = -ENODEV; |
2642 | goto cleanup; |
2643 | } |
2644 | |
2645 | /* |
2646 | * If we're on an architecture with a hardcoded hypervisor |
2647 | * vector (i.e. x86/x64), override the VMbus interrupt found |
2648 | * in the ACPI tables. Ensure vmbus_irq is not set since the |
2649 | * normal Linux IRQ mechanism is not used in this case. |
2650 | */ |
2651 | #ifdef HYPERVISOR_CALLBACK_VECTOR |
2652 | vmbus_interrupt = HYPERVISOR_CALLBACK_VECTOR; |
2653 | vmbus_irq = -1; |
2654 | #endif |
2655 | |
2656 | hv_debug_init(); |
2657 | |
2658 | ret = vmbus_bus_init(); |
2659 | if (ret) |
2660 | goto cleanup; |
2661 | |
2662 | hv_setup_kexec_handler(handler: hv_kexec_handler); |
2663 | hv_setup_crash_handler(handler: hv_crash_handler); |
2664 | |
2665 | register_syscore_ops(ops: &hv_synic_syscore_ops); |
2666 | |
2667 | return 0; |
2668 | |
2669 | cleanup: |
2670 | platform_driver_unregister(&vmbus_platform_driver); |
2671 | hv_dev = NULL; |
2672 | return ret; |
2673 | } |
2674 | |
2675 | static void __exit vmbus_exit(void) |
2676 | { |
2677 | int cpu; |
2678 | |
2679 | unregister_syscore_ops(ops: &hv_synic_syscore_ops); |
2680 | |
2681 | hv_remove_kexec_handler(); |
2682 | hv_remove_crash_handler(); |
2683 | vmbus_connection.conn_state = DISCONNECTED; |
2684 | hv_stimer_global_cleanup(); |
2685 | vmbus_disconnect(); |
2686 | if (vmbus_irq == -1) { |
2687 | hv_remove_vmbus_handler(); |
2688 | } else { |
2689 | free_percpu_irq(vmbus_irq, vmbus_evt); |
2690 | free_percpu(pdata: vmbus_evt); |
2691 | } |
2692 | for_each_online_cpu(cpu) { |
2693 | struct hv_per_cpu_context *hv_cpu |
2694 | = per_cpu_ptr(hv_context.cpu_context, cpu); |
2695 | |
2696 | tasklet_kill(t: &hv_cpu->msg_dpc); |
2697 | } |
2698 | hv_debug_rm_all_dir(); |
2699 | |
2700 | vmbus_free_channels(); |
2701 | kfree(objp: vmbus_connection.channels); |
2702 | |
2703 | /* |
2704 | * The vmbus panic notifier is always registered, hence we should |
2705 | * also unconditionally unregister it here as well. |
2706 | */ |
2707 | atomic_notifier_chain_unregister(nh: &panic_notifier_list, |
2708 | nb: &hyperv_panic_vmbus_unload_block); |
2709 | |
2710 | bus_unregister(bus: &hv_bus); |
2711 | |
2712 | cpuhp_remove_state(state: hyperv_cpuhp_online); |
2713 | hv_synic_free(); |
2714 | platform_driver_unregister(&vmbus_platform_driver); |
2715 | } |
2716 | |
2717 | |
2718 | MODULE_LICENSE("GPL" ); |
2719 | MODULE_DESCRIPTION("Microsoft Hyper-V VMBus Driver" ); |
2720 | |
2721 | subsys_initcall(hv_acpi_init); |
2722 | module_exit(vmbus_exit); |
2723 | |