1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * |
4 | * Copyright (c) 2009, Microsoft Corporation. |
5 | * |
6 | * Authors: |
7 | * Haiyang Zhang <haiyangz@microsoft.com> |
8 | * Hank Janssen <hjanssen@microsoft.com> |
9 | */ |
10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
11 | |
12 | #include <linux/kernel.h> |
13 | #include <linux/sched.h> |
14 | #include <linux/wait.h> |
15 | #include <linux/delay.h> |
16 | #include <linux/mm.h> |
17 | #include <linux/module.h> |
18 | #include <linux/slab.h> |
19 | #include <linux/vmalloc.h> |
20 | #include <linux/hyperv.h> |
21 | #include <linux/export.h> |
22 | #include <linux/io.h> |
23 | #include <linux/set_memory.h> |
24 | #include <asm/mshyperv.h> |
25 | |
26 | #include "hyperv_vmbus.h" |
27 | |
28 | |
29 | struct vmbus_connection vmbus_connection = { |
30 | .conn_state = DISCONNECTED, |
31 | .unload_event = COMPLETION_INITIALIZER( |
32 | vmbus_connection.unload_event), |
33 | .next_gpadl_handle = ATOMIC_INIT(0xE1E10), |
34 | |
35 | .ready_for_suspend_event = COMPLETION_INITIALIZER( |
36 | vmbus_connection.ready_for_suspend_event), |
37 | .ready_for_resume_event = COMPLETION_INITIALIZER( |
38 | vmbus_connection.ready_for_resume_event), |
39 | }; |
40 | EXPORT_SYMBOL_GPL(vmbus_connection); |
41 | |
42 | /* |
43 | * Negotiated protocol version with the host. |
44 | */ |
45 | __u32 vmbus_proto_version; |
46 | EXPORT_SYMBOL_GPL(vmbus_proto_version); |
47 | |
48 | /* |
49 | * Table of VMBus versions listed from newest to oldest. |
50 | * VERSION_WIN7 and VERSION_WS2008 are no longer supported in |
51 | * Linux guests and are not listed. |
52 | */ |
53 | static __u32 vmbus_versions[] = { |
54 | VERSION_WIN10_V5_3, |
55 | VERSION_WIN10_V5_2, |
56 | VERSION_WIN10_V5_1, |
57 | VERSION_WIN10_V5, |
58 | VERSION_WIN10_V4_1, |
59 | VERSION_WIN10, |
60 | VERSION_WIN8_1, |
61 | VERSION_WIN8 |
62 | }; |
63 | |
64 | /* |
65 | * Maximal VMBus protocol version guests can negotiate. Useful to cap the |
66 | * VMBus version for testing and debugging purpose. |
67 | */ |
68 | static uint max_version = VERSION_WIN10_V5_3; |
69 | |
70 | module_param(max_version, uint, S_IRUGO); |
71 | MODULE_PARM_DESC(max_version, |
72 | "Maximal VMBus protocol version which can be negotiated" ); |
73 | |
74 | int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version) |
75 | { |
76 | int ret = 0; |
77 | struct vmbus_channel_initiate_contact *msg; |
78 | unsigned long flags; |
79 | |
80 | init_completion(x: &msginfo->waitevent); |
81 | |
82 | msg = (struct vmbus_channel_initiate_contact *)msginfo->msg; |
83 | |
84 | memset(msg, 0, sizeof(*msg)); |
85 | msg->header.msgtype = CHANNELMSG_INITIATE_CONTACT; |
86 | msg->vmbus_version_requested = version; |
87 | |
88 | /* |
89 | * VMBus protocol 5.0 (VERSION_WIN10_V5) and higher require that we must |
90 | * use VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate Contact Message, |
91 | * and for subsequent messages, we must use the Message Connection ID |
92 | * field in the host-returned Version Response Message. And, with |
93 | * VERSION_WIN10_V5 and higher, we don't use msg->interrupt_page, but we |
94 | * tell the host explicitly that we still use VMBUS_MESSAGE_SINT(2) for |
95 | * compatibility. |
96 | * |
97 | * On old hosts, we should always use VMBUS_MESSAGE_CONNECTION_ID (1). |
98 | */ |
99 | if (version >= VERSION_WIN10_V5) { |
100 | msg->msg_sint = VMBUS_MESSAGE_SINT; |
101 | msg->msg_vtl = ms_hyperv.vtl; |
102 | vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID_4; |
103 | } else { |
104 | msg->interrupt_page = virt_to_phys(address: vmbus_connection.int_page); |
105 | vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID; |
106 | } |
107 | |
108 | /* |
109 | * shared_gpa_boundary is zero in non-SNP VMs, so it's safe to always |
110 | * bitwise OR it |
111 | */ |
112 | msg->monitor_page1 = virt_to_phys(address: vmbus_connection.monitor_pages[0]) | |
113 | ms_hyperv.shared_gpa_boundary; |
114 | msg->monitor_page2 = virt_to_phys(address: vmbus_connection.monitor_pages[1]) | |
115 | ms_hyperv.shared_gpa_boundary; |
116 | |
117 | msg->target_vcpu = hv_cpu_number_to_vp_number(VMBUS_CONNECT_CPU); |
118 | |
119 | /* |
120 | * Add to list before we send the request since we may |
121 | * receive the response before returning from this routine |
122 | */ |
123 | spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); |
124 | list_add_tail(new: &msginfo->msglistentry, |
125 | head: &vmbus_connection.chn_msg_list); |
126 | |
127 | spin_unlock_irqrestore(lock: &vmbus_connection.channelmsg_lock, flags); |
128 | |
129 | ret = vmbus_post_msg(buffer: msg, |
130 | buflen: sizeof(struct vmbus_channel_initiate_contact), |
131 | can_sleep: true); |
132 | |
133 | trace_vmbus_negotiate_version(msg, ret); |
134 | |
135 | if (ret != 0) { |
136 | spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); |
137 | list_del(entry: &msginfo->msglistentry); |
138 | spin_unlock_irqrestore(lock: &vmbus_connection.channelmsg_lock, |
139 | flags); |
140 | return ret; |
141 | } |
142 | |
143 | /* Wait for the connection response */ |
144 | wait_for_completion(&msginfo->waitevent); |
145 | |
146 | spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); |
147 | list_del(entry: &msginfo->msglistentry); |
148 | spin_unlock_irqrestore(lock: &vmbus_connection.channelmsg_lock, flags); |
149 | |
150 | /* Check if successful */ |
151 | if (msginfo->response.version_response.version_supported) { |
152 | vmbus_connection.conn_state = CONNECTED; |
153 | |
154 | if (version >= VERSION_WIN10_V5) |
155 | vmbus_connection.msg_conn_id = |
156 | msginfo->response.version_response.msg_conn_id; |
157 | } else { |
158 | return -ECONNREFUSED; |
159 | } |
160 | |
161 | return ret; |
162 | } |
163 | |
164 | /* |
165 | * vmbus_connect - Sends a connect request on the partition service connection |
166 | */ |
167 | int vmbus_connect(void) |
168 | { |
169 | struct vmbus_channel_msginfo *msginfo = NULL; |
170 | int i, ret = 0; |
171 | __u32 version; |
172 | |
173 | /* Initialize the vmbus connection */ |
174 | vmbus_connection.conn_state = CONNECTING; |
175 | vmbus_connection.work_queue = create_workqueue("hv_vmbus_con" ); |
176 | if (!vmbus_connection.work_queue) { |
177 | ret = -ENOMEM; |
178 | goto cleanup; |
179 | } |
180 | |
181 | vmbus_connection.rescind_work_queue = |
182 | create_workqueue("hv_vmbus_rescind" ); |
183 | if (!vmbus_connection.rescind_work_queue) { |
184 | ret = -ENOMEM; |
185 | goto cleanup; |
186 | } |
187 | vmbus_connection.ignore_any_offer_msg = false; |
188 | |
189 | vmbus_connection.handle_primary_chan_wq = |
190 | create_workqueue("hv_pri_chan" ); |
191 | if (!vmbus_connection.handle_primary_chan_wq) { |
192 | ret = -ENOMEM; |
193 | goto cleanup; |
194 | } |
195 | |
196 | vmbus_connection.handle_sub_chan_wq = |
197 | create_workqueue("hv_sub_chan" ); |
198 | if (!vmbus_connection.handle_sub_chan_wq) { |
199 | ret = -ENOMEM; |
200 | goto cleanup; |
201 | } |
202 | |
203 | INIT_LIST_HEAD(list: &vmbus_connection.chn_msg_list); |
204 | spin_lock_init(&vmbus_connection.channelmsg_lock); |
205 | |
206 | INIT_LIST_HEAD(list: &vmbus_connection.chn_list); |
207 | mutex_init(&vmbus_connection.channel_mutex); |
208 | |
209 | /* |
210 | * Setup the vmbus event connection for channel interrupt |
211 | * abstraction stuff |
212 | */ |
213 | vmbus_connection.int_page = hv_alloc_hyperv_zeroed_page(); |
214 | if (vmbus_connection.int_page == NULL) { |
215 | ret = -ENOMEM; |
216 | goto cleanup; |
217 | } |
218 | |
219 | vmbus_connection.recv_int_page = vmbus_connection.int_page; |
220 | vmbus_connection.send_int_page = |
221 | (void *)((unsigned long)vmbus_connection.int_page + |
222 | (HV_HYP_PAGE_SIZE >> 1)); |
223 | |
224 | /* |
225 | * Setup the monitor notification facility. The 1st page for |
226 | * parent->child and the 2nd page for child->parent |
227 | */ |
228 | vmbus_connection.monitor_pages[0] = hv_alloc_hyperv_page(); |
229 | vmbus_connection.monitor_pages[1] = hv_alloc_hyperv_page(); |
230 | if ((vmbus_connection.monitor_pages[0] == NULL) || |
231 | (vmbus_connection.monitor_pages[1] == NULL)) { |
232 | ret = -ENOMEM; |
233 | goto cleanup; |
234 | } |
235 | |
236 | ret = set_memory_decrypted(addr: (unsigned long) |
237 | vmbus_connection.monitor_pages[0], numpages: 1); |
238 | ret |= set_memory_decrypted(addr: (unsigned long) |
239 | vmbus_connection.monitor_pages[1], numpages: 1); |
240 | if (ret) |
241 | goto cleanup; |
242 | |
243 | /* |
244 | * Set_memory_decrypted() will change the memory contents if |
245 | * decryption occurs, so zero monitor pages here. |
246 | */ |
247 | memset(vmbus_connection.monitor_pages[0], 0x00, HV_HYP_PAGE_SIZE); |
248 | memset(vmbus_connection.monitor_pages[1], 0x00, HV_HYP_PAGE_SIZE); |
249 | |
250 | msginfo = kzalloc(size: sizeof(*msginfo) + |
251 | sizeof(struct vmbus_channel_initiate_contact), |
252 | GFP_KERNEL); |
253 | if (msginfo == NULL) { |
254 | ret = -ENOMEM; |
255 | goto cleanup; |
256 | } |
257 | |
258 | /* |
259 | * Negotiate a compatible VMBUS version number with the |
260 | * host. We start with the highest number we can support |
261 | * and work our way down until we negotiate a compatible |
262 | * version. |
263 | */ |
264 | |
265 | for (i = 0; ; i++) { |
266 | if (i == ARRAY_SIZE(vmbus_versions)) { |
267 | ret = -EDOM; |
268 | goto cleanup; |
269 | } |
270 | |
271 | version = vmbus_versions[i]; |
272 | if (version > max_version) |
273 | continue; |
274 | |
275 | ret = vmbus_negotiate_version(msginfo, version); |
276 | if (ret == -ETIMEDOUT) |
277 | goto cleanup; |
278 | |
279 | if (vmbus_connection.conn_state == CONNECTED) |
280 | break; |
281 | } |
282 | |
283 | if (hv_is_isolation_supported() && version < VERSION_WIN10_V5_2) { |
284 | pr_err("Invalid VMBus version %d.%d (expected >= %d.%d) from the host supporting isolation\n" , |
285 | version >> 16, version & 0xFFFF, VERSION_WIN10_V5_2 >> 16, VERSION_WIN10_V5_2 & 0xFFFF); |
286 | ret = -EINVAL; |
287 | goto cleanup; |
288 | } |
289 | |
290 | vmbus_proto_version = version; |
291 | pr_info("Vmbus version:%d.%d\n" , |
292 | version >> 16, version & 0xFFFF); |
293 | |
294 | vmbus_connection.channels = kcalloc(MAX_CHANNEL_RELIDS, |
295 | size: sizeof(struct vmbus_channel *), |
296 | GFP_KERNEL); |
297 | if (vmbus_connection.channels == NULL) { |
298 | ret = -ENOMEM; |
299 | goto cleanup; |
300 | } |
301 | |
302 | kfree(objp: msginfo); |
303 | return 0; |
304 | |
305 | cleanup: |
306 | pr_err("Unable to connect to host\n" ); |
307 | |
308 | vmbus_connection.conn_state = DISCONNECTED; |
309 | vmbus_disconnect(); |
310 | |
311 | kfree(objp: msginfo); |
312 | |
313 | return ret; |
314 | } |
315 | |
316 | void vmbus_disconnect(void) |
317 | { |
318 | /* |
319 | * First send the unload request to the host. |
320 | */ |
321 | vmbus_initiate_unload(crash: false); |
322 | |
323 | if (vmbus_connection.handle_sub_chan_wq) |
324 | destroy_workqueue(wq: vmbus_connection.handle_sub_chan_wq); |
325 | |
326 | if (vmbus_connection.handle_primary_chan_wq) |
327 | destroy_workqueue(wq: vmbus_connection.handle_primary_chan_wq); |
328 | |
329 | if (vmbus_connection.rescind_work_queue) |
330 | destroy_workqueue(wq: vmbus_connection.rescind_work_queue); |
331 | |
332 | if (vmbus_connection.work_queue) |
333 | destroy_workqueue(wq: vmbus_connection.work_queue); |
334 | |
335 | if (vmbus_connection.int_page) { |
336 | hv_free_hyperv_page(addr: vmbus_connection.int_page); |
337 | vmbus_connection.int_page = NULL; |
338 | } |
339 | |
340 | set_memory_encrypted(addr: (unsigned long)vmbus_connection.monitor_pages[0], numpages: 1); |
341 | set_memory_encrypted(addr: (unsigned long)vmbus_connection.monitor_pages[1], numpages: 1); |
342 | |
343 | hv_free_hyperv_page(addr: vmbus_connection.monitor_pages[0]); |
344 | hv_free_hyperv_page(addr: vmbus_connection.monitor_pages[1]); |
345 | vmbus_connection.monitor_pages[0] = NULL; |
346 | vmbus_connection.monitor_pages[1] = NULL; |
347 | } |
348 | |
349 | /* |
350 | * relid2channel - Get the channel object given its |
351 | * child relative id (ie channel id) |
352 | */ |
353 | struct vmbus_channel *relid2channel(u32 relid) |
354 | { |
355 | if (vmbus_connection.channels == NULL) { |
356 | pr_warn_once("relid2channel: relid=%d: No channels mapped!\n" , relid); |
357 | return NULL; |
358 | } |
359 | if (WARN_ON(relid >= MAX_CHANNEL_RELIDS)) |
360 | return NULL; |
361 | return READ_ONCE(vmbus_connection.channels[relid]); |
362 | } |
363 | |
364 | /* |
365 | * vmbus_on_event - Process a channel event notification |
366 | * |
367 | * For batched channels (default) optimize host to guest signaling |
368 | * by ensuring: |
369 | * 1. While reading the channel, we disable interrupts from host. |
370 | * 2. Ensure that we process all posted messages from the host |
371 | * before returning from this callback. |
372 | * 3. Once we return, enable signaling from the host. Once this |
373 | * state is set we check to see if additional packets are |
374 | * available to read. In this case we repeat the process. |
375 | * If this tasklet has been running for a long time |
376 | * then reschedule ourselves. |
377 | */ |
378 | void vmbus_on_event(unsigned long data) |
379 | { |
380 | struct vmbus_channel *channel = (void *) data; |
381 | void (*callback_fn)(void *context); |
382 | |
383 | trace_vmbus_on_event(channel); |
384 | |
385 | hv_debug_delay_test(channel, delay_type: INTERRUPT_DELAY); |
386 | |
387 | /* A channel once created is persistent even when |
388 | * there is no driver handling the device. An |
389 | * unloading driver sets the onchannel_callback to NULL. |
390 | */ |
391 | callback_fn = READ_ONCE(channel->onchannel_callback); |
392 | if (unlikely(!callback_fn)) |
393 | return; |
394 | |
395 | (*callback_fn)(channel->channel_callback_context); |
396 | |
397 | if (channel->callback_mode != HV_CALL_BATCHED) |
398 | return; |
399 | |
400 | if (likely(hv_end_read(&channel->inbound) == 0)) |
401 | return; |
402 | |
403 | hv_begin_read(rbi: &channel->inbound); |
404 | tasklet_schedule(t: &channel->callback_event); |
405 | } |
406 | |
407 | /* |
408 | * vmbus_post_msg - Send a msg on the vmbus's message connection |
409 | */ |
410 | int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep) |
411 | { |
412 | struct vmbus_channel_message_header *hdr; |
413 | union hv_connection_id conn_id; |
414 | int ret = 0; |
415 | int retries = 0; |
416 | u32 usec = 1; |
417 | |
418 | conn_id.asu32 = 0; |
419 | conn_id.u.id = vmbus_connection.msg_conn_id; |
420 | |
421 | /* |
422 | * hv_post_message() can have transient failures because of |
423 | * insufficient resources. Retry the operation a couple of |
424 | * times before giving up. |
425 | */ |
426 | while (retries < 100) { |
427 | ret = hv_post_message(connection_id: conn_id, message_type: 1, payload: buffer, payload_size: buflen); |
428 | |
429 | switch (ret) { |
430 | case HV_STATUS_INVALID_CONNECTION_ID: |
431 | /* |
432 | * See vmbus_negotiate_version(): VMBus protocol 5.0 |
433 | * and higher require that we must use |
434 | * VMBUS_MESSAGE_CONNECTION_ID_4 for the Initiate |
435 | * Contact message, but on old hosts that only |
436 | * support VMBus protocol 4.0 or lower, here we get |
437 | * HV_STATUS_INVALID_CONNECTION_ID and we should |
438 | * return an error immediately without retrying. |
439 | */ |
440 | hdr = buffer; |
441 | if (hdr->msgtype == CHANNELMSG_INITIATE_CONTACT) |
442 | return -EINVAL; |
443 | /* |
444 | * We could get this if we send messages too |
445 | * frequently. |
446 | */ |
447 | ret = -EAGAIN; |
448 | break; |
449 | case HV_STATUS_INSUFFICIENT_MEMORY: |
450 | case HV_STATUS_INSUFFICIENT_BUFFERS: |
451 | ret = -ENOBUFS; |
452 | break; |
453 | case HV_STATUS_SUCCESS: |
454 | return ret; |
455 | default: |
456 | pr_err("hv_post_msg() failed; error code:%d\n" , ret); |
457 | return -EINVAL; |
458 | } |
459 | |
460 | retries++; |
461 | if (can_sleep && usec > 1000) |
462 | msleep(msecs: usec / 1000); |
463 | else if (usec < MAX_UDELAY_MS * 1000) |
464 | udelay(usec); |
465 | else |
466 | mdelay(usec / 1000); |
467 | |
468 | if (retries < 22) |
469 | usec *= 2; |
470 | } |
471 | return ret; |
472 | } |
473 | |
474 | /* |
475 | * vmbus_set_event - Send an event notification to the parent |
476 | */ |
477 | void vmbus_set_event(struct vmbus_channel *channel) |
478 | { |
479 | u32 child_relid = channel->offermsg.child_relid; |
480 | |
481 | if (!channel->is_dedicated_interrupt) |
482 | vmbus_send_interrupt(relid: child_relid); |
483 | |
484 | ++channel->sig_events; |
485 | |
486 | if (ms_hyperv.paravisor_present) { |
487 | if (hv_isolation_type_snp()) |
488 | hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, input: &channel->sig_event, |
489 | NULL, input_size: sizeof(channel->sig_event)); |
490 | else if (hv_isolation_type_tdx()) |
491 | hv_tdx_hypercall(HVCALL_SIGNAL_EVENT | HV_HYPERCALL_FAST_BIT, |
492 | param1: channel->sig_event, param2: 0); |
493 | else |
494 | WARN_ON_ONCE(1); |
495 | } else { |
496 | hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, input8: channel->sig_event); |
497 | } |
498 | } |
499 | EXPORT_SYMBOL_GPL(vmbus_set_event); |
500 | |