1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
2 | /* |
3 | * |
4 | * Copyright (c) 2011, Microsoft Corporation. |
5 | * |
6 | * Authors: |
7 | * Haiyang Zhang <haiyangz@microsoft.com> |
8 | * Hank Janssen <hjanssen@microsoft.com> |
9 | * K. Y. Srinivasan <kys@microsoft.com> |
10 | */ |
11 | |
12 | #ifndef _HYPERV_VMBUS_H |
13 | #define _HYPERV_VMBUS_H |
14 | |
15 | #include <linux/list.h> |
16 | #include <linux/bitops.h> |
17 | #include <asm/sync_bitops.h> |
18 | #include <asm/hyperv-tlfs.h> |
19 | #include <linux/atomic.h> |
20 | #include <linux/hyperv.h> |
21 | #include <linux/interrupt.h> |
22 | |
23 | #include "hv_trace.h" |
24 | |
25 | /* |
26 | * Timeout for services such as KVP and fcopy. |
27 | */ |
28 | #define HV_UTIL_TIMEOUT 30 |
29 | |
30 | /* |
31 | * Timeout for guest-host handshake for services. |
32 | */ |
33 | #define HV_UTIL_NEGO_TIMEOUT 55 |
34 | |
35 | |
36 | /* Definitions for the monitored notification facility */ |
37 | union hv_monitor_trigger_group { |
38 | u64 as_uint64; |
39 | struct { |
40 | u32 pending; |
41 | u32 armed; |
42 | }; |
43 | }; |
44 | |
45 | struct hv_monitor_parameter { |
46 | union hv_connection_id connectionid; |
47 | u16 flagnumber; |
48 | u16 rsvdz; |
49 | }; |
50 | |
51 | union hv_monitor_trigger_state { |
52 | u32 asu32; |
53 | |
54 | struct { |
55 | u32 group_enable:4; |
56 | u32 rsvdz:28; |
57 | }; |
58 | }; |
59 | |
60 | /* struct hv_monitor_page Layout */ |
61 | /* ------------------------------------------------------ */ |
62 | /* | 0 | TriggerState (4 bytes) | Rsvd1 (4 bytes) | */ |
63 | /* | 8 | TriggerGroup[0] | */ |
64 | /* | 10 | TriggerGroup[1] | */ |
65 | /* | 18 | TriggerGroup[2] | */ |
66 | /* | 20 | TriggerGroup[3] | */ |
67 | /* | 28 | Rsvd2[0] | */ |
68 | /* | 30 | Rsvd2[1] | */ |
69 | /* | 38 | Rsvd2[2] | */ |
70 | /* | 40 | NextCheckTime[0][0] | NextCheckTime[0][1] | */ |
71 | /* | ... | */ |
72 | /* | 240 | Latency[0][0..3] | */ |
73 | /* | 340 | Rsvz3[0] | */ |
74 | /* | 440 | Parameter[0][0] | */ |
75 | /* | 448 | Parameter[0][1] | */ |
76 | /* | ... | */ |
77 | /* | 840 | Rsvd4[0] | */ |
78 | /* ------------------------------------------------------ */ |
79 | struct hv_monitor_page { |
80 | union hv_monitor_trigger_state trigger_state; |
81 | u32 rsvdz1; |
82 | |
83 | union hv_monitor_trigger_group trigger_group[4]; |
84 | u64 rsvdz2[3]; |
85 | |
86 | s32 next_checktime[4][32]; |
87 | |
88 | u16 latency[4][32]; |
89 | u64 rsvdz3[32]; |
90 | |
91 | struct hv_monitor_parameter parameter[4][32]; |
92 | |
93 | u8 rsvdz4[1984]; |
94 | }; |
95 | |
96 | #define HV_HYPERCALL_PARAM_ALIGN sizeof(u64) |
97 | |
98 | /* Definition of the hv_post_message hypercall input structure. */ |
99 | struct hv_input_post_message { |
100 | union hv_connection_id connectionid; |
101 | u32 reserved; |
102 | u32 message_type; |
103 | u32 payload_size; |
104 | u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; |
105 | }; |
106 | |
107 | |
108 | enum { |
109 | VMBUS_MESSAGE_CONNECTION_ID = 1, |
110 | VMBUS_MESSAGE_CONNECTION_ID_4 = 4, |
111 | VMBUS_MESSAGE_PORT_ID = 1, |
112 | VMBUS_EVENT_CONNECTION_ID = 2, |
113 | VMBUS_EVENT_PORT_ID = 2, |
114 | VMBUS_MONITOR_CONNECTION_ID = 3, |
115 | VMBUS_MONITOR_PORT_ID = 3, |
116 | VMBUS_MESSAGE_SINT = 2, |
117 | }; |
118 | |
119 | /* |
120 | * Per cpu state for channel handling |
121 | */ |
122 | struct hv_per_cpu_context { |
123 | void *synic_message_page; |
124 | void *synic_event_page; |
125 | |
126 | /* |
127 | * The page is only used in hv_post_message() for a TDX VM (with the |
128 | * paravisor) to post a messages to Hyper-V: when such a VM calls |
129 | * HVCALL_POST_MESSAGE, it can't use the hyperv_pcpu_input_arg (which |
130 | * is encrypted in such a VM) as the hypercall input page, because |
131 | * the input page for HVCALL_POST_MESSAGE must be decrypted in such a |
132 | * VM, so post_msg_page (which is decrypted in hv_synic_alloc()) is |
133 | * introduced for this purpose. See hyperv_init() for more comments. |
134 | */ |
135 | void *post_msg_page; |
136 | |
137 | /* |
138 | * Starting with win8, we can take channel interrupts on any CPU; |
139 | * we will manage the tasklet that handles events messages on a per CPU |
140 | * basis. |
141 | */ |
142 | struct tasklet_struct msg_dpc; |
143 | }; |
144 | |
145 | struct hv_context { |
146 | /* We only support running on top of Hyper-V |
147 | * So at this point this really can only contain the Hyper-V ID |
148 | */ |
149 | u64 guestid; |
150 | |
151 | struct hv_per_cpu_context __percpu *cpu_context; |
152 | |
153 | /* |
154 | * To manage allocations in a NUMA node. |
155 | * Array indexed by numa node ID. |
156 | */ |
157 | struct cpumask *hv_numa_map; |
158 | }; |
159 | |
160 | extern struct hv_context hv_context; |
161 | |
162 | /* Hv Interface */ |
163 | |
164 | extern int hv_init(void); |
165 | |
166 | extern int hv_post_message(union hv_connection_id connection_id, |
167 | enum hv_message_type message_type, |
168 | void *payload, size_t payload_size); |
169 | |
170 | extern int hv_synic_alloc(void); |
171 | |
172 | extern void hv_synic_free(void); |
173 | |
174 | extern void hv_synic_enable_regs(unsigned int cpu); |
175 | extern int hv_synic_init(unsigned int cpu); |
176 | |
177 | extern void hv_synic_disable_regs(unsigned int cpu); |
178 | extern int hv_synic_cleanup(unsigned int cpu); |
179 | |
180 | /* Interface */ |
181 | |
182 | void hv_ringbuffer_pre_init(struct vmbus_channel *channel); |
183 | |
184 | int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info, |
185 | struct page *pages, u32 pagecnt, u32 max_pkt_size); |
186 | |
187 | void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info); |
188 | |
189 | int hv_ringbuffer_write(struct vmbus_channel *channel, |
190 | const struct kvec *kv_list, u32 kv_count, |
191 | u64 requestid, u64 *trans_id); |
192 | |
193 | int hv_ringbuffer_read(struct vmbus_channel *channel, |
194 | void *buffer, u32 buflen, u32 *buffer_actual_len, |
195 | u64 *requestid, bool raw); |
196 | |
197 | /* |
198 | * The Maximum number of channels (16384) is determined by the size of the |
199 | * interrupt page, which is HV_HYP_PAGE_SIZE. 1/2 of HV_HYP_PAGE_SIZE is to |
200 | * send endpoint interrupts, and the other is to receive endpoint interrupts. |
201 | */ |
202 | #define MAX_NUM_CHANNELS ((HV_HYP_PAGE_SIZE >> 1) << 3) |
203 | |
204 | /* The value here must be in multiple of 32 */ |
205 | #define MAX_NUM_CHANNELS_SUPPORTED 256 |
206 | |
207 | #define MAX_CHANNEL_RELIDS \ |
208 | max(MAX_NUM_CHANNELS_SUPPORTED, HV_EVENT_FLAGS_COUNT) |
209 | |
210 | enum vmbus_connect_state { |
211 | DISCONNECTED, |
212 | CONNECTING, |
213 | CONNECTED, |
214 | DISCONNECTING |
215 | }; |
216 | |
217 | #define MAX_SIZE_CHANNEL_MESSAGE HV_MESSAGE_PAYLOAD_BYTE_COUNT |
218 | |
219 | /* |
220 | * The CPU that Hyper-V will interrupt for VMBUS messages, such as |
221 | * CHANNELMSG_OFFERCHANNEL and CHANNELMSG_RESCIND_CHANNELOFFER. |
222 | */ |
223 | #define VMBUS_CONNECT_CPU 0 |
224 | |
225 | struct vmbus_connection { |
226 | u32 msg_conn_id; |
227 | |
228 | atomic_t offer_in_progress; |
229 | |
230 | enum vmbus_connect_state conn_state; |
231 | |
232 | atomic_t next_gpadl_handle; |
233 | |
234 | struct completion unload_event; |
235 | /* |
236 | * Represents channel interrupts. Each bit position represents a |
237 | * channel. When a channel sends an interrupt via VMBUS, it finds its |
238 | * bit in the sendInterruptPage, set it and calls Hv to generate a port |
239 | * event. The other end receives the port event and parse the |
240 | * recvInterruptPage to see which bit is set |
241 | */ |
242 | void *int_page; |
243 | void *send_int_page; |
244 | void *recv_int_page; |
245 | |
246 | /* |
247 | * 2 pages - 1st page for parent->child notification and 2nd |
248 | * is child->parent notification |
249 | */ |
250 | struct hv_monitor_page *monitor_pages[2]; |
251 | struct list_head chn_msg_list; |
252 | spinlock_t channelmsg_lock; |
253 | |
254 | /* List of channels */ |
255 | struct list_head chn_list; |
256 | struct mutex channel_mutex; |
257 | |
258 | /* Array of channels */ |
259 | struct vmbus_channel **channels; |
260 | |
261 | /* |
262 | * An offer message is handled first on the work_queue, and then |
263 | * is further handled on handle_primary_chan_wq or |
264 | * handle_sub_chan_wq. |
265 | */ |
266 | struct workqueue_struct *work_queue; |
267 | struct workqueue_struct *handle_primary_chan_wq; |
268 | struct workqueue_struct *handle_sub_chan_wq; |
269 | struct workqueue_struct *rescind_work_queue; |
270 | |
271 | /* |
272 | * On suspension of the vmbus, the accumulated offer messages |
273 | * must be dropped. |
274 | */ |
275 | bool ignore_any_offer_msg; |
276 | |
277 | /* |
278 | * The number of sub-channels and hv_sock channels that should be |
279 | * cleaned up upon suspend: sub-channels will be re-created upon |
280 | * resume, and hv_sock channels should not survive suspend. |
281 | */ |
282 | atomic_t nr_chan_close_on_suspend; |
283 | /* |
284 | * vmbus_bus_suspend() waits for "nr_chan_close_on_suspend" to |
285 | * drop to zero. |
286 | */ |
287 | struct completion ready_for_suspend_event; |
288 | |
289 | /* |
290 | * The number of primary channels that should be "fixed up" |
291 | * upon resume: these channels are re-offered upon resume, and some |
292 | * fields of the channel offers (i.e. child_relid and connection_id) |
293 | * can change, so the old offermsg must be fixed up, before the resume |
294 | * callbacks of the VSC drivers start to further touch the channels. |
295 | */ |
296 | atomic_t nr_chan_fixup_on_resume; |
297 | /* |
298 | * vmbus_bus_resume() waits for "nr_chan_fixup_on_resume" to |
299 | * drop to zero. |
300 | */ |
301 | struct completion ready_for_resume_event; |
302 | }; |
303 | |
304 | |
305 | struct vmbus_msginfo { |
306 | /* Bookkeeping stuff */ |
307 | struct list_head msglist_entry; |
308 | |
309 | /* The message itself */ |
310 | unsigned char msg[]; |
311 | }; |
312 | |
313 | |
314 | extern struct vmbus_connection vmbus_connection; |
315 | |
316 | int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version); |
317 | |
318 | static inline void vmbus_send_interrupt(u32 relid) |
319 | { |
320 | sync_set_bit(nr: relid, addr: vmbus_connection.send_int_page); |
321 | } |
322 | |
323 | enum vmbus_message_handler_type { |
324 | /* The related handler can sleep. */ |
325 | VMHT_BLOCKING = 0, |
326 | |
327 | /* The related handler must NOT sleep. */ |
328 | VMHT_NON_BLOCKING = 1, |
329 | }; |
330 | |
331 | struct vmbus_channel_message_table_entry { |
332 | enum vmbus_channel_message_type message_type; |
333 | enum vmbus_message_handler_type handler_type; |
334 | void (*message_handler)(struct vmbus_channel_message_header *msg); |
335 | u32 min_payload_len; |
336 | }; |
337 | |
338 | extern const struct vmbus_channel_message_table_entry |
339 | channel_message_table[CHANNELMSG_COUNT]; |
340 | |
341 | |
342 | /* General vmbus interface */ |
343 | |
344 | struct hv_device *vmbus_device_create(const guid_t *type, |
345 | const guid_t *instance, |
346 | struct vmbus_channel *channel); |
347 | |
348 | int vmbus_device_register(struct hv_device *child_device_obj); |
349 | void vmbus_device_unregister(struct hv_device *device_obj); |
350 | int vmbus_add_channel_kobj(struct hv_device *device_obj, |
351 | struct vmbus_channel *channel); |
352 | |
353 | void vmbus_remove_channel_attr_group(struct vmbus_channel *channel); |
354 | |
355 | void vmbus_channel_map_relid(struct vmbus_channel *channel); |
356 | void vmbus_channel_unmap_relid(struct vmbus_channel *channel); |
357 | |
358 | struct vmbus_channel *relid2channel(u32 relid); |
359 | |
360 | void vmbus_free_channels(void); |
361 | |
362 | /* Connection interface */ |
363 | |
364 | int vmbus_connect(void); |
365 | void vmbus_disconnect(void); |
366 | |
367 | int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep); |
368 | |
369 | void vmbus_on_event(unsigned long data); |
370 | void vmbus_on_msg_dpc(unsigned long data); |
371 | |
372 | int hv_kvp_init(struct hv_util_service *srv); |
373 | void hv_kvp_deinit(void); |
374 | int hv_kvp_pre_suspend(void); |
375 | int hv_kvp_pre_resume(void); |
376 | void hv_kvp_onchannelcallback(void *context); |
377 | |
378 | int hv_vss_init(struct hv_util_service *srv); |
379 | void hv_vss_deinit(void); |
380 | int hv_vss_pre_suspend(void); |
381 | int hv_vss_pre_resume(void); |
382 | void hv_vss_onchannelcallback(void *context); |
383 | |
384 | int hv_fcopy_init(struct hv_util_service *srv); |
385 | void hv_fcopy_deinit(void); |
386 | int hv_fcopy_pre_suspend(void); |
387 | int hv_fcopy_pre_resume(void); |
388 | void hv_fcopy_onchannelcallback(void *context); |
389 | void vmbus_initiate_unload(bool crash); |
390 | |
391 | static inline void hv_poll_channel(struct vmbus_channel *channel, |
392 | void (*cb)(void *)) |
393 | { |
394 | if (!channel) |
395 | return; |
396 | cb(channel); |
397 | } |
398 | |
399 | enum hvutil_device_state { |
400 | HVUTIL_DEVICE_INIT = 0, /* driver is loaded, waiting for userspace */ |
401 | HVUTIL_READY, /* userspace is registered */ |
402 | HVUTIL_HOSTMSG_RECEIVED, /* message from the host was received */ |
403 | HVUTIL_USERSPACE_REQ, /* request to userspace was sent */ |
404 | HVUTIL_USERSPACE_RECV, /* reply from userspace was received */ |
405 | HVUTIL_DEVICE_DYING, /* driver unload is in progress */ |
406 | }; |
407 | |
408 | enum delay { |
409 | INTERRUPT_DELAY = 0, |
410 | MESSAGE_DELAY = 1, |
411 | }; |
412 | |
413 | extern const struct vmbus_device vmbus_devs[]; |
414 | |
415 | static inline bool hv_is_perf_channel(struct vmbus_channel *channel) |
416 | { |
417 | return vmbus_devs[channel->device_id].perf_device; |
418 | } |
419 | |
420 | static inline bool hv_is_allocated_cpu(unsigned int cpu) |
421 | { |
422 | struct vmbus_channel *channel, *sc; |
423 | |
424 | lockdep_assert_held(&vmbus_connection.channel_mutex); |
425 | /* |
426 | * List additions/deletions as well as updates of the target CPUs are |
427 | * protected by channel_mutex. |
428 | */ |
429 | list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { |
430 | if (!hv_is_perf_channel(channel)) |
431 | continue; |
432 | if (channel->target_cpu == cpu) |
433 | return true; |
434 | list_for_each_entry(sc, &channel->sc_list, sc_list) { |
435 | if (sc->target_cpu == cpu) |
436 | return true; |
437 | } |
438 | } |
439 | return false; |
440 | } |
441 | |
442 | static inline void hv_set_allocated_cpu(unsigned int cpu) |
443 | { |
444 | cpumask_set_cpu(cpu, dstp: &hv_context.hv_numa_map[cpu_to_node(cpu)]); |
445 | } |
446 | |
447 | static inline void hv_clear_allocated_cpu(unsigned int cpu) |
448 | { |
449 | if (hv_is_allocated_cpu(cpu)) |
450 | return; |
451 | cpumask_clear_cpu(cpu, dstp: &hv_context.hv_numa_map[cpu_to_node(cpu)]); |
452 | } |
453 | |
454 | static inline void hv_update_allocated_cpus(unsigned int old_cpu, |
455 | unsigned int new_cpu) |
456 | { |
457 | hv_set_allocated_cpu(cpu: new_cpu); |
458 | hv_clear_allocated_cpu(cpu: old_cpu); |
459 | } |
460 | |
461 | #ifdef CONFIG_HYPERV_TESTING |
462 | |
463 | int hv_debug_add_dev_dir(struct hv_device *dev); |
464 | void hv_debug_rm_dev_dir(struct hv_device *dev); |
465 | void hv_debug_rm_all_dir(void); |
466 | int hv_debug_init(void); |
467 | void hv_debug_delay_test(struct vmbus_channel *channel, enum delay delay_type); |
468 | |
469 | #else /* CONFIG_HYPERV_TESTING */ |
470 | |
471 | static inline void hv_debug_rm_dev_dir(struct hv_device *dev) {}; |
472 | static inline void hv_debug_rm_all_dir(void) {}; |
473 | static inline void hv_debug_delay_test(struct vmbus_channel *channel, |
474 | enum delay delay_type) {}; |
475 | static inline int hv_debug_init(void) |
476 | { |
477 | return -1; |
478 | } |
479 | |
480 | static inline int hv_debug_add_dev_dir(struct hv_device *dev) |
481 | { |
482 | return -1; |
483 | } |
484 | |
485 | #endif /* CONFIG_HYPERV_TESTING */ |
486 | |
487 | #endif /* _HYPERV_VMBUS_H */ |
488 | |