1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Copyright (c) 2009, Microsoft Corporation. |
4 | * |
5 | * Authors: |
6 | * Haiyang Zhang <haiyangz@microsoft.com> |
7 | * Hank Janssen <hjanssen@microsoft.com> |
8 | */ |
9 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
10 | |
11 | #include <linux/kernel.h> |
12 | #include <linux/interrupt.h> |
13 | #include <linux/sched.h> |
14 | #include <linux/wait.h> |
15 | #include <linux/mm.h> |
16 | #include <linux/slab.h> |
17 | #include <linux/list.h> |
18 | #include <linux/module.h> |
19 | #include <linux/completion.h> |
20 | #include <linux/delay.h> |
21 | #include <linux/cpu.h> |
22 | #include <linux/hyperv.h> |
23 | #include <asm/mshyperv.h> |
24 | #include <linux/sched/isolation.h> |
25 | |
26 | #include "hyperv_vmbus.h" |
27 | |
28 | static void init_vp_index(struct vmbus_channel *channel); |
29 | |
30 | const struct vmbus_device vmbus_devs[] = { |
31 | /* IDE */ |
32 | { .dev_type = HV_IDE, |
33 | HV_IDE_GUID, |
34 | .perf_device = true, |
35 | .allowed_in_isolated = false, |
36 | }, |
37 | |
38 | /* SCSI */ |
39 | { .dev_type = HV_SCSI, |
40 | HV_SCSI_GUID, |
41 | .perf_device = true, |
42 | .allowed_in_isolated = true, |
43 | }, |
44 | |
45 | /* Fibre Channel */ |
46 | { .dev_type = HV_FC, |
47 | HV_SYNTHFC_GUID, |
48 | .perf_device = true, |
49 | .allowed_in_isolated = false, |
50 | }, |
51 | |
52 | /* Synthetic NIC */ |
53 | { .dev_type = HV_NIC, |
54 | HV_NIC_GUID, |
55 | .perf_device = true, |
56 | .allowed_in_isolated = true, |
57 | }, |
58 | |
59 | /* Network Direct */ |
60 | { .dev_type = HV_ND, |
61 | HV_ND_GUID, |
62 | .perf_device = true, |
63 | .allowed_in_isolated = false, |
64 | }, |
65 | |
66 | /* PCIE */ |
67 | { .dev_type = HV_PCIE, |
68 | HV_PCIE_GUID, |
69 | .perf_device = false, |
70 | .allowed_in_isolated = true, |
71 | }, |
72 | |
73 | /* Synthetic Frame Buffer */ |
74 | { .dev_type = HV_FB, |
75 | HV_SYNTHVID_GUID, |
76 | .perf_device = false, |
77 | .allowed_in_isolated = false, |
78 | }, |
79 | |
80 | /* Synthetic Keyboard */ |
81 | { .dev_type = HV_KBD, |
82 | HV_KBD_GUID, |
83 | .perf_device = false, |
84 | .allowed_in_isolated = false, |
85 | }, |
86 | |
87 | /* Synthetic MOUSE */ |
88 | { .dev_type = HV_MOUSE, |
89 | HV_MOUSE_GUID, |
90 | .perf_device = false, |
91 | .allowed_in_isolated = false, |
92 | }, |
93 | |
94 | /* KVP */ |
95 | { .dev_type = HV_KVP, |
96 | HV_KVP_GUID, |
97 | .perf_device = false, |
98 | .allowed_in_isolated = false, |
99 | }, |
100 | |
101 | /* Time Synch */ |
102 | { .dev_type = HV_TS, |
103 | HV_TS_GUID, |
104 | .perf_device = false, |
105 | .allowed_in_isolated = true, |
106 | }, |
107 | |
108 | /* Heartbeat */ |
109 | { .dev_type = HV_HB, |
110 | HV_HEART_BEAT_GUID, |
111 | .perf_device = false, |
112 | .allowed_in_isolated = true, |
113 | }, |
114 | |
115 | /* Shutdown */ |
116 | { .dev_type = HV_SHUTDOWN, |
117 | HV_SHUTDOWN_GUID, |
118 | .perf_device = false, |
119 | .allowed_in_isolated = true, |
120 | }, |
121 | |
122 | /* File copy */ |
123 | { .dev_type = HV_FCOPY, |
124 | HV_FCOPY_GUID, |
125 | .perf_device = false, |
126 | .allowed_in_isolated = false, |
127 | }, |
128 | |
129 | /* Backup */ |
130 | { .dev_type = HV_BACKUP, |
131 | HV_VSS_GUID, |
132 | .perf_device = false, |
133 | .allowed_in_isolated = false, |
134 | }, |
135 | |
136 | /* Dynamic Memory */ |
137 | { .dev_type = HV_DM, |
138 | HV_DM_GUID, |
139 | .perf_device = false, |
140 | .allowed_in_isolated = false, |
141 | }, |
142 | |
143 | /* Unknown GUID */ |
144 | { .dev_type = HV_UNKNOWN, |
145 | .perf_device = false, |
146 | .allowed_in_isolated = false, |
147 | }, |
148 | }; |
149 | |
150 | static const struct { |
151 | guid_t guid; |
152 | } vmbus_unsupported_devs[] = { |
153 | { HV_AVMA1_GUID }, |
154 | { HV_AVMA2_GUID }, |
155 | { HV_RDV_GUID }, |
156 | { HV_IMC_GUID }, |
157 | }; |
158 | |
159 | /* |
160 | * The rescinded channel may be blocked waiting for a response from the host; |
161 | * take care of that. |
162 | */ |
163 | static void vmbus_rescind_cleanup(struct vmbus_channel *channel) |
164 | { |
165 | struct vmbus_channel_msginfo *msginfo; |
166 | unsigned long flags; |
167 | |
168 | |
169 | spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); |
170 | channel->rescind = true; |
171 | list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, |
172 | msglistentry) { |
173 | |
174 | if (msginfo->waiting_channel == channel) { |
175 | complete(&msginfo->waitevent); |
176 | break; |
177 | } |
178 | } |
179 | spin_unlock_irqrestore(lock: &vmbus_connection.channelmsg_lock, flags); |
180 | } |
181 | |
182 | static bool is_unsupported_vmbus_devs(const guid_t *guid) |
183 | { |
184 | int i; |
185 | |
186 | for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++) |
187 | if (guid_equal(u1: guid, u2: &vmbus_unsupported_devs[i].guid)) |
188 | return true; |
189 | return false; |
190 | } |
191 | |
192 | static u16 hv_get_dev_type(const struct vmbus_channel *channel) |
193 | { |
194 | const guid_t *guid = &channel->offermsg.offer.if_type; |
195 | u16 i; |
196 | |
197 | if (is_hvsock_channel(c: channel) || is_unsupported_vmbus_devs(guid)) |
198 | return HV_UNKNOWN; |
199 | |
200 | for (i = HV_IDE; i < HV_UNKNOWN; i++) { |
201 | if (guid_equal(u1: guid, u2: &vmbus_devs[i].guid)) |
202 | return i; |
203 | } |
204 | pr_info("Unknown GUID: %pUl\n" , guid); |
205 | return i; |
206 | } |
207 | |
208 | /** |
209 | * vmbus_prep_negotiate_resp() - Create default response for Negotiate message |
210 | * @icmsghdrp: Pointer to msg header structure |
211 | * @buf: Raw buffer channel data |
212 | * @buflen: Length of the raw buffer channel data. |
213 | * @fw_version: The framework versions we can support. |
214 | * @fw_vercnt: The size of @fw_version. |
215 | * @srv_version: The service versions we can support. |
216 | * @srv_vercnt: The size of @srv_version. |
217 | * @nego_fw_version: The selected framework version. |
218 | * @nego_srv_version: The selected service version. |
219 | * |
220 | * Note: Versions are given in decreasing order. |
221 | * |
222 | * Set up and fill in default negotiate response message. |
223 | * Mainly used by Hyper-V drivers. |
224 | */ |
225 | bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf, |
226 | u32 buflen, const int *fw_version, int fw_vercnt, |
227 | const int *srv_version, int srv_vercnt, |
228 | int *nego_fw_version, int *nego_srv_version) |
229 | { |
230 | int icframe_major, icframe_minor; |
231 | int icmsg_major, icmsg_minor; |
232 | int fw_major, fw_minor; |
233 | int srv_major, srv_minor; |
234 | int i, j; |
235 | bool found_match = false; |
236 | struct icmsg_negotiate *negop; |
237 | |
238 | /* Check that there's enough space for icframe_vercnt, icmsg_vercnt */ |
239 | if (buflen < ICMSG_HDR + offsetof(struct icmsg_negotiate, reserved)) { |
240 | pr_err_ratelimited("Invalid icmsg negotiate\n" ); |
241 | return false; |
242 | } |
243 | |
244 | icmsghdrp->icmsgsize = 0x10; |
245 | negop = (struct icmsg_negotiate *)&buf[ICMSG_HDR]; |
246 | |
247 | icframe_major = negop->icframe_vercnt; |
248 | icframe_minor = 0; |
249 | |
250 | icmsg_major = negop->icmsg_vercnt; |
251 | icmsg_minor = 0; |
252 | |
253 | /* Validate negop packet */ |
254 | if (icframe_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT || |
255 | icmsg_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT || |
256 | ICMSG_NEGOTIATE_PKT_SIZE(icframe_major, icmsg_major) > buflen) { |
257 | pr_err_ratelimited("Invalid icmsg negotiate - icframe_major: %u, icmsg_major: %u\n" , |
258 | icframe_major, icmsg_major); |
259 | goto fw_error; |
260 | } |
261 | |
262 | /* |
263 | * Select the framework version number we will |
264 | * support. |
265 | */ |
266 | |
267 | for (i = 0; i < fw_vercnt; i++) { |
268 | fw_major = (fw_version[i] >> 16); |
269 | fw_minor = (fw_version[i] & 0xFFFF); |
270 | |
271 | for (j = 0; j < negop->icframe_vercnt; j++) { |
272 | if ((negop->icversion_data[j].major == fw_major) && |
273 | (negop->icversion_data[j].minor == fw_minor)) { |
274 | icframe_major = negop->icversion_data[j].major; |
275 | icframe_minor = negop->icversion_data[j].minor; |
276 | found_match = true; |
277 | break; |
278 | } |
279 | } |
280 | |
281 | if (found_match) |
282 | break; |
283 | } |
284 | |
285 | if (!found_match) |
286 | goto fw_error; |
287 | |
288 | found_match = false; |
289 | |
290 | for (i = 0; i < srv_vercnt; i++) { |
291 | srv_major = (srv_version[i] >> 16); |
292 | srv_minor = (srv_version[i] & 0xFFFF); |
293 | |
294 | for (j = negop->icframe_vercnt; |
295 | (j < negop->icframe_vercnt + negop->icmsg_vercnt); |
296 | j++) { |
297 | |
298 | if ((negop->icversion_data[j].major == srv_major) && |
299 | (negop->icversion_data[j].minor == srv_minor)) { |
300 | |
301 | icmsg_major = negop->icversion_data[j].major; |
302 | icmsg_minor = negop->icversion_data[j].minor; |
303 | found_match = true; |
304 | break; |
305 | } |
306 | } |
307 | |
308 | if (found_match) |
309 | break; |
310 | } |
311 | |
312 | /* |
313 | * Respond with the framework and service |
314 | * version numbers we can support. |
315 | */ |
316 | |
317 | fw_error: |
318 | if (!found_match) { |
319 | negop->icframe_vercnt = 0; |
320 | negop->icmsg_vercnt = 0; |
321 | } else { |
322 | negop->icframe_vercnt = 1; |
323 | negop->icmsg_vercnt = 1; |
324 | } |
325 | |
326 | if (nego_fw_version) |
327 | *nego_fw_version = (icframe_major << 16) | icframe_minor; |
328 | |
329 | if (nego_srv_version) |
330 | *nego_srv_version = (icmsg_major << 16) | icmsg_minor; |
331 | |
332 | negop->icversion_data[0].major = icframe_major; |
333 | negop->icversion_data[0].minor = icframe_minor; |
334 | negop->icversion_data[1].major = icmsg_major; |
335 | negop->icversion_data[1].minor = icmsg_minor; |
336 | return found_match; |
337 | } |
338 | EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp); |
339 | |
340 | /* |
341 | * alloc_channel - Allocate and initialize a vmbus channel object |
342 | */ |
343 | static struct vmbus_channel *alloc_channel(void) |
344 | { |
345 | struct vmbus_channel *channel; |
346 | |
347 | channel = kzalloc(size: sizeof(*channel), GFP_ATOMIC); |
348 | if (!channel) |
349 | return NULL; |
350 | |
351 | spin_lock_init(&channel->sched_lock); |
352 | init_completion(x: &channel->rescind_event); |
353 | |
354 | INIT_LIST_HEAD(list: &channel->sc_list); |
355 | |
356 | tasklet_init(t: &channel->callback_event, |
357 | func: vmbus_on_event, data: (unsigned long)channel); |
358 | |
359 | hv_ringbuffer_pre_init(channel); |
360 | |
361 | return channel; |
362 | } |
363 | |
364 | /* |
365 | * free_channel - Release the resources used by the vmbus channel object |
366 | */ |
367 | static void free_channel(struct vmbus_channel *channel) |
368 | { |
369 | tasklet_kill(t: &channel->callback_event); |
370 | vmbus_remove_channel_attr_group(channel); |
371 | |
372 | kobject_put(kobj: &channel->kobj); |
373 | } |
374 | |
375 | void vmbus_channel_map_relid(struct vmbus_channel *channel) |
376 | { |
377 | if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS)) |
378 | return; |
379 | /* |
380 | * The mapping of the channel's relid is visible from the CPUs that |
381 | * execute vmbus_chan_sched() by the time that vmbus_chan_sched() will |
382 | * execute: |
383 | * |
384 | * (a) In the "normal (i.e., not resuming from hibernation)" path, |
385 | * the full barrier in virt_store_mb() guarantees that the store |
386 | * is propagated to all CPUs before the add_channel_work work |
387 | * is queued. In turn, add_channel_work is queued before the |
388 | * channel's ring buffer is allocated/initialized and the |
389 | * OPENCHANNEL message for the channel is sent in vmbus_open(). |
390 | * Hyper-V won't start sending the interrupts for the channel |
391 | * before the OPENCHANNEL message is acked. The memory barrier |
392 | * in vmbus_chan_sched() -> sync_test_and_clear_bit() ensures |
393 | * that vmbus_chan_sched() must find the channel's relid in |
394 | * recv_int_page before retrieving the channel pointer from the |
395 | * array of channels. |
396 | * |
397 | * (b) In the "resuming from hibernation" path, the virt_store_mb() |
398 | * guarantees that the store is propagated to all CPUs before |
399 | * the VMBus connection is marked as ready for the resume event |
400 | * (cf. check_ready_for_resume_event()). The interrupt handler |
401 | * of the VMBus driver and vmbus_chan_sched() can not run before |
402 | * vmbus_bus_resume() has completed execution (cf. resume_noirq). |
403 | */ |
404 | virt_store_mb( |
405 | vmbus_connection.channels[channel->offermsg.child_relid], |
406 | channel); |
407 | } |
408 | |
409 | void vmbus_channel_unmap_relid(struct vmbus_channel *channel) |
410 | { |
411 | if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS)) |
412 | return; |
413 | WRITE_ONCE( |
414 | vmbus_connection.channels[channel->offermsg.child_relid], |
415 | NULL); |
416 | } |
417 | |
418 | static void vmbus_release_relid(u32 relid) |
419 | { |
420 | struct vmbus_channel_relid_released msg; |
421 | int ret; |
422 | |
423 | memset(&msg, 0, sizeof(struct vmbus_channel_relid_released)); |
424 | msg.child_relid = relid; |
425 | msg.header.msgtype = CHANNELMSG_RELID_RELEASED; |
426 | ret = vmbus_post_msg(buffer: &msg, buflen: sizeof(struct vmbus_channel_relid_released), |
427 | can_sleep: true); |
428 | |
429 | trace_vmbus_release_relid(msg: &msg, ret); |
430 | } |
431 | |
432 | void hv_process_channel_removal(struct vmbus_channel *channel) |
433 | { |
434 | lockdep_assert_held(&vmbus_connection.channel_mutex); |
435 | BUG_ON(!channel->rescind); |
436 | |
437 | /* |
438 | * hv_process_channel_removal() could find INVALID_RELID only for |
439 | * hv_sock channels. See the inline comments in vmbus_onoffer(). |
440 | */ |
441 | WARN_ON(channel->offermsg.child_relid == INVALID_RELID && |
442 | !is_hvsock_channel(channel)); |
443 | |
444 | /* |
445 | * Upon suspend, an in-use hv_sock channel is removed from the array of |
446 | * channels and the relid is invalidated. After hibernation, when the |
447 | * user-space application destroys the channel, it's unnecessary and |
448 | * unsafe to remove the channel from the array of channels. See also |
449 | * the inline comments before the call of vmbus_release_relid() below. |
450 | */ |
451 | if (channel->offermsg.child_relid != INVALID_RELID) |
452 | vmbus_channel_unmap_relid(channel); |
453 | |
454 | if (channel->primary_channel == NULL) |
455 | list_del(entry: &channel->listentry); |
456 | else |
457 | list_del(entry: &channel->sc_list); |
458 | |
459 | /* |
460 | * If this is a "perf" channel, updates the hv_numa_map[] masks so that |
461 | * init_vp_index() can (re-)use the CPU. |
462 | */ |
463 | if (hv_is_perf_channel(channel)) |
464 | hv_clear_allocated_cpu(cpu: channel->target_cpu); |
465 | |
466 | /* |
467 | * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and |
468 | * the relid is invalidated; after hibernation, when the user-space app |
469 | * destroys the channel, the relid is INVALID_RELID, and in this case |
470 | * it's unnecessary and unsafe to release the old relid, since the same |
471 | * relid can refer to a completely different channel now. |
472 | */ |
473 | if (channel->offermsg.child_relid != INVALID_RELID) |
474 | vmbus_release_relid(relid: channel->offermsg.child_relid); |
475 | |
476 | free_channel(channel); |
477 | } |
478 | |
479 | void vmbus_free_channels(void) |
480 | { |
481 | struct vmbus_channel *channel, *tmp; |
482 | |
483 | list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list, |
484 | listentry) { |
485 | /* hv_process_channel_removal() needs this */ |
486 | channel->rescind = true; |
487 | |
488 | vmbus_device_unregister(device_obj: channel->device_obj); |
489 | } |
490 | } |
491 | |
492 | /* Note: the function can run concurrently for primary/sub channels. */ |
493 | static void vmbus_add_channel_work(struct work_struct *work) |
494 | { |
495 | struct vmbus_channel *newchannel = |
496 | container_of(work, struct vmbus_channel, add_channel_work); |
497 | struct vmbus_channel *primary_channel = newchannel->primary_channel; |
498 | int ret; |
499 | |
500 | /* |
501 | * This state is used to indicate a successful open |
502 | * so that when we do close the channel normally, we |
503 | * can cleanup properly. |
504 | */ |
505 | newchannel->state = CHANNEL_OPEN_STATE; |
506 | |
507 | if (primary_channel != NULL) { |
508 | /* newchannel is a sub-channel. */ |
509 | struct hv_device *dev = primary_channel->device_obj; |
510 | |
511 | if (vmbus_add_channel_kobj(device_obj: dev, channel: newchannel)) |
512 | goto err_deq_chan; |
513 | |
514 | if (primary_channel->sc_creation_callback != NULL) |
515 | primary_channel->sc_creation_callback(newchannel); |
516 | |
517 | newchannel->probe_done = true; |
518 | return; |
519 | } |
520 | |
521 | /* |
522 | * Start the process of binding the primary channel to the driver |
523 | */ |
524 | newchannel->device_obj = vmbus_device_create( |
525 | type: &newchannel->offermsg.offer.if_type, |
526 | instance: &newchannel->offermsg.offer.if_instance, |
527 | channel: newchannel); |
528 | if (!newchannel->device_obj) |
529 | goto err_deq_chan; |
530 | |
531 | newchannel->device_obj->device_id = newchannel->device_id; |
532 | /* |
533 | * Add the new device to the bus. This will kick off device-driver |
534 | * binding which eventually invokes the device driver's AddDevice() |
535 | * method. |
536 | * |
537 | * If vmbus_device_register() fails, the 'device_obj' is freed in |
538 | * vmbus_device_release() as called by device_unregister() in the |
539 | * error path of vmbus_device_register(). In the outside error |
540 | * path, there's no need to free it. |
541 | */ |
542 | ret = vmbus_device_register(child_device_obj: newchannel->device_obj); |
543 | |
544 | if (ret != 0) { |
545 | pr_err("unable to add child device object (relid %d)\n" , |
546 | newchannel->offermsg.child_relid); |
547 | goto err_deq_chan; |
548 | } |
549 | |
550 | newchannel->probe_done = true; |
551 | return; |
552 | |
553 | err_deq_chan: |
554 | mutex_lock(&vmbus_connection.channel_mutex); |
555 | |
556 | /* |
557 | * We need to set the flag, otherwise |
558 | * vmbus_onoffer_rescind() can be blocked. |
559 | */ |
560 | newchannel->probe_done = true; |
561 | |
562 | if (primary_channel == NULL) |
563 | list_del(entry: &newchannel->listentry); |
564 | else |
565 | list_del(entry: &newchannel->sc_list); |
566 | |
567 | /* vmbus_process_offer() has mapped the channel. */ |
568 | vmbus_channel_unmap_relid(channel: newchannel); |
569 | |
570 | mutex_unlock(lock: &vmbus_connection.channel_mutex); |
571 | |
572 | vmbus_release_relid(relid: newchannel->offermsg.child_relid); |
573 | |
574 | free_channel(channel: newchannel); |
575 | } |
576 | |
577 | /* |
578 | * vmbus_process_offer - Process the offer by creating a channel/device |
579 | * associated with this offer |
580 | */ |
581 | static void vmbus_process_offer(struct vmbus_channel *newchannel) |
582 | { |
583 | struct vmbus_channel *channel; |
584 | struct workqueue_struct *wq; |
585 | bool fnew = true; |
586 | |
587 | /* |
588 | * Synchronize vmbus_process_offer() and CPU hotplugging: |
589 | * |
590 | * CPU1 CPU2 |
591 | * |
592 | * [vmbus_process_offer()] [Hot removal of the CPU] |
593 | * |
594 | * CPU_READ_LOCK CPUS_WRITE_LOCK |
595 | * LOAD cpu_online_mask SEARCH chn_list |
596 | * STORE target_cpu LOAD target_cpu |
597 | * INSERT chn_list STORE cpu_online_mask |
598 | * CPUS_READ_UNLOCK CPUS_WRITE_UNLOCK |
599 | * |
600 | * Forbids: CPU1's LOAD from *not* seing CPU2's STORE && |
601 | * CPU2's SEARCH from *not* seeing CPU1's INSERT |
602 | * |
603 | * Forbids: CPU2's SEARCH from seeing CPU1's INSERT && |
604 | * CPU2's LOAD from *not* seing CPU1's STORE |
605 | */ |
606 | cpus_read_lock(); |
607 | |
608 | /* |
609 | * Serializes the modifications of the chn_list list as well as |
610 | * the accesses to next_numa_node_id in init_vp_index(). |
611 | */ |
612 | mutex_lock(&vmbus_connection.channel_mutex); |
613 | |
614 | list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) { |
615 | if (guid_equal(u1: &channel->offermsg.offer.if_type, |
616 | u2: &newchannel->offermsg.offer.if_type) && |
617 | guid_equal(u1: &channel->offermsg.offer.if_instance, |
618 | u2: &newchannel->offermsg.offer.if_instance)) { |
619 | fnew = false; |
620 | newchannel->primary_channel = channel; |
621 | break; |
622 | } |
623 | } |
624 | |
625 | init_vp_index(channel: newchannel); |
626 | |
627 | /* Remember the channels that should be cleaned up upon suspend. */ |
628 | if (is_hvsock_channel(c: newchannel) || is_sub_channel(c: newchannel)) |
629 | atomic_inc(v: &vmbus_connection.nr_chan_close_on_suspend); |
630 | |
631 | /* |
632 | * Now that we have acquired the channel_mutex, |
633 | * we can release the potentially racing rescind thread. |
634 | */ |
635 | atomic_dec(v: &vmbus_connection.offer_in_progress); |
636 | |
637 | if (fnew) { |
638 | list_add_tail(new: &newchannel->listentry, |
639 | head: &vmbus_connection.chn_list); |
640 | } else { |
641 | /* |
642 | * Check to see if this is a valid sub-channel. |
643 | */ |
644 | if (newchannel->offermsg.offer.sub_channel_index == 0) { |
645 | mutex_unlock(lock: &vmbus_connection.channel_mutex); |
646 | cpus_read_unlock(); |
647 | /* |
648 | * Don't call free_channel(), because newchannel->kobj |
649 | * is not initialized yet. |
650 | */ |
651 | kfree(objp: newchannel); |
652 | WARN_ON_ONCE(1); |
653 | return; |
654 | } |
655 | /* |
656 | * Process the sub-channel. |
657 | */ |
658 | list_add_tail(new: &newchannel->sc_list, head: &channel->sc_list); |
659 | } |
660 | |
661 | vmbus_channel_map_relid(channel: newchannel); |
662 | |
663 | mutex_unlock(lock: &vmbus_connection.channel_mutex); |
664 | cpus_read_unlock(); |
665 | |
666 | /* |
667 | * vmbus_process_offer() mustn't call channel->sc_creation_callback() |
668 | * directly for sub-channels, because sc_creation_callback() -> |
669 | * vmbus_open() may never get the host's response to the |
670 | * OPEN_CHANNEL message (the host may rescind a channel at any time, |
671 | * e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind() |
672 | * may not wake up the vmbus_open() as it's blocked due to a non-zero |
673 | * vmbus_connection.offer_in_progress, and finally we have a deadlock. |
674 | * |
675 | * The above is also true for primary channels, if the related device |
676 | * drivers use sync probing mode by default. |
677 | * |
678 | * And, usually the handling of primary channels and sub-channels can |
679 | * depend on each other, so we should offload them to different |
680 | * workqueues to avoid possible deadlock, e.g. in sync-probing mode, |
681 | * NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() -> |
682 | * rtnl_lock(), and causes deadlock: the former gets the rtnl_lock |
683 | * and waits for all the sub-channels to appear, but the latter |
684 | * can't get the rtnl_lock and this blocks the handling of |
685 | * sub-channels. |
686 | */ |
687 | INIT_WORK(&newchannel->add_channel_work, vmbus_add_channel_work); |
688 | wq = fnew ? vmbus_connection.handle_primary_chan_wq : |
689 | vmbus_connection.handle_sub_chan_wq; |
690 | queue_work(wq, work: &newchannel->add_channel_work); |
691 | } |
692 | |
693 | /* |
694 | * Check if CPUs used by other channels of the same device. |
695 | * It should only be called by init_vp_index(). |
696 | */ |
697 | static bool hv_cpuself_used(u32 cpu, struct vmbus_channel *chn) |
698 | { |
699 | struct vmbus_channel *primary = chn->primary_channel; |
700 | struct vmbus_channel *sc; |
701 | |
702 | lockdep_assert_held(&vmbus_connection.channel_mutex); |
703 | |
704 | if (!primary) |
705 | return false; |
706 | |
707 | if (primary->target_cpu == cpu) |
708 | return true; |
709 | |
710 | list_for_each_entry(sc, &primary->sc_list, sc_list) |
711 | if (sc != chn && sc->target_cpu == cpu) |
712 | return true; |
713 | |
714 | return false; |
715 | } |
716 | |
717 | /* |
718 | * We use this state to statically distribute the channel interrupt load. |
719 | */ |
720 | static int next_numa_node_id; |
721 | |
722 | /* |
723 | * We can statically distribute the incoming channel interrupt load |
724 | * by binding a channel to VCPU. |
725 | * |
726 | * For non-performance critical channels we assign the VMBUS_CONNECT_CPU. |
727 | * Performance critical channels will be distributed evenly among all |
728 | * the available NUMA nodes. Once the node is assigned, we will assign |
729 | * the CPU based on a simple round robin scheme. |
730 | */ |
731 | static void init_vp_index(struct vmbus_channel *channel) |
732 | { |
733 | bool perf_chn = hv_is_perf_channel(channel); |
734 | u32 i, ncpu = num_online_cpus(); |
735 | cpumask_var_t available_mask; |
736 | struct cpumask *allocated_mask; |
737 | const struct cpumask *hk_mask = housekeeping_cpumask(type: HK_TYPE_MANAGED_IRQ); |
738 | u32 target_cpu; |
739 | int numa_node; |
740 | |
741 | if (!perf_chn || |
742 | !alloc_cpumask_var(mask: &available_mask, GFP_KERNEL) || |
743 | cpumask_empty(srcp: hk_mask)) { |
744 | /* |
745 | * If the channel is not a performance critical |
746 | * channel, bind it to VMBUS_CONNECT_CPU. |
747 | * In case alloc_cpumask_var() fails, bind it to |
748 | * VMBUS_CONNECT_CPU. |
749 | * If all the cpus are isolated, bind it to |
750 | * VMBUS_CONNECT_CPU. |
751 | */ |
752 | channel->target_cpu = VMBUS_CONNECT_CPU; |
753 | if (perf_chn) |
754 | hv_set_allocated_cpu(VMBUS_CONNECT_CPU); |
755 | return; |
756 | } |
757 | |
758 | for (i = 1; i <= ncpu + 1; i++) { |
759 | while (true) { |
760 | numa_node = next_numa_node_id++; |
761 | if (numa_node == nr_node_ids) { |
762 | next_numa_node_id = 0; |
763 | continue; |
764 | } |
765 | if (cpumask_empty(srcp: cpumask_of_node(node: numa_node))) |
766 | continue; |
767 | break; |
768 | } |
769 | allocated_mask = &hv_context.hv_numa_map[numa_node]; |
770 | |
771 | retry: |
772 | cpumask_xor(dstp: available_mask, src1p: allocated_mask, src2p: cpumask_of_node(node: numa_node)); |
773 | cpumask_and(dstp: available_mask, src1p: available_mask, src2p: hk_mask); |
774 | |
775 | if (cpumask_empty(srcp: available_mask)) { |
776 | /* |
777 | * We have cycled through all the CPUs in the node; |
778 | * reset the allocated map. |
779 | */ |
780 | cpumask_clear(dstp: allocated_mask); |
781 | goto retry; |
782 | } |
783 | |
784 | target_cpu = cpumask_first(srcp: available_mask); |
785 | cpumask_set_cpu(cpu: target_cpu, dstp: allocated_mask); |
786 | |
787 | if (channel->offermsg.offer.sub_channel_index >= ncpu || |
788 | i > ncpu || !hv_cpuself_used(cpu: target_cpu, chn: channel)) |
789 | break; |
790 | } |
791 | |
792 | channel->target_cpu = target_cpu; |
793 | |
794 | free_cpumask_var(mask: available_mask); |
795 | } |
796 | |
797 | #define UNLOAD_DELAY_UNIT_MS 10 /* 10 milliseconds */ |
798 | #define UNLOAD_WAIT_MS (100*1000) /* 100 seconds */ |
799 | #define UNLOAD_WAIT_LOOPS (UNLOAD_WAIT_MS/UNLOAD_DELAY_UNIT_MS) |
800 | #define UNLOAD_MSG_MS (5*1000) /* Every 5 seconds */ |
801 | #define UNLOAD_MSG_LOOPS (UNLOAD_MSG_MS/UNLOAD_DELAY_UNIT_MS) |
802 | |
803 | static void vmbus_wait_for_unload(void) |
804 | { |
805 | int cpu; |
806 | void *page_addr; |
807 | struct hv_message *msg; |
808 | struct vmbus_channel_message_header *hdr; |
809 | u32 message_type, i; |
810 | |
811 | /* |
812 | * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was |
813 | * used for initial contact or to CPU0 depending on host version. When |
814 | * we're crashing on a different CPU let's hope that IRQ handler on |
815 | * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still |
816 | * functional and vmbus_unload_response() will complete |
817 | * vmbus_connection.unload_event. If not, the last thing we can do is |
818 | * read message pages for all CPUs directly. |
819 | * |
820 | * Wait up to 100 seconds since an Azure host must writeback any dirty |
821 | * data in its disk cache before the VMbus UNLOAD request will |
822 | * complete. This flushing has been empirically observed to take up |
823 | * to 50 seconds in cases with a lot of dirty data, so allow additional |
824 | * leeway and for inaccuracies in mdelay(). But eventually time out so |
825 | * that the panic path can't get hung forever in case the response |
826 | * message isn't seen. |
827 | */ |
828 | for (i = 1; i <= UNLOAD_WAIT_LOOPS; i++) { |
829 | if (completion_done(x: &vmbus_connection.unload_event)) |
830 | goto completed; |
831 | |
832 | for_each_present_cpu(cpu) { |
833 | struct hv_per_cpu_context *hv_cpu |
834 | = per_cpu_ptr(hv_context.cpu_context, cpu); |
835 | |
836 | /* |
837 | * In a CoCo VM the synic_message_page is not allocated |
838 | * in hv_synic_alloc(). Instead it is set/cleared in |
839 | * hv_synic_enable_regs() and hv_synic_disable_regs() |
840 | * such that it is set only when the CPU is online. If |
841 | * not all present CPUs are online, the message page |
842 | * might be NULL, so skip such CPUs. |
843 | */ |
844 | page_addr = hv_cpu->synic_message_page; |
845 | if (!page_addr) |
846 | continue; |
847 | |
848 | msg = (struct hv_message *)page_addr |
849 | + VMBUS_MESSAGE_SINT; |
850 | |
851 | message_type = READ_ONCE(msg->header.message_type); |
852 | if (message_type == HVMSG_NONE) |
853 | continue; |
854 | |
855 | hdr = (struct vmbus_channel_message_header *) |
856 | msg->u.payload; |
857 | |
858 | if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE) |
859 | complete(&vmbus_connection.unload_event); |
860 | |
861 | vmbus_signal_eom(msg, old_msg_type: message_type); |
862 | } |
863 | |
864 | /* |
865 | * Give a notice periodically so someone watching the |
866 | * serial output won't think it is completely hung. |
867 | */ |
868 | if (!(i % UNLOAD_MSG_LOOPS)) |
869 | pr_notice("Waiting for VMBus UNLOAD to complete\n" ); |
870 | |
871 | mdelay(UNLOAD_DELAY_UNIT_MS); |
872 | } |
873 | pr_err("Continuing even though VMBus UNLOAD did not complete\n" ); |
874 | |
875 | completed: |
876 | /* |
877 | * We're crashing and already got the UNLOAD_RESPONSE, cleanup all |
878 | * maybe-pending messages on all CPUs to be able to receive new |
879 | * messages after we reconnect. |
880 | */ |
881 | for_each_present_cpu(cpu) { |
882 | struct hv_per_cpu_context *hv_cpu |
883 | = per_cpu_ptr(hv_context.cpu_context, cpu); |
884 | |
885 | page_addr = hv_cpu->synic_message_page; |
886 | if (!page_addr) |
887 | continue; |
888 | |
889 | msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; |
890 | msg->header.message_type = HVMSG_NONE; |
891 | } |
892 | } |
893 | |
894 | /* |
895 | * vmbus_unload_response - Handler for the unload response. |
896 | */ |
897 | static void vmbus_unload_response(struct vmbus_channel_message_header *hdr) |
898 | { |
899 | /* |
900 | * This is a global event; just wakeup the waiting thread. |
901 | * Once we successfully unload, we can cleanup the monitor state. |
902 | * |
903 | * NB. A malicious or compromised Hyper-V could send a spurious |
904 | * message of type CHANNELMSG_UNLOAD_RESPONSE, and trigger a call |
905 | * of the complete() below. Make sure that unload_event has been |
906 | * initialized by the time this complete() is executed. |
907 | */ |
908 | complete(&vmbus_connection.unload_event); |
909 | } |
910 | |
911 | void vmbus_initiate_unload(bool crash) |
912 | { |
913 | struct vmbus_channel_message_header hdr; |
914 | |
915 | if (xchg(&vmbus_connection.conn_state, DISCONNECTED) == DISCONNECTED) |
916 | return; |
917 | |
918 | /* Pre-Win2012R2 hosts don't support reconnect */ |
919 | if (vmbus_proto_version < VERSION_WIN8_1) |
920 | return; |
921 | |
922 | reinit_completion(x: &vmbus_connection.unload_event); |
923 | memset(&hdr, 0, sizeof(struct vmbus_channel_message_header)); |
924 | hdr.msgtype = CHANNELMSG_UNLOAD; |
925 | vmbus_post_msg(buffer: &hdr, buflen: sizeof(struct vmbus_channel_message_header), |
926 | can_sleep: !crash); |
927 | |
928 | /* |
929 | * vmbus_initiate_unload() is also called on crash and the crash can be |
930 | * happening in an interrupt context, where scheduling is impossible. |
931 | */ |
932 | if (!crash) |
933 | wait_for_completion(&vmbus_connection.unload_event); |
934 | else |
935 | vmbus_wait_for_unload(); |
936 | } |
937 | |
938 | static void check_ready_for_resume_event(void) |
939 | { |
940 | /* |
941 | * If all the old primary channels have been fixed up, then it's safe |
942 | * to resume. |
943 | */ |
944 | if (atomic_dec_and_test(v: &vmbus_connection.nr_chan_fixup_on_resume)) |
945 | complete(&vmbus_connection.ready_for_resume_event); |
946 | } |
947 | |
948 | static void vmbus_setup_channel_state(struct vmbus_channel *channel, |
949 | struct vmbus_channel_offer_channel *offer) |
950 | { |
951 | /* |
952 | * Setup state for signalling the host. |
953 | */ |
954 | channel->sig_event = VMBUS_EVENT_CONNECTION_ID; |
955 | |
956 | channel->is_dedicated_interrupt = |
957 | (offer->is_dedicated_interrupt != 0); |
958 | channel->sig_event = offer->connection_id; |
959 | |
960 | memcpy(&channel->offermsg, offer, |
961 | sizeof(struct vmbus_channel_offer_channel)); |
962 | channel->monitor_grp = (u8)offer->monitorid / 32; |
963 | channel->monitor_bit = (u8)offer->monitorid % 32; |
964 | channel->device_id = hv_get_dev_type(channel); |
965 | } |
966 | |
967 | /* |
968 | * find_primary_channel_by_offer - Get the channel object given the new offer. |
969 | * This is only used in the resume path of hibernation. |
970 | */ |
971 | static struct vmbus_channel * |
972 | find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer) |
973 | { |
974 | struct vmbus_channel *channel = NULL, *iter; |
975 | const guid_t *inst1, *inst2; |
976 | |
977 | /* Ignore sub-channel offers. */ |
978 | if (offer->offer.sub_channel_index != 0) |
979 | return NULL; |
980 | |
981 | mutex_lock(&vmbus_connection.channel_mutex); |
982 | |
983 | list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) { |
984 | inst1 = &iter->offermsg.offer.if_instance; |
985 | inst2 = &offer->offer.if_instance; |
986 | |
987 | if (guid_equal(u1: inst1, u2: inst2)) { |
988 | channel = iter; |
989 | break; |
990 | } |
991 | } |
992 | |
993 | mutex_unlock(lock: &vmbus_connection.channel_mutex); |
994 | |
995 | return channel; |
996 | } |
997 | |
998 | static bool vmbus_is_valid_offer(const struct vmbus_channel_offer_channel *offer) |
999 | { |
1000 | const guid_t *guid = &offer->offer.if_type; |
1001 | u16 i; |
1002 | |
1003 | if (!hv_is_isolation_supported()) |
1004 | return true; |
1005 | |
1006 | if (is_hvsock_offer(o: offer)) |
1007 | return true; |
1008 | |
1009 | for (i = 0; i < ARRAY_SIZE(vmbus_devs); i++) { |
1010 | if (guid_equal(u1: guid, u2: &vmbus_devs[i].guid)) |
1011 | return vmbus_devs[i].allowed_in_isolated; |
1012 | } |
1013 | return false; |
1014 | } |
1015 | |
1016 | /* |
1017 | * vmbus_onoffer - Handler for channel offers from vmbus in parent partition. |
1018 | * |
1019 | */ |
1020 | static void vmbus_onoffer(struct vmbus_channel_message_header *hdr) |
1021 | { |
1022 | struct vmbus_channel_offer_channel *offer; |
1023 | struct vmbus_channel *oldchannel, *newchannel; |
1024 | size_t offer_sz; |
1025 | |
1026 | offer = (struct vmbus_channel_offer_channel *)hdr; |
1027 | |
1028 | trace_vmbus_onoffer(offer); |
1029 | |
1030 | if (!vmbus_is_valid_offer(offer)) { |
1031 | pr_err_ratelimited("Invalid offer %d from the host supporting isolation\n" , |
1032 | offer->child_relid); |
1033 | atomic_dec(v: &vmbus_connection.offer_in_progress); |
1034 | return; |
1035 | } |
1036 | |
1037 | oldchannel = find_primary_channel_by_offer(offer); |
1038 | |
1039 | if (oldchannel != NULL) { |
1040 | /* |
1041 | * We're resuming from hibernation: all the sub-channel and |
1042 | * hv_sock channels we had before the hibernation should have |
1043 | * been cleaned up, and now we must be seeing a re-offered |
1044 | * primary channel that we had before the hibernation. |
1045 | */ |
1046 | |
1047 | /* |
1048 | * { Initially: channel relid = INVALID_RELID, |
1049 | * channels[valid_relid] = NULL } |
1050 | * |
1051 | * CPU1 CPU2 |
1052 | * |
1053 | * [vmbus_onoffer()] [vmbus_device_release()] |
1054 | * |
1055 | * LOCK channel_mutex LOCK channel_mutex |
1056 | * STORE channel relid = valid_relid LOAD r1 = channel relid |
1057 | * MAP_RELID channel if (r1 != INVALID_RELID) |
1058 | * UNLOCK channel_mutex UNMAP_RELID channel |
1059 | * UNLOCK channel_mutex |
1060 | * |
1061 | * Forbids: r1 == valid_relid && |
1062 | * channels[valid_relid] == channel |
1063 | * |
1064 | * Note. r1 can be INVALID_RELID only for an hv_sock channel. |
1065 | * None of the hv_sock channels which were present before the |
1066 | * suspend are re-offered upon the resume. See the WARN_ON() |
1067 | * in hv_process_channel_removal(). |
1068 | */ |
1069 | mutex_lock(&vmbus_connection.channel_mutex); |
1070 | |
1071 | atomic_dec(v: &vmbus_connection.offer_in_progress); |
1072 | |
1073 | WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID); |
1074 | /* Fix up the relid. */ |
1075 | oldchannel->offermsg.child_relid = offer->child_relid; |
1076 | |
1077 | offer_sz = sizeof(*offer); |
1078 | if (memcmp(p: offer, q: &oldchannel->offermsg, size: offer_sz) != 0) { |
1079 | /* |
1080 | * This is not an error, since the host can also change |
1081 | * the other field(s) of the offer, e.g. on WS RS5 |
1082 | * (Build 17763), the offer->connection_id of the |
1083 | * Mellanox VF vmbus device can change when the host |
1084 | * reoffers the device upon resume. |
1085 | */ |
1086 | pr_debug("vmbus offer changed: relid=%d\n" , |
1087 | offer->child_relid); |
1088 | |
1089 | print_hex_dump_debug("Old vmbus offer: " , |
1090 | DUMP_PREFIX_OFFSET, 16, 4, |
1091 | &oldchannel->offermsg, offer_sz, |
1092 | false); |
1093 | print_hex_dump_debug("New vmbus offer: " , |
1094 | DUMP_PREFIX_OFFSET, 16, 4, |
1095 | offer, offer_sz, false); |
1096 | |
1097 | /* Fix up the old channel. */ |
1098 | vmbus_setup_channel_state(channel: oldchannel, offer); |
1099 | } |
1100 | |
1101 | /* Add the channel back to the array of channels. */ |
1102 | vmbus_channel_map_relid(channel: oldchannel); |
1103 | check_ready_for_resume_event(); |
1104 | |
1105 | mutex_unlock(lock: &vmbus_connection.channel_mutex); |
1106 | return; |
1107 | } |
1108 | |
1109 | /* Allocate the channel object and save this offer. */ |
1110 | newchannel = alloc_channel(); |
1111 | if (!newchannel) { |
1112 | vmbus_release_relid(relid: offer->child_relid); |
1113 | atomic_dec(v: &vmbus_connection.offer_in_progress); |
1114 | pr_err("Unable to allocate channel object\n" ); |
1115 | return; |
1116 | } |
1117 | |
1118 | vmbus_setup_channel_state(channel: newchannel, offer); |
1119 | |
1120 | vmbus_process_offer(newchannel); |
1121 | } |
1122 | |
1123 | static void check_ready_for_suspend_event(void) |
1124 | { |
1125 | /* |
1126 | * If all the sub-channels or hv_sock channels have been cleaned up, |
1127 | * then it's safe to suspend. |
1128 | */ |
1129 | if (atomic_dec_and_test(v: &vmbus_connection.nr_chan_close_on_suspend)) |
1130 | complete(&vmbus_connection.ready_for_suspend_event); |
1131 | } |
1132 | |
1133 | /* |
1134 | * vmbus_onoffer_rescind - Rescind offer handler. |
1135 | * |
1136 | * We queue a work item to process this offer synchronously |
1137 | */ |
1138 | static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) |
1139 | { |
1140 | struct vmbus_channel_rescind_offer *rescind; |
1141 | struct vmbus_channel *channel; |
1142 | struct device *dev; |
1143 | bool clean_up_chan_for_suspend; |
1144 | |
1145 | rescind = (struct vmbus_channel_rescind_offer *)hdr; |
1146 | |
1147 | trace_vmbus_onoffer_rescind(offer: rescind); |
1148 | |
1149 | /* |
1150 | * The offer msg and the corresponding rescind msg |
1151 | * from the host are guranteed to be ordered - |
1152 | * offer comes in first and then the rescind. |
1153 | * Since we process these events in work elements, |
1154 | * and with preemption, we may end up processing |
1155 | * the events out of order. We rely on the synchronization |
1156 | * provided by offer_in_progress and by channel_mutex for |
1157 | * ordering these events: |
1158 | * |
1159 | * { Initially: offer_in_progress = 1 } |
1160 | * |
1161 | * CPU1 CPU2 |
1162 | * |
1163 | * [vmbus_onoffer()] [vmbus_onoffer_rescind()] |
1164 | * |
1165 | * LOCK channel_mutex WAIT_ON offer_in_progress == 0 |
1166 | * DECREMENT offer_in_progress LOCK channel_mutex |
1167 | * STORE channels[] LOAD channels[] |
1168 | * UNLOCK channel_mutex UNLOCK channel_mutex |
1169 | * |
1170 | * Forbids: CPU2's LOAD from *not* seeing CPU1's STORE |
1171 | */ |
1172 | |
1173 | while (atomic_read(v: &vmbus_connection.offer_in_progress) != 0) { |
1174 | /* |
1175 | * We wait here until any channel offer is currently |
1176 | * being processed. |
1177 | */ |
1178 | msleep(msecs: 1); |
1179 | } |
1180 | |
1181 | mutex_lock(&vmbus_connection.channel_mutex); |
1182 | channel = relid2channel(relid: rescind->child_relid); |
1183 | if (channel != NULL) { |
1184 | /* |
1185 | * Guarantee that no other instance of vmbus_onoffer_rescind() |
1186 | * has got a reference to the channel object. Synchronize on |
1187 | * &vmbus_connection.channel_mutex. |
1188 | */ |
1189 | if (channel->rescind_ref) { |
1190 | mutex_unlock(lock: &vmbus_connection.channel_mutex); |
1191 | return; |
1192 | } |
1193 | channel->rescind_ref = true; |
1194 | } |
1195 | mutex_unlock(lock: &vmbus_connection.channel_mutex); |
1196 | |
1197 | if (channel == NULL) { |
1198 | /* |
1199 | * We failed in processing the offer message; |
1200 | * we would have cleaned up the relid in that |
1201 | * failure path. |
1202 | */ |
1203 | return; |
1204 | } |
1205 | |
1206 | clean_up_chan_for_suspend = is_hvsock_channel(c: channel) || |
1207 | is_sub_channel(c: channel); |
1208 | /* |
1209 | * Before setting channel->rescind in vmbus_rescind_cleanup(), we |
1210 | * should make sure the channel callback is not running any more. |
1211 | */ |
1212 | vmbus_reset_channel_cb(channel); |
1213 | |
1214 | /* |
1215 | * Now wait for offer handling to complete. |
1216 | */ |
1217 | vmbus_rescind_cleanup(channel); |
1218 | while (READ_ONCE(channel->probe_done) == false) { |
1219 | /* |
1220 | * We wait here until any channel offer is currently |
1221 | * being processed. |
1222 | */ |
1223 | msleep(msecs: 1); |
1224 | } |
1225 | |
1226 | /* |
1227 | * At this point, the rescind handling can proceed safely. |
1228 | */ |
1229 | |
1230 | if (channel->device_obj) { |
1231 | if (channel->chn_rescind_callback) { |
1232 | channel->chn_rescind_callback(channel); |
1233 | |
1234 | if (clean_up_chan_for_suspend) |
1235 | check_ready_for_suspend_event(); |
1236 | |
1237 | return; |
1238 | } |
1239 | /* |
1240 | * We will have to unregister this device from the |
1241 | * driver core. |
1242 | */ |
1243 | dev = get_device(dev: &channel->device_obj->device); |
1244 | if (dev) { |
1245 | vmbus_device_unregister(device_obj: channel->device_obj); |
1246 | put_device(dev); |
1247 | } |
1248 | } else if (channel->primary_channel != NULL) { |
1249 | /* |
1250 | * Sub-channel is being rescinded. Following is the channel |
1251 | * close sequence when initiated from the driveri (refer to |
1252 | * vmbus_close() for details): |
1253 | * 1. Close all sub-channels first |
1254 | * 2. Then close the primary channel. |
1255 | */ |
1256 | mutex_lock(&vmbus_connection.channel_mutex); |
1257 | if (channel->state == CHANNEL_OPEN_STATE) { |
1258 | /* |
1259 | * The channel is currently not open; |
1260 | * it is safe for us to cleanup the channel. |
1261 | */ |
1262 | hv_process_channel_removal(channel); |
1263 | } else { |
1264 | complete(&channel->rescind_event); |
1265 | } |
1266 | mutex_unlock(lock: &vmbus_connection.channel_mutex); |
1267 | } |
1268 | |
1269 | /* The "channel" may have been freed. Do not access it any longer. */ |
1270 | |
1271 | if (clean_up_chan_for_suspend) |
1272 | check_ready_for_suspend_event(); |
1273 | } |
1274 | |
1275 | void vmbus_hvsock_device_unregister(struct vmbus_channel *channel) |
1276 | { |
1277 | BUG_ON(!is_hvsock_channel(channel)); |
1278 | |
1279 | /* We always get a rescind msg when a connection is closed. */ |
1280 | while (!READ_ONCE(channel->probe_done) || !READ_ONCE(channel->rescind)) |
1281 | msleep(msecs: 1); |
1282 | |
1283 | vmbus_device_unregister(device_obj: channel->device_obj); |
1284 | } |
1285 | EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister); |
1286 | |
1287 | |
1288 | /* |
1289 | * vmbus_onoffers_delivered - |
1290 | * This is invoked when all offers have been delivered. |
1291 | * |
1292 | * Nothing to do here. |
1293 | */ |
1294 | static void vmbus_onoffers_delivered( |
1295 | struct vmbus_channel_message_header *hdr) |
1296 | { |
1297 | } |
1298 | |
1299 | /* |
1300 | * vmbus_onopen_result - Open result handler. |
1301 | * |
1302 | * This is invoked when we received a response to our channel open request. |
1303 | * Find the matching request, copy the response and signal the requesting |
1304 | * thread. |
1305 | */ |
1306 | static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr) |
1307 | { |
1308 | struct vmbus_channel_open_result *result; |
1309 | struct vmbus_channel_msginfo *msginfo; |
1310 | struct vmbus_channel_message_header *; |
1311 | struct vmbus_channel_open_channel *openmsg; |
1312 | unsigned long flags; |
1313 | |
1314 | result = (struct vmbus_channel_open_result *)hdr; |
1315 | |
1316 | trace_vmbus_onopen_result(result); |
1317 | |
1318 | /* |
1319 | * Find the open msg, copy the result and signal/unblock the wait event |
1320 | */ |
1321 | spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); |
1322 | |
1323 | list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, |
1324 | msglistentry) { |
1325 | requestheader = |
1326 | (struct vmbus_channel_message_header *)msginfo->msg; |
1327 | |
1328 | if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) { |
1329 | openmsg = |
1330 | (struct vmbus_channel_open_channel *)msginfo->msg; |
1331 | if (openmsg->child_relid == result->child_relid && |
1332 | openmsg->openid == result->openid) { |
1333 | memcpy(&msginfo->response.open_result, |
1334 | result, |
1335 | sizeof( |
1336 | struct vmbus_channel_open_result)); |
1337 | complete(&msginfo->waitevent); |
1338 | break; |
1339 | } |
1340 | } |
1341 | } |
1342 | spin_unlock_irqrestore(lock: &vmbus_connection.channelmsg_lock, flags); |
1343 | } |
1344 | |
1345 | /* |
1346 | * vmbus_ongpadl_created - GPADL created handler. |
1347 | * |
1348 | * This is invoked when we received a response to our gpadl create request. |
1349 | * Find the matching request, copy the response and signal the requesting |
1350 | * thread. |
1351 | */ |
1352 | static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr) |
1353 | { |
1354 | struct vmbus_channel_gpadl_created *gpadlcreated; |
1355 | struct vmbus_channel_msginfo *msginfo; |
1356 | struct vmbus_channel_message_header *; |
1357 | struct vmbus_channel_gpadl_header *; |
1358 | unsigned long flags; |
1359 | |
1360 | gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr; |
1361 | |
1362 | trace_vmbus_ongpadl_created(gpadlcreated); |
1363 | |
1364 | /* |
1365 | * Find the establish msg, copy the result and signal/unblock the wait |
1366 | * event |
1367 | */ |
1368 | spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); |
1369 | |
1370 | list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, |
1371 | msglistentry) { |
1372 | requestheader = |
1373 | (struct vmbus_channel_message_header *)msginfo->msg; |
1374 | |
1375 | if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) { |
1376 | gpadlheader = |
1377 | (struct vmbus_channel_gpadl_header *)requestheader; |
1378 | |
1379 | if ((gpadlcreated->child_relid == |
1380 | gpadlheader->child_relid) && |
1381 | (gpadlcreated->gpadl == gpadlheader->gpadl)) { |
1382 | memcpy(&msginfo->response.gpadl_created, |
1383 | gpadlcreated, |
1384 | sizeof( |
1385 | struct vmbus_channel_gpadl_created)); |
1386 | complete(&msginfo->waitevent); |
1387 | break; |
1388 | } |
1389 | } |
1390 | } |
1391 | spin_unlock_irqrestore(lock: &vmbus_connection.channelmsg_lock, flags); |
1392 | } |
1393 | |
1394 | /* |
1395 | * vmbus_onmodifychannel_response - Modify Channel response handler. |
1396 | * |
1397 | * This is invoked when we received a response to our channel modify request. |
1398 | * Find the matching request, copy the response and signal the requesting thread. |
1399 | */ |
1400 | static void vmbus_onmodifychannel_response(struct vmbus_channel_message_header *hdr) |
1401 | { |
1402 | struct vmbus_channel_modifychannel_response *response; |
1403 | struct vmbus_channel_msginfo *msginfo; |
1404 | unsigned long flags; |
1405 | |
1406 | response = (struct vmbus_channel_modifychannel_response *)hdr; |
1407 | |
1408 | trace_vmbus_onmodifychannel_response(response); |
1409 | |
1410 | /* |
1411 | * Find the modify msg, copy the response and signal/unblock the wait event. |
1412 | */ |
1413 | spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); |
1414 | |
1415 | list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, msglistentry) { |
1416 | struct vmbus_channel_message_header * = |
1417 | (struct vmbus_channel_message_header *)msginfo->msg; |
1418 | |
1419 | if (responseheader->msgtype == CHANNELMSG_MODIFYCHANNEL) { |
1420 | struct vmbus_channel_modifychannel *modifymsg; |
1421 | |
1422 | modifymsg = (struct vmbus_channel_modifychannel *)msginfo->msg; |
1423 | if (modifymsg->child_relid == response->child_relid) { |
1424 | memcpy(&msginfo->response.modify_response, response, |
1425 | sizeof(*response)); |
1426 | complete(&msginfo->waitevent); |
1427 | break; |
1428 | } |
1429 | } |
1430 | } |
1431 | spin_unlock_irqrestore(lock: &vmbus_connection.channelmsg_lock, flags); |
1432 | } |
1433 | |
1434 | /* |
1435 | * vmbus_ongpadl_torndown - GPADL torndown handler. |
1436 | * |
1437 | * This is invoked when we received a response to our gpadl teardown request. |
1438 | * Find the matching request, copy the response and signal the requesting |
1439 | * thread. |
1440 | */ |
1441 | static void vmbus_ongpadl_torndown( |
1442 | struct vmbus_channel_message_header *hdr) |
1443 | { |
1444 | struct vmbus_channel_gpadl_torndown *gpadl_torndown; |
1445 | struct vmbus_channel_msginfo *msginfo; |
1446 | struct vmbus_channel_message_header *; |
1447 | struct vmbus_channel_gpadl_teardown *gpadl_teardown; |
1448 | unsigned long flags; |
1449 | |
1450 | gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr; |
1451 | |
1452 | trace_vmbus_ongpadl_torndown(gpadltorndown: gpadl_torndown); |
1453 | |
1454 | /* |
1455 | * Find the open msg, copy the result and signal/unblock the wait event |
1456 | */ |
1457 | spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); |
1458 | |
1459 | list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, |
1460 | msglistentry) { |
1461 | requestheader = |
1462 | (struct vmbus_channel_message_header *)msginfo->msg; |
1463 | |
1464 | if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) { |
1465 | gpadl_teardown = |
1466 | (struct vmbus_channel_gpadl_teardown *)requestheader; |
1467 | |
1468 | if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) { |
1469 | memcpy(&msginfo->response.gpadl_torndown, |
1470 | gpadl_torndown, |
1471 | sizeof( |
1472 | struct vmbus_channel_gpadl_torndown)); |
1473 | complete(&msginfo->waitevent); |
1474 | break; |
1475 | } |
1476 | } |
1477 | } |
1478 | spin_unlock_irqrestore(lock: &vmbus_connection.channelmsg_lock, flags); |
1479 | } |
1480 | |
1481 | /* |
1482 | * vmbus_onversion_response - Version response handler |
1483 | * |
1484 | * This is invoked when we received a response to our initiate contact request. |
1485 | * Find the matching request, copy the response and signal the requesting |
1486 | * thread. |
1487 | */ |
1488 | static void vmbus_onversion_response( |
1489 | struct vmbus_channel_message_header *hdr) |
1490 | { |
1491 | struct vmbus_channel_msginfo *msginfo; |
1492 | struct vmbus_channel_message_header *; |
1493 | struct vmbus_channel_version_response *version_response; |
1494 | unsigned long flags; |
1495 | |
1496 | version_response = (struct vmbus_channel_version_response *)hdr; |
1497 | |
1498 | trace_vmbus_onversion_response(response: version_response); |
1499 | |
1500 | spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); |
1501 | |
1502 | list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, |
1503 | msglistentry) { |
1504 | requestheader = |
1505 | (struct vmbus_channel_message_header *)msginfo->msg; |
1506 | |
1507 | if (requestheader->msgtype == |
1508 | CHANNELMSG_INITIATE_CONTACT) { |
1509 | memcpy(&msginfo->response.version_response, |
1510 | version_response, |
1511 | sizeof(struct vmbus_channel_version_response)); |
1512 | complete(&msginfo->waitevent); |
1513 | } |
1514 | } |
1515 | spin_unlock_irqrestore(lock: &vmbus_connection.channelmsg_lock, flags); |
1516 | } |
1517 | |
1518 | /* Channel message dispatch table */ |
1519 | const struct vmbus_channel_message_table_entry |
1520 | channel_message_table[CHANNELMSG_COUNT] = { |
1521 | { CHANNELMSG_INVALID, 0, NULL, 0}, |
1522 | { CHANNELMSG_OFFERCHANNEL, 0, vmbus_onoffer, |
1523 | sizeof(struct vmbus_channel_offer_channel)}, |
1524 | { CHANNELMSG_RESCIND_CHANNELOFFER, 0, vmbus_onoffer_rescind, |
1525 | sizeof(struct vmbus_channel_rescind_offer) }, |
1526 | { CHANNELMSG_REQUESTOFFERS, 0, NULL, 0}, |
1527 | { CHANNELMSG_ALLOFFERS_DELIVERED, 1, vmbus_onoffers_delivered, 0}, |
1528 | { CHANNELMSG_OPENCHANNEL, 0, NULL, 0}, |
1529 | { CHANNELMSG_OPENCHANNEL_RESULT, 1, vmbus_onopen_result, |
1530 | sizeof(struct vmbus_channel_open_result)}, |
1531 | { CHANNELMSG_CLOSECHANNEL, 0, NULL, 0}, |
1532 | { CHANNELMSG_GPADL_HEADER, 0, NULL, 0}, |
1533 | { CHANNELMSG_GPADL_BODY, 0, NULL, 0}, |
1534 | { CHANNELMSG_GPADL_CREATED, 1, vmbus_ongpadl_created, |
1535 | sizeof(struct vmbus_channel_gpadl_created)}, |
1536 | { CHANNELMSG_GPADL_TEARDOWN, 0, NULL, 0}, |
1537 | { CHANNELMSG_GPADL_TORNDOWN, 1, vmbus_ongpadl_torndown, |
1538 | sizeof(struct vmbus_channel_gpadl_torndown) }, |
1539 | { CHANNELMSG_RELID_RELEASED, 0, NULL, 0}, |
1540 | { CHANNELMSG_INITIATE_CONTACT, 0, NULL, 0}, |
1541 | { CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response, |
1542 | sizeof(struct vmbus_channel_version_response)}, |
1543 | { CHANNELMSG_UNLOAD, 0, NULL, 0}, |
1544 | { CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response, 0}, |
1545 | { CHANNELMSG_18, 0, NULL, 0}, |
1546 | { CHANNELMSG_19, 0, NULL, 0}, |
1547 | { CHANNELMSG_20, 0, NULL, 0}, |
1548 | { CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL, 0}, |
1549 | { CHANNELMSG_MODIFYCHANNEL, 0, NULL, 0}, |
1550 | { CHANNELMSG_TL_CONNECT_RESULT, 0, NULL, 0}, |
1551 | { CHANNELMSG_MODIFYCHANNEL_RESPONSE, 1, vmbus_onmodifychannel_response, |
1552 | sizeof(struct vmbus_channel_modifychannel_response)}, |
1553 | }; |
1554 | |
1555 | /* |
1556 | * vmbus_onmessage - Handler for channel protocol messages. |
1557 | * |
1558 | * This is invoked in the vmbus worker thread context. |
1559 | */ |
1560 | void vmbus_onmessage(struct vmbus_channel_message_header *hdr) |
1561 | { |
1562 | trace_vmbus_on_message(hdr); |
1563 | |
1564 | /* |
1565 | * vmbus_on_msg_dpc() makes sure the hdr->msgtype here can not go |
1566 | * out of bound and the message_handler pointer can not be NULL. |
1567 | */ |
1568 | channel_message_table[hdr->msgtype].message_handler(hdr); |
1569 | } |
1570 | |
1571 | /* |
1572 | * vmbus_request_offers - Send a request to get all our pending offers. |
1573 | */ |
1574 | int vmbus_request_offers(void) |
1575 | { |
1576 | struct vmbus_channel_message_header *msg; |
1577 | struct vmbus_channel_msginfo *msginfo; |
1578 | int ret; |
1579 | |
1580 | msginfo = kzalloc(size: sizeof(*msginfo) + |
1581 | sizeof(struct vmbus_channel_message_header), |
1582 | GFP_KERNEL); |
1583 | if (!msginfo) |
1584 | return -ENOMEM; |
1585 | |
1586 | msg = (struct vmbus_channel_message_header *)msginfo->msg; |
1587 | |
1588 | msg->msgtype = CHANNELMSG_REQUESTOFFERS; |
1589 | |
1590 | ret = vmbus_post_msg(buffer: msg, buflen: sizeof(struct vmbus_channel_message_header), |
1591 | can_sleep: true); |
1592 | |
1593 | trace_vmbus_request_offers(ret); |
1594 | |
1595 | if (ret != 0) { |
1596 | pr_err("Unable to request offers - %d\n" , ret); |
1597 | |
1598 | goto cleanup; |
1599 | } |
1600 | |
1601 | cleanup: |
1602 | kfree(objp: msginfo); |
1603 | |
1604 | return ret; |
1605 | } |
1606 | |
1607 | void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel, |
1608 | void (*sc_cr_cb)(struct vmbus_channel *new_sc)) |
1609 | { |
1610 | primary_channel->sc_creation_callback = sc_cr_cb; |
1611 | } |
1612 | EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback); |
1613 | |
1614 | void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel, |
1615 | void (*chn_rescind_cb)(struct vmbus_channel *)) |
1616 | { |
1617 | channel->chn_rescind_callback = chn_rescind_cb; |
1618 | } |
1619 | EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback); |
1620 | |