channel_mgmt.c source code [linux/drivers/hv/channel_mgmt.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* Copyright (c) 2009, Microsoft Corporation.
4	*
5	* Authors:
6	* Haiyang Zhang <haiyangz@microsoft.com>
7	* Hank Janssen <hjanssen@microsoft.com>
8	*/
9	#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10
11	#include <linux/kernel.h>
12	#include <linux/interrupt.h>
13	#include <linux/sched.h>
14	#include <linux/wait.h>
15	#include <linux/mm.h>
16	#include <linux/slab.h>
17	#include <linux/list.h>
18	#include <linux/module.h>
19	#include <linux/completion.h>
20	#include <linux/delay.h>
21	#include <linux/cpu.h>
22	#include <linux/hyperv.h>
23	#include <asm/mshyperv.h>
24	#include <linux/sched/isolation.h>
25
26	#include "hyperv_vmbus.h"
27
28	static void init_vp_index(struct vmbus_channel *channel);
29
30	const struct vmbus_device vmbus_devs[] = {
31	/ IDE /
32	{ .dev_type = HV_IDE,
33	HV_IDE_GUID,
34	.perf_device = true,
35	.allowed_in_isolated = false,
36	},
37
38	/ SCSI /
39	{ .dev_type = HV_SCSI,
40	HV_SCSI_GUID,
41	.perf_device = true,
42	.allowed_in_isolated = true,
43	},
44
45	/ Fibre Channel /
46	{ .dev_type = HV_FC,
47	HV_SYNTHFC_GUID,
48	.perf_device = true,
49	.allowed_in_isolated = false,
50	},
51
52	/ Synthetic NIC /
53	{ .dev_type = HV_NIC,
54	HV_NIC_GUID,
55	.perf_device = true,
56	.allowed_in_isolated = true,
57	},
58
59	/ Network Direct /
60	{ .dev_type = HV_ND,
61	HV_ND_GUID,
62	.perf_device = true,
63	.allowed_in_isolated = false,
64	},
65
66	/ PCIE /
67	{ .dev_type = HV_PCIE,
68	HV_PCIE_GUID,
69	.perf_device = false,
70	.allowed_in_isolated = true,
71	},
72
73	/ Synthetic Frame Buffer /
74	{ .dev_type = HV_FB,
75	HV_SYNTHVID_GUID,
76	.perf_device = false,
77	.allowed_in_isolated = false,
78	},
79
80	/ Synthetic Keyboard /
81	{ .dev_type = HV_KBD,
82	HV_KBD_GUID,
83	.perf_device = false,
84	.allowed_in_isolated = false,
85	},
86
87	/ Synthetic MOUSE /
88	{ .dev_type = HV_MOUSE,
89	HV_MOUSE_GUID,
90	.perf_device = false,
91	.allowed_in_isolated = false,
92	},
93
94	/ KVP /
95	{ .dev_type = HV_KVP,
96	HV_KVP_GUID,
97	.perf_device = false,
98	.allowed_in_isolated = false,
99	},
100
101	/ Time Synch /
102	{ .dev_type = HV_TS,
103	HV_TS_GUID,
104	.perf_device = false,
105	.allowed_in_isolated = true,
106	},
107
108	/ Heartbeat /
109	{ .dev_type = HV_HB,
110	HV_HEART_BEAT_GUID,
111	.perf_device = false,
112	.allowed_in_isolated = true,
113	},
114
115	/ Shutdown /
116	{ .dev_type = HV_SHUTDOWN,
117	HV_SHUTDOWN_GUID,
118	.perf_device = false,
119	.allowed_in_isolated = true,
120	},
121
122	/ File copy /
123	{ .dev_type = HV_FCOPY,
124	HV_FCOPY_GUID,
125	.perf_device = false,
126	.allowed_in_isolated = false,
127	},
128
129	/ Backup /
130	{ .dev_type = HV_BACKUP,
131	HV_VSS_GUID,
132	.perf_device = false,
133	.allowed_in_isolated = false,
134	},
135
136	/ Dynamic Memory /
137	{ .dev_type = HV_DM,
138	HV_DM_GUID,
139	.perf_device = false,
140	.allowed_in_isolated = false,
141	},
142
143	/ Unknown GUID /
144	{ .dev_type = HV_UNKNOWN,
145	.perf_device = false,
146	.allowed_in_isolated = false,
147	},
148	};
149
150	static const struct {
151	guid_t guid;
152	} vmbus_unsupported_devs[] = {
153	{ HV_AVMA1_GUID },
154	{ HV_AVMA2_GUID },
155	{ HV_RDV_GUID },
156	{ HV_IMC_GUID },
157	};
158
159	/*
160	* The rescinded channel may be blocked waiting for a response from the host;
161	* take care of that.
162	*/
163	static void vmbus_rescind_cleanup(struct vmbus_channel *channel)
164	{
165	struct vmbus_channel_msginfo *msginfo;
166	unsigned long flags;
167
168
169	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
170	channel->rescind = true;
171	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
172	msglistentry) {
173
174	if (msginfo->waiting_channel == channel) {
175	complete(&msginfo->waitevent);
176	break;
177	}
178	}
179	spin_unlock_irqrestore(lock: &vmbus_connection.channelmsg_lock, flags);
180	}
181
182	static bool is_unsupported_vmbus_devs(const guid_t *guid)
183	{
184	int i;
185
186	for (i = `0`; i < ARRAY_SIZE(vmbus_unsupported_devs); i++)
187	if (guid_equal(u1: guid, u2: &vmbus_unsupported_devs[i].guid))
188	return true;
189	return false;
190	}
191
192	static u16 hv_get_dev_type(const struct vmbus_channel *channel)
193	{
194	const guid_t *guid = &channel->offermsg.offer.if_type;
195	u16 i;
196
197	if (is_hvsock_channel(c: channel) \|\| is_unsupported_vmbus_devs(guid))
198	return HV_UNKNOWN;
199
200	for (i = HV_IDE; i < HV_UNKNOWN; i++) {
201	if (guid_equal(u1: guid, u2: &vmbus_devs[i].guid))
202	return i;
203	}
204	pr_info("Unknown GUID: %pUl\n", guid);
205	return i;
206	}
207
208	/**
209	* vmbus_prep_negotiate_resp() - Create default response for Negotiate message
210	* @icmsghdrp: Pointer to msg header structure
211	* @buf: Raw buffer channel data
212	* @buflen: Length of the raw buffer channel data.
213	* @fw_version: The framework versions we can support.
214	* @fw_vercnt: The size of @fw_version.
215	* @srv_version: The service versions we can support.
216	* @srv_vercnt: The size of @srv_version.
217	* @nego_fw_version: The selected framework version.
218	* @nego_srv_version: The selected service version.
219	*
220	* Note: Versions are given in decreasing order.
221	*
222	* Set up and fill in default negotiate response message.
223	* Mainly used by Hyper-V drivers.
224	*/
225	bool vmbus_prep_negotiate_resp(struct icmsg_hdr icmsghdrp, u8 buf,
226	u32 buflen, const int fw_version, int* fw_vercnt,
227	const int srv_version, int* srv_vercnt,
228	int nego_fw_version, int* *nego_srv_version)
229	{
230	int icframe_major, icframe_minor;
231	int icmsg_major, icmsg_minor;
232	int fw_major, fw_minor;
233	int srv_major, srv_minor;
234	int i, j;
235	bool found_match = false;
236	struct icmsg_negotiate *negop;
237
238	/ Check that there's enough space for icframe_vercnt, icmsg_vercnt /
239	if (buflen < ICMSG_HDR + offsetof(struct icmsg_negotiate, reserved)) {
240	pr_err_ratelimited("Invalid icmsg negotiate\n");
241	return false;
242	}
243
244	icmsghdrp->icmsgsize = `0x10`;
245	negop = (struct icmsg_negotiate *)&buf[ICMSG_HDR];
246
247	icframe_major = negop->icframe_vercnt;
248	icframe_minor = `0`;
249
250	icmsg_major = negop->icmsg_vercnt;
251	icmsg_minor = `0`;
252
253	/ Validate negop packet /
254	if (icframe_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT \|\|
255	icmsg_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT \|\|
256	ICMSG_NEGOTIATE_PKT_SIZE(icframe_major, icmsg_major) > buflen) {
257	pr_err_ratelimited("Invalid icmsg negotiate - icframe_major: %u, icmsg_major: %u\n",
258	icframe_major, icmsg_major);
259	goto fw_error;
260	}
261
262	/*
263	* Select the framework version number we will
264	* support.
265	*/
266
267	for (i = `0`; i < fw_vercnt; i++) {
268	fw_major = (fw_version[i] >> `16`);
269	fw_minor = (fw_version[i] & `0xFFFF`);
270
271	for (j = `0`; j < negop->icframe_vercnt; j++) {
272	if ((negop->icversion_data[j].major == fw_major) &&
273	(negop->icversion_data[j].minor == fw_minor)) {
274	icframe_major = negop->icversion_data[j].major;
275	icframe_minor = negop->icversion_data[j].minor;
276	found_match = true;
277	break;
278	}
279	}
280
281	if (found_match)
282	break;
283	}
284
285	if (!found_match)
286	goto fw_error;
287
288	found_match = false;
289
290	for (i = `0`; i < srv_vercnt; i++) {
291	srv_major = (srv_version[i] >> `16`);
292	srv_minor = (srv_version[i] & `0xFFFF`);
293
294	for (j = negop->icframe_vercnt;
295	(j < negop->icframe_vercnt + negop->icmsg_vercnt);
296	j++) {
297
298	if ((negop->icversion_data[j].major == srv_major) &&
299	(negop->icversion_data[j].minor == srv_minor)) {
300
301	icmsg_major = negop->icversion_data[j].major;
302	icmsg_minor = negop->icversion_data[j].minor;
303	found_match = true;
304	break;
305	}
306	}
307
308	if (found_match)
309	break;
310	}
311
312	/*
313	* Respond with the framework and service
314	* version numbers we can support.
315	*/
316
317	fw_error:
318	if (!found_match) {
319	negop->icframe_vercnt = `0`;
320	negop->icmsg_vercnt = `0`;
321	} else {
322	negop->icframe_vercnt = `1`;
323	negop->icmsg_vercnt = `1`;
324	}
325
326	if (nego_fw_version)
327	*nego_fw_version = (icframe_major << `16`) \| icframe_minor;
328
329	if (nego_srv_version)
330	*nego_srv_version = (icmsg_major << `16`) \| icmsg_minor;
331
332	negop->icversion_data[`0`].major = icframe_major;
333	negop->icversion_data[`0`].minor = icframe_minor;
334	negop->icversion_data[`1`].major = icmsg_major;
335	negop->icversion_data[`1`].minor = icmsg_minor;
336	return found_match;
337	}
338	EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
339
340	/*
341	* alloc_channel - Allocate and initialize a vmbus channel object
342	*/
343	static struct vmbus_channel alloc_channel(void*)
344	{
345	struct vmbus_channel *channel;
346
347	channel = kzalloc(size: sizeof(*channel), GFP_ATOMIC);
348	if (!channel)
349	return NULL;
350
351	spin_lock_init(&channel->sched_lock);
352	init_completion(x: &channel->rescind_event);
353
354	INIT_LIST_HEAD(list: &channel->sc_list);
355
356	tasklet_init(t: &channel->callback_event,
357	func: vmbus_on_event, data: (unsigned long)channel);
358
359	hv_ringbuffer_pre_init(channel);
360
361	return channel;
362	}
363
364	/*
365	* free_channel - Release the resources used by the vmbus channel object
366	*/
367	static void free_channel(struct vmbus_channel *channel)
368	{
369	tasklet_kill(t: &channel->callback_event);
370	vmbus_remove_channel_attr_group(channel);
371
372	kobject_put(kobj: &channel->kobj);
373	}
374
375	void vmbus_channel_map_relid(struct vmbus_channel *channel)
376	{
377	if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS))
378	return;
379	/*
380	* The mapping of the channel's relid is visible from the CPUs that
381	* execute vmbus_chan_sched() by the time that vmbus_chan_sched() will
382	* execute:
383	*
384	* (a) In the "normal (i.e., not resuming from hibernation)" path,
385	* the full barrier in virt_store_mb() guarantees that the store
386	* is propagated to all CPUs before the add_channel_work work
387	* is queued. In turn, add_channel_work is queued before the
388	* channel's ring buffer is allocated/initialized and the
389	* OPENCHANNEL message for the channel is sent in vmbus_open().
390	* Hyper-V won't start sending the interrupts for the channel
391	* before the OPENCHANNEL message is acked. The memory barrier
392	* in vmbus_chan_sched() -> sync_test_and_clear_bit() ensures
393	* that vmbus_chan_sched() must find the channel's relid in
394	* recv_int_page before retrieving the channel pointer from the
395	* array of channels.
396	*
397	* (b) In the "resuming from hibernation" path, the virt_store_mb()
398	* guarantees that the store is propagated to all CPUs before
399	* the VMBus connection is marked as ready for the resume event
400	* (cf. check_ready_for_resume_event()). The interrupt handler
401	* of the VMBus driver and vmbus_chan_sched() can not run before
402	* vmbus_bus_resume() has completed execution (cf. resume_noirq).
403	*/
404	virt_store_mb(
405	vmbus_connection.channels[channel->offermsg.child_relid],
406	channel);
407	}
408
409	void vmbus_channel_unmap_relid(struct vmbus_channel *channel)
410	{
411	if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS))
412	return;
413	WRITE_ONCE(
414	vmbus_connection.channels[channel->offermsg.child_relid],
415	NULL);
416	}
417
418	static void vmbus_release_relid(u32 relid)
419	{
420	struct vmbus_channel_relid_released msg;
421	int ret;
422
423	memset(&msg, `0`, sizeof(struct vmbus_channel_relid_released));
424	msg.child_relid = relid;
425	msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
426	ret = vmbus_post_msg(buffer: &msg, buflen: sizeof(struct vmbus_channel_relid_released),
427	can_sleep: true);
428
429	trace_vmbus_release_relid(msg: &msg, ret);
430	}
431
432	void hv_process_channel_removal(struct vmbus_channel *channel)
433	{
434	lockdep_assert_held(&vmbus_connection.channel_mutex);
435	BUG_ON(!channel->rescind);
436
437	/*
438	* hv_process_channel_removal() could find INVALID_RELID only for
439	* hv_sock channels. See the inline comments in vmbus_onoffer().
440	*/
441	WARN_ON(channel->offermsg.child_relid == INVALID_RELID &&
442	!is_hvsock_channel(channel));
443
444	/*
445	* Upon suspend, an in-use hv_sock channel is removed from the array of
446	* channels and the relid is invalidated. After hibernation, when the
447	* user-space application destroys the channel, it's unnecessary and
448	* unsafe to remove the channel from the array of channels. See also
449	* the inline comments before the call of vmbus_release_relid() below.
450	*/
451	if (channel->offermsg.child_relid != INVALID_RELID)
452	vmbus_channel_unmap_relid(channel);
453
454	if (channel->primary_channel == NULL)
455	list_del(entry: &channel->listentry);
456	else
457	list_del(entry: &channel->sc_list);
458
459	/*
460	* If this is a "perf" channel, updates the hv_numa_map[] masks so that
461	* init_vp_index() can (re-)use the CPU.
462	*/
463	if (hv_is_perf_channel(channel))
464	hv_clear_allocated_cpu(cpu: channel->target_cpu);
465
466	/*
467	* Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
468	* the relid is invalidated; after hibernation, when the user-space app
469	* destroys the channel, the relid is INVALID_RELID, and in this case
470	* it's unnecessary and unsafe to release the old relid, since the same
471	* relid can refer to a completely different channel now.
472	*/
473	if (channel->offermsg.child_relid != INVALID_RELID)
474	vmbus_release_relid(relid: channel->offermsg.child_relid);
475
476	free_channel(channel);
477	}
478
479	void vmbus_free_channels(void)
480	{
481	struct vmbus_channel channel, tmp;
482
483	list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
484	listentry) {
485	/ hv_process_channel_removal() needs this /
486	channel->rescind = true;
487
488	vmbus_device_unregister(device_obj: channel->device_obj);
489	}
490	}
491
492	/ Note: the function can run concurrently for primary/sub channels. /
493	static void vmbus_add_channel_work(struct work_struct *work)
494	{
495	struct vmbus_channel *newchannel =
496	container_of(work, struct vmbus_channel, add_channel_work);
497	struct vmbus_channel *primary_channel = newchannel->primary_channel;
498	int ret;
499
500	/*
501	* This state is used to indicate a successful open
502	* so that when we do close the channel normally, we
503	* can cleanup properly.
504	*/
505	newchannel->state = CHANNEL_OPEN_STATE;
506
507	if (primary_channel != NULL) {
508	/ newchannel is a sub-channel. /
509	struct hv_device *dev = primary_channel->device_obj;
510
511	if (vmbus_add_channel_kobj(device_obj: dev, channel: newchannel))
512	goto err_deq_chan;
513
514	if (primary_channel->sc_creation_callback != NULL)
515	primary_channel->sc_creation_callback(newchannel);
516
517	newchannel->probe_done = true;
518	return;
519	}
520
521	/*
522	* Start the process of binding the primary channel to the driver
523	*/
524	newchannel->device_obj = vmbus_device_create(
525	type: &newchannel->offermsg.offer.if_type,
526	instance: &newchannel->offermsg.offer.if_instance,
527	channel: newchannel);
528	if (!newchannel->device_obj)
529	goto err_deq_chan;
530
531	newchannel->device_obj->device_id = newchannel->device_id;
532	/*
533	* Add the new device to the bus. This will kick off device-driver
534	* binding which eventually invokes the device driver's AddDevice()
535	* method.
536	*
537	* If vmbus_device_register() fails, the 'device_obj' is freed in
538	* vmbus_device_release() as called by device_unregister() in the
539	* error path of vmbus_device_register(). In the outside error
540	* path, there's no need to free it.
541	*/
542	ret = vmbus_device_register(child_device_obj: newchannel->device_obj);
543
544	if (ret != `0`) {
545	pr_err("unable to add child device object (relid %d)\n",
546	newchannel->offermsg.child_relid);
547	goto err_deq_chan;
548	}
549
550	newchannel->probe_done = true;
551	return;
552
553	err_deq_chan:
554	mutex_lock(&vmbus_connection.channel_mutex);
555
556	/*
557	* We need to set the flag, otherwise
558	* vmbus_onoffer_rescind() can be blocked.
559	*/
560	newchannel->probe_done = true;
561
562	if (primary_channel == NULL)
563	list_del(entry: &newchannel->listentry);
564	else
565	list_del(entry: &newchannel->sc_list);
566
567	/ vmbus_process_offer() has mapped the channel. /
568	vmbus_channel_unmap_relid(channel: newchannel);
569
570	mutex_unlock(lock: &vmbus_connection.channel_mutex);
571
572	vmbus_release_relid(relid: newchannel->offermsg.child_relid);
573
574	free_channel(channel: newchannel);
575	}
576
577	/*
578	* vmbus_process_offer - Process the offer by creating a channel/device
579	* associated with this offer
580	*/
581	static void vmbus_process_offer(struct vmbus_channel *newchannel)
582	{
583	struct vmbus_channel *channel;
584	struct workqueue_struct *wq;
585	bool fnew = true;
586
587	/*
588	* Synchronize vmbus_process_offer() and CPU hotplugging:
589	*
590	* CPU1 CPU2
591	*
592	* [vmbus_process_offer()] [Hot removal of the CPU]
593	*
594	* CPU_READ_LOCK CPUS_WRITE_LOCK
595	* LOAD cpu_online_mask SEARCH chn_list
596	* STORE target_cpu LOAD target_cpu
597	* INSERT chn_list STORE cpu_online_mask
598	* CPUS_READ_UNLOCK CPUS_WRITE_UNLOCK
599	*
600	* Forbids: CPU1's LOAD from not seing CPU2's STORE &&
601	* CPU2's SEARCH from not seeing CPU1's INSERT
602	*
603	* Forbids: CPU2's SEARCH from seeing CPU1's INSERT &&
604	* CPU2's LOAD from not seing CPU1's STORE
605	*/
606	cpus_read_lock();
607
608	/*
609	* Serializes the modifications of the chn_list list as well as
610	* the accesses to next_numa_node_id in init_vp_index().
611	*/
612	mutex_lock(&vmbus_connection.channel_mutex);
613
614	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
615	if (guid_equal(u1: &channel->offermsg.offer.if_type,
616	u2: &newchannel->offermsg.offer.if_type) &&
617	guid_equal(u1: &channel->offermsg.offer.if_instance,
618	u2: &newchannel->offermsg.offer.if_instance)) {
619	fnew = false;
620	newchannel->primary_channel = channel;
621	break;
622	}
623	}
624
625	init_vp_index(channel: newchannel);
626
627	/ Remember the channels that should be cleaned up upon suspend. /
628	if (is_hvsock_channel(c: newchannel) \|\| is_sub_channel(c: newchannel))
629	atomic_inc(v: &vmbus_connection.nr_chan_close_on_suspend);
630
631	/*
632	* Now that we have acquired the channel_mutex,
633	* we can release the potentially racing rescind thread.
634	*/
635	atomic_dec(v: &vmbus_connection.offer_in_progress);
636
637	if (fnew) {
638	list_add_tail(new: &newchannel->listentry,
639	head: &vmbus_connection.chn_list);
640	} else {
641	/*
642	* Check to see if this is a valid sub-channel.
643	*/
644	if (newchannel->offermsg.offer.sub_channel_index == `0`) {
645	mutex_unlock(lock: &vmbus_connection.channel_mutex);
646	cpus_read_unlock();
647	/*
648	* Don't call free_channel(), because newchannel->kobj
649	* is not initialized yet.
650	*/
651	kfree(objp: newchannel);
652	WARN_ON_ONCE(`1`);
653	return;
654	}
655	/*
656	* Process the sub-channel.
657	*/
658	list_add_tail(new: &newchannel->sc_list, head: &channel->sc_list);
659	}
660
661	vmbus_channel_map_relid(channel: newchannel);
662
663	mutex_unlock(lock: &vmbus_connection.channel_mutex);
664	cpus_read_unlock();
665
666	/*
667	* vmbus_process_offer() mustn't call channel->sc_creation_callback()
668	* directly for sub-channels, because sc_creation_callback() ->
669	* vmbus_open() may never get the host's response to the
670	* OPEN_CHANNEL message (the host may rescind a channel at any time,
671	* e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind()
672	* may not wake up the vmbus_open() as it's blocked due to a non-zero
673	* vmbus_connection.offer_in_progress, and finally we have a deadlock.
674	*
675	* The above is also true for primary channels, if the related device
676	* drivers use sync probing mode by default.
677	*
678	* And, usually the handling of primary channels and sub-channels can
679	* depend on each other, so we should offload them to different
680	* workqueues to avoid possible deadlock, e.g. in sync-probing mode,
681	* NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() ->
682	* rtnl_lock(), and causes deadlock: the former gets the rtnl_lock
683	* and waits for all the sub-channels to appear, but the latter
684	* can't get the rtnl_lock and this blocks the handling of
685	* sub-channels.
686	*/
687	INIT_WORK(&newchannel->add_channel_work, vmbus_add_channel_work);
688	wq = fnew ? vmbus_connection.handle_primary_chan_wq :
689	vmbus_connection.handle_sub_chan_wq;
690	queue_work(wq, work: &newchannel->add_channel_work);
691	}
692
693	/*
694	* Check if CPUs used by other channels of the same device.
695	* It should only be called by init_vp_index().
696	*/
697	static bool hv_cpuself_used(u32 cpu, struct vmbus_channel *chn)
698	{
699	struct vmbus_channel *primary = chn->primary_channel;
700	struct vmbus_channel *sc;
701
702	lockdep_assert_held(&vmbus_connection.channel_mutex);
703
704	if (!primary)
705	return false;
706
707	if (primary->target_cpu == cpu)
708	return true;
709
710	list_for_each_entry(sc, &primary->sc_list, sc_list)
711	if (sc != chn && sc->target_cpu == cpu)
712	return true;
713
714	return false;
715	}
716
717	/*
718	* We use this state to statically distribute the channel interrupt load.
719	*/
720	static int next_numa_node_id;
721
722	/*
723	* We can statically distribute the incoming channel interrupt load
724	* by binding a channel to VCPU.
725	*
726	* For non-performance critical channels we assign the VMBUS_CONNECT_CPU.
727	* Performance critical channels will be distributed evenly among all
728	* the available NUMA nodes. Once the node is assigned, we will assign
729	* the CPU based on a simple round robin scheme.
730	*/
731	static void init_vp_index(struct vmbus_channel *channel)
732	{
733	bool perf_chn = hv_is_perf_channel(channel);
734	u32 i, ncpu = num_online_cpus();
735	cpumask_var_t available_mask;
736	struct cpumask *allocated_mask;
737	const struct cpumask *hk_mask = housekeeping_cpumask(type: HK_TYPE_MANAGED_IRQ);
738	u32 target_cpu;
739	int numa_node;
740
741	if (!perf_chn \|\|
742	!alloc_cpumask_var(mask: &available_mask, GFP_KERNEL) \|\|
743	cpumask_empty(srcp: hk_mask)) {
744	/*
745	* If the channel is not a performance critical
746	* channel, bind it to VMBUS_CONNECT_CPU.
747	* In case alloc_cpumask_var() fails, bind it to
748	* VMBUS_CONNECT_CPU.
749	* If all the cpus are isolated, bind it to
750	* VMBUS_CONNECT_CPU.
751	*/
752	channel->target_cpu = VMBUS_CONNECT_CPU;
753	if (perf_chn)
754	hv_set_allocated_cpu(VMBUS_CONNECT_CPU);
755	return;
756	}
757
758	for (i = `1`; i <= ncpu + `1`; i++) {
759	while (true) {
760	numa_node = next_numa_node_id++;
761	if (numa_node == nr_node_ids) {
762	next_numa_node_id = `0`;
763	continue;
764	}
765	if (cpumask_empty(srcp: cpumask_of_node(node: numa_node)))
766	continue;
767	break;
768	}
769	allocated_mask = &hv_context.hv_numa_map[numa_node];
770
771	retry:
772	cpumask_xor(dstp: available_mask, src1p: allocated_mask, src2p: cpumask_of_node(node: numa_node));
773	cpumask_and(dstp: available_mask, src1p: available_mask, src2p: hk_mask);
774
775	if (cpumask_empty(srcp: available_mask)) {
776	/*
777	* We have cycled through all the CPUs in the node;
778	* reset the allocated map.
779	*/
780	cpumask_clear(dstp: allocated_mask);
781	goto retry;
782	}
783
784	target_cpu = cpumask_first(srcp: available_mask);
785	cpumask_set_cpu(cpu: target_cpu, dstp: allocated_mask);
786
787	if (channel->offermsg.offer.sub_channel_index >= ncpu \|\|
788	i > ncpu \|\| !hv_cpuself_used(cpu: target_cpu, chn: channel))
789	break;
790	}
791
792	channel->target_cpu = target_cpu;
793
794	free_cpumask_var(mask: available_mask);
795	}
796
797	#define UNLOAD_DELAY_UNIT_MS 10 /* 10 milliseconds */
798	#define UNLOAD_WAIT_MS (1001000) / 100 seconds */
799	#define UNLOAD_WAIT_LOOPS (UNLOAD_WAIT_MS/UNLOAD_DELAY_UNIT_MS)
800	#define UNLOAD_MSG_MS (51000) / Every 5 seconds */
801	#define UNLOAD_MSG_LOOPS (UNLOAD_MSG_MS/UNLOAD_DELAY_UNIT_MS)
802
803	static void vmbus_wait_for_unload(void)
804	{
805	int cpu;
806	void *page_addr;
807	struct hv_message *msg;
808	struct vmbus_channel_message_header *hdr;
809	u32 message_type, i;
810
811	/*
812	* CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was
813	* used for initial contact or to CPU0 depending on host version. When
814	* we're crashing on a different CPU let's hope that IRQ handler on
815	* the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still
816	* functional and vmbus_unload_response() will complete
817	* vmbus_connection.unload_event. If not, the last thing we can do is
818	* read message pages for all CPUs directly.
819	*
820	* Wait up to 100 seconds since an Azure host must writeback any dirty
821	* data in its disk cache before the VMbus UNLOAD request will
822	* complete. This flushing has been empirically observed to take up
823	* to 50 seconds in cases with a lot of dirty data, so allow additional
824	* leeway and for inaccuracies in mdelay(). But eventually time out so
825	* that the panic path can't get hung forever in case the response
826	* message isn't seen.
827	*/
828	for (i = `1`; i <= UNLOAD_WAIT_LOOPS; i++) {
829	if (completion_done(x: &vmbus_connection.unload_event))
830	goto completed;
831
832	for_each_present_cpu(cpu) {
833	struct hv_per_cpu_context *hv_cpu
834	= per_cpu_ptr(hv_context.cpu_context, cpu);
835
836	/*
837	* In a CoCo VM the synic_message_page is not allocated
838	* in hv_synic_alloc(). Instead it is set/cleared in
839	* hv_synic_enable_regs() and hv_synic_disable_regs()
840	* such that it is set only when the CPU is online. If
841	* not all present CPUs are online, the message page
842	* might be NULL, so skip such CPUs.
843	*/
844	page_addr = hv_cpu->synic_message_page;
845	if (!page_addr)
846	continue;
847
848	msg = (struct hv_message *)page_addr
849	+ VMBUS_MESSAGE_SINT;
850
851	message_type = READ_ONCE(msg->header.message_type);
852	if (message_type == HVMSG_NONE)
853	continue;
854
855	hdr = (struct vmbus_channel_message_header *)
856	msg->u.payload;
857
858	if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE)
859	complete(&vmbus_connection.unload_event);
860
861	vmbus_signal_eom(msg, old_msg_type: message_type);
862	}
863
864	/*
865	* Give a notice periodically so someone watching the
866	* serial output won't think it is completely hung.
867	*/
868	if (!(i % UNLOAD_MSG_LOOPS))
869	pr_notice("Waiting for VMBus UNLOAD to complete\n");
870
871	mdelay(UNLOAD_DELAY_UNIT_MS);
872	}
873	pr_err("Continuing even though VMBus UNLOAD did not complete\n");
874
875	completed:
876	/*
877	* We're crashing and already got the UNLOAD_RESPONSE, cleanup all
878	* maybe-pending messages on all CPUs to be able to receive new
879	* messages after we reconnect.
880	*/
881	for_each_present_cpu(cpu) {
882	struct hv_per_cpu_context *hv_cpu
883	= per_cpu_ptr(hv_context.cpu_context, cpu);
884
885	page_addr = hv_cpu->synic_message_page;
886	if (!page_addr)
887	continue;
888
889	msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
890	msg->header.message_type = HVMSG_NONE;
891	}
892	}
893
894	/*
895	* vmbus_unload_response - Handler for the unload response.
896	*/
897	static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
898	{
899	/*
900	* This is a global event; just wakeup the waiting thread.
901	* Once we successfully unload, we can cleanup the monitor state.
902	*
903	* NB. A malicious or compromised Hyper-V could send a spurious
904	* message of type CHANNELMSG_UNLOAD_RESPONSE, and trigger a call
905	* of the complete() below. Make sure that unload_event has been
906	* initialized by the time this complete() is executed.
907	*/
908	complete(&vmbus_connection.unload_event);
909	}
910
911	void vmbus_initiate_unload(bool crash)
912	{
913	struct vmbus_channel_message_header hdr;
914
915	if (xchg(&vmbus_connection.conn_state, DISCONNECTED) == DISCONNECTED)
916	return;
917
918	/ Pre-Win2012R2 hosts don't support reconnect /
919	if (vmbus_proto_version < VERSION_WIN8_1)
920	return;
921
922	reinit_completion(x: &vmbus_connection.unload_event);
923	memset(&hdr, `0`, sizeof(struct vmbus_channel_message_header));
924	hdr.msgtype = CHANNELMSG_UNLOAD;
925	vmbus_post_msg(buffer: &hdr, buflen: sizeof(struct vmbus_channel_message_header),
926	can_sleep: !crash);
927
928	/*
929	* vmbus_initiate_unload() is also called on crash and the crash can be
930	* happening in an interrupt context, where scheduling is impossible.
931	*/
932	if (!crash)
933	wait_for_completion(&vmbus_connection.unload_event);
934	else
935	vmbus_wait_for_unload();
936	}
937
938	static void check_ready_for_resume_event(void)
939	{
940	/*
941	* If all the old primary channels have been fixed up, then it's safe
942	* to resume.
943	*/
944	if (atomic_dec_and_test(v: &vmbus_connection.nr_chan_fixup_on_resume))
945	complete(&vmbus_connection.ready_for_resume_event);
946	}
947
948	static void vmbus_setup_channel_state(struct vmbus_channel *channel,
949	struct vmbus_channel_offer_channel *offer)
950	{
951	/*
952	* Setup state for signalling the host.
953	*/
954	channel->sig_event = VMBUS_EVENT_CONNECTION_ID;
955
956	channel->is_dedicated_interrupt =
957	(offer->is_dedicated_interrupt != `0`);
958	channel->sig_event = offer->connection_id;
959
960	memcpy(&channel->offermsg, offer,
961	sizeof(struct vmbus_channel_offer_channel));
962	channel->monitor_grp = (u8)offer->monitorid / `32`;
963	channel->monitor_bit = (u8)offer->monitorid % `32`;
964	channel->device_id = hv_get_dev_type(channel);
965	}
966
967	/*
968	* find_primary_channel_by_offer - Get the channel object given the new offer.
969	* This is only used in the resume path of hibernation.
970	*/
971	static struct vmbus_channel *
972	find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer)
973	{
974	struct vmbus_channel channel = NULL, iter;
975	const guid_t inst1, inst2;
976
977	/ Ignore sub-channel offers. /
978	if (offer->offer.sub_channel_index != `0`)
979	return NULL;
980
981	mutex_lock(&vmbus_connection.channel_mutex);
982
983	list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) {
984	inst1 = &iter->offermsg.offer.if_instance;
985	inst2 = &offer->offer.if_instance;
986
987	if (guid_equal(u1: inst1, u2: inst2)) {
988	channel = iter;
989	break;
990	}
991	}
992
993	mutex_unlock(lock: &vmbus_connection.channel_mutex);
994
995	return channel;
996	}
997
998	static bool vmbus_is_valid_offer(const struct vmbus_channel_offer_channel *offer)
999	{
1000	const guid_t *guid = &offer->offer.if_type;
1001	u16 i;
1002
1003	if (!hv_is_isolation_supported())
1004	return true;
1005
1006	if (is_hvsock_offer(o: offer))
1007	return true;
1008
1009	for (i = `0`; i < ARRAY_SIZE(vmbus_devs); i++) {
1010	if (guid_equal(u1: guid, u2: &vmbus_devs[i].guid))
1011	return vmbus_devs[i].allowed_in_isolated;
1012	}
1013	return false;
1014	}
1015
1016	/*
1017	* vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
1018	*
1019	*/
1020	static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
1021	{
1022	struct vmbus_channel_offer_channel *offer;
1023	struct vmbus_channel oldchannel, newchannel;
1024	size_t offer_sz;
1025
1026	offer = (struct vmbus_channel_offer_channel *)hdr;
1027
1028	trace_vmbus_onoffer(offer);
1029
1030	if (!vmbus_is_valid_offer(offer)) {
1031	pr_err_ratelimited("Invalid offer %d from the host supporting isolation\n",
1032	offer->child_relid);
1033	atomic_dec(v: &vmbus_connection.offer_in_progress);
1034	return;
1035	}
1036
1037	oldchannel = find_primary_channel_by_offer(offer);
1038
1039	if (oldchannel != NULL) {
1040	/*
1041	* We're resuming from hibernation: all the sub-channel and
1042	* hv_sock channels we had before the hibernation should have
1043	* been cleaned up, and now we must be seeing a re-offered
1044	* primary channel that we had before the hibernation.
1045	*/
1046
1047	/*
1048	* { Initially: channel relid = INVALID_RELID,
1049	* channels[valid_relid] = NULL }
1050	*
1051	* CPU1 CPU2
1052	*
1053	* [vmbus_onoffer()] [vmbus_device_release()]
1054	*
1055	* LOCK channel_mutex LOCK channel_mutex
1056	* STORE channel relid = valid_relid LOAD r1 = channel relid
1057	* MAP_RELID channel if (r1 != INVALID_RELID)
1058	* UNLOCK channel_mutex UNMAP_RELID channel
1059	* UNLOCK channel_mutex
1060	*
1061	* Forbids: r1 == valid_relid &&
1062	* channels[valid_relid] == channel
1063	*
1064	* Note. r1 can be INVALID_RELID only for an hv_sock channel.
1065	* None of the hv_sock channels which were present before the
1066	* suspend are re-offered upon the resume. See the WARN_ON()
1067	* in hv_process_channel_removal().
1068	*/
1069	mutex_lock(&vmbus_connection.channel_mutex);
1070
1071	atomic_dec(v: &vmbus_connection.offer_in_progress);
1072
1073	WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID);
1074	/ Fix up the relid. /
1075	oldchannel->offermsg.child_relid = offer->child_relid;
1076
1077	offer_sz = sizeof(*offer);
1078	if (memcmp(p: offer, q: &oldchannel->offermsg, size: offer_sz) != `0`) {
1079	/*
1080	* This is not an error, since the host can also change
1081	* the other field(s) of the offer, e.g. on WS RS5
1082	* (Build 17763), the offer->connection_id of the
1083	* Mellanox VF vmbus device can change when the host
1084	* reoffers the device upon resume.
1085	*/
1086	pr_debug("vmbus offer changed: relid=%d\n",
1087	offer->child_relid);
1088
1089	print_hex_dump_debug("Old vmbus offer: ",
1090	DUMP_PREFIX_OFFSET, `16`, `4`,
1091	&oldchannel->offermsg, offer_sz,
1092	false);
1093	print_hex_dump_debug("New vmbus offer: ",
1094	DUMP_PREFIX_OFFSET, `16`, `4`,
1095	offer, offer_sz, false);
1096
1097	/ Fix up the old channel. /
1098	vmbus_setup_channel_state(channel: oldchannel, offer);
1099	}
1100
1101	/ Add the channel back to the array of channels. /
1102	vmbus_channel_map_relid(channel: oldchannel);
1103	check_ready_for_resume_event();
1104
1105	mutex_unlock(lock: &vmbus_connection.channel_mutex);
1106	return;
1107	}
1108
1109	/ Allocate the channel object and save this offer. /
1110	newchannel = alloc_channel();
1111	if (!newchannel) {
1112	vmbus_release_relid(relid: offer->child_relid);
1113	atomic_dec(v: &vmbus_connection.offer_in_progress);
1114	pr_err("Unable to allocate channel object\n");
1115	return;
1116	}
1117
1118	vmbus_setup_channel_state(channel: newchannel, offer);
1119
1120	vmbus_process_offer(newchannel);
1121	}
1122
1123	static void check_ready_for_suspend_event(void)
1124	{
1125	/*
1126	* If all the sub-channels or hv_sock channels have been cleaned up,
1127	* then it's safe to suspend.
1128	*/
1129	if (atomic_dec_and_test(v: &vmbus_connection.nr_chan_close_on_suspend))
1130	complete(&vmbus_connection.ready_for_suspend_event);
1131	}
1132
1133	/*
1134	* vmbus_onoffer_rescind - Rescind offer handler.
1135	*
1136	* We queue a work item to process this offer synchronously
1137	*/
1138	static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
1139	{
1140	struct vmbus_channel_rescind_offer *rescind;
1141	struct vmbus_channel *channel;
1142	struct device *dev;
1143	bool clean_up_chan_for_suspend;
1144
1145	rescind = (struct vmbus_channel_rescind_offer *)hdr;
1146
1147	trace_vmbus_onoffer_rescind(offer: rescind);
1148
1149	/*
1150	* The offer msg and the corresponding rescind msg
1151	* from the host are guranteed to be ordered -
1152	* offer comes in first and then the rescind.
1153	* Since we process these events in work elements,
1154	* and with preemption, we may end up processing
1155	* the events out of order. We rely on the synchronization
1156	* provided by offer_in_progress and by channel_mutex for
1157	* ordering these events:
1158	*
1159	* { Initially: offer_in_progress = 1 }
1160	*
1161	* CPU1 CPU2
1162	*
1163	* [vmbus_onoffer()] [vmbus_onoffer_rescind()]
1164	*
1165	* LOCK channel_mutex WAIT_ON offer_in_progress == 0
1166	* DECREMENT offer_in_progress LOCK channel_mutex
1167	* STORE channels[] LOAD channels[]
1168	* UNLOCK channel_mutex UNLOCK channel_mutex
1169	*
1170	* Forbids: CPU2's LOAD from not seeing CPU1's STORE
1171	*/
1172
1173	while (atomic_read(v: &vmbus_connection.offer_in_progress) != `0`) {
1174	/*
1175	* We wait here until any channel offer is currently
1176	* being processed.
1177	*/
1178	msleep(msecs: `1`);
1179	}
1180
1181	mutex_lock(&vmbus_connection.channel_mutex);
1182	channel = relid2channel(relid: rescind->child_relid);
1183	if (channel != NULL) {
1184	/*
1185	* Guarantee that no other instance of vmbus_onoffer_rescind()
1186	* has got a reference to the channel object. Synchronize on
1187	* &vmbus_connection.channel_mutex.
1188	*/
1189	if (channel->rescind_ref) {
1190	mutex_unlock(lock: &vmbus_connection.channel_mutex);
1191	return;
1192	}
1193	channel->rescind_ref = true;
1194	}
1195	mutex_unlock(lock: &vmbus_connection.channel_mutex);
1196
1197	if (channel == NULL) {
1198	/*
1199	* We failed in processing the offer message;
1200	* we would have cleaned up the relid in that
1201	* failure path.
1202	*/
1203	return;
1204	}
1205
1206	clean_up_chan_for_suspend = is_hvsock_channel(c: channel) \|\|
1207	is_sub_channel(c: channel);
1208	/*
1209	* Before setting channel->rescind in vmbus_rescind_cleanup(), we
1210	* should make sure the channel callback is not running any more.
1211	*/
1212	vmbus_reset_channel_cb(channel);
1213
1214	/*
1215	* Now wait for offer handling to complete.
1216	*/
1217	vmbus_rescind_cleanup(channel);
1218	while (READ_ONCE(channel->probe_done) == false) {
1219	/*
1220	* We wait here until any channel offer is currently
1221	* being processed.
1222	*/
1223	msleep(msecs: `1`);
1224	}
1225
1226	/*
1227	* At this point, the rescind handling can proceed safely.
1228	*/
1229
1230	if (channel->device_obj) {
1231	if (channel->chn_rescind_callback) {
1232	channel->chn_rescind_callback(channel);
1233
1234	if (clean_up_chan_for_suspend)
1235	check_ready_for_suspend_event();
1236
1237	return;
1238	}
1239	/*
1240	* We will have to unregister this device from the
1241	* driver core.
1242	*/
1243	dev = get_device(dev: &channel->device_obj->device);
1244	if (dev) {
1245	vmbus_device_unregister(device_obj: channel->device_obj);
1246	put_device(dev);
1247	}
1248	} else if (channel->primary_channel != NULL) {
1249	/*
1250	* Sub-channel is being rescinded. Following is the channel
1251	* close sequence when initiated from the driveri (refer to
1252	* vmbus_close() for details):
1253	* 1. Close all sub-channels first
1254	* 2. Then close the primary channel.
1255	*/
1256	mutex_lock(&vmbus_connection.channel_mutex);
1257	if (channel->state == CHANNEL_OPEN_STATE) {
1258	/*
1259	* The channel is currently not open;
1260	* it is safe for us to cleanup the channel.
1261	*/
1262	hv_process_channel_removal(channel);
1263	} else {
1264	complete(&channel->rescind_event);
1265	}
1266	mutex_unlock(lock: &vmbus_connection.channel_mutex);
1267	}
1268
1269	/ The "channel" may have been freed. Do not access it any longer. /
1270
1271	if (clean_up_chan_for_suspend)
1272	check_ready_for_suspend_event();
1273	}
1274
1275	void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
1276	{
1277	BUG_ON(!is_hvsock_channel(channel));
1278
1279	/ We always get a rescind msg when a connection is closed. /
1280	while (!READ_ONCE(channel->probe_done) \|\| !READ_ONCE(channel->rescind))
1281	msleep(msecs: `1`);
1282
1283	vmbus_device_unregister(device_obj: channel->device_obj);
1284	}
1285	EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);
1286
1287
1288	/*
1289	* vmbus_onoffers_delivered -
1290	* This is invoked when all offers have been delivered.
1291	*
1292	* Nothing to do here.
1293	*/
1294	static void vmbus_onoffers_delivered(
1295	struct vmbus_channel_message_header *hdr)
1296	{
1297	}
1298
1299	/*
1300	* vmbus_onopen_result - Open result handler.
1301	*
1302	* This is invoked when we received a response to our channel open request.
1303	* Find the matching request, copy the response and signal the requesting
1304	* thread.
1305	*/
1306	static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
1307	{
1308	struct vmbus_channel_open_result *result;
1309	struct vmbus_channel_msginfo *msginfo;
1310	struct vmbus_channel_message_header *requestheader;
1311	struct vmbus_channel_open_channel *openmsg;
1312	unsigned long flags;
1313
1314	result = (struct vmbus_channel_open_result *)hdr;
1315
1316	trace_vmbus_onopen_result(result);
1317
1318	/*
1319	* Find the open msg, copy the result and signal/unblock the wait event
1320	*/
1321	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1322
1323	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1324	msglistentry) {
1325	requestheader =
1326	(struct vmbus_channel_message_header *)msginfo->msg;
1327
1328	if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
1329	openmsg =
1330	(struct vmbus_channel_open_channel *)msginfo->msg;
1331	if (openmsg->child_relid == result->child_relid &&
1332	openmsg->openid == result->openid) {
1333	memcpy(&msginfo->response.open_result,
1334	result,
1335	sizeof(
1336	struct vmbus_channel_open_result));
1337	complete(&msginfo->waitevent);
1338	break;
1339	}
1340	}
1341	}
1342	spin_unlock_irqrestore(lock: &vmbus_connection.channelmsg_lock, flags);
1343	}
1344
1345	/*
1346	* vmbus_ongpadl_created - GPADL created handler.
1347	*
1348	* This is invoked when we received a response to our gpadl create request.
1349	* Find the matching request, copy the response and signal the requesting
1350	* thread.
1351	*/
1352	static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
1353	{
1354	struct vmbus_channel_gpadl_created *gpadlcreated;
1355	struct vmbus_channel_msginfo *msginfo;
1356	struct vmbus_channel_message_header *requestheader;
1357	struct vmbus_channel_gpadl_header *gpadlheader;
1358	unsigned long flags;
1359
1360	gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
1361
1362	trace_vmbus_ongpadl_created(gpadlcreated);
1363
1364	/*
1365	* Find the establish msg, copy the result and signal/unblock the wait
1366	* event
1367	*/
1368	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1369
1370	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1371	msglistentry) {
1372	requestheader =
1373	(struct vmbus_channel_message_header *)msginfo->msg;
1374
1375	if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
1376	gpadlheader =
1377	(struct vmbus_channel_gpadl_header *)requestheader;
1378
1379	if ((gpadlcreated->child_relid ==
1380	gpadlheader->child_relid) &&
1381	(gpadlcreated->gpadl == gpadlheader->gpadl)) {
1382	memcpy(&msginfo->response.gpadl_created,
1383	gpadlcreated,
1384	sizeof(
1385	struct vmbus_channel_gpadl_created));
1386	complete(&msginfo->waitevent);
1387	break;
1388	}
1389	}
1390	}
1391	spin_unlock_irqrestore(lock: &vmbus_connection.channelmsg_lock, flags);
1392	}
1393
1394	/*
1395	* vmbus_onmodifychannel_response - Modify Channel response handler.
1396	*
1397	* This is invoked when we received a response to our channel modify request.
1398	* Find the matching request, copy the response and signal the requesting thread.
1399	*/
1400	static void vmbus_onmodifychannel_response(struct vmbus_channel_message_header *hdr)
1401	{
1402	struct vmbus_channel_modifychannel_response *response;
1403	struct vmbus_channel_msginfo *msginfo;
1404	unsigned long flags;
1405
1406	response = (struct vmbus_channel_modifychannel_response *)hdr;
1407
1408	trace_vmbus_onmodifychannel_response(response);
1409
1410	/*
1411	* Find the modify msg, copy the response and signal/unblock the wait event.
1412	*/
1413	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1414
1415	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, msglistentry) {
1416	struct vmbus_channel_message_header *responseheader =
1417	(struct vmbus_channel_message_header *)msginfo->msg;
1418
1419	if (responseheader->msgtype == CHANNELMSG_MODIFYCHANNEL) {
1420	struct vmbus_channel_modifychannel *modifymsg;
1421
1422	modifymsg = (struct vmbus_channel_modifychannel *)msginfo->msg;
1423	if (modifymsg->child_relid == response->child_relid) {
1424	memcpy(&msginfo->response.modify_response, response,
1425	sizeof(*response));
1426	complete(&msginfo->waitevent);
1427	break;
1428	}
1429	}
1430	}
1431	spin_unlock_irqrestore(lock: &vmbus_connection.channelmsg_lock, flags);
1432	}
1433
1434	/*
1435	* vmbus_ongpadl_torndown - GPADL torndown handler.
1436	*
1437	* This is invoked when we received a response to our gpadl teardown request.
1438	* Find the matching request, copy the response and signal the requesting
1439	* thread.
1440	*/
1441	static void vmbus_ongpadl_torndown(
1442	struct vmbus_channel_message_header *hdr)
1443	{
1444	struct vmbus_channel_gpadl_torndown *gpadl_torndown;
1445	struct vmbus_channel_msginfo *msginfo;
1446	struct vmbus_channel_message_header *requestheader;
1447	struct vmbus_channel_gpadl_teardown *gpadl_teardown;
1448	unsigned long flags;
1449
1450	gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
1451
1452	trace_vmbus_ongpadl_torndown(gpadltorndown: gpadl_torndown);
1453
1454	/*
1455	* Find the open msg, copy the result and signal/unblock the wait event
1456	*/
1457	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1458
1459	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1460	msglistentry) {
1461	requestheader =
1462	(struct vmbus_channel_message_header *)msginfo->msg;
1463
1464	if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
1465	gpadl_teardown =
1466	(struct vmbus_channel_gpadl_teardown *)requestheader;
1467
1468	if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
1469	memcpy(&msginfo->response.gpadl_torndown,
1470	gpadl_torndown,
1471	sizeof(
1472	struct vmbus_channel_gpadl_torndown));
1473	complete(&msginfo->waitevent);
1474	break;
1475	}
1476	}
1477	}
1478	spin_unlock_irqrestore(lock: &vmbus_connection.channelmsg_lock, flags);
1479	}
1480
1481	/*
1482	* vmbus_onversion_response - Version response handler
1483	*
1484	* This is invoked when we received a response to our initiate contact request.
1485	* Find the matching request, copy the response and signal the requesting
1486	* thread.
1487	*/
1488	static void vmbus_onversion_response(
1489	struct vmbus_channel_message_header *hdr)
1490	{
1491	struct vmbus_channel_msginfo *msginfo;
1492	struct vmbus_channel_message_header *requestheader;
1493	struct vmbus_channel_version_response *version_response;
1494	unsigned long flags;
1495
1496	version_response = (struct vmbus_channel_version_response *)hdr;
1497
1498	trace_vmbus_onversion_response(response: version_response);
1499
1500	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1501
1502	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
1503	msglistentry) {
1504	requestheader =
1505	(struct vmbus_channel_message_header *)msginfo->msg;
1506
1507	if (requestheader->msgtype ==
1508	CHANNELMSG_INITIATE_CONTACT) {
1509	memcpy(&msginfo->response.version_response,
1510	version_response,
1511	sizeof(struct vmbus_channel_version_response));
1512	complete(&msginfo->waitevent);
1513	}
1514	}
1515	spin_unlock_irqrestore(lock: &vmbus_connection.channelmsg_lock, flags);
1516	}
1517
1518	/ Channel message dispatch table /
1519	const struct vmbus_channel_message_table_entry
1520	channel_message_table[CHANNELMSG_COUNT] = {
1521	{ CHANNELMSG_INVALID, `0`, NULL, `0`},
1522	{ CHANNELMSG_OFFERCHANNEL, `0`, vmbus_onoffer,
1523	sizeof(struct vmbus_channel_offer_channel)},
1524	{ CHANNELMSG_RESCIND_CHANNELOFFER, `0`, vmbus_onoffer_rescind,
1525	sizeof(struct vmbus_channel_rescind_offer) },
1526	{ CHANNELMSG_REQUESTOFFERS, `0`, NULL, `0`},
1527	{ CHANNELMSG_ALLOFFERS_DELIVERED, `1`, vmbus_onoffers_delivered, `0`},
1528	{ CHANNELMSG_OPENCHANNEL, `0`, NULL, `0`},
1529	{ CHANNELMSG_OPENCHANNEL_RESULT, `1`, vmbus_onopen_result,
1530	sizeof(struct vmbus_channel_open_result)},
1531	{ CHANNELMSG_CLOSECHANNEL, `0`, NULL, `0`},
1532	{ CHANNELMSG_GPADL_HEADER, `0`, NULL, `0`},
1533	{ CHANNELMSG_GPADL_BODY, `0`, NULL, `0`},
1534	{ CHANNELMSG_GPADL_CREATED, `1`, vmbus_ongpadl_created,
1535	sizeof(struct vmbus_channel_gpadl_created)},
1536	{ CHANNELMSG_GPADL_TEARDOWN, `0`, NULL, `0`},
1537	{ CHANNELMSG_GPADL_TORNDOWN, `1`, vmbus_ongpadl_torndown,
1538	sizeof(struct vmbus_channel_gpadl_torndown) },
1539	{ CHANNELMSG_RELID_RELEASED, `0`, NULL, `0`},
1540	{ CHANNELMSG_INITIATE_CONTACT, `0`, NULL, `0`},
1541	{ CHANNELMSG_VERSION_RESPONSE, `1`, vmbus_onversion_response,
1542	sizeof(struct vmbus_channel_version_response)},
1543	{ CHANNELMSG_UNLOAD, `0`, NULL, `0`},
1544	{ CHANNELMSG_UNLOAD_RESPONSE, `1`, vmbus_unload_response, `0`},
1545	{ CHANNELMSG_18, `0`, NULL, `0`},
1546	{ CHANNELMSG_19, `0`, NULL, `0`},
1547	{ CHANNELMSG_20, `0`, NULL, `0`},
1548	{ CHANNELMSG_TL_CONNECT_REQUEST, `0`, NULL, `0`},
1549	{ CHANNELMSG_MODIFYCHANNEL, `0`, NULL, `0`},
1550	{ CHANNELMSG_TL_CONNECT_RESULT, `0`, NULL, `0`},
1551	{ CHANNELMSG_MODIFYCHANNEL_RESPONSE, `1`, vmbus_onmodifychannel_response,
1552	sizeof(struct vmbus_channel_modifychannel_response)},
1553	};
1554
1555	/*
1556	* vmbus_onmessage - Handler for channel protocol messages.
1557	*
1558	* This is invoked in the vmbus worker thread context.
1559	*/
1560	void vmbus_onmessage(struct vmbus_channel_message_header *hdr)
1561	{
1562	trace_vmbus_on_message(hdr);
1563
1564	/*
1565	* vmbus_on_msg_dpc() makes sure the hdr->msgtype here can not go
1566	* out of bound and the message_handler pointer can not be NULL.
1567	*/
1568	channel_message_table[hdr->msgtype].message_handler(hdr);
1569	}
1570
1571	/*
1572	* vmbus_request_offers - Send a request to get all our pending offers.
1573	*/
1574	int vmbus_request_offers(void)
1575	{
1576	struct vmbus_channel_message_header *msg;
1577	struct vmbus_channel_msginfo *msginfo;
1578	int ret;
1579
1580	msginfo = kzalloc(size: sizeof(*msginfo) +
1581	sizeof(struct vmbus_channel_message_header),
1582	GFP_KERNEL);
1583	if (!msginfo)
1584	return -ENOMEM;
1585
1586	msg = (struct vmbus_channel_message_header *)msginfo->msg;
1587
1588	msg->msgtype = CHANNELMSG_REQUESTOFFERS;
1589
1590	ret = vmbus_post_msg(buffer: msg, buflen: sizeof(struct vmbus_channel_message_header),
1591	can_sleep: true);
1592
1593	trace_vmbus_request_offers(ret);
1594
1595	if (ret != `0`) {
1596	pr_err("Unable to request offers - %d\n", ret);
1597
1598	goto cleanup;
1599	}
1600
1601	cleanup:
1602	kfree(objp: msginfo);
1603
1604	return ret;
1605	}
1606
1607	void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
1608	void (sc_cr_cb)(struct* vmbus_channel *new_sc))
1609	{
1610	primary_channel->sc_creation_callback = sc_cr_cb;
1611	}
1612	EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);
1613
1614	void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
1615	void (chn_rescind_cb)(struct* vmbus_channel *))
1616	{
1617	channel->chn_rescind_callback = chn_rescind_cb;
1618	}
1619	EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback);
1620

source code of linux/drivers/hv/channel_mgmt.c