vmci_guest.c source code [linux/drivers/misc/vmw_vmci/vmci_guest.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* VMware VMCI Driver
4	*
5	* Copyright (C) 2012 VMware, Inc. All rights reserved.
6	*/
7
8	#include <linux/vmw_vmci_defs.h>
9	#include <linux/vmw_vmci_api.h>
10	#include <linux/moduleparam.h>
11	#include <linux/interrupt.h>
12	#include <linux/highmem.h>
13	#include <linux/kernel.h>
14	#include <linux/mm.h>
15	#include <linux/module.h>
16	#include <linux/processor.h>
17	#include <linux/sched.h>
18	#include <linux/slab.h>
19	#include <linux/init.h>
20	#include <linux/pci.h>
21	#include <linux/smp.h>
22	#include <linux/io.h>
23	#include <linux/vmalloc.h>
24
25	#include "vmci_datagram.h"
26	#include "vmci_doorbell.h"
27	#include "vmci_context.h"
28	#include "vmci_driver.h"
29	#include "vmci_event.h"
30
31	#define PCI_DEVICE_ID_VMWARE_VMCI 0x0740
32
33	#define VMCI_UTIL_NUM_RESOURCES 1
34
35	/*
36	* Datagram buffers for DMA send/receive must accommodate at least
37	* a maximum sized datagram and the header.
38	*/
39	#define VMCI_DMA_DG_BUFFER_SIZE (VMCI_MAX_DG_SIZE + PAGE_SIZE)
40
41	static bool vmci_disable_msi;
42	module_param_named(disable_msi, vmci_disable_msi, bool, `0`);
43	MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
44
45	static bool vmci_disable_msix;
46	module_param_named(disable_msix, vmci_disable_msix, bool, `0`);
47	MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)");
48
49	static u32 ctx_update_sub_id = VMCI_INVALID_ID;
50	static u32 vm_context_id = VMCI_INVALID_ID;
51
52	struct vmci_guest_device {
53	struct device dev; /* PCI device we are attached to /
54	void __iomem *iobase;
55	void __iomem *mmio_base;
56
57	bool exclusive_vectors;
58
59	struct wait_queue_head inout_wq;
60
61	void *data_buffer;
62	dma_addr_t data_buffer_base;
63	void *tx_buffer;
64	dma_addr_t tx_buffer_base;
65	void *notification_bitmap;
66	dma_addr_t notification_base;
67	};
68
69	static bool use_ppn64;
70
71	bool vmci_use_ppn64(void)
72	{
73	return use_ppn64;
74	}
75
76	/ vmci_dev singleton device and supporting data/
77	struct pci_dev *vmci_pdev;
78	static struct vmci_guest_device *vmci_dev_g;
79	static DEFINE_SPINLOCK(vmci_dev_spinlock);
80
81	static atomic_t vmci_num_guest_devices = ATOMIC_INIT(`0`);
82
83	bool vmci_guest_code_active(void)
84	{
85	return atomic_read(v: &vmci_num_guest_devices) != `0`;
86	}
87
88	u32 vmci_get_vm_context_id(void)
89	{
90	if (vm_context_id == VMCI_INVALID_ID) {
91	struct vmci_datagram get_cid_msg;
92	get_cid_msg.dst =
93	vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
94	VMCI_GET_CONTEXT_ID);
95	get_cid_msg.src = VMCI_ANON_SRC_HANDLE;
96	get_cid_msg.payload_size = `0`;
97	vm_context_id = vmci_send_datagram(dg: &get_cid_msg);
98	}
99	return vm_context_id;
100	}
101
102	static unsigned int vmci_read_reg(struct vmci_guest_device *dev, u32 reg)
103	{
104	if (dev->mmio_base != NULL)
105	return readl(addr: dev->mmio_base + reg);
106	return ioread32(dev->iobase + reg);
107	}
108
109	static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg)
110	{
111	if (dev->mmio_base != NULL)
112	writel(val, addr: dev->mmio_base + reg);
113	else
114	iowrite32(val, dev->iobase + reg);
115	}
116
117	static void vmci_read_data(struct vmci_guest_device *vmci_dev,
118	void *dest, size_t size)
119	{
120	if (vmci_dev->mmio_base == NULL)
121	ioread8_rep(port: vmci_dev->iobase + VMCI_DATA_IN_ADDR,
122	buf: dest, count: size);
123	else {
124	/*
125	* For DMA datagrams, the data_buffer will contain the header on the
126	* first page, followed by the incoming datagram(s) on the following
127	* pages. The header uses an S/G element immediately following the
128	* header on the first page to point to the data area.
129	*/
130	struct vmci_data_in_out_header *buffer_header = vmci_dev->data_buffer;
131	struct vmci_sg_elem sg_array = (struct* vmci_sg_elem *)(buffer_header + `1`);
132	size_t buffer_offset = dest - vmci_dev->data_buffer;
133
134	buffer_header->opcode = `1`;
135	buffer_header->size = `1`;
136	buffer_header->busy = `0`;
137	sg_array[`0`].addr = vmci_dev->data_buffer_base + buffer_offset;
138	sg_array[`0`].size = size;
139
140	vmci_write_reg(dev: vmci_dev, lower_32_bits(vmci_dev->data_buffer_base),
141	VMCI_DATA_IN_LOW_ADDR);
142
143	wait_event(vmci_dev->inout_wq, buffer_header->busy == `1`);
144	}
145	}
146
147	static int vmci_write_data(struct vmci_guest_device *dev,
148	struct vmci_datagram *dg)
149	{
150	int result;
151
152	if (dev->mmio_base != NULL) {
153	struct vmci_data_in_out_header *buffer_header = dev->tx_buffer;
154	u8 dg_out_buffer = (u8 )(buffer_header + `1`);
155
156	if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE)
157	return VMCI_ERROR_INVALID_ARGS;
158
159	/*
160	* Initialize send buffer with outgoing datagram
161	* and set up header for inline data. Device will
162	* not access buffer asynchronously - only after
163	* the write to VMCI_DATA_OUT_LOW_ADDR.
164	*/
165	memcpy(dg_out_buffer, dg, VMCI_DG_SIZE(dg));
166	buffer_header->opcode = `0`;
167	buffer_header->size = VMCI_DG_SIZE(dg);
168	buffer_header->busy = `1`;
169
170	vmci_write_reg(dev, lower_32_bits(dev->tx_buffer_base),
171	VMCI_DATA_OUT_LOW_ADDR);
172
173	/ Caller holds a spinlock, so cannot block. /
174	spin_until_cond(buffer_header->busy == `0`);
175
176	result = vmci_read_reg(dev: vmci_dev_g, VMCI_RESULT_LOW_ADDR);
177	if (result == VMCI_SUCCESS)
178	result = (int)buffer_header->result;
179	} else {
180	iowrite8_rep(port: dev->iobase + VMCI_DATA_OUT_ADDR,
181	buf: dg, VMCI_DG_SIZE(dg));
182	result = vmci_read_reg(dev: vmci_dev_g, VMCI_RESULT_LOW_ADDR);
183	}
184
185	return result;
186	}
187
188	/*
189	* VM to hypervisor call mechanism. We use the standard VMware naming
190	* convention since shared code is calling this function as well.
191	*/
192	int vmci_send_datagram(struct vmci_datagram *dg)
193	{
194	unsigned long flags;
195	int result;
196
197	/ Check args. /
198	if (dg == NULL)
199	return VMCI_ERROR_INVALID_ARGS;
200
201	/*
202	* Need to acquire spinlock on the device because the datagram
203	* data may be spread over multiple pages and the monitor may
204	* interleave device user rpc calls from multiple
205	* VCPUs. Acquiring the spinlock precludes that
206	* possibility. Disabling interrupts to avoid incoming
207	* datagrams during a "rep out" and possibly landing up in
208	* this function.
209	*/
210	spin_lock_irqsave(&vmci_dev_spinlock, flags);
211
212	if (vmci_dev_g) {
213	vmci_write_data(dev: vmci_dev_g, dg);
214	result = vmci_read_reg(dev: vmci_dev_g, VMCI_RESULT_LOW_ADDR);
215	} else {
216	result = VMCI_ERROR_UNAVAILABLE;
217	}
218
219	spin_unlock_irqrestore(lock: &vmci_dev_spinlock, flags);
220
221	return result;
222	}
223	EXPORT_SYMBOL_GPL(vmci_send_datagram);
224
225	/*
226	* Gets called with the new context id if updated or resumed.
227	* Context id.
228	*/
229	static void vmci_guest_cid_update(u32 sub_id,
230	const struct vmci_event_data *event_data,
231	void *client_data)
232	{
233	const struct vmci_event_payld_ctx *ev_payload =
234	vmci_event_data_const_payload(ev_data: event_data);
235
236	if (sub_id != ctx_update_sub_id) {
237	pr_devel("Invalid subscriber (ID=0x%x)\n", sub_id);
238	return;
239	}
240
241	if (!event_data \|\| ev_payload->context_id == VMCI_INVALID_ID) {
242	pr_devel("Invalid event data\n");
243	return;
244	}
245
246	pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event (type=%d)\n",
247	vm_context_id, ev_payload->context_id, event_data->event);
248
249	vm_context_id = ev_payload->context_id;
250	}
251
252	/*
253	* Verify that the host supports the hypercalls we need. If it does not,
254	* try to find fallback hypercalls and use those instead. Returns 0 if
255	* required hypercalls (or fallback hypercalls) are supported by the host,
256	* an error code otherwise.
257	*/
258	static int vmci_check_host_caps(struct pci_dev *pdev)
259	{
260	bool result;
261	struct vmci_resource_query_msg *msg;
262	u32 msg_size = sizeof(struct vmci_resource_query_hdr) +
263	VMCI_UTIL_NUM_RESOURCES * sizeof(u32);
264	struct vmci_datagram *check_msg;
265
266	check_msg = kzalloc(size: msg_size, GFP_KERNEL);
267	if (!check_msg) {
268	dev_err(&pdev->dev, "%s: Insufficient memory\n", __func__);
269	return -ENOMEM;
270	}
271
272	check_msg->dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
273	VMCI_RESOURCES_QUERY);
274	check_msg->src = VMCI_ANON_SRC_HANDLE;
275	check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE;
276	msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(check_msg);
277
278	msg->num_resources = VMCI_UTIL_NUM_RESOURCES;
279	msg->resources[`0`] = VMCI_GET_CONTEXT_ID;
280
281	/ Checks that hyper calls are supported /
282	result = vmci_send_datagram(check_msg) == `0x01`;
283	kfree(objp: check_msg);
284
285	dev_dbg(&pdev->dev, "%s: Host capability check: %s\n",
286	__func__, result ? "PASSED" : "FAILED");
287
288	/ We need the vector. There are no fallbacks. /
289	return result ? `0` : -ENXIO;
290	}
291
292	/*
293	* Reads datagrams from the device and dispatches them. For IO port
294	* based access to the device, we always start reading datagrams into
295	* only the first page of the datagram buffer. If the datagrams don't
296	* fit into one page, we use the maximum datagram buffer size for the
297	* remainder of the invocation. This is a simple heuristic for not
298	* penalizing small datagrams. For DMA-based datagrams, we always
299	* use the maximum datagram buffer size, since there is no performance
300	* penalty for doing so.
301	*
302	* This function assumes that it has exclusive access to the data
303	* in register(s) for the duration of the call.
304	*/
305	static void vmci_dispatch_dgs(struct vmci_guest_device *vmci_dev)
306	{
307	u8 *dg_in_buffer = vmci_dev->data_buffer;
308	struct vmci_datagram *dg;
309	size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE;
310	size_t current_dg_in_buffer_size;
311	size_t remaining_bytes;
312	bool is_io_port = vmci_dev->mmio_base == NULL;
313
314	BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE);
315
316	if (!is_io_port) {
317	/ For mmio, the first page is used for the header. /
318	dg_in_buffer += PAGE_SIZE;
319
320	/*
321	* For DMA-based datagram operations, there is no performance
322	* penalty for reading the maximum buffer size.
323	*/
324	current_dg_in_buffer_size = VMCI_MAX_DG_SIZE;
325	} else {
326	current_dg_in_buffer_size = PAGE_SIZE;
327	}
328	vmci_read_data(vmci_dev, dest: dg_in_buffer, size: current_dg_in_buffer_size);
329	dg = (struct vmci_datagram *)dg_in_buffer;
330	remaining_bytes = current_dg_in_buffer_size;
331
332	/*
333	* Read through the buffer until an invalid datagram header is
334	* encountered. The exit condition for datagrams read through
335	* VMCI_DATA_IN_ADDR is a bit more complicated, since a datagram
336	* can start on any page boundary in the buffer.
337	*/
338	while (dg->dst.resource != VMCI_INVALID_ID \|\|
339	(is_io_port && remaining_bytes > PAGE_SIZE)) {
340	unsigned dg_in_size;
341
342	/*
343	* If using VMCI_DATA_IN_ADDR, skip to the next page
344	* as a datagram can start on any page boundary.
345	*/
346	if (dg->dst.resource == VMCI_INVALID_ID) {
347	dg = (struct vmci_datagram *)roundup(
348	(uintptr_t)dg + `1`, PAGE_SIZE);
349	remaining_bytes =
350	(size_t)(dg_in_buffer +
351	current_dg_in_buffer_size -
352	(u8 *)dg);
353	continue;
354	}
355
356	dg_in_size = VMCI_DG_SIZE_ALIGNED(dg);
357
358	if (dg_in_size <= dg_in_buffer_size) {
359	int result;
360
361	/*
362	* If the remaining bytes in the datagram
363	* buffer doesn't contain the complete
364	* datagram, we first make sure we have enough
365	* room for it and then we read the reminder
366	* of the datagram and possibly any following
367	* datagrams.
368	*/
369	if (dg_in_size > remaining_bytes) {
370	if (remaining_bytes !=
371	current_dg_in_buffer_size) {
372
373	/*
374	* We move the partial
375	* datagram to the front and
376	* read the reminder of the
377	* datagram and possibly
378	* following calls into the
379	* following bytes.
380	*/
381	memmove(dg_in_buffer, dg_in_buffer +
382	current_dg_in_buffer_size -
383	remaining_bytes,
384	remaining_bytes);
385	dg = (struct vmci_datagram *)
386	dg_in_buffer;
387	}
388
389	if (current_dg_in_buffer_size !=
390	dg_in_buffer_size)
391	current_dg_in_buffer_size =
392	dg_in_buffer_size;
393
394	vmci_read_data(vmci_dev,
395	dest: dg_in_buffer +
396	remaining_bytes,
397	size: current_dg_in_buffer_size -
398	remaining_bytes);
399	}
400
401	/*
402	* We special case event datagrams from the
403	* hypervisor.
404	*/
405	if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID &&
406	dg->dst.resource == VMCI_EVENT_HANDLER) {
407	result = vmci_event_dispatch(msg: dg);
408	} else {
409	result = vmci_datagram_invoke_guest_handler(dg);
410	}
411	if (result < VMCI_SUCCESS)
412	dev_dbg(vmci_dev->dev,
413	"Datagram with resource (ID=0x%x) failed (err=%d)\n",
414	dg->dst.resource, result);
415
416	/ On to the next datagram. /
417	dg = (struct vmci_datagram )((u8 )dg +
418	dg_in_size);
419	} else {
420	size_t bytes_to_skip;
421
422	/*
423	* Datagram doesn't fit in datagram buffer of maximal
424	* size. We drop it.
425	*/
426	dev_dbg(vmci_dev->dev,
427	"Failed to receive datagram (size=%u bytes)\n",
428	dg_in_size);
429
430	bytes_to_skip = dg_in_size - remaining_bytes;
431	if (current_dg_in_buffer_size != dg_in_buffer_size)
432	current_dg_in_buffer_size = dg_in_buffer_size;
433
434	for (;;) {
435	vmci_read_data(vmci_dev, dest: dg_in_buffer,
436	size: current_dg_in_buffer_size);
437	if (bytes_to_skip <= current_dg_in_buffer_size)
438	break;
439
440	bytes_to_skip -= current_dg_in_buffer_size;
441	}
442	dg = (struct vmci_datagram *)(dg_in_buffer +
443	bytes_to_skip);
444	}
445
446	remaining_bytes =
447	(size_t) (dg_in_buffer + current_dg_in_buffer_size -
448	(u8 *)dg);
449
450	if (remaining_bytes < VMCI_DG_HEADERSIZE) {
451	/ Get the next batch of datagrams. /
452
453	vmci_read_data(vmci_dev, dest: dg_in_buffer,
454	size: current_dg_in_buffer_size);
455	dg = (struct vmci_datagram *)dg_in_buffer;
456	remaining_bytes = current_dg_in_buffer_size;
457	}
458	}
459	}
460
461	/*
462	* Scans the notification bitmap for raised flags, clears them
463	* and handles the notifications.
464	*/
465	static void vmci_process_bitmap(struct vmci_guest_device *dev)
466	{
467	if (!dev->notification_bitmap) {
468	dev_dbg(dev->dev, "No bitmap present in %s\n", __func__);
469	return;
470	}
471
472	vmci_dbell_scan_notification_entries(bitmap: dev->notification_bitmap);
473	}
474
475	/*
476	* Interrupt handler for legacy or MSI interrupt, or for first MSI-X
477	* interrupt (vector VMCI_INTR_DATAGRAM).
478	*/
479	static irqreturn_t vmci_interrupt(int irq, void *_dev)
480	{
481	struct vmci_guest_device *dev = _dev;
482
483	/*
484	* If we are using MSI-X with exclusive vectors then we simply call
485	* vmci_dispatch_dgs(), since we know the interrupt was meant for us.
486	* Otherwise we must read the ICR to determine what to do.
487	*/
488
489	if (dev->exclusive_vectors) {
490	vmci_dispatch_dgs(vmci_dev: dev);
491	} else {
492	unsigned int icr;
493
494	/ Acknowledge interrupt and determine what needs doing. /
495	icr = vmci_read_reg(dev, VMCI_ICR_ADDR);
496	if (icr == `0` \|\| icr == ~`0`)
497	return IRQ_NONE;
498
499	if (icr & VMCI_ICR_DATAGRAM) {
500	vmci_dispatch_dgs(vmci_dev: dev);
501	icr &= ~VMCI_ICR_DATAGRAM;
502	}
503
504	if (icr & VMCI_ICR_NOTIFICATION) {
505	vmci_process_bitmap(dev);
506	icr &= ~VMCI_ICR_NOTIFICATION;
507	}
508
509
510	if (icr & VMCI_ICR_DMA_DATAGRAM) {
511	wake_up_all(&dev->inout_wq);
512	icr &= ~VMCI_ICR_DMA_DATAGRAM;
513	}
514
515	if (icr != `0`)
516	dev_warn(dev->dev,
517	"Ignoring unknown interrupt cause (%d)\n",
518	icr);
519	}
520
521	return IRQ_HANDLED;
522	}
523
524	/*
525	* Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION,
526	* which is for the notification bitmap. Will only get called if we are
527	* using MSI-X with exclusive vectors.
528	*/
529	static irqreturn_t vmci_interrupt_bm(int irq, void *_dev)
530	{
531	struct vmci_guest_device *dev = _dev;
532
533	/ For MSI-X we can just assume it was meant for us. /
534	vmci_process_bitmap(dev);
535
536	return IRQ_HANDLED;
537	}
538
539	/*
540	* Interrupt handler for MSI-X interrupt vector VMCI_INTR_DMA_DATAGRAM,
541	* which is for the completion of a DMA datagram send or receive operation.
542	* Will only get called if we are using MSI-X with exclusive vectors.
543	*/
544	static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev)
545	{
546	struct vmci_guest_device *dev = _dev;
547
548	wake_up_all(&dev->inout_wq);
549
550	return IRQ_HANDLED;
551	}
552
553	static void vmci_free_dg_buffers(struct vmci_guest_device *vmci_dev)
554	{
555	if (vmci_dev->mmio_base != NULL) {
556	if (vmci_dev->tx_buffer != NULL)
557	dma_free_coherent(dev: vmci_dev->dev,
558	VMCI_DMA_DG_BUFFER_SIZE,
559	cpu_addr: vmci_dev->tx_buffer,
560	dma_handle: vmci_dev->tx_buffer_base);
561	if (vmci_dev->data_buffer != NULL)
562	dma_free_coherent(dev: vmci_dev->dev,
563	VMCI_DMA_DG_BUFFER_SIZE,
564	cpu_addr: vmci_dev->data_buffer,
565	dma_handle: vmci_dev->data_buffer_base);
566	} else {
567	vfree(addr: vmci_dev->data_buffer);
568	}
569	}
570
571	/*
572	* Most of the initialization at module load time is done here.
573	*/
574	static int vmci_guest_probe_device(struct pci_dev *pdev,
575	const struct pci_device_id *id)
576	{
577	struct vmci_guest_device *vmci_dev;
578	void __iomem *iobase = NULL;
579	void __iomem *mmio_base = NULL;
580	unsigned int num_irq_vectors;
581	unsigned int capabilities;
582	unsigned int caps_in_use;
583	unsigned long cmd;
584	int vmci_err;
585	int error;
586
587	dev_dbg(&pdev->dev, "Probing for vmci/PCI guest device\n");
588
589	error = pcim_enable_device(pdev);
590	if (error) {
591	dev_err(&pdev->dev,
592	"Failed to enable VMCI device: %d\n", error);
593	return error;
594	}
595
596	/*
597	* The VMCI device with mmio access to registers requests 256KB
598	* for BAR1. If present, driver will use new VMCI device
599	* functionality for register access and datagram send/recv.
600	*/
601
602	if (pci_resource_len(pdev, `1`) == VMCI_WITH_MMIO_ACCESS_BAR_SIZE) {
603	dev_info(&pdev->dev, "MMIO register access is available\n");
604	mmio_base = pci_iomap_range(dev: pdev, bar: `1`, VMCI_MMIO_ACCESS_OFFSET,
605	VMCI_MMIO_ACCESS_SIZE);
606	/ If the map fails, we fall back to IOIO access. /
607	if (!mmio_base)
608	dev_warn(&pdev->dev, "Failed to map MMIO register access\n");
609	}
610
611	if (!mmio_base) {
612	if (IS_ENABLED(CONFIG_ARM64)) {
613	dev_err(&pdev->dev, "MMIO base is invalid\n");
614	return -ENXIO;
615	}
616	error = pcim_iomap_regions(pdev, BIT(`0`), KBUILD_MODNAME);
617	if (error) {
618	dev_err(&pdev->dev, "Failed to reserve/map IO regions\n");
619	return error;
620	}
621	iobase = pcim_iomap_table(pdev)[`0`];
622	}
623
624	vmci_dev = devm_kzalloc(dev: &pdev->dev, size: sizeof(*vmci_dev), GFP_KERNEL);
625	if (!vmci_dev) {
626	dev_err(&pdev->dev,
627	"Can't allocate memory for VMCI device\n");
628	return -ENOMEM;
629	}
630
631	vmci_dev->dev = &pdev->dev;
632	vmci_dev->exclusive_vectors = false;
633	vmci_dev->iobase = iobase;
634	vmci_dev->mmio_base = mmio_base;
635
636	init_waitqueue_head(&vmci_dev->inout_wq);
637
638	if (mmio_base != NULL) {
639	vmci_dev->tx_buffer = dma_alloc_coherent(dev: &pdev->dev, VMCI_DMA_DG_BUFFER_SIZE,
640	dma_handle: &vmci_dev->tx_buffer_base,
641	GFP_KERNEL);
642	if (!vmci_dev->tx_buffer) {
643	dev_err(&pdev->dev,
644	"Can't allocate memory for datagram tx buffer\n");
645	return -ENOMEM;
646	}
647
648	vmci_dev->data_buffer = dma_alloc_coherent(dev: &pdev->dev, VMCI_DMA_DG_BUFFER_SIZE,
649	dma_handle: &vmci_dev->data_buffer_base,
650	GFP_KERNEL);
651	} else {
652	vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE);
653	}
654	if (!vmci_dev->data_buffer) {
655	dev_err(&pdev->dev,
656	"Can't allocate memory for datagram buffer\n");
657	error = -ENOMEM;
658	goto err_free_data_buffers;
659	}
660
661	pci_set_master(dev: pdev); / To enable queue_pair functionality. /
662
663	/*
664	* Verify that the VMCI Device supports the capabilities that
665	* we need. If the device is missing capabilities that we would
666	* like to use, check for fallback capabilities and use those
667	* instead (so we can run a new VM on old hosts). Fail the load if
668	* a required capability is missing and there is no fallback.
669	*
670	* Right now, we need datagrams. There are no fallbacks.
671	*/
672	capabilities = vmci_read_reg(dev: vmci_dev, VMCI_CAPS_ADDR);
673	if (!(capabilities & VMCI_CAPS_DATAGRAM)) {
674	dev_err(&pdev->dev, "Device does not support datagrams\n");
675	error = -ENXIO;
676	goto err_free_data_buffers;
677	}
678	caps_in_use = VMCI_CAPS_DATAGRAM;
679
680	/*
681	* Use 64-bit PPNs if the device supports.
682	*
683	* There is no check for the return value of dma_set_mask_and_coherent
684	* since this driver can handle the default mask values if
685	* dma_set_mask_and_coherent fails.
686	*/
687	if (capabilities & VMCI_CAPS_PPN64) {
688	dma_set_mask_and_coherent(dev: &pdev->dev, DMA_BIT_MASK(`64`));
689	use_ppn64 = true;
690	caps_in_use \|= VMCI_CAPS_PPN64;
691	} else {
692	dma_set_mask_and_coherent(dev: &pdev->dev, DMA_BIT_MASK(`44`));
693	use_ppn64 = false;
694	}
695
696	/*
697	* If the hardware supports notifications, we will use that as
698	* well.
699	*/
700	if (capabilities & VMCI_CAPS_NOTIFICATIONS) {
701	vmci_dev->notification_bitmap = dma_alloc_coherent(
702	dev: &pdev->dev, PAGE_SIZE, dma_handle: &vmci_dev->notification_base,
703	GFP_KERNEL);
704	if (!vmci_dev->notification_bitmap)
705	dev_warn(&pdev->dev,
706	"Unable to allocate notification bitmap\n");
707	else
708	caps_in_use \|= VMCI_CAPS_NOTIFICATIONS;
709	}
710
711	if (mmio_base != NULL) {
712	if (capabilities & VMCI_CAPS_DMA_DATAGRAM) {
713	caps_in_use \|= VMCI_CAPS_DMA_DATAGRAM;
714	} else {
715	dev_err(&pdev->dev,
716	"Missing capability: VMCI_CAPS_DMA_DATAGRAM\n");
717	error = -ENXIO;
718	goto err_free_notification_bitmap;
719	}
720	}
721
722	dev_info(&pdev->dev, "Using capabilities 0x%x\n", caps_in_use);
723
724	/ Let the host know which capabilities we intend to use. /
725	vmci_write_reg(dev: vmci_dev, val: caps_in_use, VMCI_CAPS_ADDR);
726
727	if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
728	/ Let the device know the size for pages passed down. /
729	vmci_write_reg(dev: vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT);
730
731	/ Configure the high order parts of the data in/out buffers. /
732	vmci_write_reg(dev: vmci_dev, upper_32_bits(vmci_dev->data_buffer_base),
733	VMCI_DATA_IN_HIGH_ADDR);
734	vmci_write_reg(dev: vmci_dev, upper_32_bits(vmci_dev->tx_buffer_base),
735	VMCI_DATA_OUT_HIGH_ADDR);
736	}
737
738	/ Set up global device so that we can start sending datagrams /
739	spin_lock_irq(lock: &vmci_dev_spinlock);
740	vmci_dev_g = vmci_dev;
741	vmci_pdev = pdev;
742	spin_unlock_irq(lock: &vmci_dev_spinlock);
743
744	/*
745	* Register notification bitmap with device if that capability is
746	* used.
747	*/
748	if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) {
749	unsigned long bitmap_ppn =
750	vmci_dev->notification_base >> PAGE_SHIFT;
751	if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) {
752	dev_warn(&pdev->dev,
753	"VMCI device unable to register notification bitmap with PPN 0x%lx\n",
754	bitmap_ppn);
755	error = -ENXIO;
756	goto err_remove_vmci_dev_g;
757	}
758	}
759
760	/ Check host capabilities. /
761	error = vmci_check_host_caps(pdev);
762	if (error)
763	goto err_remove_vmci_dev_g;
764
765	/ Enable device. /
766
767	/*
768	* We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can
769	* update the internal context id when needed.
770	*/
771	vmci_err = vmci_event_subscribe(event: VMCI_EVENT_CTX_ID_UPDATE,
772	callback: vmci_guest_cid_update, NULL,
773	subid: &ctx_update_sub_id);
774	if (vmci_err < VMCI_SUCCESS)
775	dev_warn(&pdev->dev,
776	"Failed to subscribe to event (type=%d): %d\n",
777	VMCI_EVENT_CTX_ID_UPDATE, vmci_err);
778
779	/*
780	* Enable interrupts. Try MSI-X first, then MSI, and then fallback on
781	* legacy interrupts.
782	*/
783	if (vmci_dev->mmio_base != NULL)
784	num_irq_vectors = VMCI_MAX_INTRS;
785	else
786	num_irq_vectors = VMCI_MAX_INTRS_NOTIFICATION;
787	error = pci_alloc_irq_vectors(dev: pdev, min_vecs: num_irq_vectors, max_vecs: num_irq_vectors,
788	PCI_IRQ_MSIX);
789	if (error < `0`) {
790	error = pci_alloc_irq_vectors(dev: pdev, min_vecs: `1`, max_vecs: `1`,
791	PCI_IRQ_MSIX \| PCI_IRQ_MSI \| PCI_IRQ_LEGACY);
792	if (error < `0`)
793	goto err_unsubscribe_event;
794	} else {
795	vmci_dev->exclusive_vectors = true;
796	}
797
798	/*
799	* Request IRQ for legacy or MSI interrupts, or for first
800	* MSI-X vector.
801	*/
802	error = request_threaded_irq(irq: pci_irq_vector(dev: pdev, nr: `0`), NULL,
803	thread_fn: vmci_interrupt, IRQF_SHARED,
804	KBUILD_MODNAME, dev: vmci_dev);
805	if (error) {
806	dev_err(&pdev->dev, "Irq %u in use: %d\n",
807	pci_irq_vector(pdev, `0`), error);
808	goto err_disable_msi;
809	}
810
811	/*
812	* For MSI-X with exclusive vectors we need to request an
813	* interrupt for each vector so that we get a separate
814	* interrupt handler routine. This allows us to distinguish
815	* between the vectors.
816	*/
817	if (vmci_dev->exclusive_vectors) {
818	error = request_threaded_irq(irq: pci_irq_vector(dev: pdev, nr: `1`), NULL,
819	thread_fn: vmci_interrupt_bm, flags: `0`,
820	KBUILD_MODNAME, dev: vmci_dev);
821	if (error) {
822	dev_err(&pdev->dev,
823	"Failed to allocate irq %u: %d\n",
824	pci_irq_vector(pdev, `1`), error);
825	goto err_free_irq;
826	}
827	if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
828	error = request_threaded_irq(irq: pci_irq_vector(dev: pdev, nr: `2`),
829	NULL,
830	thread_fn: vmci_interrupt_dma_datagram,
831	flags: `0`, KBUILD_MODNAME,
832	dev: vmci_dev);
833	if (error) {
834	dev_err(&pdev->dev,
835	"Failed to allocate irq %u: %d\n",
836	pci_irq_vector(pdev, `2`), error);
837	goto err_free_bm_irq;
838	}
839	}
840	}
841
842	dev_dbg(&pdev->dev, "Registered device\n");
843
844	atomic_inc(v: &vmci_num_guest_devices);
845
846	/ Enable specific interrupt bits. /
847	cmd = VMCI_IMR_DATAGRAM;
848	if (caps_in_use & VMCI_CAPS_NOTIFICATIONS)
849	cmd \|= VMCI_IMR_NOTIFICATION;
850	if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM)
851	cmd \|= VMCI_IMR_DMA_DATAGRAM;
852	vmci_write_reg(dev: vmci_dev, val: cmd, VMCI_IMR_ADDR);
853
854	/ Enable interrupts. /
855	vmci_write_reg(dev: vmci_dev, VMCI_CONTROL_INT_ENABLE, VMCI_CONTROL_ADDR);
856
857	pci_set_drvdata(pdev, data: vmci_dev);
858
859	vmci_call_vsock_callback(is_host: false);
860	return `0`;
861
862	err_free_bm_irq:
863	if (vmci_dev->exclusive_vectors)
864	free_irq(pci_irq_vector(dev: pdev, nr: `1`), vmci_dev);
865
866	err_free_irq:
867	free_irq(pci_irq_vector(dev: pdev, nr: `0`), vmci_dev);
868
869	err_disable_msi:
870	pci_free_irq_vectors(dev: pdev);
871
872	err_unsubscribe_event:
873	vmci_err = vmci_event_unsubscribe(subid: ctx_update_sub_id);
874	if (vmci_err < VMCI_SUCCESS)
875	dev_warn(&pdev->dev,
876	"Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
877	VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
878
879	err_remove_vmci_dev_g:
880	spin_lock_irq(lock: &vmci_dev_spinlock);
881	vmci_pdev = NULL;
882	vmci_dev_g = NULL;
883	spin_unlock_irq(lock: &vmci_dev_spinlock);
884
885	err_free_notification_bitmap:
886	if (vmci_dev->notification_bitmap) {
887	vmci_write_reg(dev: vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR);
888	dma_free_coherent(dev: &pdev->dev, PAGE_SIZE,
889	cpu_addr: vmci_dev->notification_bitmap,
890	dma_handle: vmci_dev->notification_base);
891	}
892
893	err_free_data_buffers:
894	vmci_free_dg_buffers(vmci_dev);
895
896	/ The rest are managed resources and will be freed by PCI core /
897	return error;
898	}
899
900	static void vmci_guest_remove_device(struct pci_dev *pdev)
901	{
902	struct vmci_guest_device *vmci_dev = pci_get_drvdata(pdev);
903	int vmci_err;
904
905	dev_dbg(&pdev->dev, "Removing device\n");
906
907	atomic_dec(v: &vmci_num_guest_devices);
908
909	vmci_qp_guest_endpoints_exit();
910
911	vmci_err = vmci_event_unsubscribe(subid: ctx_update_sub_id);
912	if (vmci_err < VMCI_SUCCESS)
913	dev_warn(&pdev->dev,
914	"Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
915	VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
916
917	spin_lock_irq(lock: &vmci_dev_spinlock);
918	vmci_dev_g = NULL;
919	vmci_pdev = NULL;
920	spin_unlock_irq(lock: &vmci_dev_spinlock);
921
922	dev_dbg(&pdev->dev, "Resetting vmci device\n");
923	vmci_write_reg(dev: vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR);
924
925	/*
926	* Free IRQ and then disable MSI/MSI-X as appropriate. For
927	* MSI-X, we might have multiple vectors, each with their own
928	* IRQ, which we must free too.
929	*/
930	if (vmci_dev->exclusive_vectors) {
931	free_irq(pci_irq_vector(dev: pdev, nr: `1`), vmci_dev);
932	if (vmci_dev->mmio_base != NULL)
933	free_irq(pci_irq_vector(dev: pdev, nr: `2`), vmci_dev);
934	}
935	free_irq(pci_irq_vector(dev: pdev, nr: `0`), vmci_dev);
936	pci_free_irq_vectors(dev: pdev);
937
938	if (vmci_dev->notification_bitmap) {
939	/*
940	* The device reset above cleared the bitmap state of the
941	* device, so we can safely free it here.
942	*/
943
944	dma_free_coherent(dev: &pdev->dev, PAGE_SIZE,
945	cpu_addr: vmci_dev->notification_bitmap,
946	dma_handle: vmci_dev->notification_base);
947	}
948
949	vmci_free_dg_buffers(vmci_dev);
950
951	if (vmci_dev->mmio_base != NULL)
952	pci_iounmap(dev: pdev, vmci_dev->mmio_base);
953
954	/ The rest are managed resources and will be freed by PCI core /
955	}
956
957	static const struct pci_device_id vmci_ids[] = {
958	{ PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI), },
959	{ `0` },
960	};
961	MODULE_DEVICE_TABLE(pci, vmci_ids);
962
963	static struct pci_driver vmci_guest_driver = {
964	.name = KBUILD_MODNAME,
965	.id_table = vmci_ids,
966	.probe = vmci_guest_probe_device,
967	.remove = vmci_guest_remove_device,
968	};
969
970	int __init vmci_guest_init(void)
971	{
972	return pci_register_driver(&vmci_guest_driver);
973	}
974
975	void __exit vmci_guest_exit(void)
976	{
977	pci_unregister_driver(dev: &vmci_guest_driver);
978	}
979

source code of linux/drivers/misc/vmw_vmci/vmci_guest.c