cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

vmci_guest.c (27824B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * VMware VMCI Driver
      4 *
      5 * Copyright (C) 2012 VMware, Inc. All rights reserved.
      6 */
      7
      8#include <linux/vmw_vmci_defs.h>
      9#include <linux/vmw_vmci_api.h>
     10#include <linux/moduleparam.h>
     11#include <linux/interrupt.h>
     12#include <linux/highmem.h>
     13#include <linux/kernel.h>
     14#include <linux/mm.h>
     15#include <linux/module.h>
     16#include <linux/processor.h>
     17#include <linux/sched.h>
     18#include <linux/slab.h>
     19#include <linux/init.h>
     20#include <linux/pci.h>
     21#include <linux/smp.h>
     22#include <linux/io.h>
     23#include <linux/vmalloc.h>
     24
     25#include "vmci_datagram.h"
     26#include "vmci_doorbell.h"
     27#include "vmci_context.h"
     28#include "vmci_driver.h"
     29#include "vmci_event.h"
     30
     31#define PCI_DEVICE_ID_VMWARE_VMCI	0x0740
     32
     33#define VMCI_UTIL_NUM_RESOURCES 1
     34
     35/*
     36 * Datagram buffers for DMA send/receive must accommodate at least
     37 * a maximum sized datagram and the header.
     38 */
     39#define VMCI_DMA_DG_BUFFER_SIZE (VMCI_MAX_DG_SIZE + PAGE_SIZE)
     40
     41static bool vmci_disable_msi;
     42module_param_named(disable_msi, vmci_disable_msi, bool, 0);
     43MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
     44
     45static bool vmci_disable_msix;
     46module_param_named(disable_msix, vmci_disable_msix, bool, 0);
     47MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)");
     48
     49static u32 ctx_update_sub_id = VMCI_INVALID_ID;
     50static u32 vm_context_id = VMCI_INVALID_ID;
     51
     52struct vmci_guest_device {
     53	struct device *dev;	/* PCI device we are attached to */
     54	void __iomem *iobase;
     55	void __iomem *mmio_base;
     56
     57	bool exclusive_vectors;
     58
     59	struct tasklet_struct datagram_tasklet;
     60	struct tasklet_struct bm_tasklet;
     61	struct wait_queue_head inout_wq;
     62
     63	void *data_buffer;
     64	dma_addr_t data_buffer_base;
     65	void *tx_buffer;
     66	dma_addr_t tx_buffer_base;
     67	void *notification_bitmap;
     68	dma_addr_t notification_base;
     69};
     70
     71static bool use_ppn64;
     72
     73bool vmci_use_ppn64(void)
     74{
     75	return use_ppn64;
     76}
     77
     78/* vmci_dev singleton device and supporting data*/
     79struct pci_dev *vmci_pdev;
     80static struct vmci_guest_device *vmci_dev_g;
     81static DEFINE_SPINLOCK(vmci_dev_spinlock);
     82
     83static atomic_t vmci_num_guest_devices = ATOMIC_INIT(0);
     84
     85bool vmci_guest_code_active(void)
     86{
     87	return atomic_read(&vmci_num_guest_devices) != 0;
     88}
     89
     90u32 vmci_get_vm_context_id(void)
     91{
     92	if (vm_context_id == VMCI_INVALID_ID) {
     93		struct vmci_datagram get_cid_msg;
     94		get_cid_msg.dst =
     95		    vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
     96				     VMCI_GET_CONTEXT_ID);
     97		get_cid_msg.src = VMCI_ANON_SRC_HANDLE;
     98		get_cid_msg.payload_size = 0;
     99		vm_context_id = vmci_send_datagram(&get_cid_msg);
    100	}
    101	return vm_context_id;
    102}
    103
    104static unsigned int vmci_read_reg(struct vmci_guest_device *dev, u32 reg)
    105{
    106	if (dev->mmio_base != NULL)
    107		return readl(dev->mmio_base + reg);
    108	return ioread32(dev->iobase + reg);
    109}
    110
    111static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg)
    112{
    113	if (dev->mmio_base != NULL)
    114		writel(val, dev->mmio_base + reg);
    115	else
    116		iowrite32(val, dev->iobase + reg);
    117}
    118
    119static void vmci_read_data(struct vmci_guest_device *vmci_dev,
    120			   void *dest, size_t size)
    121{
    122	if (vmci_dev->mmio_base == NULL)
    123		ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
    124			    dest, size);
    125	else {
    126		/*
    127		 * For DMA datagrams, the data_buffer will contain the header on the
    128		 * first page, followed by the incoming datagram(s) on the following
    129		 * pages. The header uses an S/G element immediately following the
    130		 * header on the first page to point to the data area.
    131		 */
    132		struct vmci_data_in_out_header *buffer_header = vmci_dev->data_buffer;
    133		struct vmci_sg_elem *sg_array = (struct vmci_sg_elem *)(buffer_header + 1);
    134		size_t buffer_offset = dest - vmci_dev->data_buffer;
    135
    136		buffer_header->opcode = 1;
    137		buffer_header->size = 1;
    138		buffer_header->busy = 0;
    139		sg_array[0].addr = vmci_dev->data_buffer_base + buffer_offset;
    140		sg_array[0].size = size;
    141
    142		vmci_write_reg(vmci_dev, lower_32_bits(vmci_dev->data_buffer_base),
    143			       VMCI_DATA_IN_LOW_ADDR);
    144
    145		wait_event(vmci_dev->inout_wq, buffer_header->busy == 1);
    146	}
    147}
    148
    149static int vmci_write_data(struct vmci_guest_device *dev,
    150			   struct vmci_datagram *dg)
    151{
    152	int result;
    153
    154	if (dev->mmio_base != NULL) {
    155		struct vmci_data_in_out_header *buffer_header = dev->tx_buffer;
    156		u8 *dg_out_buffer = (u8 *)(buffer_header + 1);
    157
    158		if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE)
    159			return VMCI_ERROR_INVALID_ARGS;
    160
    161		/*
    162		 * Initialize send buffer with outgoing datagram
    163		 * and set up header for inline data. Device will
    164		 * not access buffer asynchronously - only after
    165		 * the write to VMCI_DATA_OUT_LOW_ADDR.
    166		 */
    167		memcpy(dg_out_buffer, dg, VMCI_DG_SIZE(dg));
    168		buffer_header->opcode = 0;
    169		buffer_header->size = VMCI_DG_SIZE(dg);
    170		buffer_header->busy = 1;
    171
    172		vmci_write_reg(dev, lower_32_bits(dev->tx_buffer_base),
    173			       VMCI_DATA_OUT_LOW_ADDR);
    174
    175		/* Caller holds a spinlock, so cannot block. */
    176		spin_until_cond(buffer_header->busy == 0);
    177
    178		result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
    179		if (result == VMCI_SUCCESS)
    180			result = (int)buffer_header->result;
    181	} else {
    182		iowrite8_rep(dev->iobase + VMCI_DATA_OUT_ADDR,
    183			     dg, VMCI_DG_SIZE(dg));
    184		result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
    185	}
    186
    187	return result;
    188}
    189
    190/*
    191 * VM to hypervisor call mechanism. We use the standard VMware naming
    192 * convention since shared code is calling this function as well.
    193 */
    194int vmci_send_datagram(struct vmci_datagram *dg)
    195{
    196	unsigned long flags;
    197	int result;
    198
    199	/* Check args. */
    200	if (dg == NULL)
    201		return VMCI_ERROR_INVALID_ARGS;
    202
    203	/*
    204	 * Need to acquire spinlock on the device because the datagram
    205	 * data may be spread over multiple pages and the monitor may
    206	 * interleave device user rpc calls from multiple
    207	 * VCPUs. Acquiring the spinlock precludes that
    208	 * possibility. Disabling interrupts to avoid incoming
    209	 * datagrams during a "rep out" and possibly landing up in
    210	 * this function.
    211	 */
    212	spin_lock_irqsave(&vmci_dev_spinlock, flags);
    213
    214	if (vmci_dev_g) {
    215		vmci_write_data(vmci_dev_g, dg);
    216		result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
    217	} else {
    218		result = VMCI_ERROR_UNAVAILABLE;
    219	}
    220
    221	spin_unlock_irqrestore(&vmci_dev_spinlock, flags);
    222
    223	return result;
    224}
    225EXPORT_SYMBOL_GPL(vmci_send_datagram);
    226
    227/*
    228 * Gets called with the new context id if updated or resumed.
    229 * Context id.
    230 */
    231static void vmci_guest_cid_update(u32 sub_id,
    232				  const struct vmci_event_data *event_data,
    233				  void *client_data)
    234{
    235	const struct vmci_event_payld_ctx *ev_payload =
    236				vmci_event_data_const_payload(event_data);
    237
    238	if (sub_id != ctx_update_sub_id) {
    239		pr_devel("Invalid subscriber (ID=0x%x)\n", sub_id);
    240		return;
    241	}
    242
    243	if (!event_data || ev_payload->context_id == VMCI_INVALID_ID) {
    244		pr_devel("Invalid event data\n");
    245		return;
    246	}
    247
    248	pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event (type=%d)\n",
    249		 vm_context_id, ev_payload->context_id, event_data->event);
    250
    251	vm_context_id = ev_payload->context_id;
    252}
    253
    254/*
    255 * Verify that the host supports the hypercalls we need. If it does not,
    256 * try to find fallback hypercalls and use those instead.  Returns 0 if
    257 * required hypercalls (or fallback hypercalls) are supported by the host,
    258 * an error code otherwise.
    259 */
    260static int vmci_check_host_caps(struct pci_dev *pdev)
    261{
    262	bool result;
    263	struct vmci_resource_query_msg *msg;
    264	u32 msg_size = sizeof(struct vmci_resource_query_hdr) +
    265				VMCI_UTIL_NUM_RESOURCES * sizeof(u32);
    266	struct vmci_datagram *check_msg;
    267
    268	check_msg = kzalloc(msg_size, GFP_KERNEL);
    269	if (!check_msg) {
    270		dev_err(&pdev->dev, "%s: Insufficient memory\n", __func__);
    271		return -ENOMEM;
    272	}
    273
    274	check_msg->dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
    275					  VMCI_RESOURCES_QUERY);
    276	check_msg->src = VMCI_ANON_SRC_HANDLE;
    277	check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE;
    278	msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(check_msg);
    279
    280	msg->num_resources = VMCI_UTIL_NUM_RESOURCES;
    281	msg->resources[0] = VMCI_GET_CONTEXT_ID;
    282
    283	/* Checks that hyper calls are supported */
    284	result = vmci_send_datagram(check_msg) == 0x01;
    285	kfree(check_msg);
    286
    287	dev_dbg(&pdev->dev, "%s: Host capability check: %s\n",
    288		__func__, result ? "PASSED" : "FAILED");
    289
    290	/* We need the vector. There are no fallbacks. */
    291	return result ? 0 : -ENXIO;
    292}
    293
    294/*
    295 * Reads datagrams from the device and dispatches them. For IO port
    296 * based access to the device, we always start reading datagrams into
    297 * only the first page of the datagram buffer. If the datagrams don't
    298 * fit into one page, we use the maximum datagram buffer size for the
    299 * remainder of the invocation. This is a simple heuristic for not
    300 * penalizing small datagrams. For DMA-based datagrams, we always
    301 * use the maximum datagram buffer size, since there is no performance
    302 * penalty for doing so.
    303 *
    304 * This function assumes that it has exclusive access to the data
    305 * in register(s) for the duration of the call.
    306 */
    307static void vmci_dispatch_dgs(unsigned long data)
    308{
    309	struct vmci_guest_device *vmci_dev = (struct vmci_guest_device *)data;
    310	u8 *dg_in_buffer = vmci_dev->data_buffer;
    311	struct vmci_datagram *dg;
    312	size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE;
    313	size_t current_dg_in_buffer_size;
    314	size_t remaining_bytes;
    315	bool is_io_port = vmci_dev->mmio_base == NULL;
    316
    317	BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE);
    318
    319	if (!is_io_port) {
    320		/* For mmio, the first page is used for the header. */
    321		dg_in_buffer += PAGE_SIZE;
    322
    323		/*
    324		 * For DMA-based datagram operations, there is no performance
    325		 * penalty for reading the maximum buffer size.
    326		 */
    327		current_dg_in_buffer_size = VMCI_MAX_DG_SIZE;
    328	} else {
    329		current_dg_in_buffer_size = PAGE_SIZE;
    330	}
    331	vmci_read_data(vmci_dev, dg_in_buffer, current_dg_in_buffer_size);
    332	dg = (struct vmci_datagram *)dg_in_buffer;
    333	remaining_bytes = current_dg_in_buffer_size;
    334
    335	/*
    336	 * Read through the buffer until an invalid datagram header is
    337	 * encountered. The exit condition for datagrams read through
    338	 * VMCI_DATA_IN_ADDR is a bit more complicated, since a datagram
    339	 * can start on any page boundary in the buffer.
    340	 */
    341	while (dg->dst.resource != VMCI_INVALID_ID ||
    342	       (is_io_port && remaining_bytes > PAGE_SIZE)) {
    343		unsigned dg_in_size;
    344
    345		/*
    346		 * If using VMCI_DATA_IN_ADDR, skip to the next page
    347		 * as a datagram can start on any page boundary.
    348		 */
    349		if (dg->dst.resource == VMCI_INVALID_ID) {
    350			dg = (struct vmci_datagram *)roundup(
    351				(uintptr_t)dg + 1, PAGE_SIZE);
    352			remaining_bytes =
    353				(size_t)(dg_in_buffer +
    354					 current_dg_in_buffer_size -
    355					 (u8 *)dg);
    356			continue;
    357		}
    358
    359		dg_in_size = VMCI_DG_SIZE_ALIGNED(dg);
    360
    361		if (dg_in_size <= dg_in_buffer_size) {
    362			int result;
    363
    364			/*
    365			 * If the remaining bytes in the datagram
    366			 * buffer doesn't contain the complete
    367			 * datagram, we first make sure we have enough
    368			 * room for it and then we read the reminder
    369			 * of the datagram and possibly any following
    370			 * datagrams.
    371			 */
    372			if (dg_in_size > remaining_bytes) {
    373				if (remaining_bytes !=
    374				    current_dg_in_buffer_size) {
    375
    376					/*
    377					 * We move the partial
    378					 * datagram to the front and
    379					 * read the reminder of the
    380					 * datagram and possibly
    381					 * following calls into the
    382					 * following bytes.
    383					 */
    384					memmove(dg_in_buffer, dg_in_buffer +
    385						current_dg_in_buffer_size -
    386						remaining_bytes,
    387						remaining_bytes);
    388					dg = (struct vmci_datagram *)
    389					    dg_in_buffer;
    390				}
    391
    392				if (current_dg_in_buffer_size !=
    393				    dg_in_buffer_size)
    394					current_dg_in_buffer_size =
    395					    dg_in_buffer_size;
    396
    397				vmci_read_data(vmci_dev,
    398					       dg_in_buffer +
    399						remaining_bytes,
    400					       current_dg_in_buffer_size -
    401						remaining_bytes);
    402			}
    403
    404			/*
    405			 * We special case event datagrams from the
    406			 * hypervisor.
    407			 */
    408			if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID &&
    409			    dg->dst.resource == VMCI_EVENT_HANDLER) {
    410				result = vmci_event_dispatch(dg);
    411			} else {
    412				result = vmci_datagram_invoke_guest_handler(dg);
    413			}
    414			if (result < VMCI_SUCCESS)
    415				dev_dbg(vmci_dev->dev,
    416					"Datagram with resource (ID=0x%x) failed (err=%d)\n",
    417					 dg->dst.resource, result);
    418
    419			/* On to the next datagram. */
    420			dg = (struct vmci_datagram *)((u8 *)dg +
    421						      dg_in_size);
    422		} else {
    423			size_t bytes_to_skip;
    424
    425			/*
    426			 * Datagram doesn't fit in datagram buffer of maximal
    427			 * size. We drop it.
    428			 */
    429			dev_dbg(vmci_dev->dev,
    430				"Failed to receive datagram (size=%u bytes)\n",
    431				 dg_in_size);
    432
    433			bytes_to_skip = dg_in_size - remaining_bytes;
    434			if (current_dg_in_buffer_size != dg_in_buffer_size)
    435				current_dg_in_buffer_size = dg_in_buffer_size;
    436
    437			for (;;) {
    438				vmci_read_data(vmci_dev, dg_in_buffer,
    439					       current_dg_in_buffer_size);
    440				if (bytes_to_skip <= current_dg_in_buffer_size)
    441					break;
    442
    443				bytes_to_skip -= current_dg_in_buffer_size;
    444			}
    445			dg = (struct vmci_datagram *)(dg_in_buffer +
    446						      bytes_to_skip);
    447		}
    448
    449		remaining_bytes =
    450		    (size_t) (dg_in_buffer + current_dg_in_buffer_size -
    451			      (u8 *)dg);
    452
    453		if (remaining_bytes < VMCI_DG_HEADERSIZE) {
    454			/* Get the next batch of datagrams. */
    455
    456			vmci_read_data(vmci_dev, dg_in_buffer,
    457				    current_dg_in_buffer_size);
    458			dg = (struct vmci_datagram *)dg_in_buffer;
    459			remaining_bytes = current_dg_in_buffer_size;
    460		}
    461	}
    462}
    463
    464/*
    465 * Scans the notification bitmap for raised flags, clears them
    466 * and handles the notifications.
    467 */
    468static void vmci_process_bitmap(unsigned long data)
    469{
    470	struct vmci_guest_device *dev = (struct vmci_guest_device *)data;
    471
    472	if (!dev->notification_bitmap) {
    473		dev_dbg(dev->dev, "No bitmap present in %s\n", __func__);
    474		return;
    475	}
    476
    477	vmci_dbell_scan_notification_entries(dev->notification_bitmap);
    478}
    479
    480/*
    481 * Interrupt handler for legacy or MSI interrupt, or for first MSI-X
    482 * interrupt (vector VMCI_INTR_DATAGRAM).
    483 */
    484static irqreturn_t vmci_interrupt(int irq, void *_dev)
    485{
    486	struct vmci_guest_device *dev = _dev;
    487
    488	/*
    489	 * If we are using MSI-X with exclusive vectors then we simply schedule
    490	 * the datagram tasklet, since we know the interrupt was meant for us.
    491	 * Otherwise we must read the ICR to determine what to do.
    492	 */
    493
    494	if (dev->exclusive_vectors) {
    495		tasklet_schedule(&dev->datagram_tasklet);
    496	} else {
    497		unsigned int icr;
    498
    499		/* Acknowledge interrupt and determine what needs doing. */
    500		icr = vmci_read_reg(dev, VMCI_ICR_ADDR);
    501		if (icr == 0 || icr == ~0)
    502			return IRQ_NONE;
    503
    504		if (icr & VMCI_ICR_DATAGRAM) {
    505			tasklet_schedule(&dev->datagram_tasklet);
    506			icr &= ~VMCI_ICR_DATAGRAM;
    507		}
    508
    509		if (icr & VMCI_ICR_NOTIFICATION) {
    510			tasklet_schedule(&dev->bm_tasklet);
    511			icr &= ~VMCI_ICR_NOTIFICATION;
    512		}
    513
    514
    515		if (icr & VMCI_ICR_DMA_DATAGRAM) {
    516			wake_up_all(&dev->inout_wq);
    517			icr &= ~VMCI_ICR_DMA_DATAGRAM;
    518		}
    519
    520		if (icr != 0)
    521			dev_warn(dev->dev,
    522				 "Ignoring unknown interrupt cause (%d)\n",
    523				 icr);
    524	}
    525
    526	return IRQ_HANDLED;
    527}
    528
    529/*
    530 * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION,
    531 * which is for the notification bitmap.  Will only get called if we are
    532 * using MSI-X with exclusive vectors.
    533 */
    534static irqreturn_t vmci_interrupt_bm(int irq, void *_dev)
    535{
    536	struct vmci_guest_device *dev = _dev;
    537
    538	/* For MSI-X we can just assume it was meant for us. */
    539	tasklet_schedule(&dev->bm_tasklet);
    540
    541	return IRQ_HANDLED;
    542}
    543
    544/*
    545 * Interrupt handler for MSI-X interrupt vector VMCI_INTR_DMA_DATAGRAM,
    546 * which is for the completion of a DMA datagram send or receive operation.
    547 * Will only get called if we are using MSI-X with exclusive vectors.
    548 */
    549static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev)
    550{
    551	struct vmci_guest_device *dev = _dev;
    552
    553	wake_up_all(&dev->inout_wq);
    554
    555	return IRQ_HANDLED;
    556}
    557
    558static void vmci_free_dg_buffers(struct vmci_guest_device *vmci_dev)
    559{
    560	if (vmci_dev->mmio_base != NULL) {
    561		if (vmci_dev->tx_buffer != NULL)
    562			dma_free_coherent(vmci_dev->dev,
    563					  VMCI_DMA_DG_BUFFER_SIZE,
    564					  vmci_dev->tx_buffer,
    565					  vmci_dev->tx_buffer_base);
    566		if (vmci_dev->data_buffer != NULL)
    567			dma_free_coherent(vmci_dev->dev,
    568					  VMCI_DMA_DG_BUFFER_SIZE,
    569					  vmci_dev->data_buffer,
    570					  vmci_dev->data_buffer_base);
    571	} else {
    572		vfree(vmci_dev->data_buffer);
    573	}
    574}
    575
    576/*
    577 * Most of the initialization at module load time is done here.
    578 */
    579static int vmci_guest_probe_device(struct pci_dev *pdev,
    580				   const struct pci_device_id *id)
    581{
    582	struct vmci_guest_device *vmci_dev;
    583	void __iomem *iobase = NULL;
    584	void __iomem *mmio_base = NULL;
    585	unsigned int num_irq_vectors;
    586	unsigned int capabilities;
    587	unsigned int caps_in_use;
    588	unsigned long cmd;
    589	int vmci_err;
    590	int error;
    591
    592	dev_dbg(&pdev->dev, "Probing for vmci/PCI guest device\n");
    593
    594	error = pcim_enable_device(pdev);
    595	if (error) {
    596		dev_err(&pdev->dev,
    597			"Failed to enable VMCI device: %d\n", error);
    598		return error;
    599	}
    600
    601	/*
    602	 * The VMCI device with mmio access to registers requests 256KB
    603	 * for BAR1. If present, driver will use new VMCI device
    604	 * functionality for register access and datagram send/recv.
    605	 */
    606
    607	if (pci_resource_len(pdev, 1) == VMCI_WITH_MMIO_ACCESS_BAR_SIZE) {
    608		dev_info(&pdev->dev, "MMIO register access is available\n");
    609		mmio_base = pci_iomap_range(pdev, 1, VMCI_MMIO_ACCESS_OFFSET,
    610					    VMCI_MMIO_ACCESS_SIZE);
    611		/* If the map fails, we fall back to IOIO access. */
    612		if (!mmio_base)
    613			dev_warn(&pdev->dev, "Failed to map MMIO register access\n");
    614	}
    615
    616	if (!mmio_base) {
    617		if (IS_ENABLED(CONFIG_ARM64)) {
    618			dev_err(&pdev->dev, "MMIO base is invalid\n");
    619			return -ENXIO;
    620		}
    621		error = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME);
    622		if (error) {
    623			dev_err(&pdev->dev, "Failed to reserve/map IO regions\n");
    624			return error;
    625		}
    626		iobase = pcim_iomap_table(pdev)[0];
    627	}
    628
    629	vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL);
    630	if (!vmci_dev) {
    631		dev_err(&pdev->dev,
    632			"Can't allocate memory for VMCI device\n");
    633		return -ENOMEM;
    634	}
    635
    636	vmci_dev->dev = &pdev->dev;
    637	vmci_dev->exclusive_vectors = false;
    638	vmci_dev->iobase = iobase;
    639	vmci_dev->mmio_base = mmio_base;
    640
    641	tasklet_init(&vmci_dev->datagram_tasklet,
    642		     vmci_dispatch_dgs, (unsigned long)vmci_dev);
    643	tasklet_init(&vmci_dev->bm_tasklet,
    644		     vmci_process_bitmap, (unsigned long)vmci_dev);
    645	init_waitqueue_head(&vmci_dev->inout_wq);
    646
    647	if (mmio_base != NULL) {
    648		vmci_dev->tx_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE,
    649							 &vmci_dev->tx_buffer_base,
    650							 GFP_KERNEL);
    651		if (!vmci_dev->tx_buffer) {
    652			dev_err(&pdev->dev,
    653				"Can't allocate memory for datagram tx buffer\n");
    654			return -ENOMEM;
    655		}
    656
    657		vmci_dev->data_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE,
    658							   &vmci_dev->data_buffer_base,
    659							   GFP_KERNEL);
    660	} else {
    661		vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE);
    662	}
    663	if (!vmci_dev->data_buffer) {
    664		dev_err(&pdev->dev,
    665			"Can't allocate memory for datagram buffer\n");
    666		error = -ENOMEM;
    667		goto err_free_data_buffers;
    668	}
    669
    670	pci_set_master(pdev);	/* To enable queue_pair functionality. */
    671
    672	/*
    673	 * Verify that the VMCI Device supports the capabilities that
    674	 * we need. If the device is missing capabilities that we would
    675	 * like to use, check for fallback capabilities and use those
    676	 * instead (so we can run a new VM on old hosts). Fail the load if
    677	 * a required capability is missing and there is no fallback.
    678	 *
    679	 * Right now, we need datagrams. There are no fallbacks.
    680	 */
    681	capabilities = vmci_read_reg(vmci_dev, VMCI_CAPS_ADDR);
    682	if (!(capabilities & VMCI_CAPS_DATAGRAM)) {
    683		dev_err(&pdev->dev, "Device does not support datagrams\n");
    684		error = -ENXIO;
    685		goto err_free_data_buffers;
    686	}
    687	caps_in_use = VMCI_CAPS_DATAGRAM;
    688
    689	/*
    690	 * Use 64-bit PPNs if the device supports.
    691	 *
    692	 * There is no check for the return value of dma_set_mask_and_coherent
    693	 * since this driver can handle the default mask values if
    694	 * dma_set_mask_and_coherent fails.
    695	 */
    696	if (capabilities & VMCI_CAPS_PPN64) {
    697		dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
    698		use_ppn64 = true;
    699		caps_in_use |= VMCI_CAPS_PPN64;
    700	} else {
    701		dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44));
    702		use_ppn64 = false;
    703	}
    704
    705	/*
    706	 * If the hardware supports notifications, we will use that as
    707	 * well.
    708	 */
    709	if (capabilities & VMCI_CAPS_NOTIFICATIONS) {
    710		vmci_dev->notification_bitmap = dma_alloc_coherent(
    711			&pdev->dev, PAGE_SIZE, &vmci_dev->notification_base,
    712			GFP_KERNEL);
    713		if (!vmci_dev->notification_bitmap)
    714			dev_warn(&pdev->dev,
    715				 "Unable to allocate notification bitmap\n");
    716		else
    717			caps_in_use |= VMCI_CAPS_NOTIFICATIONS;
    718	}
    719
    720	if (mmio_base != NULL) {
    721		if (capabilities & VMCI_CAPS_DMA_DATAGRAM) {
    722			caps_in_use |= VMCI_CAPS_DMA_DATAGRAM;
    723		} else {
    724			dev_err(&pdev->dev,
    725				"Missing capability: VMCI_CAPS_DMA_DATAGRAM\n");
    726			error = -ENXIO;
    727			goto err_free_notification_bitmap;
    728		}
    729	}
    730
    731	dev_info(&pdev->dev, "Using capabilities 0x%x\n", caps_in_use);
    732
    733	/* Let the host know which capabilities we intend to use. */
    734	vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR);
    735
    736	if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
    737		/* Let the device know the size for pages passed down. */
    738		vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT);
    739
    740		/* Configure the high order parts of the data in/out buffers. */
    741		vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->data_buffer_base),
    742			       VMCI_DATA_IN_HIGH_ADDR);
    743		vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->tx_buffer_base),
    744			       VMCI_DATA_OUT_HIGH_ADDR);
    745	}
    746
    747	/* Set up global device so that we can start sending datagrams */
    748	spin_lock_irq(&vmci_dev_spinlock);
    749	vmci_dev_g = vmci_dev;
    750	vmci_pdev = pdev;
    751	spin_unlock_irq(&vmci_dev_spinlock);
    752
    753	/*
    754	 * Register notification bitmap with device if that capability is
    755	 * used.
    756	 */
    757	if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) {
    758		unsigned long bitmap_ppn =
    759			vmci_dev->notification_base >> PAGE_SHIFT;
    760		if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) {
    761			dev_warn(&pdev->dev,
    762				 "VMCI device unable to register notification bitmap with PPN 0x%lx\n",
    763				 bitmap_ppn);
    764			error = -ENXIO;
    765			goto err_remove_vmci_dev_g;
    766		}
    767	}
    768
    769	/* Check host capabilities. */
    770	error = vmci_check_host_caps(pdev);
    771	if (error)
    772		goto err_remove_vmci_dev_g;
    773
    774	/* Enable device. */
    775
    776	/*
    777	 * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can
    778	 * update the internal context id when needed.
    779	 */
    780	vmci_err = vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE,
    781					vmci_guest_cid_update, NULL,
    782					&ctx_update_sub_id);
    783	if (vmci_err < VMCI_SUCCESS)
    784		dev_warn(&pdev->dev,
    785			 "Failed to subscribe to event (type=%d): %d\n",
    786			 VMCI_EVENT_CTX_ID_UPDATE, vmci_err);
    787
    788	/*
    789	 * Enable interrupts.  Try MSI-X first, then MSI, and then fallback on
    790	 * legacy interrupts.
    791	 */
    792	if (vmci_dev->mmio_base != NULL)
    793		num_irq_vectors = VMCI_MAX_INTRS;
    794	else
    795		num_irq_vectors = VMCI_MAX_INTRS_NOTIFICATION;
    796	error = pci_alloc_irq_vectors(pdev, num_irq_vectors, num_irq_vectors,
    797				      PCI_IRQ_MSIX);
    798	if (error < 0) {
    799		error = pci_alloc_irq_vectors(pdev, 1, 1,
    800				PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY);
    801		if (error < 0)
    802			goto err_unsubscribe_event;
    803	} else {
    804		vmci_dev->exclusive_vectors = true;
    805	}
    806
    807	/*
    808	 * Request IRQ for legacy or MSI interrupts, or for first
    809	 * MSI-X vector.
    810	 */
    811	error = request_irq(pci_irq_vector(pdev, 0), vmci_interrupt,
    812			    IRQF_SHARED, KBUILD_MODNAME, vmci_dev);
    813	if (error) {
    814		dev_err(&pdev->dev, "Irq %u in use: %d\n",
    815			pci_irq_vector(pdev, 0), error);
    816		goto err_disable_msi;
    817	}
    818
    819	/*
    820	 * For MSI-X with exclusive vectors we need to request an
    821	 * interrupt for each vector so that we get a separate
    822	 * interrupt handler routine.  This allows us to distinguish
    823	 * between the vectors.
    824	 */
    825	if (vmci_dev->exclusive_vectors) {
    826		error = request_irq(pci_irq_vector(pdev, 1),
    827				    vmci_interrupt_bm, 0, KBUILD_MODNAME,
    828				    vmci_dev);
    829		if (error) {
    830			dev_err(&pdev->dev,
    831				"Failed to allocate irq %u: %d\n",
    832				pci_irq_vector(pdev, 1), error);
    833			goto err_free_irq;
    834		}
    835		if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
    836			error = request_irq(pci_irq_vector(pdev, 2),
    837					    vmci_interrupt_dma_datagram,
    838					    0, KBUILD_MODNAME, vmci_dev);
    839			if (error) {
    840				dev_err(&pdev->dev,
    841					"Failed to allocate irq %u: %d\n",
    842					pci_irq_vector(pdev, 2), error);
    843				goto err_free_bm_irq;
    844			}
    845		}
    846	}
    847
    848	dev_dbg(&pdev->dev, "Registered device\n");
    849
    850	atomic_inc(&vmci_num_guest_devices);
    851
    852	/* Enable specific interrupt bits. */
    853	cmd = VMCI_IMR_DATAGRAM;
    854	if (caps_in_use & VMCI_CAPS_NOTIFICATIONS)
    855		cmd |= VMCI_IMR_NOTIFICATION;
    856	if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM)
    857		cmd |= VMCI_IMR_DMA_DATAGRAM;
    858	vmci_write_reg(vmci_dev, cmd, VMCI_IMR_ADDR);
    859
    860	/* Enable interrupts. */
    861	vmci_write_reg(vmci_dev, VMCI_CONTROL_INT_ENABLE, VMCI_CONTROL_ADDR);
    862
    863	pci_set_drvdata(pdev, vmci_dev);
    864
    865	vmci_call_vsock_callback(false);
    866	return 0;
    867
    868err_free_bm_irq:
    869	if (vmci_dev->exclusive_vectors)
    870		free_irq(pci_irq_vector(pdev, 1), vmci_dev);
    871
    872err_free_irq:
    873	free_irq(pci_irq_vector(pdev, 0), vmci_dev);
    874	tasklet_kill(&vmci_dev->datagram_tasklet);
    875	tasklet_kill(&vmci_dev->bm_tasklet);
    876
    877err_disable_msi:
    878	pci_free_irq_vectors(pdev);
    879
    880err_unsubscribe_event:
    881	vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
    882	if (vmci_err < VMCI_SUCCESS)
    883		dev_warn(&pdev->dev,
    884			 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
    885			 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
    886
    887err_remove_vmci_dev_g:
    888	spin_lock_irq(&vmci_dev_spinlock);
    889	vmci_pdev = NULL;
    890	vmci_dev_g = NULL;
    891	spin_unlock_irq(&vmci_dev_spinlock);
    892
    893err_free_notification_bitmap:
    894	if (vmci_dev->notification_bitmap) {
    895		vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR);
    896		dma_free_coherent(&pdev->dev, PAGE_SIZE,
    897				  vmci_dev->notification_bitmap,
    898				  vmci_dev->notification_base);
    899	}
    900
    901err_free_data_buffers:
    902	vmci_free_dg_buffers(vmci_dev);
    903
    904	/* The rest are managed resources and will be freed by PCI core */
    905	return error;
    906}
    907
    908static void vmci_guest_remove_device(struct pci_dev *pdev)
    909{
    910	struct vmci_guest_device *vmci_dev = pci_get_drvdata(pdev);
    911	int vmci_err;
    912
    913	dev_dbg(&pdev->dev, "Removing device\n");
    914
    915	atomic_dec(&vmci_num_guest_devices);
    916
    917	vmci_qp_guest_endpoints_exit();
    918
    919	vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
    920	if (vmci_err < VMCI_SUCCESS)
    921		dev_warn(&pdev->dev,
    922			 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
    923			 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
    924
    925	spin_lock_irq(&vmci_dev_spinlock);
    926	vmci_dev_g = NULL;
    927	vmci_pdev = NULL;
    928	spin_unlock_irq(&vmci_dev_spinlock);
    929
    930	dev_dbg(&pdev->dev, "Resetting vmci device\n");
    931	vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR);
    932
    933	/*
    934	 * Free IRQ and then disable MSI/MSI-X as appropriate.  For
    935	 * MSI-X, we might have multiple vectors, each with their own
    936	 * IRQ, which we must free too.
    937	 */
    938	if (vmci_dev->exclusive_vectors) {
    939		free_irq(pci_irq_vector(pdev, 1), vmci_dev);
    940		if (vmci_dev->mmio_base != NULL)
    941			free_irq(pci_irq_vector(pdev, 2), vmci_dev);
    942	}
    943	free_irq(pci_irq_vector(pdev, 0), vmci_dev);
    944	pci_free_irq_vectors(pdev);
    945
    946	tasklet_kill(&vmci_dev->datagram_tasklet);
    947	tasklet_kill(&vmci_dev->bm_tasklet);
    948
    949	if (vmci_dev->notification_bitmap) {
    950		/*
    951		 * The device reset above cleared the bitmap state of the
    952		 * device, so we can safely free it here.
    953		 */
    954
    955		dma_free_coherent(&pdev->dev, PAGE_SIZE,
    956				  vmci_dev->notification_bitmap,
    957				  vmci_dev->notification_base);
    958	}
    959
    960	vmci_free_dg_buffers(vmci_dev);
    961
    962	if (vmci_dev->mmio_base != NULL)
    963		pci_iounmap(pdev, vmci_dev->mmio_base);
    964
    965	/* The rest are managed resources and will be freed by PCI core */
    966}
    967
    968static const struct pci_device_id vmci_ids[] = {
    969	{ PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI), },
    970	{ 0 },
    971};
    972MODULE_DEVICE_TABLE(pci, vmci_ids);
    973
    974static struct pci_driver vmci_guest_driver = {
    975	.name		= KBUILD_MODNAME,
    976	.id_table	= vmci_ids,
    977	.probe		= vmci_guest_probe_device,
    978	.remove		= vmci_guest_remove_device,
    979};
    980
    981int __init vmci_guest_init(void)
    982{
    983	return pci_register_driver(&vmci_guest_driver);
    984}
    985
    986void __exit vmci_guest_exit(void)
    987{
    988	pci_unregister_driver(&vmci_guest_driver);
    989}