cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

vmci_queue_pair.c (95305B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * VMware VMCI Driver
      4 *
      5 * Copyright (C) 2012 VMware, Inc. All rights reserved.
      6 */
      7
      8#include <linux/vmw_vmci_defs.h>
      9#include <linux/vmw_vmci_api.h>
     10#include <linux/highmem.h>
     11#include <linux/kernel.h>
     12#include <linux/mm.h>
     13#include <linux/module.h>
     14#include <linux/mutex.h>
     15#include <linux/pagemap.h>
     16#include <linux/pci.h>
     17#include <linux/sched.h>
     18#include <linux/slab.h>
     19#include <linux/uio.h>
     20#include <linux/wait.h>
     21#include <linux/vmalloc.h>
     22#include <linux/skbuff.h>
     23
     24#include "vmci_handle_array.h"
     25#include "vmci_queue_pair.h"
     26#include "vmci_datagram.h"
     27#include "vmci_resource.h"
     28#include "vmci_context.h"
     29#include "vmci_driver.h"
     30#include "vmci_event.h"
     31#include "vmci_route.h"
     32
     33/*
     34 * In the following, we will distinguish between two kinds of VMX processes -
     35 * the ones with versions lower than VMCI_VERSION_NOVMVM that use specialized
     36 * VMCI page files in the VMX and supporting VM to VM communication and the
     37 * newer ones that use the guest memory directly. We will in the following
     38 * refer to the older VMX versions as old-style VMX'en, and the newer ones as
     39 * new-style VMX'en.
     40 *
     41 * The state transition datagram is as follows (the VMCIQPB_ prefix has been
     42 * removed for readability) - see below for more details on the transtions:
     43 *
     44 *            --------------  NEW  -------------
     45 *            |                                |
     46 *           \_/                              \_/
     47 *     CREATED_NO_MEM <-----------------> CREATED_MEM
     48 *            |    |                           |
     49 *            |    o-----------------------o   |
     50 *            |                            |   |
     51 *           \_/                          \_/ \_/
     52 *     ATTACHED_NO_MEM <----------------> ATTACHED_MEM
     53 *            |                            |   |
     54 *            |     o----------------------o   |
     55 *            |     |                          |
     56 *           \_/   \_/                        \_/
     57 *     SHUTDOWN_NO_MEM <----------------> SHUTDOWN_MEM
     58 *            |                                |
     59 *            |                                |
     60 *            -------------> gone <-------------
     61 *
     62 * In more detail. When a VMCI queue pair is first created, it will be in the
     63 * VMCIQPB_NEW state. It will then move into one of the following states:
     64 *
     65 * - VMCIQPB_CREATED_NO_MEM: this state indicates that either:
     66 *
     67 *     - the created was performed by a host endpoint, in which case there is
     68 *       no backing memory yet.
     69 *
     70 *     - the create was initiated by an old-style VMX, that uses
     71 *       vmci_qp_broker_set_page_store to specify the UVAs of the queue pair at
     72 *       a later point in time. This state can be distinguished from the one
     73 *       above by the context ID of the creator. A host side is not allowed to
     74 *       attach until the page store has been set.
     75 *
     76 * - VMCIQPB_CREATED_MEM: this state is the result when the queue pair
     77 *     is created by a VMX using the queue pair device backend that
     78 *     sets the UVAs of the queue pair immediately and stores the
     79 *     information for later attachers. At this point, it is ready for
     80 *     the host side to attach to it.
     81 *
     82 * Once the queue pair is in one of the created states (with the exception of
     83 * the case mentioned for older VMX'en above), it is possible to attach to the
     84 * queue pair. Again we have two new states possible:
     85 *
     86 * - VMCIQPB_ATTACHED_MEM: this state can be reached through the following
     87 *   paths:
     88 *
     89 *     - from VMCIQPB_CREATED_NO_MEM when a new-style VMX allocates a queue
     90 *       pair, and attaches to a queue pair previously created by the host side.
     91 *
     92 *     - from VMCIQPB_CREATED_MEM when the host side attaches to a queue pair
     93 *       already created by a guest.
     94 *
     95 *     - from VMCIQPB_ATTACHED_NO_MEM, when an old-style VMX calls
     96 *       vmci_qp_broker_set_page_store (see below).
     97 *
     98 * - VMCIQPB_ATTACHED_NO_MEM: If the queue pair already was in the
     99 *     VMCIQPB_CREATED_NO_MEM due to a host side create, an old-style VMX will
    100 *     bring the queue pair into this state. Once vmci_qp_broker_set_page_store
    101 *     is called to register the user memory, the VMCIQPB_ATTACH_MEM state
    102 *     will be entered.
    103 *
    104 * From the attached queue pair, the queue pair can enter the shutdown states
    105 * when either side of the queue pair detaches. If the guest side detaches
    106 * first, the queue pair will enter the VMCIQPB_SHUTDOWN_NO_MEM state, where
    107 * the content of the queue pair will no longer be available. If the host
    108 * side detaches first, the queue pair will either enter the
    109 * VMCIQPB_SHUTDOWN_MEM, if the guest memory is currently mapped, or
    110 * VMCIQPB_SHUTDOWN_NO_MEM, if the guest memory is not mapped
    111 * (e.g., the host detaches while a guest is stunned).
    112 *
    113 * New-style VMX'en will also unmap guest memory, if the guest is
    114 * quiesced, e.g., during a snapshot operation. In that case, the guest
    115 * memory will no longer be available, and the queue pair will transition from
    116 * *_MEM state to a *_NO_MEM state. The VMX may later map the memory once more,
    117 * in which case the queue pair will transition from the *_NO_MEM state at that
    118 * point back to the *_MEM state. Note that the *_NO_MEM state may have changed,
    119 * since the peer may have either attached or detached in the meantime. The
    120 * values are laid out such that ++ on a state will move from a *_NO_MEM to a
    121 * *_MEM state, and vice versa.
    122 */
    123
    124/* The Kernel specific component of the struct vmci_queue structure. */
    125struct vmci_queue_kern_if {
    126	struct mutex __mutex;	/* Protects the queue. */
    127	struct mutex *mutex;	/* Shared by producer and consumer queues. */
    128	size_t num_pages;	/* Number of pages incl. header. */
    129	bool host;		/* Host or guest? */
    130	union {
    131		struct {
    132			dma_addr_t *pas;
    133			void **vas;
    134		} g;		/* Used by the guest. */
    135		struct {
    136			struct page **page;
    137			struct page **header_page;
    138		} h;		/* Used by the host. */
    139	} u;
    140};
    141
    142/*
    143 * This structure is opaque to the clients.
    144 */
    145struct vmci_qp {
    146	struct vmci_handle handle;
    147	struct vmci_queue *produce_q;
    148	struct vmci_queue *consume_q;
    149	u64 produce_q_size;
    150	u64 consume_q_size;
    151	u32 peer;
    152	u32 flags;
    153	u32 priv_flags;
    154	bool guest_endpoint;
    155	unsigned int blocked;
    156	unsigned int generation;
    157	wait_queue_head_t event;
    158};
    159
    160enum qp_broker_state {
    161	VMCIQPB_NEW,
    162	VMCIQPB_CREATED_NO_MEM,
    163	VMCIQPB_CREATED_MEM,
    164	VMCIQPB_ATTACHED_NO_MEM,
    165	VMCIQPB_ATTACHED_MEM,
    166	VMCIQPB_SHUTDOWN_NO_MEM,
    167	VMCIQPB_SHUTDOWN_MEM,
    168	VMCIQPB_GONE
    169};
    170
    171#define QPBROKERSTATE_HAS_MEM(_qpb) (_qpb->state == VMCIQPB_CREATED_MEM || \
    172				     _qpb->state == VMCIQPB_ATTACHED_MEM || \
    173				     _qpb->state == VMCIQPB_SHUTDOWN_MEM)
    174
    175/*
    176 * In the queue pair broker, we always use the guest point of view for
    177 * the produce and consume queue values and references, e.g., the
    178 * produce queue size stored is the guests produce queue size. The
    179 * host endpoint will need to swap these around. The only exception is
    180 * the local queue pairs on the host, in which case the host endpoint
    181 * that creates the queue pair will have the right orientation, and
    182 * the attaching host endpoint will need to swap.
    183 */
    184struct qp_entry {
    185	struct list_head list_item;
    186	struct vmci_handle handle;
    187	u32 peer;
    188	u32 flags;
    189	u64 produce_size;
    190	u64 consume_size;
    191	u32 ref_count;
    192};
    193
    194struct qp_broker_entry {
    195	struct vmci_resource resource;
    196	struct qp_entry qp;
    197	u32 create_id;
    198	u32 attach_id;
    199	enum qp_broker_state state;
    200	bool require_trusted_attach;
    201	bool created_by_trusted;
    202	bool vmci_page_files;	/* Created by VMX using VMCI page files */
    203	struct vmci_queue *produce_q;
    204	struct vmci_queue *consume_q;
    205	struct vmci_queue_header saved_produce_q;
    206	struct vmci_queue_header saved_consume_q;
    207	vmci_event_release_cb wakeup_cb;
    208	void *client_data;
    209	void *local_mem;	/* Kernel memory for local queue pair */
    210};
    211
    212struct qp_guest_endpoint {
    213	struct vmci_resource resource;
    214	struct qp_entry qp;
    215	u64 num_ppns;
    216	void *produce_q;
    217	void *consume_q;
    218	struct ppn_set ppn_set;
    219};
    220
    221struct qp_list {
    222	struct list_head head;
    223	struct mutex mutex;	/* Protect queue list. */
    224};
    225
    226static struct qp_list qp_broker_list = {
    227	.head = LIST_HEAD_INIT(qp_broker_list.head),
    228	.mutex = __MUTEX_INITIALIZER(qp_broker_list.mutex),
    229};
    230
    231static struct qp_list qp_guest_endpoints = {
    232	.head = LIST_HEAD_INIT(qp_guest_endpoints.head),
    233	.mutex = __MUTEX_INITIALIZER(qp_guest_endpoints.mutex),
    234};
    235
    236#define INVALID_VMCI_GUEST_MEM_ID  0
    237#define QPE_NUM_PAGES(_QPE) ((u32) \
    238			     (DIV_ROUND_UP(_QPE.produce_size, PAGE_SIZE) + \
    239			      DIV_ROUND_UP(_QPE.consume_size, PAGE_SIZE) + 2))
    240#define QP_SIZES_ARE_VALID(_prod_qsize, _cons_qsize) \
    241	((_prod_qsize) + (_cons_qsize) >= max(_prod_qsize, _cons_qsize) && \
    242	 (_prod_qsize) + (_cons_qsize) <= VMCI_MAX_GUEST_QP_MEMORY)
    243
    244/*
    245 * Frees kernel VA space for a given queue and its queue header, and
    246 * frees physical data pages.
    247 */
    248static void qp_free_queue(void *q, u64 size)
    249{
    250	struct vmci_queue *queue = q;
    251
    252	if (queue) {
    253		u64 i;
    254
    255		/* Given size does not include header, so add in a page here. */
    256		for (i = 0; i < DIV_ROUND_UP(size, PAGE_SIZE) + 1; i++) {
    257			dma_free_coherent(&vmci_pdev->dev, PAGE_SIZE,
    258					  queue->kernel_if->u.g.vas[i],
    259					  queue->kernel_if->u.g.pas[i]);
    260		}
    261
    262		vfree(queue);
    263	}
    264}
    265
    266/*
    267 * Allocates kernel queue pages of specified size with IOMMU mappings,
    268 * plus space for the queue structure/kernel interface and the queue
    269 * header.
    270 */
    271static void *qp_alloc_queue(u64 size, u32 flags)
    272{
    273	u64 i;
    274	struct vmci_queue *queue;
    275	size_t pas_size;
    276	size_t vas_size;
    277	size_t queue_size = sizeof(*queue) + sizeof(*queue->kernel_if);
    278	u64 num_pages;
    279
    280	if (size > SIZE_MAX - PAGE_SIZE)
    281		return NULL;
    282	num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1;
    283	if (num_pages >
    284		 (SIZE_MAX - queue_size) /
    285		 (sizeof(*queue->kernel_if->u.g.pas) +
    286		  sizeof(*queue->kernel_if->u.g.vas)))
    287		return NULL;
    288
    289	pas_size = num_pages * sizeof(*queue->kernel_if->u.g.pas);
    290	vas_size = num_pages * sizeof(*queue->kernel_if->u.g.vas);
    291	queue_size += pas_size + vas_size;
    292
    293	queue = vmalloc(queue_size);
    294	if (!queue)
    295		return NULL;
    296
    297	queue->q_header = NULL;
    298	queue->saved_header = NULL;
    299	queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1);
    300	queue->kernel_if->mutex = NULL;
    301	queue->kernel_if->num_pages = num_pages;
    302	queue->kernel_if->u.g.pas = (dma_addr_t *)(queue->kernel_if + 1);
    303	queue->kernel_if->u.g.vas =
    304		(void **)((u8 *)queue->kernel_if->u.g.pas + pas_size);
    305	queue->kernel_if->host = false;
    306
    307	for (i = 0; i < num_pages; i++) {
    308		queue->kernel_if->u.g.vas[i] =
    309			dma_alloc_coherent(&vmci_pdev->dev, PAGE_SIZE,
    310					   &queue->kernel_if->u.g.pas[i],
    311					   GFP_KERNEL);
    312		if (!queue->kernel_if->u.g.vas[i]) {
    313			/* Size excl. the header. */
    314			qp_free_queue(queue, i * PAGE_SIZE);
    315			return NULL;
    316		}
    317	}
    318
    319	/* Queue header is the first page. */
    320	queue->q_header = queue->kernel_if->u.g.vas[0];
    321
    322	return queue;
    323}
    324
    325/*
    326 * Copies from a given buffer or iovector to a VMCI Queue.  Uses
    327 * kmap()/kunmap() to dynamically map/unmap required portions of the queue
    328 * by traversing the offset -> page translation structure for the queue.
    329 * Assumes that offset + size does not wrap around in the queue.
    330 */
    331static int qp_memcpy_to_queue_iter(struct vmci_queue *queue,
    332				  u64 queue_offset,
    333				  struct iov_iter *from,
    334				  size_t size)
    335{
    336	struct vmci_queue_kern_if *kernel_if = queue->kernel_if;
    337	size_t bytes_copied = 0;
    338
    339	while (bytes_copied < size) {
    340		const u64 page_index =
    341			(queue_offset + bytes_copied) / PAGE_SIZE;
    342		const size_t page_offset =
    343		    (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
    344		void *va;
    345		size_t to_copy;
    346
    347		if (kernel_if->host)
    348			va = kmap(kernel_if->u.h.page[page_index]);
    349		else
    350			va = kernel_if->u.g.vas[page_index + 1];
    351			/* Skip header. */
    352
    353		if (size - bytes_copied > PAGE_SIZE - page_offset)
    354			/* Enough payload to fill up from this page. */
    355			to_copy = PAGE_SIZE - page_offset;
    356		else
    357			to_copy = size - bytes_copied;
    358
    359		if (!copy_from_iter_full((u8 *)va + page_offset, to_copy,
    360					 from)) {
    361			if (kernel_if->host)
    362				kunmap(kernel_if->u.h.page[page_index]);
    363			return VMCI_ERROR_INVALID_ARGS;
    364		}
    365		bytes_copied += to_copy;
    366		if (kernel_if->host)
    367			kunmap(kernel_if->u.h.page[page_index]);
    368	}
    369
    370	return VMCI_SUCCESS;
    371}
    372
    373/*
    374 * Copies to a given buffer or iovector from a VMCI Queue.  Uses
    375 * kmap()/kunmap() to dynamically map/unmap required portions of the queue
    376 * by traversing the offset -> page translation structure for the queue.
    377 * Assumes that offset + size does not wrap around in the queue.
    378 */
    379static int qp_memcpy_from_queue_iter(struct iov_iter *to,
    380				    const struct vmci_queue *queue,
    381				    u64 queue_offset, size_t size)
    382{
    383	struct vmci_queue_kern_if *kernel_if = queue->kernel_if;
    384	size_t bytes_copied = 0;
    385
    386	while (bytes_copied < size) {
    387		const u64 page_index =
    388			(queue_offset + bytes_copied) / PAGE_SIZE;
    389		const size_t page_offset =
    390		    (queue_offset + bytes_copied) & (PAGE_SIZE - 1);
    391		void *va;
    392		size_t to_copy;
    393		int err;
    394
    395		if (kernel_if->host)
    396			va = kmap(kernel_if->u.h.page[page_index]);
    397		else
    398			va = kernel_if->u.g.vas[page_index + 1];
    399			/* Skip header. */
    400
    401		if (size - bytes_copied > PAGE_SIZE - page_offset)
    402			/* Enough payload to fill up this page. */
    403			to_copy = PAGE_SIZE - page_offset;
    404		else
    405			to_copy = size - bytes_copied;
    406
    407		err = copy_to_iter((u8 *)va + page_offset, to_copy, to);
    408		if (err != to_copy) {
    409			if (kernel_if->host)
    410				kunmap(kernel_if->u.h.page[page_index]);
    411			return VMCI_ERROR_INVALID_ARGS;
    412		}
    413		bytes_copied += to_copy;
    414		if (kernel_if->host)
    415			kunmap(kernel_if->u.h.page[page_index]);
    416	}
    417
    418	return VMCI_SUCCESS;
    419}
    420
    421/*
    422 * Allocates two list of PPNs --- one for the pages in the produce queue,
    423 * and the other for the pages in the consume queue. Intializes the list
    424 * of PPNs with the page frame numbers of the KVA for the two queues (and
    425 * the queue headers).
    426 */
    427static int qp_alloc_ppn_set(void *prod_q,
    428			    u64 num_produce_pages,
    429			    void *cons_q,
    430			    u64 num_consume_pages, struct ppn_set *ppn_set)
    431{
    432	u64 *produce_ppns;
    433	u64 *consume_ppns;
    434	struct vmci_queue *produce_q = prod_q;
    435	struct vmci_queue *consume_q = cons_q;
    436	u64 i;
    437
    438	if (!produce_q || !num_produce_pages || !consume_q ||
    439	    !num_consume_pages || !ppn_set)
    440		return VMCI_ERROR_INVALID_ARGS;
    441
    442	if (ppn_set->initialized)
    443		return VMCI_ERROR_ALREADY_EXISTS;
    444
    445	produce_ppns =
    446	    kmalloc_array(num_produce_pages, sizeof(*produce_ppns),
    447			  GFP_KERNEL);
    448	if (!produce_ppns)
    449		return VMCI_ERROR_NO_MEM;
    450
    451	consume_ppns =
    452	    kmalloc_array(num_consume_pages, sizeof(*consume_ppns),
    453			  GFP_KERNEL);
    454	if (!consume_ppns) {
    455		kfree(produce_ppns);
    456		return VMCI_ERROR_NO_MEM;
    457	}
    458
    459	for (i = 0; i < num_produce_pages; i++)
    460		produce_ppns[i] =
    461			produce_q->kernel_if->u.g.pas[i] >> PAGE_SHIFT;
    462
    463	for (i = 0; i < num_consume_pages; i++)
    464		consume_ppns[i] =
    465			consume_q->kernel_if->u.g.pas[i] >> PAGE_SHIFT;
    466
    467	ppn_set->num_produce_pages = num_produce_pages;
    468	ppn_set->num_consume_pages = num_consume_pages;
    469	ppn_set->produce_ppns = produce_ppns;
    470	ppn_set->consume_ppns = consume_ppns;
    471	ppn_set->initialized = true;
    472	return VMCI_SUCCESS;
    473}
    474
    475/*
    476 * Frees the two list of PPNs for a queue pair.
    477 */
    478static void qp_free_ppn_set(struct ppn_set *ppn_set)
    479{
    480	if (ppn_set->initialized) {
    481		/* Do not call these functions on NULL inputs. */
    482		kfree(ppn_set->produce_ppns);
    483		kfree(ppn_set->consume_ppns);
    484	}
    485	memset(ppn_set, 0, sizeof(*ppn_set));
    486}
    487
    488/*
    489 * Populates the list of PPNs in the hypercall structure with the PPNS
    490 * of the produce queue and the consume queue.
    491 */
    492static int qp_populate_ppn_set(u8 *call_buf, const struct ppn_set *ppn_set)
    493{
    494	if (vmci_use_ppn64()) {
    495		memcpy(call_buf, ppn_set->produce_ppns,
    496		       ppn_set->num_produce_pages *
    497		       sizeof(*ppn_set->produce_ppns));
    498		memcpy(call_buf +
    499		       ppn_set->num_produce_pages *
    500		       sizeof(*ppn_set->produce_ppns),
    501		       ppn_set->consume_ppns,
    502		       ppn_set->num_consume_pages *
    503		       sizeof(*ppn_set->consume_ppns));
    504	} else {
    505		int i;
    506		u32 *ppns = (u32 *) call_buf;
    507
    508		for (i = 0; i < ppn_set->num_produce_pages; i++)
    509			ppns[i] = (u32) ppn_set->produce_ppns[i];
    510
    511		ppns = &ppns[ppn_set->num_produce_pages];
    512
    513		for (i = 0; i < ppn_set->num_consume_pages; i++)
    514			ppns[i] = (u32) ppn_set->consume_ppns[i];
    515	}
    516
    517	return VMCI_SUCCESS;
    518}
    519
    520/*
    521 * Allocates kernel VA space of specified size plus space for the queue
    522 * and kernel interface.  This is different from the guest queue allocator,
    523 * because we do not allocate our own queue header/data pages here but
    524 * share those of the guest.
    525 */
    526static struct vmci_queue *qp_host_alloc_queue(u64 size)
    527{
    528	struct vmci_queue *queue;
    529	size_t queue_page_size;
    530	u64 num_pages;
    531	const size_t queue_size = sizeof(*queue) + sizeof(*(queue->kernel_if));
    532
    533	if (size > min_t(size_t, VMCI_MAX_GUEST_QP_MEMORY, SIZE_MAX - PAGE_SIZE))
    534		return NULL;
    535	num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1;
    536	if (num_pages > (SIZE_MAX - queue_size) /
    537		 sizeof(*queue->kernel_if->u.h.page))
    538		return NULL;
    539
    540	queue_page_size = num_pages * sizeof(*queue->kernel_if->u.h.page);
    541
    542	if (queue_size + queue_page_size > KMALLOC_MAX_SIZE)
    543		return NULL;
    544
    545	queue = kzalloc(queue_size + queue_page_size, GFP_KERNEL);
    546	if (queue) {
    547		queue->q_header = NULL;
    548		queue->saved_header = NULL;
    549		queue->kernel_if = (struct vmci_queue_kern_if *)(queue + 1);
    550		queue->kernel_if->host = true;
    551		queue->kernel_if->mutex = NULL;
    552		queue->kernel_if->num_pages = num_pages;
    553		queue->kernel_if->u.h.header_page =
    554		    (struct page **)((u8 *)queue + queue_size);
    555		queue->kernel_if->u.h.page =
    556			&queue->kernel_if->u.h.header_page[1];
    557	}
    558
    559	return queue;
    560}
    561
    562/*
    563 * Frees kernel memory for a given queue (header plus translation
    564 * structure).
    565 */
    566static void qp_host_free_queue(struct vmci_queue *queue, u64 queue_size)
    567{
    568	kfree(queue);
    569}
    570
    571/*
    572 * Initialize the mutex for the pair of queues.  This mutex is used to
    573 * protect the q_header and the buffer from changing out from under any
    574 * users of either queue.  Of course, it's only any good if the mutexes
    575 * are actually acquired.  Queue structure must lie on non-paged memory
    576 * or we cannot guarantee access to the mutex.
    577 */
    578static void qp_init_queue_mutex(struct vmci_queue *produce_q,
    579				struct vmci_queue *consume_q)
    580{
    581	/*
    582	 * Only the host queue has shared state - the guest queues do not
    583	 * need to synchronize access using a queue mutex.
    584	 */
    585
    586	if (produce_q->kernel_if->host) {
    587		produce_q->kernel_if->mutex = &produce_q->kernel_if->__mutex;
    588		consume_q->kernel_if->mutex = &produce_q->kernel_if->__mutex;
    589		mutex_init(produce_q->kernel_if->mutex);
    590	}
    591}
    592
    593/*
    594 * Cleans up the mutex for the pair of queues.
    595 */
    596static void qp_cleanup_queue_mutex(struct vmci_queue *produce_q,
    597				   struct vmci_queue *consume_q)
    598{
    599	if (produce_q->kernel_if->host) {
    600		produce_q->kernel_if->mutex = NULL;
    601		consume_q->kernel_if->mutex = NULL;
    602	}
    603}
    604
    605/*
    606 * Acquire the mutex for the queue.  Note that the produce_q and
    607 * the consume_q share a mutex.  So, only one of the two need to
    608 * be passed in to this routine.  Either will work just fine.
    609 */
    610static void qp_acquire_queue_mutex(struct vmci_queue *queue)
    611{
    612	if (queue->kernel_if->host)
    613		mutex_lock(queue->kernel_if->mutex);
    614}
    615
    616/*
    617 * Release the mutex for the queue.  Note that the produce_q and
    618 * the consume_q share a mutex.  So, only one of the two need to
    619 * be passed in to this routine.  Either will work just fine.
    620 */
    621static void qp_release_queue_mutex(struct vmci_queue *queue)
    622{
    623	if (queue->kernel_if->host)
    624		mutex_unlock(queue->kernel_if->mutex);
    625}
    626
    627/*
    628 * Helper function to release pages in the PageStoreAttachInfo
    629 * previously obtained using get_user_pages.
    630 */
    631static void qp_release_pages(struct page **pages,
    632			     u64 num_pages, bool dirty)
    633{
    634	int i;
    635
    636	for (i = 0; i < num_pages; i++) {
    637		if (dirty)
    638			set_page_dirty_lock(pages[i]);
    639
    640		put_page(pages[i]);
    641		pages[i] = NULL;
    642	}
    643}
    644
    645/*
    646 * Lock the user pages referenced by the {produce,consume}Buffer
    647 * struct into memory and populate the {produce,consume}Pages
    648 * arrays in the attach structure with them.
    649 */
    650static int qp_host_get_user_memory(u64 produce_uva,
    651				   u64 consume_uva,
    652				   struct vmci_queue *produce_q,
    653				   struct vmci_queue *consume_q)
    654{
    655	int retval;
    656	int err = VMCI_SUCCESS;
    657
    658	retval = get_user_pages_fast((uintptr_t) produce_uva,
    659				     produce_q->kernel_if->num_pages,
    660				     FOLL_WRITE,
    661				     produce_q->kernel_if->u.h.header_page);
    662	if (retval < (int)produce_q->kernel_if->num_pages) {
    663		pr_debug("get_user_pages_fast(produce) failed (retval=%d)",
    664			retval);
    665		if (retval > 0)
    666			qp_release_pages(produce_q->kernel_if->u.h.header_page,
    667					retval, false);
    668		err = VMCI_ERROR_NO_MEM;
    669		goto out;
    670	}
    671
    672	retval = get_user_pages_fast((uintptr_t) consume_uva,
    673				     consume_q->kernel_if->num_pages,
    674				     FOLL_WRITE,
    675				     consume_q->kernel_if->u.h.header_page);
    676	if (retval < (int)consume_q->kernel_if->num_pages) {
    677		pr_debug("get_user_pages_fast(consume) failed (retval=%d)",
    678			retval);
    679		if (retval > 0)
    680			qp_release_pages(consume_q->kernel_if->u.h.header_page,
    681					retval, false);
    682		qp_release_pages(produce_q->kernel_if->u.h.header_page,
    683				 produce_q->kernel_if->num_pages, false);
    684		err = VMCI_ERROR_NO_MEM;
    685	}
    686
    687 out:
    688	return err;
    689}
    690
    691/*
    692 * Registers the specification of the user pages used for backing a queue
    693 * pair. Enough information to map in pages is stored in the OS specific
    694 * part of the struct vmci_queue structure.
    695 */
    696static int qp_host_register_user_memory(struct vmci_qp_page_store *page_store,
    697					struct vmci_queue *produce_q,
    698					struct vmci_queue *consume_q)
    699{
    700	u64 produce_uva;
    701	u64 consume_uva;
    702
    703	/*
    704	 * The new style and the old style mapping only differs in
    705	 * that we either get a single or two UVAs, so we split the
    706	 * single UVA range at the appropriate spot.
    707	 */
    708	produce_uva = page_store->pages;
    709	consume_uva = page_store->pages +
    710	    produce_q->kernel_if->num_pages * PAGE_SIZE;
    711	return qp_host_get_user_memory(produce_uva, consume_uva, produce_q,
    712				       consume_q);
    713}
    714
    715/*
    716 * Releases and removes the references to user pages stored in the attach
    717 * struct.  Pages are released from the page cache and may become
    718 * swappable again.
    719 */
    720static void qp_host_unregister_user_memory(struct vmci_queue *produce_q,
    721					   struct vmci_queue *consume_q)
    722{
    723	qp_release_pages(produce_q->kernel_if->u.h.header_page,
    724			 produce_q->kernel_if->num_pages, true);
    725	memset(produce_q->kernel_if->u.h.header_page, 0,
    726	       sizeof(*produce_q->kernel_if->u.h.header_page) *
    727	       produce_q->kernel_if->num_pages);
    728	qp_release_pages(consume_q->kernel_if->u.h.header_page,
    729			 consume_q->kernel_if->num_pages, true);
    730	memset(consume_q->kernel_if->u.h.header_page, 0,
    731	       sizeof(*consume_q->kernel_if->u.h.header_page) *
    732	       consume_q->kernel_if->num_pages);
    733}
    734
    735/*
    736 * Once qp_host_register_user_memory has been performed on a
    737 * queue, the queue pair headers can be mapped into the
    738 * kernel. Once mapped, they must be unmapped with
    739 * qp_host_unmap_queues prior to calling
    740 * qp_host_unregister_user_memory.
    741 * Pages are pinned.
    742 */
    743static int qp_host_map_queues(struct vmci_queue *produce_q,
    744			      struct vmci_queue *consume_q)
    745{
    746	int result;
    747
    748	if (!produce_q->q_header || !consume_q->q_header) {
    749		struct page *headers[2];
    750
    751		if (produce_q->q_header != consume_q->q_header)
    752			return VMCI_ERROR_QUEUEPAIR_MISMATCH;
    753
    754		if (produce_q->kernel_if->u.h.header_page == NULL ||
    755		    *produce_q->kernel_if->u.h.header_page == NULL)
    756			return VMCI_ERROR_UNAVAILABLE;
    757
    758		headers[0] = *produce_q->kernel_if->u.h.header_page;
    759		headers[1] = *consume_q->kernel_if->u.h.header_page;
    760
    761		produce_q->q_header = vmap(headers, 2, VM_MAP, PAGE_KERNEL);
    762		if (produce_q->q_header != NULL) {
    763			consume_q->q_header =
    764			    (struct vmci_queue_header *)((u8 *)
    765							 produce_q->q_header +
    766							 PAGE_SIZE);
    767			result = VMCI_SUCCESS;
    768		} else {
    769			pr_warn("vmap failed\n");
    770			result = VMCI_ERROR_NO_MEM;
    771		}
    772	} else {
    773		result = VMCI_SUCCESS;
    774	}
    775
    776	return result;
    777}
    778
    779/*
    780 * Unmaps previously mapped queue pair headers from the kernel.
    781 * Pages are unpinned.
    782 */
    783static int qp_host_unmap_queues(u32 gid,
    784				struct vmci_queue *produce_q,
    785				struct vmci_queue *consume_q)
    786{
    787	if (produce_q->q_header) {
    788		if (produce_q->q_header < consume_q->q_header)
    789			vunmap(produce_q->q_header);
    790		else
    791			vunmap(consume_q->q_header);
    792
    793		produce_q->q_header = NULL;
    794		consume_q->q_header = NULL;
    795	}
    796
    797	return VMCI_SUCCESS;
    798}
    799
    800/*
    801 * Finds the entry in the list corresponding to a given handle. Assumes
    802 * that the list is locked.
    803 */
    804static struct qp_entry *qp_list_find(struct qp_list *qp_list,
    805				     struct vmci_handle handle)
    806{
    807	struct qp_entry *entry;
    808
    809	if (vmci_handle_is_invalid(handle))
    810		return NULL;
    811
    812	list_for_each_entry(entry, &qp_list->head, list_item) {
    813		if (vmci_handle_is_equal(entry->handle, handle))
    814			return entry;
    815	}
    816
    817	return NULL;
    818}
    819
    820/*
    821 * Finds the entry in the list corresponding to a given handle.
    822 */
    823static struct qp_guest_endpoint *
    824qp_guest_handle_to_entry(struct vmci_handle handle)
    825{
    826	struct qp_guest_endpoint *entry;
    827	struct qp_entry *qp = qp_list_find(&qp_guest_endpoints, handle);
    828
    829	entry = qp ? container_of(
    830		qp, struct qp_guest_endpoint, qp) : NULL;
    831	return entry;
    832}
    833
    834/*
    835 * Finds the entry in the list corresponding to a given handle.
    836 */
    837static struct qp_broker_entry *
    838qp_broker_handle_to_entry(struct vmci_handle handle)
    839{
    840	struct qp_broker_entry *entry;
    841	struct qp_entry *qp = qp_list_find(&qp_broker_list, handle);
    842
    843	entry = qp ? container_of(
    844		qp, struct qp_broker_entry, qp) : NULL;
    845	return entry;
    846}
    847
    848/*
    849 * Dispatches a queue pair event message directly into the local event
    850 * queue.
    851 */
    852static int qp_notify_peer_local(bool attach, struct vmci_handle handle)
    853{
    854	u32 context_id = vmci_get_context_id();
    855	struct vmci_event_qp ev;
    856
    857	ev.msg.hdr.dst = vmci_make_handle(context_id, VMCI_EVENT_HANDLER);
    858	ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
    859					  VMCI_CONTEXT_RESOURCE_ID);
    860	ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
    861	ev.msg.event_data.event =
    862	    attach ? VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH;
    863	ev.payload.peer_id = context_id;
    864	ev.payload.handle = handle;
    865
    866	return vmci_event_dispatch(&ev.msg.hdr);
    867}
    868
    869/*
    870 * Allocates and initializes a qp_guest_endpoint structure.
    871 * Allocates a queue_pair rid (and handle) iff the given entry has
    872 * an invalid handle.  0 through VMCI_RESERVED_RESOURCE_ID_MAX
    873 * are reserved handles.  Assumes that the QP list mutex is held
    874 * by the caller.
    875 */
    876static struct qp_guest_endpoint *
    877qp_guest_endpoint_create(struct vmci_handle handle,
    878			 u32 peer,
    879			 u32 flags,
    880			 u64 produce_size,
    881			 u64 consume_size,
    882			 void *produce_q,
    883			 void *consume_q)
    884{
    885	int result;
    886	struct qp_guest_endpoint *entry;
    887	/* One page each for the queue headers. */
    888	const u64 num_ppns = DIV_ROUND_UP(produce_size, PAGE_SIZE) +
    889	    DIV_ROUND_UP(consume_size, PAGE_SIZE) + 2;
    890
    891	if (vmci_handle_is_invalid(handle)) {
    892		u32 context_id = vmci_get_context_id();
    893
    894		handle = vmci_make_handle(context_id, VMCI_INVALID_ID);
    895	}
    896
    897	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
    898	if (entry) {
    899		entry->qp.peer = peer;
    900		entry->qp.flags = flags;
    901		entry->qp.produce_size = produce_size;
    902		entry->qp.consume_size = consume_size;
    903		entry->qp.ref_count = 0;
    904		entry->num_ppns = num_ppns;
    905		entry->produce_q = produce_q;
    906		entry->consume_q = consume_q;
    907		INIT_LIST_HEAD(&entry->qp.list_item);
    908
    909		/* Add resource obj */
    910		result = vmci_resource_add(&entry->resource,
    911					   VMCI_RESOURCE_TYPE_QPAIR_GUEST,
    912					   handle);
    913		entry->qp.handle = vmci_resource_handle(&entry->resource);
    914		if ((result != VMCI_SUCCESS) ||
    915		    qp_list_find(&qp_guest_endpoints, entry->qp.handle)) {
    916			pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d",
    917				handle.context, handle.resource, result);
    918			kfree(entry);
    919			entry = NULL;
    920		}
    921	}
    922	return entry;
    923}
    924
    925/*
    926 * Frees a qp_guest_endpoint structure.
    927 */
    928static void qp_guest_endpoint_destroy(struct qp_guest_endpoint *entry)
    929{
    930	qp_free_ppn_set(&entry->ppn_set);
    931	qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q);
    932	qp_free_queue(entry->produce_q, entry->qp.produce_size);
    933	qp_free_queue(entry->consume_q, entry->qp.consume_size);
    934	/* Unlink from resource hash table and free callback */
    935	vmci_resource_remove(&entry->resource);
    936
    937	kfree(entry);
    938}
    939
    940/*
    941 * Helper to make a queue_pairAlloc hypercall when the driver is
    942 * supporting a guest device.
    943 */
    944static int qp_alloc_hypercall(const struct qp_guest_endpoint *entry)
    945{
    946	struct vmci_qp_alloc_msg *alloc_msg;
    947	size_t msg_size;
    948	size_t ppn_size;
    949	int result;
    950
    951	if (!entry || entry->num_ppns <= 2)
    952		return VMCI_ERROR_INVALID_ARGS;
    953
    954	ppn_size = vmci_use_ppn64() ? sizeof(u64) : sizeof(u32);
    955	msg_size = sizeof(*alloc_msg) +
    956	    (size_t) entry->num_ppns * ppn_size;
    957	alloc_msg = kmalloc(msg_size, GFP_KERNEL);
    958	if (!alloc_msg)
    959		return VMCI_ERROR_NO_MEM;
    960
    961	alloc_msg->hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
    962					      VMCI_QUEUEPAIR_ALLOC);
    963	alloc_msg->hdr.src = VMCI_ANON_SRC_HANDLE;
    964	alloc_msg->hdr.payload_size = msg_size - VMCI_DG_HEADERSIZE;
    965	alloc_msg->handle = entry->qp.handle;
    966	alloc_msg->peer = entry->qp.peer;
    967	alloc_msg->flags = entry->qp.flags;
    968	alloc_msg->produce_size = entry->qp.produce_size;
    969	alloc_msg->consume_size = entry->qp.consume_size;
    970	alloc_msg->num_ppns = entry->num_ppns;
    971
    972	result = qp_populate_ppn_set((u8 *)alloc_msg + sizeof(*alloc_msg),
    973				     &entry->ppn_set);
    974	if (result == VMCI_SUCCESS)
    975		result = vmci_send_datagram(&alloc_msg->hdr);
    976
    977	kfree(alloc_msg);
    978
    979	return result;
    980}
    981
    982/*
    983 * Helper to make a queue_pairDetach hypercall when the driver is
    984 * supporting a guest device.
    985 */
    986static int qp_detatch_hypercall(struct vmci_handle handle)
    987{
    988	struct vmci_qp_detach_msg detach_msg;
    989
    990	detach_msg.hdr.dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
    991					      VMCI_QUEUEPAIR_DETACH);
    992	detach_msg.hdr.src = VMCI_ANON_SRC_HANDLE;
    993	detach_msg.hdr.payload_size = sizeof(handle);
    994	detach_msg.handle = handle;
    995
    996	return vmci_send_datagram(&detach_msg.hdr);
    997}
    998
    999/*
   1000 * Adds the given entry to the list. Assumes that the list is locked.
   1001 */
   1002static void qp_list_add_entry(struct qp_list *qp_list, struct qp_entry *entry)
   1003{
   1004	if (entry)
   1005		list_add(&entry->list_item, &qp_list->head);
   1006}
   1007
   1008/*
   1009 * Removes the given entry from the list. Assumes that the list is locked.
   1010 */
   1011static void qp_list_remove_entry(struct qp_list *qp_list,
   1012				 struct qp_entry *entry)
   1013{
   1014	if (entry)
   1015		list_del(&entry->list_item);
   1016}
   1017
   1018/*
   1019 * Helper for VMCI queue_pair detach interface. Frees the physical
   1020 * pages for the queue pair.
   1021 */
   1022static int qp_detatch_guest_work(struct vmci_handle handle)
   1023{
   1024	int result;
   1025	struct qp_guest_endpoint *entry;
   1026	u32 ref_count = ~0;	/* To avoid compiler warning below */
   1027
   1028	mutex_lock(&qp_guest_endpoints.mutex);
   1029
   1030	entry = qp_guest_handle_to_entry(handle);
   1031	if (!entry) {
   1032		mutex_unlock(&qp_guest_endpoints.mutex);
   1033		return VMCI_ERROR_NOT_FOUND;
   1034	}
   1035
   1036	if (entry->qp.flags & VMCI_QPFLAG_LOCAL) {
   1037		result = VMCI_SUCCESS;
   1038
   1039		if (entry->qp.ref_count > 1) {
   1040			result = qp_notify_peer_local(false, handle);
   1041			/*
   1042			 * We can fail to notify a local queuepair
   1043			 * because we can't allocate.  We still want
   1044			 * to release the entry if that happens, so
   1045			 * don't bail out yet.
   1046			 */
   1047		}
   1048	} else {
   1049		result = qp_detatch_hypercall(handle);
   1050		if (result < VMCI_SUCCESS) {
   1051			/*
   1052			 * We failed to notify a non-local queuepair.
   1053			 * That other queuepair might still be
   1054			 * accessing the shared memory, so don't
   1055			 * release the entry yet.  It will get cleaned
   1056			 * up by VMCIqueue_pair_Exit() if necessary
   1057			 * (assuming we are going away, otherwise why
   1058			 * did this fail?).
   1059			 */
   1060
   1061			mutex_unlock(&qp_guest_endpoints.mutex);
   1062			return result;
   1063		}
   1064	}
   1065
   1066	/*
   1067	 * If we get here then we either failed to notify a local queuepair, or
   1068	 * we succeeded in all cases.  Release the entry if required.
   1069	 */
   1070
   1071	entry->qp.ref_count--;
   1072	if (entry->qp.ref_count == 0)
   1073		qp_list_remove_entry(&qp_guest_endpoints, &entry->qp);
   1074
   1075	/* If we didn't remove the entry, this could change once we unlock. */
   1076	if (entry)
   1077		ref_count = entry->qp.ref_count;
   1078
   1079	mutex_unlock(&qp_guest_endpoints.mutex);
   1080
   1081	if (ref_count == 0)
   1082		qp_guest_endpoint_destroy(entry);
   1083
   1084	return result;
   1085}
   1086
   1087/*
   1088 * This functions handles the actual allocation of a VMCI queue
   1089 * pair guest endpoint. Allocates physical pages for the queue
   1090 * pair. It makes OS dependent calls through generic wrappers.
   1091 */
   1092static int qp_alloc_guest_work(struct vmci_handle *handle,
   1093			       struct vmci_queue **produce_q,
   1094			       u64 produce_size,
   1095			       struct vmci_queue **consume_q,
   1096			       u64 consume_size,
   1097			       u32 peer,
   1098			       u32 flags,
   1099			       u32 priv_flags)
   1100{
   1101	const u64 num_produce_pages =
   1102	    DIV_ROUND_UP(produce_size, PAGE_SIZE) + 1;
   1103	const u64 num_consume_pages =
   1104	    DIV_ROUND_UP(consume_size, PAGE_SIZE) + 1;
   1105	void *my_produce_q = NULL;
   1106	void *my_consume_q = NULL;
   1107	int result;
   1108	struct qp_guest_endpoint *queue_pair_entry = NULL;
   1109
   1110	if (priv_flags != VMCI_NO_PRIVILEGE_FLAGS)
   1111		return VMCI_ERROR_NO_ACCESS;
   1112
   1113	mutex_lock(&qp_guest_endpoints.mutex);
   1114
   1115	queue_pair_entry = qp_guest_handle_to_entry(*handle);
   1116	if (queue_pair_entry) {
   1117		if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) {
   1118			/* Local attach case. */
   1119			if (queue_pair_entry->qp.ref_count > 1) {
   1120				pr_devel("Error attempting to attach more than once\n");
   1121				result = VMCI_ERROR_UNAVAILABLE;
   1122				goto error_keep_entry;
   1123			}
   1124
   1125			if (queue_pair_entry->qp.produce_size != consume_size ||
   1126			    queue_pair_entry->qp.consume_size !=
   1127			    produce_size ||
   1128			    queue_pair_entry->qp.flags !=
   1129			    (flags & ~VMCI_QPFLAG_ATTACH_ONLY)) {
   1130				pr_devel("Error mismatched queue pair in local attach\n");
   1131				result = VMCI_ERROR_QUEUEPAIR_MISMATCH;
   1132				goto error_keep_entry;
   1133			}
   1134
   1135			/*
   1136			 * Do a local attach.  We swap the consume and
   1137			 * produce queues for the attacher and deliver
   1138			 * an attach event.
   1139			 */
   1140			result = qp_notify_peer_local(true, *handle);
   1141			if (result < VMCI_SUCCESS)
   1142				goto error_keep_entry;
   1143
   1144			my_produce_q = queue_pair_entry->consume_q;
   1145			my_consume_q = queue_pair_entry->produce_q;
   1146			goto out;
   1147		}
   1148
   1149		result = VMCI_ERROR_ALREADY_EXISTS;
   1150		goto error_keep_entry;
   1151	}
   1152
   1153	my_produce_q = qp_alloc_queue(produce_size, flags);
   1154	if (!my_produce_q) {
   1155		pr_warn("Error allocating pages for produce queue\n");
   1156		result = VMCI_ERROR_NO_MEM;
   1157		goto error;
   1158	}
   1159
   1160	my_consume_q = qp_alloc_queue(consume_size, flags);
   1161	if (!my_consume_q) {
   1162		pr_warn("Error allocating pages for consume queue\n");
   1163		result = VMCI_ERROR_NO_MEM;
   1164		goto error;
   1165	}
   1166
   1167	queue_pair_entry = qp_guest_endpoint_create(*handle, peer, flags,
   1168						    produce_size, consume_size,
   1169						    my_produce_q, my_consume_q);
   1170	if (!queue_pair_entry) {
   1171		pr_warn("Error allocating memory in %s\n", __func__);
   1172		result = VMCI_ERROR_NO_MEM;
   1173		goto error;
   1174	}
   1175
   1176	result = qp_alloc_ppn_set(my_produce_q, num_produce_pages, my_consume_q,
   1177				  num_consume_pages,
   1178				  &queue_pair_entry->ppn_set);
   1179	if (result < VMCI_SUCCESS) {
   1180		pr_warn("qp_alloc_ppn_set failed\n");
   1181		goto error;
   1182	}
   1183
   1184	/*
   1185	 * It's only necessary to notify the host if this queue pair will be
   1186	 * attached to from another context.
   1187	 */
   1188	if (queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) {
   1189		/* Local create case. */
   1190		u32 context_id = vmci_get_context_id();
   1191
   1192		/*
   1193		 * Enforce similar checks on local queue pairs as we
   1194		 * do for regular ones.  The handle's context must
   1195		 * match the creator or attacher context id (here they
   1196		 * are both the current context id) and the
   1197		 * attach-only flag cannot exist during create.  We
   1198		 * also ensure specified peer is this context or an
   1199		 * invalid one.
   1200		 */
   1201		if (queue_pair_entry->qp.handle.context != context_id ||
   1202		    (queue_pair_entry->qp.peer != VMCI_INVALID_ID &&
   1203		     queue_pair_entry->qp.peer != context_id)) {
   1204			result = VMCI_ERROR_NO_ACCESS;
   1205			goto error;
   1206		}
   1207
   1208		if (queue_pair_entry->qp.flags & VMCI_QPFLAG_ATTACH_ONLY) {
   1209			result = VMCI_ERROR_NOT_FOUND;
   1210			goto error;
   1211		}
   1212	} else {
   1213		result = qp_alloc_hypercall(queue_pair_entry);
   1214		if (result < VMCI_SUCCESS) {
   1215			pr_devel("qp_alloc_hypercall result = %d\n", result);
   1216			goto error;
   1217		}
   1218	}
   1219
   1220	qp_init_queue_mutex((struct vmci_queue *)my_produce_q,
   1221			    (struct vmci_queue *)my_consume_q);
   1222
   1223	qp_list_add_entry(&qp_guest_endpoints, &queue_pair_entry->qp);
   1224
   1225 out:
   1226	queue_pair_entry->qp.ref_count++;
   1227	*handle = queue_pair_entry->qp.handle;
   1228	*produce_q = (struct vmci_queue *)my_produce_q;
   1229	*consume_q = (struct vmci_queue *)my_consume_q;
   1230
   1231	/*
   1232	 * We should initialize the queue pair header pages on a local
   1233	 * queue pair create.  For non-local queue pairs, the
   1234	 * hypervisor initializes the header pages in the create step.
   1235	 */
   1236	if ((queue_pair_entry->qp.flags & VMCI_QPFLAG_LOCAL) &&
   1237	    queue_pair_entry->qp.ref_count == 1) {
   1238		vmci_q_header_init((*produce_q)->q_header, *handle);
   1239		vmci_q_header_init((*consume_q)->q_header, *handle);
   1240	}
   1241
   1242	mutex_unlock(&qp_guest_endpoints.mutex);
   1243
   1244	return VMCI_SUCCESS;
   1245
   1246 error:
   1247	mutex_unlock(&qp_guest_endpoints.mutex);
   1248	if (queue_pair_entry) {
   1249		/* The queues will be freed inside the destroy routine. */
   1250		qp_guest_endpoint_destroy(queue_pair_entry);
   1251	} else {
   1252		qp_free_queue(my_produce_q, produce_size);
   1253		qp_free_queue(my_consume_q, consume_size);
   1254	}
   1255	return result;
   1256
   1257 error_keep_entry:
   1258	/* This path should only be used when an existing entry was found. */
   1259	mutex_unlock(&qp_guest_endpoints.mutex);
   1260	return result;
   1261}
   1262
   1263/*
   1264 * The first endpoint issuing a queue pair allocation will create the state
   1265 * of the queue pair in the queue pair broker.
   1266 *
   1267 * If the creator is a guest, it will associate a VMX virtual address range
   1268 * with the queue pair as specified by the page_store. For compatibility with
   1269 * older VMX'en, that would use a separate step to set the VMX virtual
   1270 * address range, the virtual address range can be registered later using
   1271 * vmci_qp_broker_set_page_store. In that case, a page_store of NULL should be
   1272 * used.
   1273 *
   1274 * If the creator is the host, a page_store of NULL should be used as well,
   1275 * since the host is not able to supply a page store for the queue pair.
   1276 *
   1277 * For older VMX and host callers, the queue pair will be created in the
   1278 * VMCIQPB_CREATED_NO_MEM state, and for current VMX callers, it will be
   1279 * created in VMCOQPB_CREATED_MEM state.
   1280 */
   1281static int qp_broker_create(struct vmci_handle handle,
   1282			    u32 peer,
   1283			    u32 flags,
   1284			    u32 priv_flags,
   1285			    u64 produce_size,
   1286			    u64 consume_size,
   1287			    struct vmci_qp_page_store *page_store,
   1288			    struct vmci_ctx *context,
   1289			    vmci_event_release_cb wakeup_cb,
   1290			    void *client_data, struct qp_broker_entry **ent)
   1291{
   1292	struct qp_broker_entry *entry = NULL;
   1293	const u32 context_id = vmci_ctx_get_id(context);
   1294	bool is_local = flags & VMCI_QPFLAG_LOCAL;
   1295	int result;
   1296	u64 guest_produce_size;
   1297	u64 guest_consume_size;
   1298
   1299	/* Do not create if the caller asked not to. */
   1300	if (flags & VMCI_QPFLAG_ATTACH_ONLY)
   1301		return VMCI_ERROR_NOT_FOUND;
   1302
   1303	/*
   1304	 * Creator's context ID should match handle's context ID or the creator
   1305	 * must allow the context in handle's context ID as the "peer".
   1306	 */
   1307	if (handle.context != context_id && handle.context != peer)
   1308		return VMCI_ERROR_NO_ACCESS;
   1309
   1310	if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(peer))
   1311		return VMCI_ERROR_DST_UNREACHABLE;
   1312
   1313	/*
   1314	 * Creator's context ID for local queue pairs should match the
   1315	 * peer, if a peer is specified.
   1316	 */
   1317	if (is_local && peer != VMCI_INVALID_ID && context_id != peer)
   1318		return VMCI_ERROR_NO_ACCESS;
   1319
   1320	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
   1321	if (!entry)
   1322		return VMCI_ERROR_NO_MEM;
   1323
   1324	if (vmci_ctx_get_id(context) == VMCI_HOST_CONTEXT_ID && !is_local) {
   1325		/*
   1326		 * The queue pair broker entry stores values from the guest
   1327		 * point of view, so a creating host side endpoint should swap
   1328		 * produce and consume values -- unless it is a local queue
   1329		 * pair, in which case no swapping is necessary, since the local
   1330		 * attacher will swap queues.
   1331		 */
   1332
   1333		guest_produce_size = consume_size;
   1334		guest_consume_size = produce_size;
   1335	} else {
   1336		guest_produce_size = produce_size;
   1337		guest_consume_size = consume_size;
   1338	}
   1339
   1340	entry->qp.handle = handle;
   1341	entry->qp.peer = peer;
   1342	entry->qp.flags = flags;
   1343	entry->qp.produce_size = guest_produce_size;
   1344	entry->qp.consume_size = guest_consume_size;
   1345	entry->qp.ref_count = 1;
   1346	entry->create_id = context_id;
   1347	entry->attach_id = VMCI_INVALID_ID;
   1348	entry->state = VMCIQPB_NEW;
   1349	entry->require_trusted_attach =
   1350	    !!(context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED);
   1351	entry->created_by_trusted =
   1352	    !!(priv_flags & VMCI_PRIVILEGE_FLAG_TRUSTED);
   1353	entry->vmci_page_files = false;
   1354	entry->wakeup_cb = wakeup_cb;
   1355	entry->client_data = client_data;
   1356	entry->produce_q = qp_host_alloc_queue(guest_produce_size);
   1357	if (entry->produce_q == NULL) {
   1358		result = VMCI_ERROR_NO_MEM;
   1359		goto error;
   1360	}
   1361	entry->consume_q = qp_host_alloc_queue(guest_consume_size);
   1362	if (entry->consume_q == NULL) {
   1363		result = VMCI_ERROR_NO_MEM;
   1364		goto error;
   1365	}
   1366
   1367	qp_init_queue_mutex(entry->produce_q, entry->consume_q);
   1368
   1369	INIT_LIST_HEAD(&entry->qp.list_item);
   1370
   1371	if (is_local) {
   1372		u8 *tmp;
   1373
   1374		entry->local_mem = kcalloc(QPE_NUM_PAGES(entry->qp),
   1375					   PAGE_SIZE, GFP_KERNEL);
   1376		if (entry->local_mem == NULL) {
   1377			result = VMCI_ERROR_NO_MEM;
   1378			goto error;
   1379		}
   1380		entry->state = VMCIQPB_CREATED_MEM;
   1381		entry->produce_q->q_header = entry->local_mem;
   1382		tmp = (u8 *)entry->local_mem + PAGE_SIZE *
   1383		    (DIV_ROUND_UP(entry->qp.produce_size, PAGE_SIZE) + 1);
   1384		entry->consume_q->q_header = (struct vmci_queue_header *)tmp;
   1385	} else if (page_store) {
   1386		/*
   1387		 * The VMX already initialized the queue pair headers, so no
   1388		 * need for the kernel side to do that.
   1389		 */
   1390		result = qp_host_register_user_memory(page_store,
   1391						      entry->produce_q,
   1392						      entry->consume_q);
   1393		if (result < VMCI_SUCCESS)
   1394			goto error;
   1395
   1396		entry->state = VMCIQPB_CREATED_MEM;
   1397	} else {
   1398		/*
   1399		 * A create without a page_store may be either a host
   1400		 * side create (in which case we are waiting for the
   1401		 * guest side to supply the memory) or an old style
   1402		 * queue pair create (in which case we will expect a
   1403		 * set page store call as the next step).
   1404		 */
   1405		entry->state = VMCIQPB_CREATED_NO_MEM;
   1406	}
   1407
   1408	qp_list_add_entry(&qp_broker_list, &entry->qp);
   1409	if (ent != NULL)
   1410		*ent = entry;
   1411
   1412	/* Add to resource obj */
   1413	result = vmci_resource_add(&entry->resource,
   1414				   VMCI_RESOURCE_TYPE_QPAIR_HOST,
   1415				   handle);
   1416	if (result != VMCI_SUCCESS) {
   1417		pr_warn("Failed to add new resource (handle=0x%x:0x%x), error: %d",
   1418			handle.context, handle.resource, result);
   1419		goto error;
   1420	}
   1421
   1422	entry->qp.handle = vmci_resource_handle(&entry->resource);
   1423	if (is_local) {
   1424		vmci_q_header_init(entry->produce_q->q_header,
   1425				   entry->qp.handle);
   1426		vmci_q_header_init(entry->consume_q->q_header,
   1427				   entry->qp.handle);
   1428	}
   1429
   1430	vmci_ctx_qp_create(context, entry->qp.handle);
   1431
   1432	return VMCI_SUCCESS;
   1433
   1434 error:
   1435	if (entry != NULL) {
   1436		qp_host_free_queue(entry->produce_q, guest_produce_size);
   1437		qp_host_free_queue(entry->consume_q, guest_consume_size);
   1438		kfree(entry);
   1439	}
   1440
   1441	return result;
   1442}
   1443
   1444/*
   1445 * Enqueues an event datagram to notify the peer VM attached to
   1446 * the given queue pair handle about attach/detach event by the
   1447 * given VM.  Returns Payload size of datagram enqueued on
   1448 * success, error code otherwise.
   1449 */
   1450static int qp_notify_peer(bool attach,
   1451			  struct vmci_handle handle,
   1452			  u32 my_id,
   1453			  u32 peer_id)
   1454{
   1455	int rv;
   1456	struct vmci_event_qp ev;
   1457
   1458	if (vmci_handle_is_invalid(handle) || my_id == VMCI_INVALID_ID ||
   1459	    peer_id == VMCI_INVALID_ID)
   1460		return VMCI_ERROR_INVALID_ARGS;
   1461
   1462	/*
   1463	 * In vmci_ctx_enqueue_datagram() we enforce the upper limit on
   1464	 * number of pending events from the hypervisor to a given VM
   1465	 * otherwise a rogue VM could do an arbitrary number of attach
   1466	 * and detach operations causing memory pressure in the host
   1467	 * kernel.
   1468	 */
   1469
   1470	ev.msg.hdr.dst = vmci_make_handle(peer_id, VMCI_EVENT_HANDLER);
   1471	ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
   1472					  VMCI_CONTEXT_RESOURCE_ID);
   1473	ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
   1474	ev.msg.event_data.event = attach ?
   1475	    VMCI_EVENT_QP_PEER_ATTACH : VMCI_EVENT_QP_PEER_DETACH;
   1476	ev.payload.handle = handle;
   1477	ev.payload.peer_id = my_id;
   1478
   1479	rv = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID,
   1480				    &ev.msg.hdr, false);
   1481	if (rv < VMCI_SUCCESS)
   1482		pr_warn("Failed to enqueue queue_pair %s event datagram for context (ID=0x%x)\n",
   1483			attach ? "ATTACH" : "DETACH", peer_id);
   1484
   1485	return rv;
   1486}
   1487
   1488/*
   1489 * The second endpoint issuing a queue pair allocation will attach to
   1490 * the queue pair registered with the queue pair broker.
   1491 *
   1492 * If the attacher is a guest, it will associate a VMX virtual address
   1493 * range with the queue pair as specified by the page_store. At this
   1494 * point, the already attach host endpoint may start using the queue
   1495 * pair, and an attach event is sent to it. For compatibility with
   1496 * older VMX'en, that used a separate step to set the VMX virtual
   1497 * address range, the virtual address range can be registered later
   1498 * using vmci_qp_broker_set_page_store. In that case, a page_store of
   1499 * NULL should be used, and the attach event will be generated once
   1500 * the actual page store has been set.
   1501 *
   1502 * If the attacher is the host, a page_store of NULL should be used as
   1503 * well, since the page store information is already set by the guest.
   1504 *
   1505 * For new VMX and host callers, the queue pair will be moved to the
   1506 * VMCIQPB_ATTACHED_MEM state, and for older VMX callers, it will be
   1507 * moved to the VMCOQPB_ATTACHED_NO_MEM state.
   1508 */
   1509static int qp_broker_attach(struct qp_broker_entry *entry,
   1510			    u32 peer,
   1511			    u32 flags,
   1512			    u32 priv_flags,
   1513			    u64 produce_size,
   1514			    u64 consume_size,
   1515			    struct vmci_qp_page_store *page_store,
   1516			    struct vmci_ctx *context,
   1517			    vmci_event_release_cb wakeup_cb,
   1518			    void *client_data,
   1519			    struct qp_broker_entry **ent)
   1520{
   1521	const u32 context_id = vmci_ctx_get_id(context);
   1522	bool is_local = flags & VMCI_QPFLAG_LOCAL;
   1523	int result;
   1524
   1525	if (entry->state != VMCIQPB_CREATED_NO_MEM &&
   1526	    entry->state != VMCIQPB_CREATED_MEM)
   1527		return VMCI_ERROR_UNAVAILABLE;
   1528
   1529	if (is_local) {
   1530		if (!(entry->qp.flags & VMCI_QPFLAG_LOCAL) ||
   1531		    context_id != entry->create_id) {
   1532			return VMCI_ERROR_INVALID_ARGS;
   1533		}
   1534	} else if (context_id == entry->create_id ||
   1535		   context_id == entry->attach_id) {
   1536		return VMCI_ERROR_ALREADY_EXISTS;
   1537	}
   1538
   1539	if (VMCI_CONTEXT_IS_VM(context_id) &&
   1540	    VMCI_CONTEXT_IS_VM(entry->create_id))
   1541		return VMCI_ERROR_DST_UNREACHABLE;
   1542
   1543	/*
   1544	 * If we are attaching from a restricted context then the queuepair
   1545	 * must have been created by a trusted endpoint.
   1546	 */
   1547	if ((context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) &&
   1548	    !entry->created_by_trusted)
   1549		return VMCI_ERROR_NO_ACCESS;
   1550
   1551	/*
   1552	 * If we are attaching to a queuepair that was created by a restricted
   1553	 * context then we must be trusted.
   1554	 */
   1555	if (entry->require_trusted_attach &&
   1556	    (!(priv_flags & VMCI_PRIVILEGE_FLAG_TRUSTED)))
   1557		return VMCI_ERROR_NO_ACCESS;
   1558
   1559	/*
   1560	 * If the creator specifies VMCI_INVALID_ID in "peer" field, access
   1561	 * control check is not performed.
   1562	 */
   1563	if (entry->qp.peer != VMCI_INVALID_ID && entry->qp.peer != context_id)
   1564		return VMCI_ERROR_NO_ACCESS;
   1565
   1566	if (entry->create_id == VMCI_HOST_CONTEXT_ID) {
   1567		/*
   1568		 * Do not attach if the caller doesn't support Host Queue Pairs
   1569		 * and a host created this queue pair.
   1570		 */
   1571
   1572		if (!vmci_ctx_supports_host_qp(context))
   1573			return VMCI_ERROR_INVALID_RESOURCE;
   1574
   1575	} else if (context_id == VMCI_HOST_CONTEXT_ID) {
   1576		struct vmci_ctx *create_context;
   1577		bool supports_host_qp;
   1578
   1579		/*
   1580		 * Do not attach a host to a user created queue pair if that
   1581		 * user doesn't support host queue pair end points.
   1582		 */
   1583
   1584		create_context = vmci_ctx_get(entry->create_id);
   1585		supports_host_qp = vmci_ctx_supports_host_qp(create_context);
   1586		vmci_ctx_put(create_context);
   1587
   1588		if (!supports_host_qp)
   1589			return VMCI_ERROR_INVALID_RESOURCE;
   1590	}
   1591
   1592	if ((entry->qp.flags & ~VMCI_QP_ASYMM) != (flags & ~VMCI_QP_ASYMM_PEER))
   1593		return VMCI_ERROR_QUEUEPAIR_MISMATCH;
   1594
   1595	if (context_id != VMCI_HOST_CONTEXT_ID) {
   1596		/*
   1597		 * The queue pair broker entry stores values from the guest
   1598		 * point of view, so an attaching guest should match the values
   1599		 * stored in the entry.
   1600		 */
   1601
   1602		if (entry->qp.produce_size != produce_size ||
   1603		    entry->qp.consume_size != consume_size) {
   1604			return VMCI_ERROR_QUEUEPAIR_MISMATCH;
   1605		}
   1606	} else if (entry->qp.produce_size != consume_size ||
   1607		   entry->qp.consume_size != produce_size) {
   1608		return VMCI_ERROR_QUEUEPAIR_MISMATCH;
   1609	}
   1610
   1611	if (context_id != VMCI_HOST_CONTEXT_ID) {
   1612		/*
   1613		 * If a guest attached to a queue pair, it will supply
   1614		 * the backing memory.  If this is a pre NOVMVM vmx,
   1615		 * the backing memory will be supplied by calling
   1616		 * vmci_qp_broker_set_page_store() following the
   1617		 * return of the vmci_qp_broker_alloc() call. If it is
   1618		 * a vmx of version NOVMVM or later, the page store
   1619		 * must be supplied as part of the
   1620		 * vmci_qp_broker_alloc call.  Under all circumstances
   1621		 * must the initially created queue pair not have any
   1622		 * memory associated with it already.
   1623		 */
   1624
   1625		if (entry->state != VMCIQPB_CREATED_NO_MEM)
   1626			return VMCI_ERROR_INVALID_ARGS;
   1627
   1628		if (page_store != NULL) {
   1629			/*
   1630			 * Patch up host state to point to guest
   1631			 * supplied memory. The VMX already
   1632			 * initialized the queue pair headers, so no
   1633			 * need for the kernel side to do that.
   1634			 */
   1635
   1636			result = qp_host_register_user_memory(page_store,
   1637							      entry->produce_q,
   1638							      entry->consume_q);
   1639			if (result < VMCI_SUCCESS)
   1640				return result;
   1641
   1642			entry->state = VMCIQPB_ATTACHED_MEM;
   1643		} else {
   1644			entry->state = VMCIQPB_ATTACHED_NO_MEM;
   1645		}
   1646	} else if (entry->state == VMCIQPB_CREATED_NO_MEM) {
   1647		/*
   1648		 * The host side is attempting to attach to a queue
   1649		 * pair that doesn't have any memory associated with
   1650		 * it. This must be a pre NOVMVM vmx that hasn't set
   1651		 * the page store information yet, or a quiesced VM.
   1652		 */
   1653
   1654		return VMCI_ERROR_UNAVAILABLE;
   1655	} else {
   1656		/* The host side has successfully attached to a queue pair. */
   1657		entry->state = VMCIQPB_ATTACHED_MEM;
   1658	}
   1659
   1660	if (entry->state == VMCIQPB_ATTACHED_MEM) {
   1661		result =
   1662		    qp_notify_peer(true, entry->qp.handle, context_id,
   1663				   entry->create_id);
   1664		if (result < VMCI_SUCCESS)
   1665			pr_warn("Failed to notify peer (ID=0x%x) of attach to queue pair (handle=0x%x:0x%x)\n",
   1666				entry->create_id, entry->qp.handle.context,
   1667				entry->qp.handle.resource);
   1668	}
   1669
   1670	entry->attach_id = context_id;
   1671	entry->qp.ref_count++;
   1672	if (wakeup_cb) {
   1673		entry->wakeup_cb = wakeup_cb;
   1674		entry->client_data = client_data;
   1675	}
   1676
   1677	/*
   1678	 * When attaching to local queue pairs, the context already has
   1679	 * an entry tracking the queue pair, so don't add another one.
   1680	 */
   1681	if (!is_local)
   1682		vmci_ctx_qp_create(context, entry->qp.handle);
   1683
   1684	if (ent != NULL)
   1685		*ent = entry;
   1686
   1687	return VMCI_SUCCESS;
   1688}
   1689
   1690/*
   1691 * queue_pair_Alloc for use when setting up queue pair endpoints
   1692 * on the host.
   1693 */
   1694static int qp_broker_alloc(struct vmci_handle handle,
   1695			   u32 peer,
   1696			   u32 flags,
   1697			   u32 priv_flags,
   1698			   u64 produce_size,
   1699			   u64 consume_size,
   1700			   struct vmci_qp_page_store *page_store,
   1701			   struct vmci_ctx *context,
   1702			   vmci_event_release_cb wakeup_cb,
   1703			   void *client_data,
   1704			   struct qp_broker_entry **ent,
   1705			   bool *swap)
   1706{
   1707	const u32 context_id = vmci_ctx_get_id(context);
   1708	bool create;
   1709	struct qp_broker_entry *entry = NULL;
   1710	bool is_local = flags & VMCI_QPFLAG_LOCAL;
   1711	int result;
   1712
   1713	if (vmci_handle_is_invalid(handle) ||
   1714	    (flags & ~VMCI_QP_ALL_FLAGS) || is_local ||
   1715	    !(produce_size || consume_size) ||
   1716	    !context || context_id == VMCI_INVALID_ID ||
   1717	    handle.context == VMCI_INVALID_ID) {
   1718		return VMCI_ERROR_INVALID_ARGS;
   1719	}
   1720
   1721	if (page_store && !VMCI_QP_PAGESTORE_IS_WELLFORMED(page_store))
   1722		return VMCI_ERROR_INVALID_ARGS;
   1723
   1724	/*
   1725	 * In the initial argument check, we ensure that non-vmkernel hosts
   1726	 * are not allowed to create local queue pairs.
   1727	 */
   1728
   1729	mutex_lock(&qp_broker_list.mutex);
   1730
   1731	if (!is_local && vmci_ctx_qp_exists(context, handle)) {
   1732		pr_devel("Context (ID=0x%x) already attached to queue pair (handle=0x%x:0x%x)\n",
   1733			 context_id, handle.context, handle.resource);
   1734		mutex_unlock(&qp_broker_list.mutex);
   1735		return VMCI_ERROR_ALREADY_EXISTS;
   1736	}
   1737
   1738	if (handle.resource != VMCI_INVALID_ID)
   1739		entry = qp_broker_handle_to_entry(handle);
   1740
   1741	if (!entry) {
   1742		create = true;
   1743		result =
   1744		    qp_broker_create(handle, peer, flags, priv_flags,
   1745				     produce_size, consume_size, page_store,
   1746				     context, wakeup_cb, client_data, ent);
   1747	} else {
   1748		create = false;
   1749		result =
   1750		    qp_broker_attach(entry, peer, flags, priv_flags,
   1751				     produce_size, consume_size, page_store,
   1752				     context, wakeup_cb, client_data, ent);
   1753	}
   1754
   1755	mutex_unlock(&qp_broker_list.mutex);
   1756
   1757	if (swap)
   1758		*swap = (context_id == VMCI_HOST_CONTEXT_ID) &&
   1759		    !(create && is_local);
   1760
   1761	return result;
   1762}
   1763
   1764/*
   1765 * This function implements the kernel API for allocating a queue
   1766 * pair.
   1767 */
   1768static int qp_alloc_host_work(struct vmci_handle *handle,
   1769			      struct vmci_queue **produce_q,
   1770			      u64 produce_size,
   1771			      struct vmci_queue **consume_q,
   1772			      u64 consume_size,
   1773			      u32 peer,
   1774			      u32 flags,
   1775			      u32 priv_flags,
   1776			      vmci_event_release_cb wakeup_cb,
   1777			      void *client_data)
   1778{
   1779	struct vmci_handle new_handle;
   1780	struct vmci_ctx *context;
   1781	struct qp_broker_entry *entry;
   1782	int result;
   1783	bool swap;
   1784
   1785	if (vmci_handle_is_invalid(*handle)) {
   1786		new_handle = vmci_make_handle(
   1787			VMCI_HOST_CONTEXT_ID, VMCI_INVALID_ID);
   1788	} else
   1789		new_handle = *handle;
   1790
   1791	context = vmci_ctx_get(VMCI_HOST_CONTEXT_ID);
   1792	entry = NULL;
   1793	result =
   1794	    qp_broker_alloc(new_handle, peer, flags, priv_flags,
   1795			    produce_size, consume_size, NULL, context,
   1796			    wakeup_cb, client_data, &entry, &swap);
   1797	if (result == VMCI_SUCCESS) {
   1798		if (swap) {
   1799			/*
   1800			 * If this is a local queue pair, the attacher
   1801			 * will swap around produce and consume
   1802			 * queues.
   1803			 */
   1804
   1805			*produce_q = entry->consume_q;
   1806			*consume_q = entry->produce_q;
   1807		} else {
   1808			*produce_q = entry->produce_q;
   1809			*consume_q = entry->consume_q;
   1810		}
   1811
   1812		*handle = vmci_resource_handle(&entry->resource);
   1813	} else {
   1814		*handle = VMCI_INVALID_HANDLE;
   1815		pr_devel("queue pair broker failed to alloc (result=%d)\n",
   1816			 result);
   1817	}
   1818	vmci_ctx_put(context);
   1819	return result;
   1820}
   1821
   1822/*
   1823 * Allocates a VMCI queue_pair. Only checks validity of input
   1824 * arguments. The real work is done in the host or guest
   1825 * specific function.
   1826 */
   1827int vmci_qp_alloc(struct vmci_handle *handle,
   1828		  struct vmci_queue **produce_q,
   1829		  u64 produce_size,
   1830		  struct vmci_queue **consume_q,
   1831		  u64 consume_size,
   1832		  u32 peer,
   1833		  u32 flags,
   1834		  u32 priv_flags,
   1835		  bool guest_endpoint,
   1836		  vmci_event_release_cb wakeup_cb,
   1837		  void *client_data)
   1838{
   1839	if (!handle || !produce_q || !consume_q ||
   1840	    (!produce_size && !consume_size) || (flags & ~VMCI_QP_ALL_FLAGS))
   1841		return VMCI_ERROR_INVALID_ARGS;
   1842
   1843	if (guest_endpoint) {
   1844		return qp_alloc_guest_work(handle, produce_q,
   1845					   produce_size, consume_q,
   1846					   consume_size, peer,
   1847					   flags, priv_flags);
   1848	} else {
   1849		return qp_alloc_host_work(handle, produce_q,
   1850					  produce_size, consume_q,
   1851					  consume_size, peer, flags,
   1852					  priv_flags, wakeup_cb, client_data);
   1853	}
   1854}
   1855
   1856/*
   1857 * This function implements the host kernel API for detaching from
   1858 * a queue pair.
   1859 */
   1860static int qp_detatch_host_work(struct vmci_handle handle)
   1861{
   1862	int result;
   1863	struct vmci_ctx *context;
   1864
   1865	context = vmci_ctx_get(VMCI_HOST_CONTEXT_ID);
   1866
   1867	result = vmci_qp_broker_detach(handle, context);
   1868
   1869	vmci_ctx_put(context);
   1870	return result;
   1871}
   1872
   1873/*
   1874 * Detaches from a VMCI queue_pair. Only checks validity of input argument.
   1875 * Real work is done in the host or guest specific function.
   1876 */
   1877static int qp_detatch(struct vmci_handle handle, bool guest_endpoint)
   1878{
   1879	if (vmci_handle_is_invalid(handle))
   1880		return VMCI_ERROR_INVALID_ARGS;
   1881
   1882	if (guest_endpoint)
   1883		return qp_detatch_guest_work(handle);
   1884	else
   1885		return qp_detatch_host_work(handle);
   1886}
   1887
   1888/*
   1889 * Returns the entry from the head of the list. Assumes that the list is
   1890 * locked.
   1891 */
   1892static struct qp_entry *qp_list_get_head(struct qp_list *qp_list)
   1893{
   1894	if (!list_empty(&qp_list->head)) {
   1895		struct qp_entry *entry =
   1896		    list_first_entry(&qp_list->head, struct qp_entry,
   1897				     list_item);
   1898		return entry;
   1899	}
   1900
   1901	return NULL;
   1902}
   1903
   1904void vmci_qp_broker_exit(void)
   1905{
   1906	struct qp_entry *entry;
   1907	struct qp_broker_entry *be;
   1908
   1909	mutex_lock(&qp_broker_list.mutex);
   1910
   1911	while ((entry = qp_list_get_head(&qp_broker_list))) {
   1912		be = (struct qp_broker_entry *)entry;
   1913
   1914		qp_list_remove_entry(&qp_broker_list, entry);
   1915		kfree(be);
   1916	}
   1917
   1918	mutex_unlock(&qp_broker_list.mutex);
   1919}
   1920
   1921/*
   1922 * Requests that a queue pair be allocated with the VMCI queue
   1923 * pair broker. Allocates a queue pair entry if one does not
   1924 * exist. Attaches to one if it exists, and retrieves the page
   1925 * files backing that queue_pair.  Assumes that the queue pair
   1926 * broker lock is held.
   1927 */
   1928int vmci_qp_broker_alloc(struct vmci_handle handle,
   1929			 u32 peer,
   1930			 u32 flags,
   1931			 u32 priv_flags,
   1932			 u64 produce_size,
   1933			 u64 consume_size,
   1934			 struct vmci_qp_page_store *page_store,
   1935			 struct vmci_ctx *context)
   1936{
   1937	if (!QP_SIZES_ARE_VALID(produce_size, consume_size))
   1938		return VMCI_ERROR_NO_RESOURCES;
   1939
   1940	return qp_broker_alloc(handle, peer, flags, priv_flags,
   1941			       produce_size, consume_size,
   1942			       page_store, context, NULL, NULL, NULL, NULL);
   1943}
   1944
   1945/*
   1946 * VMX'en with versions lower than VMCI_VERSION_NOVMVM use a separate
   1947 * step to add the UVAs of the VMX mapping of the queue pair. This function
   1948 * provides backwards compatibility with such VMX'en, and takes care of
   1949 * registering the page store for a queue pair previously allocated by the
   1950 * VMX during create or attach. This function will move the queue pair state
   1951 * to either from VMCIQBP_CREATED_NO_MEM to VMCIQBP_CREATED_MEM or
   1952 * VMCIQBP_ATTACHED_NO_MEM to VMCIQBP_ATTACHED_MEM. If moving to the
   1953 * attached state with memory, the queue pair is ready to be used by the
   1954 * host peer, and an attached event will be generated.
   1955 *
   1956 * Assumes that the queue pair broker lock is held.
   1957 *
   1958 * This function is only used by the hosted platform, since there is no
   1959 * issue with backwards compatibility for vmkernel.
   1960 */
   1961int vmci_qp_broker_set_page_store(struct vmci_handle handle,
   1962				  u64 produce_uva,
   1963				  u64 consume_uva,
   1964				  struct vmci_ctx *context)
   1965{
   1966	struct qp_broker_entry *entry;
   1967	int result;
   1968	const u32 context_id = vmci_ctx_get_id(context);
   1969
   1970	if (vmci_handle_is_invalid(handle) || !context ||
   1971	    context_id == VMCI_INVALID_ID)
   1972		return VMCI_ERROR_INVALID_ARGS;
   1973
   1974	/*
   1975	 * We only support guest to host queue pairs, so the VMX must
   1976	 * supply UVAs for the mapped page files.
   1977	 */
   1978
   1979	if (produce_uva == 0 || consume_uva == 0)
   1980		return VMCI_ERROR_INVALID_ARGS;
   1981
   1982	mutex_lock(&qp_broker_list.mutex);
   1983
   1984	if (!vmci_ctx_qp_exists(context, handle)) {
   1985		pr_warn("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
   1986			context_id, handle.context, handle.resource);
   1987		result = VMCI_ERROR_NOT_FOUND;
   1988		goto out;
   1989	}
   1990
   1991	entry = qp_broker_handle_to_entry(handle);
   1992	if (!entry) {
   1993		result = VMCI_ERROR_NOT_FOUND;
   1994		goto out;
   1995	}
   1996
   1997	/*
   1998	 * If I'm the owner then I can set the page store.
   1999	 *
   2000	 * Or, if a host created the queue_pair and I'm the attached peer
   2001	 * then I can set the page store.
   2002	 */
   2003	if (entry->create_id != context_id &&
   2004	    (entry->create_id != VMCI_HOST_CONTEXT_ID ||
   2005	     entry->attach_id != context_id)) {
   2006		result = VMCI_ERROR_QUEUEPAIR_NOTOWNER;
   2007		goto out;
   2008	}
   2009
   2010	if (entry->state != VMCIQPB_CREATED_NO_MEM &&
   2011	    entry->state != VMCIQPB_ATTACHED_NO_MEM) {
   2012		result = VMCI_ERROR_UNAVAILABLE;
   2013		goto out;
   2014	}
   2015
   2016	result = qp_host_get_user_memory(produce_uva, consume_uva,
   2017					 entry->produce_q, entry->consume_q);
   2018	if (result < VMCI_SUCCESS)
   2019		goto out;
   2020
   2021	result = qp_host_map_queues(entry->produce_q, entry->consume_q);
   2022	if (result < VMCI_SUCCESS) {
   2023		qp_host_unregister_user_memory(entry->produce_q,
   2024					       entry->consume_q);
   2025		goto out;
   2026	}
   2027
   2028	if (entry->state == VMCIQPB_CREATED_NO_MEM)
   2029		entry->state = VMCIQPB_CREATED_MEM;
   2030	else
   2031		entry->state = VMCIQPB_ATTACHED_MEM;
   2032
   2033	entry->vmci_page_files = true;
   2034
   2035	if (entry->state == VMCIQPB_ATTACHED_MEM) {
   2036		result =
   2037		    qp_notify_peer(true, handle, context_id, entry->create_id);
   2038		if (result < VMCI_SUCCESS) {
   2039			pr_warn("Failed to notify peer (ID=0x%x) of attach to queue pair (handle=0x%x:0x%x)\n",
   2040				entry->create_id, entry->qp.handle.context,
   2041				entry->qp.handle.resource);
   2042		}
   2043	}
   2044
   2045	result = VMCI_SUCCESS;
   2046 out:
   2047	mutex_unlock(&qp_broker_list.mutex);
   2048	return result;
   2049}
   2050
   2051/*
   2052 * Resets saved queue headers for the given QP broker
   2053 * entry. Should be used when guest memory becomes available
   2054 * again, or the guest detaches.
   2055 */
   2056static void qp_reset_saved_headers(struct qp_broker_entry *entry)
   2057{
   2058	entry->produce_q->saved_header = NULL;
   2059	entry->consume_q->saved_header = NULL;
   2060}
   2061
   2062/*
   2063 * The main entry point for detaching from a queue pair registered with the
   2064 * queue pair broker. If more than one endpoint is attached to the queue
   2065 * pair, the first endpoint will mainly decrement a reference count and
   2066 * generate a notification to its peer. The last endpoint will clean up
   2067 * the queue pair state registered with the broker.
   2068 *
   2069 * When a guest endpoint detaches, it will unmap and unregister the guest
   2070 * memory backing the queue pair. If the host is still attached, it will
   2071 * no longer be able to access the queue pair content.
   2072 *
   2073 * If the queue pair is already in a state where there is no memory
   2074 * registered for the queue pair (any *_NO_MEM state), it will transition to
   2075 * the VMCIQPB_SHUTDOWN_NO_MEM state. This will also happen, if a guest
   2076 * endpoint is the first of two endpoints to detach. If the host endpoint is
   2077 * the first out of two to detach, the queue pair will move to the
   2078 * VMCIQPB_SHUTDOWN_MEM state.
   2079 */
   2080int vmci_qp_broker_detach(struct vmci_handle handle, struct vmci_ctx *context)
   2081{
   2082	struct qp_broker_entry *entry;
   2083	const u32 context_id = vmci_ctx_get_id(context);
   2084	u32 peer_id;
   2085	bool is_local = false;
   2086	int result;
   2087
   2088	if (vmci_handle_is_invalid(handle) || !context ||
   2089	    context_id == VMCI_INVALID_ID) {
   2090		return VMCI_ERROR_INVALID_ARGS;
   2091	}
   2092
   2093	mutex_lock(&qp_broker_list.mutex);
   2094
   2095	if (!vmci_ctx_qp_exists(context, handle)) {
   2096		pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
   2097			 context_id, handle.context, handle.resource);
   2098		result = VMCI_ERROR_NOT_FOUND;
   2099		goto out;
   2100	}
   2101
   2102	entry = qp_broker_handle_to_entry(handle);
   2103	if (!entry) {
   2104		pr_devel("Context (ID=0x%x) reports being attached to queue pair(handle=0x%x:0x%x) that isn't present in broker\n",
   2105			 context_id, handle.context, handle.resource);
   2106		result = VMCI_ERROR_NOT_FOUND;
   2107		goto out;
   2108	}
   2109
   2110	if (context_id != entry->create_id && context_id != entry->attach_id) {
   2111		result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
   2112		goto out;
   2113	}
   2114
   2115	if (context_id == entry->create_id) {
   2116		peer_id = entry->attach_id;
   2117		entry->create_id = VMCI_INVALID_ID;
   2118	} else {
   2119		peer_id = entry->create_id;
   2120		entry->attach_id = VMCI_INVALID_ID;
   2121	}
   2122	entry->qp.ref_count--;
   2123
   2124	is_local = entry->qp.flags & VMCI_QPFLAG_LOCAL;
   2125
   2126	if (context_id != VMCI_HOST_CONTEXT_ID) {
   2127		bool headers_mapped;
   2128
   2129		/*
   2130		 * Pre NOVMVM vmx'en may detach from a queue pair
   2131		 * before setting the page store, and in that case
   2132		 * there is no user memory to detach from. Also, more
   2133		 * recent VMX'en may detach from a queue pair in the
   2134		 * quiesced state.
   2135		 */
   2136
   2137		qp_acquire_queue_mutex(entry->produce_q);
   2138		headers_mapped = entry->produce_q->q_header ||
   2139		    entry->consume_q->q_header;
   2140		if (QPBROKERSTATE_HAS_MEM(entry)) {
   2141			result =
   2142			    qp_host_unmap_queues(INVALID_VMCI_GUEST_MEM_ID,
   2143						 entry->produce_q,
   2144						 entry->consume_q);
   2145			if (result < VMCI_SUCCESS)
   2146				pr_warn("Failed to unmap queue headers for queue pair (handle=0x%x:0x%x,result=%d)\n",
   2147					handle.context, handle.resource,
   2148					result);
   2149
   2150			qp_host_unregister_user_memory(entry->produce_q,
   2151						       entry->consume_q);
   2152
   2153		}
   2154
   2155		if (!headers_mapped)
   2156			qp_reset_saved_headers(entry);
   2157
   2158		qp_release_queue_mutex(entry->produce_q);
   2159
   2160		if (!headers_mapped && entry->wakeup_cb)
   2161			entry->wakeup_cb(entry->client_data);
   2162
   2163	} else {
   2164		if (entry->wakeup_cb) {
   2165			entry->wakeup_cb = NULL;
   2166			entry->client_data = NULL;
   2167		}
   2168	}
   2169
   2170	if (entry->qp.ref_count == 0) {
   2171		qp_list_remove_entry(&qp_broker_list, &entry->qp);
   2172
   2173		if (is_local)
   2174			kfree(entry->local_mem);
   2175
   2176		qp_cleanup_queue_mutex(entry->produce_q, entry->consume_q);
   2177		qp_host_free_queue(entry->produce_q, entry->qp.produce_size);
   2178		qp_host_free_queue(entry->consume_q, entry->qp.consume_size);
   2179		/* Unlink from resource hash table and free callback */
   2180		vmci_resource_remove(&entry->resource);
   2181
   2182		kfree(entry);
   2183
   2184		vmci_ctx_qp_destroy(context, handle);
   2185	} else {
   2186		qp_notify_peer(false, handle, context_id, peer_id);
   2187		if (context_id == VMCI_HOST_CONTEXT_ID &&
   2188		    QPBROKERSTATE_HAS_MEM(entry)) {
   2189			entry->state = VMCIQPB_SHUTDOWN_MEM;
   2190		} else {
   2191			entry->state = VMCIQPB_SHUTDOWN_NO_MEM;
   2192		}
   2193
   2194		if (!is_local)
   2195			vmci_ctx_qp_destroy(context, handle);
   2196
   2197	}
   2198	result = VMCI_SUCCESS;
   2199 out:
   2200	mutex_unlock(&qp_broker_list.mutex);
   2201	return result;
   2202}
   2203
   2204/*
   2205 * Establishes the necessary mappings for a queue pair given a
   2206 * reference to the queue pair guest memory. This is usually
   2207 * called when a guest is unquiesced and the VMX is allowed to
   2208 * map guest memory once again.
   2209 */
   2210int vmci_qp_broker_map(struct vmci_handle handle,
   2211		       struct vmci_ctx *context,
   2212		       u64 guest_mem)
   2213{
   2214	struct qp_broker_entry *entry;
   2215	const u32 context_id = vmci_ctx_get_id(context);
   2216	int result;
   2217
   2218	if (vmci_handle_is_invalid(handle) || !context ||
   2219	    context_id == VMCI_INVALID_ID)
   2220		return VMCI_ERROR_INVALID_ARGS;
   2221
   2222	mutex_lock(&qp_broker_list.mutex);
   2223
   2224	if (!vmci_ctx_qp_exists(context, handle)) {
   2225		pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
   2226			 context_id, handle.context, handle.resource);
   2227		result = VMCI_ERROR_NOT_FOUND;
   2228		goto out;
   2229	}
   2230
   2231	entry = qp_broker_handle_to_entry(handle);
   2232	if (!entry) {
   2233		pr_devel("Context (ID=0x%x) reports being attached to queue pair (handle=0x%x:0x%x) that isn't present in broker\n",
   2234			 context_id, handle.context, handle.resource);
   2235		result = VMCI_ERROR_NOT_FOUND;
   2236		goto out;
   2237	}
   2238
   2239	if (context_id != entry->create_id && context_id != entry->attach_id) {
   2240		result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
   2241		goto out;
   2242	}
   2243
   2244	result = VMCI_SUCCESS;
   2245
   2246	if (context_id != VMCI_HOST_CONTEXT_ID &&
   2247	    !QPBROKERSTATE_HAS_MEM(entry)) {
   2248		struct vmci_qp_page_store page_store;
   2249
   2250		page_store.pages = guest_mem;
   2251		page_store.len = QPE_NUM_PAGES(entry->qp);
   2252
   2253		qp_acquire_queue_mutex(entry->produce_q);
   2254		qp_reset_saved_headers(entry);
   2255		result =
   2256		    qp_host_register_user_memory(&page_store,
   2257						 entry->produce_q,
   2258						 entry->consume_q);
   2259		qp_release_queue_mutex(entry->produce_q);
   2260		if (result == VMCI_SUCCESS) {
   2261			/* Move state from *_NO_MEM to *_MEM */
   2262
   2263			entry->state++;
   2264
   2265			if (entry->wakeup_cb)
   2266				entry->wakeup_cb(entry->client_data);
   2267		}
   2268	}
   2269
   2270 out:
   2271	mutex_unlock(&qp_broker_list.mutex);
   2272	return result;
   2273}
   2274
   2275/*
   2276 * Saves a snapshot of the queue headers for the given QP broker
   2277 * entry. Should be used when guest memory is unmapped.
   2278 * Results:
   2279 * VMCI_SUCCESS on success, appropriate error code if guest memory
   2280 * can't be accessed..
   2281 */
   2282static int qp_save_headers(struct qp_broker_entry *entry)
   2283{
   2284	int result;
   2285
   2286	if (entry->produce_q->saved_header != NULL &&
   2287	    entry->consume_q->saved_header != NULL) {
   2288		/*
   2289		 *  If the headers have already been saved, we don't need to do
   2290		 *  it again, and we don't want to map in the headers
   2291		 *  unnecessarily.
   2292		 */
   2293
   2294		return VMCI_SUCCESS;
   2295	}
   2296
   2297	if (NULL == entry->produce_q->q_header ||
   2298	    NULL == entry->consume_q->q_header) {
   2299		result = qp_host_map_queues(entry->produce_q, entry->consume_q);
   2300		if (result < VMCI_SUCCESS)
   2301			return result;
   2302	}
   2303
   2304	memcpy(&entry->saved_produce_q, entry->produce_q->q_header,
   2305	       sizeof(entry->saved_produce_q));
   2306	entry->produce_q->saved_header = &entry->saved_produce_q;
   2307	memcpy(&entry->saved_consume_q, entry->consume_q->q_header,
   2308	       sizeof(entry->saved_consume_q));
   2309	entry->consume_q->saved_header = &entry->saved_consume_q;
   2310
   2311	return VMCI_SUCCESS;
   2312}
   2313
   2314/*
   2315 * Removes all references to the guest memory of a given queue pair, and
   2316 * will move the queue pair from state *_MEM to *_NO_MEM. It is usually
   2317 * called when a VM is being quiesced where access to guest memory should
   2318 * avoided.
   2319 */
   2320int vmci_qp_broker_unmap(struct vmci_handle handle,
   2321			 struct vmci_ctx *context,
   2322			 u32 gid)
   2323{
   2324	struct qp_broker_entry *entry;
   2325	const u32 context_id = vmci_ctx_get_id(context);
   2326	int result;
   2327
   2328	if (vmci_handle_is_invalid(handle) || !context ||
   2329	    context_id == VMCI_INVALID_ID)
   2330		return VMCI_ERROR_INVALID_ARGS;
   2331
   2332	mutex_lock(&qp_broker_list.mutex);
   2333
   2334	if (!vmci_ctx_qp_exists(context, handle)) {
   2335		pr_devel("Context (ID=0x%x) not attached to queue pair (handle=0x%x:0x%x)\n",
   2336			 context_id, handle.context, handle.resource);
   2337		result = VMCI_ERROR_NOT_FOUND;
   2338		goto out;
   2339	}
   2340
   2341	entry = qp_broker_handle_to_entry(handle);
   2342	if (!entry) {
   2343		pr_devel("Context (ID=0x%x) reports being attached to queue pair (handle=0x%x:0x%x) that isn't present in broker\n",
   2344			 context_id, handle.context, handle.resource);
   2345		result = VMCI_ERROR_NOT_FOUND;
   2346		goto out;
   2347	}
   2348
   2349	if (context_id != entry->create_id && context_id != entry->attach_id) {
   2350		result = VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
   2351		goto out;
   2352	}
   2353
   2354	if (context_id != VMCI_HOST_CONTEXT_ID &&
   2355	    QPBROKERSTATE_HAS_MEM(entry)) {
   2356		qp_acquire_queue_mutex(entry->produce_q);
   2357		result = qp_save_headers(entry);
   2358		if (result < VMCI_SUCCESS)
   2359			pr_warn("Failed to save queue headers for queue pair (handle=0x%x:0x%x,result=%d)\n",
   2360				handle.context, handle.resource, result);
   2361
   2362		qp_host_unmap_queues(gid, entry->produce_q, entry->consume_q);
   2363
   2364		/*
   2365		 * On hosted, when we unmap queue pairs, the VMX will also
   2366		 * unmap the guest memory, so we invalidate the previously
   2367		 * registered memory. If the queue pair is mapped again at a
   2368		 * later point in time, we will need to reregister the user
   2369		 * memory with a possibly new user VA.
   2370		 */
   2371		qp_host_unregister_user_memory(entry->produce_q,
   2372					       entry->consume_q);
   2373
   2374		/*
   2375		 * Move state from *_MEM to *_NO_MEM.
   2376		 */
   2377		entry->state--;
   2378
   2379		qp_release_queue_mutex(entry->produce_q);
   2380	}
   2381
   2382	result = VMCI_SUCCESS;
   2383
   2384 out:
   2385	mutex_unlock(&qp_broker_list.mutex);
   2386	return result;
   2387}
   2388
   2389/*
   2390 * Destroys all guest queue pair endpoints. If active guest queue
   2391 * pairs still exist, hypercalls to attempt detach from these
   2392 * queue pairs will be made. Any failure to detach is silently
   2393 * ignored.
   2394 */
   2395void vmci_qp_guest_endpoints_exit(void)
   2396{
   2397	struct qp_entry *entry;
   2398	struct qp_guest_endpoint *ep;
   2399
   2400	mutex_lock(&qp_guest_endpoints.mutex);
   2401
   2402	while ((entry = qp_list_get_head(&qp_guest_endpoints))) {
   2403		ep = (struct qp_guest_endpoint *)entry;
   2404
   2405		/* Don't make a hypercall for local queue_pairs. */
   2406		if (!(entry->flags & VMCI_QPFLAG_LOCAL))
   2407			qp_detatch_hypercall(entry->handle);
   2408
   2409		/* We cannot fail the exit, so let's reset ref_count. */
   2410		entry->ref_count = 0;
   2411		qp_list_remove_entry(&qp_guest_endpoints, entry);
   2412
   2413		qp_guest_endpoint_destroy(ep);
   2414	}
   2415
   2416	mutex_unlock(&qp_guest_endpoints.mutex);
   2417}
   2418
   2419/*
   2420 * Helper routine that will lock the queue pair before subsequent
   2421 * operations.
   2422 * Note: Non-blocking on the host side is currently only implemented in ESX.
   2423 * Since non-blocking isn't yet implemented on the host personality we
   2424 * have no reason to acquire a spin lock.  So to avoid the use of an
   2425 * unnecessary lock only acquire the mutex if we can block.
   2426 */
   2427static void qp_lock(const struct vmci_qp *qpair)
   2428{
   2429	qp_acquire_queue_mutex(qpair->produce_q);
   2430}
   2431
   2432/*
   2433 * Helper routine that unlocks the queue pair after calling
   2434 * qp_lock.
   2435 */
   2436static void qp_unlock(const struct vmci_qp *qpair)
   2437{
   2438	qp_release_queue_mutex(qpair->produce_q);
   2439}
   2440
   2441/*
   2442 * The queue headers may not be mapped at all times. If a queue is
   2443 * currently not mapped, it will be attempted to do so.
   2444 */
   2445static int qp_map_queue_headers(struct vmci_queue *produce_q,
   2446				struct vmci_queue *consume_q)
   2447{
   2448	int result;
   2449
   2450	if (NULL == produce_q->q_header || NULL == consume_q->q_header) {
   2451		result = qp_host_map_queues(produce_q, consume_q);
   2452		if (result < VMCI_SUCCESS)
   2453			return (produce_q->saved_header &&
   2454				consume_q->saved_header) ?
   2455			    VMCI_ERROR_QUEUEPAIR_NOT_READY :
   2456			    VMCI_ERROR_QUEUEPAIR_NOTATTACHED;
   2457	}
   2458
   2459	return VMCI_SUCCESS;
   2460}
   2461
   2462/*
   2463 * Helper routine that will retrieve the produce and consume
   2464 * headers of a given queue pair. If the guest memory of the
   2465 * queue pair is currently not available, the saved queue headers
   2466 * will be returned, if these are available.
   2467 */
   2468static int qp_get_queue_headers(const struct vmci_qp *qpair,
   2469				struct vmci_queue_header **produce_q_header,
   2470				struct vmci_queue_header **consume_q_header)
   2471{
   2472	int result;
   2473
   2474	result = qp_map_queue_headers(qpair->produce_q, qpair->consume_q);
   2475	if (result == VMCI_SUCCESS) {
   2476		*produce_q_header = qpair->produce_q->q_header;
   2477		*consume_q_header = qpair->consume_q->q_header;
   2478	} else if (qpair->produce_q->saved_header &&
   2479		   qpair->consume_q->saved_header) {
   2480		*produce_q_header = qpair->produce_q->saved_header;
   2481		*consume_q_header = qpair->consume_q->saved_header;
   2482		result = VMCI_SUCCESS;
   2483	}
   2484
   2485	return result;
   2486}
   2487
   2488/*
   2489 * Callback from VMCI queue pair broker indicating that a queue
   2490 * pair that was previously not ready, now either is ready or
   2491 * gone forever.
   2492 */
   2493static int qp_wakeup_cb(void *client_data)
   2494{
   2495	struct vmci_qp *qpair = (struct vmci_qp *)client_data;
   2496
   2497	qp_lock(qpair);
   2498	while (qpair->blocked > 0) {
   2499		qpair->blocked--;
   2500		qpair->generation++;
   2501		wake_up(&qpair->event);
   2502	}
   2503	qp_unlock(qpair);
   2504
   2505	return VMCI_SUCCESS;
   2506}
   2507
   2508/*
   2509 * Makes the calling thread wait for the queue pair to become
   2510 * ready for host side access.  Returns true when thread is
   2511 * woken up after queue pair state change, false otherwise.
   2512 */
   2513static bool qp_wait_for_ready_queue(struct vmci_qp *qpair)
   2514{
   2515	unsigned int generation;
   2516
   2517	qpair->blocked++;
   2518	generation = qpair->generation;
   2519	qp_unlock(qpair);
   2520	wait_event(qpair->event, generation != qpair->generation);
   2521	qp_lock(qpair);
   2522
   2523	return true;
   2524}
   2525
   2526/*
   2527 * Enqueues a given buffer to the produce queue using the provided
   2528 * function. As many bytes as possible (space available in the queue)
   2529 * are enqueued.  Assumes the queue->mutex has been acquired.  Returns
   2530 * VMCI_ERROR_QUEUEPAIR_NOSPACE if no space was available to enqueue
   2531 * data, VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the
   2532 * queue (as defined by the queue size), VMCI_ERROR_INVALID_ARGS, if
   2533 * an error occured when accessing the buffer,
   2534 * VMCI_ERROR_QUEUEPAIR_NOTATTACHED, if the queue pair pages aren't
   2535 * available.  Otherwise, the number of bytes written to the queue is
   2536 * returned.  Updates the tail pointer of the produce queue.
   2537 */
   2538static ssize_t qp_enqueue_locked(struct vmci_queue *produce_q,
   2539				 struct vmci_queue *consume_q,
   2540				 const u64 produce_q_size,
   2541				 struct iov_iter *from)
   2542{
   2543	s64 free_space;
   2544	u64 tail;
   2545	size_t buf_size = iov_iter_count(from);
   2546	size_t written;
   2547	ssize_t result;
   2548
   2549	result = qp_map_queue_headers(produce_q, consume_q);
   2550	if (unlikely(result != VMCI_SUCCESS))
   2551		return result;
   2552
   2553	free_space = vmci_q_header_free_space(produce_q->q_header,
   2554					      consume_q->q_header,
   2555					      produce_q_size);
   2556	if (free_space == 0)
   2557		return VMCI_ERROR_QUEUEPAIR_NOSPACE;
   2558
   2559	if (free_space < VMCI_SUCCESS)
   2560		return (ssize_t) free_space;
   2561
   2562	written = (size_t) (free_space > buf_size ? buf_size : free_space);
   2563	tail = vmci_q_header_producer_tail(produce_q->q_header);
   2564	if (likely(tail + written < produce_q_size)) {
   2565		result = qp_memcpy_to_queue_iter(produce_q, tail, from, written);
   2566	} else {
   2567		/* Tail pointer wraps around. */
   2568
   2569		const size_t tmp = (size_t) (produce_q_size - tail);
   2570
   2571		result = qp_memcpy_to_queue_iter(produce_q, tail, from, tmp);
   2572		if (result >= VMCI_SUCCESS)
   2573			result = qp_memcpy_to_queue_iter(produce_q, 0, from,
   2574						 written - tmp);
   2575	}
   2576
   2577	if (result < VMCI_SUCCESS)
   2578		return result;
   2579
   2580	/*
   2581	 * This virt_wmb() ensures that data written to the queue
   2582	 * is observable before the new producer_tail is.
   2583	 */
   2584	virt_wmb();
   2585
   2586	vmci_q_header_add_producer_tail(produce_q->q_header, written,
   2587					produce_q_size);
   2588	return written;
   2589}
   2590
   2591/*
   2592 * Dequeues data (if available) from the given consume queue. Writes data
   2593 * to the user provided buffer using the provided function.
   2594 * Assumes the queue->mutex has been acquired.
   2595 * Results:
   2596 * VMCI_ERROR_QUEUEPAIR_NODATA if no data was available to dequeue.
   2597 * VMCI_ERROR_INVALID_SIZE, if any queue pointer is outside the queue
   2598 * (as defined by the queue size).
   2599 * VMCI_ERROR_INVALID_ARGS, if an error occured when accessing the buffer.
   2600 * Otherwise the number of bytes dequeued is returned.
   2601 * Side effects:
   2602 * Updates the head pointer of the consume queue.
   2603 */
   2604static ssize_t qp_dequeue_locked(struct vmci_queue *produce_q,
   2605				 struct vmci_queue *consume_q,
   2606				 const u64 consume_q_size,
   2607				 struct iov_iter *to,
   2608				 bool update_consumer)
   2609{
   2610	size_t buf_size = iov_iter_count(to);
   2611	s64 buf_ready;
   2612	u64 head;
   2613	size_t read;
   2614	ssize_t result;
   2615
   2616	result = qp_map_queue_headers(produce_q, consume_q);
   2617	if (unlikely(result != VMCI_SUCCESS))
   2618		return result;
   2619
   2620	buf_ready = vmci_q_header_buf_ready(consume_q->q_header,
   2621					    produce_q->q_header,
   2622					    consume_q_size);
   2623	if (buf_ready == 0)
   2624		return VMCI_ERROR_QUEUEPAIR_NODATA;
   2625
   2626	if (buf_ready < VMCI_SUCCESS)
   2627		return (ssize_t) buf_ready;
   2628
   2629	/*
   2630	 * This virt_rmb() ensures that data from the queue will be read
   2631	 * after we have determined how much is ready to be consumed.
   2632	 */
   2633	virt_rmb();
   2634
   2635	read = (size_t) (buf_ready > buf_size ? buf_size : buf_ready);
   2636	head = vmci_q_header_consumer_head(produce_q->q_header);
   2637	if (likely(head + read < consume_q_size)) {
   2638		result = qp_memcpy_from_queue_iter(to, consume_q, head, read);
   2639	} else {
   2640		/* Head pointer wraps around. */
   2641
   2642		const size_t tmp = (size_t) (consume_q_size - head);
   2643
   2644		result = qp_memcpy_from_queue_iter(to, consume_q, head, tmp);
   2645		if (result >= VMCI_SUCCESS)
   2646			result = qp_memcpy_from_queue_iter(to, consume_q, 0,
   2647						   read - tmp);
   2648
   2649	}
   2650
   2651	if (result < VMCI_SUCCESS)
   2652		return result;
   2653
   2654	if (update_consumer)
   2655		vmci_q_header_add_consumer_head(produce_q->q_header,
   2656						read, consume_q_size);
   2657
   2658	return read;
   2659}
   2660
   2661/*
   2662 * vmci_qpair_alloc() - Allocates a queue pair.
   2663 * @qpair:      Pointer for the new vmci_qp struct.
   2664 * @handle:     Handle to track the resource.
   2665 * @produce_qsize:      Desired size of the producer queue.
   2666 * @consume_qsize:      Desired size of the consumer queue.
   2667 * @peer:       ContextID of the peer.
   2668 * @flags:      VMCI flags.
   2669 * @priv_flags: VMCI priviledge flags.
   2670 *
   2671 * This is the client interface for allocating the memory for a
   2672 * vmci_qp structure and then attaching to the underlying
   2673 * queue.  If an error occurs allocating the memory for the
   2674 * vmci_qp structure no attempt is made to attach.  If an
   2675 * error occurs attaching, then the structure is freed.
   2676 */
   2677int vmci_qpair_alloc(struct vmci_qp **qpair,
   2678		     struct vmci_handle *handle,
   2679		     u64 produce_qsize,
   2680		     u64 consume_qsize,
   2681		     u32 peer,
   2682		     u32 flags,
   2683		     u32 priv_flags)
   2684{
   2685	struct vmci_qp *my_qpair;
   2686	int retval;
   2687	struct vmci_handle src = VMCI_INVALID_HANDLE;
   2688	struct vmci_handle dst = vmci_make_handle(peer, VMCI_INVALID_ID);
   2689	enum vmci_route route;
   2690	vmci_event_release_cb wakeup_cb;
   2691	void *client_data;
   2692
   2693	/*
   2694	 * Restrict the size of a queuepair.  The device already
   2695	 * enforces a limit on the total amount of memory that can be
   2696	 * allocated to queuepairs for a guest.  However, we try to
   2697	 * allocate this memory before we make the queuepair
   2698	 * allocation hypercall.  On Linux, we allocate each page
   2699	 * separately, which means rather than fail, the guest will
   2700	 * thrash while it tries to allocate, and will become
   2701	 * increasingly unresponsive to the point where it appears to
   2702	 * be hung.  So we place a limit on the size of an individual
   2703	 * queuepair here, and leave the device to enforce the
   2704	 * restriction on total queuepair memory.  (Note that this
   2705	 * doesn't prevent all cases; a user with only this much
   2706	 * physical memory could still get into trouble.)  The error
   2707	 * used by the device is NO_RESOURCES, so use that here too.
   2708	 */
   2709
   2710	if (!QP_SIZES_ARE_VALID(produce_qsize, consume_qsize))
   2711		return VMCI_ERROR_NO_RESOURCES;
   2712
   2713	retval = vmci_route(&src, &dst, false, &route);
   2714	if (retval < VMCI_SUCCESS)
   2715		route = vmci_guest_code_active() ?
   2716		    VMCI_ROUTE_AS_GUEST : VMCI_ROUTE_AS_HOST;
   2717
   2718	if (flags & (VMCI_QPFLAG_NONBLOCK | VMCI_QPFLAG_PINNED)) {
   2719		pr_devel("NONBLOCK OR PINNED set");
   2720		return VMCI_ERROR_INVALID_ARGS;
   2721	}
   2722
   2723	my_qpair = kzalloc(sizeof(*my_qpair), GFP_KERNEL);
   2724	if (!my_qpair)
   2725		return VMCI_ERROR_NO_MEM;
   2726
   2727	my_qpair->produce_q_size = produce_qsize;
   2728	my_qpair->consume_q_size = consume_qsize;
   2729	my_qpair->peer = peer;
   2730	my_qpair->flags = flags;
   2731	my_qpair->priv_flags = priv_flags;
   2732
   2733	wakeup_cb = NULL;
   2734	client_data = NULL;
   2735
   2736	if (VMCI_ROUTE_AS_HOST == route) {
   2737		my_qpair->guest_endpoint = false;
   2738		if (!(flags & VMCI_QPFLAG_LOCAL)) {
   2739			my_qpair->blocked = 0;
   2740			my_qpair->generation = 0;
   2741			init_waitqueue_head(&my_qpair->event);
   2742			wakeup_cb = qp_wakeup_cb;
   2743			client_data = (void *)my_qpair;
   2744		}
   2745	} else {
   2746		my_qpair->guest_endpoint = true;
   2747	}
   2748
   2749	retval = vmci_qp_alloc(handle,
   2750			       &my_qpair->produce_q,
   2751			       my_qpair->produce_q_size,
   2752			       &my_qpair->consume_q,
   2753			       my_qpair->consume_q_size,
   2754			       my_qpair->peer,
   2755			       my_qpair->flags,
   2756			       my_qpair->priv_flags,
   2757			       my_qpair->guest_endpoint,
   2758			       wakeup_cb, client_data);
   2759
   2760	if (retval < VMCI_SUCCESS) {
   2761		kfree(my_qpair);
   2762		return retval;
   2763	}
   2764
   2765	*qpair = my_qpair;
   2766	my_qpair->handle = *handle;
   2767
   2768	return retval;
   2769}
   2770EXPORT_SYMBOL_GPL(vmci_qpair_alloc);
   2771
   2772/*
   2773 * vmci_qpair_detach() - Detatches the client from a queue pair.
   2774 * @qpair:      Reference of a pointer to the qpair struct.
   2775 *
   2776 * This is the client interface for detaching from a VMCIQPair.
   2777 * Note that this routine will free the memory allocated for the
   2778 * vmci_qp structure too.
   2779 */
   2780int vmci_qpair_detach(struct vmci_qp **qpair)
   2781{
   2782	int result;
   2783	struct vmci_qp *old_qpair;
   2784
   2785	if (!qpair || !(*qpair))
   2786		return VMCI_ERROR_INVALID_ARGS;
   2787
   2788	old_qpair = *qpair;
   2789	result = qp_detatch(old_qpair->handle, old_qpair->guest_endpoint);
   2790
   2791	/*
   2792	 * The guest can fail to detach for a number of reasons, and
   2793	 * if it does so, it will cleanup the entry (if there is one).
   2794	 * The host can fail too, but it won't cleanup the entry
   2795	 * immediately, it will do that later when the context is
   2796	 * freed.  Either way, we need to release the qpair struct
   2797	 * here; there isn't much the caller can do, and we don't want
   2798	 * to leak.
   2799	 */
   2800
   2801	memset(old_qpair, 0, sizeof(*old_qpair));
   2802	old_qpair->handle = VMCI_INVALID_HANDLE;
   2803	old_qpair->peer = VMCI_INVALID_ID;
   2804	kfree(old_qpair);
   2805	*qpair = NULL;
   2806
   2807	return result;
   2808}
   2809EXPORT_SYMBOL_GPL(vmci_qpair_detach);
   2810
   2811/*
   2812 * vmci_qpair_get_produce_indexes() - Retrieves the indexes of the producer.
   2813 * @qpair:      Pointer to the queue pair struct.
   2814 * @producer_tail:      Reference used for storing producer tail index.
   2815 * @consumer_head:      Reference used for storing the consumer head index.
   2816 *
   2817 * This is the client interface for getting the current indexes of the
   2818 * QPair from the point of the view of the caller as the producer.
   2819 */
   2820int vmci_qpair_get_produce_indexes(const struct vmci_qp *qpair,
   2821				   u64 *producer_tail,
   2822				   u64 *consumer_head)
   2823{
   2824	struct vmci_queue_header *produce_q_header;
   2825	struct vmci_queue_header *consume_q_header;
   2826	int result;
   2827
   2828	if (!qpair)
   2829		return VMCI_ERROR_INVALID_ARGS;
   2830
   2831	qp_lock(qpair);
   2832	result =
   2833	    qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
   2834	if (result == VMCI_SUCCESS)
   2835		vmci_q_header_get_pointers(produce_q_header, consume_q_header,
   2836					   producer_tail, consumer_head);
   2837	qp_unlock(qpair);
   2838
   2839	if (result == VMCI_SUCCESS &&
   2840	    ((producer_tail && *producer_tail >= qpair->produce_q_size) ||
   2841	     (consumer_head && *consumer_head >= qpair->produce_q_size)))
   2842		return VMCI_ERROR_INVALID_SIZE;
   2843
   2844	return result;
   2845}
   2846EXPORT_SYMBOL_GPL(vmci_qpair_get_produce_indexes);
   2847
   2848/*
   2849 * vmci_qpair_get_consume_indexes() - Retrieves the indexes of the consumer.
   2850 * @qpair:      Pointer to the queue pair struct.
   2851 * @consumer_tail:      Reference used for storing consumer tail index.
   2852 * @producer_head:      Reference used for storing the producer head index.
   2853 *
   2854 * This is the client interface for getting the current indexes of the
   2855 * QPair from the point of the view of the caller as the consumer.
   2856 */
   2857int vmci_qpair_get_consume_indexes(const struct vmci_qp *qpair,
   2858				   u64 *consumer_tail,
   2859				   u64 *producer_head)
   2860{
   2861	struct vmci_queue_header *produce_q_header;
   2862	struct vmci_queue_header *consume_q_header;
   2863	int result;
   2864
   2865	if (!qpair)
   2866		return VMCI_ERROR_INVALID_ARGS;
   2867
   2868	qp_lock(qpair);
   2869	result =
   2870	    qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
   2871	if (result == VMCI_SUCCESS)
   2872		vmci_q_header_get_pointers(consume_q_header, produce_q_header,
   2873					   consumer_tail, producer_head);
   2874	qp_unlock(qpair);
   2875
   2876	if (result == VMCI_SUCCESS &&
   2877	    ((consumer_tail && *consumer_tail >= qpair->consume_q_size) ||
   2878	     (producer_head && *producer_head >= qpair->consume_q_size)))
   2879		return VMCI_ERROR_INVALID_SIZE;
   2880
   2881	return result;
   2882}
   2883EXPORT_SYMBOL_GPL(vmci_qpair_get_consume_indexes);
   2884
   2885/*
   2886 * vmci_qpair_produce_free_space() - Retrieves free space in producer queue.
   2887 * @qpair:      Pointer to the queue pair struct.
   2888 *
   2889 * This is the client interface for getting the amount of free
   2890 * space in the QPair from the point of the view of the caller as
   2891 * the producer which is the common case.  Returns < 0 if err, else
   2892 * available bytes into which data can be enqueued if > 0.
   2893 */
   2894s64 vmci_qpair_produce_free_space(const struct vmci_qp *qpair)
   2895{
   2896	struct vmci_queue_header *produce_q_header;
   2897	struct vmci_queue_header *consume_q_header;
   2898	s64 result;
   2899
   2900	if (!qpair)
   2901		return VMCI_ERROR_INVALID_ARGS;
   2902
   2903	qp_lock(qpair);
   2904	result =
   2905	    qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
   2906	if (result == VMCI_SUCCESS)
   2907		result = vmci_q_header_free_space(produce_q_header,
   2908						  consume_q_header,
   2909						  qpair->produce_q_size);
   2910	else
   2911		result = 0;
   2912
   2913	qp_unlock(qpair);
   2914
   2915	return result;
   2916}
   2917EXPORT_SYMBOL_GPL(vmci_qpair_produce_free_space);
   2918
   2919/*
   2920 * vmci_qpair_consume_free_space() - Retrieves free space in consumer queue.
   2921 * @qpair:      Pointer to the queue pair struct.
   2922 *
   2923 * This is the client interface for getting the amount of free
   2924 * space in the QPair from the point of the view of the caller as
   2925 * the consumer which is not the common case.  Returns < 0 if err, else
   2926 * available bytes into which data can be enqueued if > 0.
   2927 */
   2928s64 vmci_qpair_consume_free_space(const struct vmci_qp *qpair)
   2929{
   2930	struct vmci_queue_header *produce_q_header;
   2931	struct vmci_queue_header *consume_q_header;
   2932	s64 result;
   2933
   2934	if (!qpair)
   2935		return VMCI_ERROR_INVALID_ARGS;
   2936
   2937	qp_lock(qpair);
   2938	result =
   2939	    qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
   2940	if (result == VMCI_SUCCESS)
   2941		result = vmci_q_header_free_space(consume_q_header,
   2942						  produce_q_header,
   2943						  qpair->consume_q_size);
   2944	else
   2945		result = 0;
   2946
   2947	qp_unlock(qpair);
   2948
   2949	return result;
   2950}
   2951EXPORT_SYMBOL_GPL(vmci_qpair_consume_free_space);
   2952
   2953/*
   2954 * vmci_qpair_produce_buf_ready() - Gets bytes ready to read from
   2955 * producer queue.
   2956 * @qpair:      Pointer to the queue pair struct.
   2957 *
   2958 * This is the client interface for getting the amount of
   2959 * enqueued data in the QPair from the point of the view of the
   2960 * caller as the producer which is not the common case.  Returns < 0 if err,
   2961 * else available bytes that may be read.
   2962 */
   2963s64 vmci_qpair_produce_buf_ready(const struct vmci_qp *qpair)
   2964{
   2965	struct vmci_queue_header *produce_q_header;
   2966	struct vmci_queue_header *consume_q_header;
   2967	s64 result;
   2968
   2969	if (!qpair)
   2970		return VMCI_ERROR_INVALID_ARGS;
   2971
   2972	qp_lock(qpair);
   2973	result =
   2974	    qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
   2975	if (result == VMCI_SUCCESS)
   2976		result = vmci_q_header_buf_ready(produce_q_header,
   2977						 consume_q_header,
   2978						 qpair->produce_q_size);
   2979	else
   2980		result = 0;
   2981
   2982	qp_unlock(qpair);
   2983
   2984	return result;
   2985}
   2986EXPORT_SYMBOL_GPL(vmci_qpair_produce_buf_ready);
   2987
   2988/*
   2989 * vmci_qpair_consume_buf_ready() - Gets bytes ready to read from
   2990 * consumer queue.
   2991 * @qpair:      Pointer to the queue pair struct.
   2992 *
   2993 * This is the client interface for getting the amount of
   2994 * enqueued data in the QPair from the point of the view of the
   2995 * caller as the consumer which is the normal case.  Returns < 0 if err,
   2996 * else available bytes that may be read.
   2997 */
   2998s64 vmci_qpair_consume_buf_ready(const struct vmci_qp *qpair)
   2999{
   3000	struct vmci_queue_header *produce_q_header;
   3001	struct vmci_queue_header *consume_q_header;
   3002	s64 result;
   3003
   3004	if (!qpair)
   3005		return VMCI_ERROR_INVALID_ARGS;
   3006
   3007	qp_lock(qpair);
   3008	result =
   3009	    qp_get_queue_headers(qpair, &produce_q_header, &consume_q_header);
   3010	if (result == VMCI_SUCCESS)
   3011		result = vmci_q_header_buf_ready(consume_q_header,
   3012						 produce_q_header,
   3013						 qpair->consume_q_size);
   3014	else
   3015		result = 0;
   3016
   3017	qp_unlock(qpair);
   3018
   3019	return result;
   3020}
   3021EXPORT_SYMBOL_GPL(vmci_qpair_consume_buf_ready);
   3022
   3023/*
   3024 * vmci_qpair_enqueue() - Throw data on the queue.
   3025 * @qpair:      Pointer to the queue pair struct.
   3026 * @buf:        Pointer to buffer containing data
   3027 * @buf_size:   Length of buffer.
   3028 * @buf_type:   Buffer type (Unused).
   3029 *
   3030 * This is the client interface for enqueueing data into the queue.
   3031 * Returns number of bytes enqueued or < 0 on error.
   3032 */
   3033ssize_t vmci_qpair_enqueue(struct vmci_qp *qpair,
   3034			   const void *buf,
   3035			   size_t buf_size,
   3036			   int buf_type)
   3037{
   3038	ssize_t result;
   3039	struct iov_iter from;
   3040	struct kvec v = {.iov_base = (void *)buf, .iov_len = buf_size};
   3041
   3042	if (!qpair || !buf)
   3043		return VMCI_ERROR_INVALID_ARGS;
   3044
   3045	iov_iter_kvec(&from, WRITE, &v, 1, buf_size);
   3046
   3047	qp_lock(qpair);
   3048
   3049	do {
   3050		result = qp_enqueue_locked(qpair->produce_q,
   3051					   qpair->consume_q,
   3052					   qpair->produce_q_size,
   3053					   &from);
   3054
   3055		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
   3056		    !qp_wait_for_ready_queue(qpair))
   3057			result = VMCI_ERROR_WOULD_BLOCK;
   3058
   3059	} while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
   3060
   3061	qp_unlock(qpair);
   3062
   3063	return result;
   3064}
   3065EXPORT_SYMBOL_GPL(vmci_qpair_enqueue);
   3066
   3067/*
   3068 * vmci_qpair_dequeue() - Get data from the queue.
   3069 * @qpair:      Pointer to the queue pair struct.
   3070 * @buf:        Pointer to buffer for the data
   3071 * @buf_size:   Length of buffer.
   3072 * @buf_type:   Buffer type (Unused).
   3073 *
   3074 * This is the client interface for dequeueing data from the queue.
   3075 * Returns number of bytes dequeued or < 0 on error.
   3076 */
   3077ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair,
   3078			   void *buf,
   3079			   size_t buf_size,
   3080			   int buf_type)
   3081{
   3082	ssize_t result;
   3083	struct iov_iter to;
   3084	struct kvec v = {.iov_base = buf, .iov_len = buf_size};
   3085
   3086	if (!qpair || !buf)
   3087		return VMCI_ERROR_INVALID_ARGS;
   3088
   3089	iov_iter_kvec(&to, READ, &v, 1, buf_size);
   3090
   3091	qp_lock(qpair);
   3092
   3093	do {
   3094		result = qp_dequeue_locked(qpair->produce_q,
   3095					   qpair->consume_q,
   3096					   qpair->consume_q_size,
   3097					   &to, true);
   3098
   3099		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
   3100		    !qp_wait_for_ready_queue(qpair))
   3101			result = VMCI_ERROR_WOULD_BLOCK;
   3102
   3103	} while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
   3104
   3105	qp_unlock(qpair);
   3106
   3107	return result;
   3108}
   3109EXPORT_SYMBOL_GPL(vmci_qpair_dequeue);
   3110
   3111/*
   3112 * vmci_qpair_peek() - Peek at the data in the queue.
   3113 * @qpair:      Pointer to the queue pair struct.
   3114 * @buf:        Pointer to buffer for the data
   3115 * @buf_size:   Length of buffer.
   3116 * @buf_type:   Buffer type (Unused on Linux).
   3117 *
   3118 * This is the client interface for peeking into a queue.  (I.e.,
   3119 * copy data from the queue without updating the head pointer.)
   3120 * Returns number of bytes dequeued or < 0 on error.
   3121 */
   3122ssize_t vmci_qpair_peek(struct vmci_qp *qpair,
   3123			void *buf,
   3124			size_t buf_size,
   3125			int buf_type)
   3126{
   3127	struct iov_iter to;
   3128	struct kvec v = {.iov_base = buf, .iov_len = buf_size};
   3129	ssize_t result;
   3130
   3131	if (!qpair || !buf)
   3132		return VMCI_ERROR_INVALID_ARGS;
   3133
   3134	iov_iter_kvec(&to, READ, &v, 1, buf_size);
   3135
   3136	qp_lock(qpair);
   3137
   3138	do {
   3139		result = qp_dequeue_locked(qpair->produce_q,
   3140					   qpair->consume_q,
   3141					   qpair->consume_q_size,
   3142					   &to, false);
   3143
   3144		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
   3145		    !qp_wait_for_ready_queue(qpair))
   3146			result = VMCI_ERROR_WOULD_BLOCK;
   3147
   3148	} while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
   3149
   3150	qp_unlock(qpair);
   3151
   3152	return result;
   3153}
   3154EXPORT_SYMBOL_GPL(vmci_qpair_peek);
   3155
   3156/*
   3157 * vmci_qpair_enquev() - Throw data on the queue using iov.
   3158 * @qpair:      Pointer to the queue pair struct.
   3159 * @iov:        Pointer to buffer containing data
   3160 * @iov_size:   Length of buffer.
   3161 * @buf_type:   Buffer type (Unused).
   3162 *
   3163 * This is the client interface for enqueueing data into the queue.
   3164 * This function uses IO vectors to handle the work. Returns number
   3165 * of bytes enqueued or < 0 on error.
   3166 */
   3167ssize_t vmci_qpair_enquev(struct vmci_qp *qpair,
   3168			  struct msghdr *msg,
   3169			  size_t iov_size,
   3170			  int buf_type)
   3171{
   3172	ssize_t result;
   3173
   3174	if (!qpair)
   3175		return VMCI_ERROR_INVALID_ARGS;
   3176
   3177	qp_lock(qpair);
   3178
   3179	do {
   3180		result = qp_enqueue_locked(qpair->produce_q,
   3181					   qpair->consume_q,
   3182					   qpair->produce_q_size,
   3183					   &msg->msg_iter);
   3184
   3185		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
   3186		    !qp_wait_for_ready_queue(qpair))
   3187			result = VMCI_ERROR_WOULD_BLOCK;
   3188
   3189	} while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
   3190
   3191	qp_unlock(qpair);
   3192
   3193	return result;
   3194}
   3195EXPORT_SYMBOL_GPL(vmci_qpair_enquev);
   3196
   3197/*
   3198 * vmci_qpair_dequev() - Get data from the queue using iov.
   3199 * @qpair:      Pointer to the queue pair struct.
   3200 * @iov:        Pointer to buffer for the data
   3201 * @iov_size:   Length of buffer.
   3202 * @buf_type:   Buffer type (Unused).
   3203 *
   3204 * This is the client interface for dequeueing data from the queue.
   3205 * This function uses IO vectors to handle the work. Returns number
   3206 * of bytes dequeued or < 0 on error.
   3207 */
   3208ssize_t vmci_qpair_dequev(struct vmci_qp *qpair,
   3209			  struct msghdr *msg,
   3210			  size_t iov_size,
   3211			  int buf_type)
   3212{
   3213	ssize_t result;
   3214
   3215	if (!qpair)
   3216		return VMCI_ERROR_INVALID_ARGS;
   3217
   3218	qp_lock(qpair);
   3219
   3220	do {
   3221		result = qp_dequeue_locked(qpair->produce_q,
   3222					   qpair->consume_q,
   3223					   qpair->consume_q_size,
   3224					   &msg->msg_iter, true);
   3225
   3226		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
   3227		    !qp_wait_for_ready_queue(qpair))
   3228			result = VMCI_ERROR_WOULD_BLOCK;
   3229
   3230	} while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
   3231
   3232	qp_unlock(qpair);
   3233
   3234	return result;
   3235}
   3236EXPORT_SYMBOL_GPL(vmci_qpair_dequev);
   3237
   3238/*
   3239 * vmci_qpair_peekv() - Peek at the data in the queue using iov.
   3240 * @qpair:      Pointer to the queue pair struct.
   3241 * @iov:        Pointer to buffer for the data
   3242 * @iov_size:   Length of buffer.
   3243 * @buf_type:   Buffer type (Unused on Linux).
   3244 *
   3245 * This is the client interface for peeking into a queue.  (I.e.,
   3246 * copy data from the queue without updating the head pointer.)
   3247 * This function uses IO vectors to handle the work. Returns number
   3248 * of bytes peeked or < 0 on error.
   3249 */
   3250ssize_t vmci_qpair_peekv(struct vmci_qp *qpair,
   3251			 struct msghdr *msg,
   3252			 size_t iov_size,
   3253			 int buf_type)
   3254{
   3255	ssize_t result;
   3256
   3257	if (!qpair)
   3258		return VMCI_ERROR_INVALID_ARGS;
   3259
   3260	qp_lock(qpair);
   3261
   3262	do {
   3263		result = qp_dequeue_locked(qpair->produce_q,
   3264					   qpair->consume_q,
   3265					   qpair->consume_q_size,
   3266					   &msg->msg_iter, false);
   3267
   3268		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
   3269		    !qp_wait_for_ready_queue(qpair))
   3270			result = VMCI_ERROR_WOULD_BLOCK;
   3271
   3272	} while (result == VMCI_ERROR_QUEUEPAIR_NOT_READY);
   3273
   3274	qp_unlock(qpair);
   3275	return result;
   3276}
   3277EXPORT_SYMBOL_GPL(vmci_qpair_peekv);