cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

vmci_context.c (31410B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * VMware VMCI Driver
      4 *
      5 * Copyright (C) 2012 VMware, Inc. All rights reserved.
      6 */
      7
      8#include <linux/vmw_vmci_defs.h>
      9#include <linux/vmw_vmci_api.h>
     10#include <linux/highmem.h>
     11#include <linux/kernel.h>
     12#include <linux/module.h>
     13#include <linux/sched.h>
     14#include <linux/cred.h>
     15#include <linux/slab.h>
     16
     17#include "vmci_queue_pair.h"
     18#include "vmci_datagram.h"
     19#include "vmci_doorbell.h"
     20#include "vmci_context.h"
     21#include "vmci_driver.h"
     22#include "vmci_event.h"
     23
     24/* Use a wide upper bound for the maximum contexts. */
     25#define VMCI_MAX_CONTEXTS 2000
     26
     27/*
     28 * List of current VMCI contexts.  Contexts can be added by
     29 * vmci_ctx_create() and removed via vmci_ctx_destroy().
     30 * These, along with context lookup, are protected by the
     31 * list structure's lock.
     32 */
     33static struct {
     34	struct list_head head;
     35	spinlock_t lock; /* Spinlock for context list operations */
     36} ctx_list = {
     37	.head = LIST_HEAD_INIT(ctx_list.head),
     38	.lock = __SPIN_LOCK_UNLOCKED(ctx_list.lock),
     39};
     40
     41/* Used by contexts that did not set up notify flag pointers */
     42static bool ctx_dummy_notify;
     43
     44static void ctx_signal_notify(struct vmci_ctx *context)
     45{
     46	*context->notify = true;
     47}
     48
     49static void ctx_clear_notify(struct vmci_ctx *context)
     50{
     51	*context->notify = false;
     52}
     53
     54/*
     55 * If nothing requires the attention of the guest, clears both
     56 * notify flag and call.
     57 */
     58static void ctx_clear_notify_call(struct vmci_ctx *context)
     59{
     60	if (context->pending_datagrams == 0 &&
     61	    vmci_handle_arr_get_size(context->pending_doorbell_array) == 0)
     62		ctx_clear_notify(context);
     63}
     64
     65/*
     66 * Sets the context's notify flag iff datagrams are pending for this
     67 * context.  Called from vmci_setup_notify().
     68 */
     69void vmci_ctx_check_signal_notify(struct vmci_ctx *context)
     70{
     71	spin_lock(&context->lock);
     72	if (context->pending_datagrams)
     73		ctx_signal_notify(context);
     74	spin_unlock(&context->lock);
     75}
     76
     77/*
     78 * Allocates and initializes a VMCI context.
     79 */
     80struct vmci_ctx *vmci_ctx_create(u32 cid, u32 priv_flags,
     81				 uintptr_t event_hnd,
     82				 int user_version,
     83				 const struct cred *cred)
     84{
     85	struct vmci_ctx *context;
     86	int error;
     87
     88	if (cid == VMCI_INVALID_ID) {
     89		pr_devel("Invalid context ID for VMCI context\n");
     90		error = -EINVAL;
     91		goto err_out;
     92	}
     93
     94	if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS) {
     95		pr_devel("Invalid flag (flags=0x%x) for VMCI context\n",
     96			 priv_flags);
     97		error = -EINVAL;
     98		goto err_out;
     99	}
    100
    101	if (user_version == 0) {
    102		pr_devel("Invalid suer_version %d\n", user_version);
    103		error = -EINVAL;
    104		goto err_out;
    105	}
    106
    107	context = kzalloc(sizeof(*context), GFP_KERNEL);
    108	if (!context) {
    109		pr_warn("Failed to allocate memory for VMCI context\n");
    110		error = -ENOMEM;
    111		goto err_out;
    112	}
    113
    114	kref_init(&context->kref);
    115	spin_lock_init(&context->lock);
    116	INIT_LIST_HEAD(&context->list_item);
    117	INIT_LIST_HEAD(&context->datagram_queue);
    118	INIT_LIST_HEAD(&context->notifier_list);
    119
    120	/* Initialize host-specific VMCI context. */
    121	init_waitqueue_head(&context->host_context.wait_queue);
    122
    123	context->queue_pair_array =
    124		vmci_handle_arr_create(0, VMCI_MAX_GUEST_QP_COUNT);
    125	if (!context->queue_pair_array) {
    126		error = -ENOMEM;
    127		goto err_free_ctx;
    128	}
    129
    130	context->doorbell_array =
    131		vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT);
    132	if (!context->doorbell_array) {
    133		error = -ENOMEM;
    134		goto err_free_qp_array;
    135	}
    136
    137	context->pending_doorbell_array =
    138		vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT);
    139	if (!context->pending_doorbell_array) {
    140		error = -ENOMEM;
    141		goto err_free_db_array;
    142	}
    143
    144	context->user_version = user_version;
    145
    146	context->priv_flags = priv_flags;
    147
    148	if (cred)
    149		context->cred = get_cred(cred);
    150
    151	context->notify = &ctx_dummy_notify;
    152	context->notify_page = NULL;
    153
    154	/*
    155	 * If we collide with an existing context we generate a new
    156	 * and use it instead. The VMX will determine if regeneration
    157	 * is okay. Since there isn't 4B - 16 VMs running on a given
    158	 * host, the below loop will terminate.
    159	 */
    160	spin_lock(&ctx_list.lock);
    161
    162	while (vmci_ctx_exists(cid)) {
    163		/* We reserve the lowest 16 ids for fixed contexts. */
    164		cid = max(cid, VMCI_RESERVED_CID_LIMIT - 1) + 1;
    165		if (cid == VMCI_INVALID_ID)
    166			cid = VMCI_RESERVED_CID_LIMIT;
    167	}
    168	context->cid = cid;
    169
    170	list_add_tail_rcu(&context->list_item, &ctx_list.head);
    171	spin_unlock(&ctx_list.lock);
    172
    173	return context;
    174
    175 err_free_db_array:
    176	vmci_handle_arr_destroy(context->doorbell_array);
    177 err_free_qp_array:
    178	vmci_handle_arr_destroy(context->queue_pair_array);
    179 err_free_ctx:
    180	kfree(context);
    181 err_out:
    182	return ERR_PTR(error);
    183}
    184
    185/*
    186 * Destroy VMCI context.
    187 */
    188void vmci_ctx_destroy(struct vmci_ctx *context)
    189{
    190	spin_lock(&ctx_list.lock);
    191	list_del_rcu(&context->list_item);
    192	spin_unlock(&ctx_list.lock);
    193	synchronize_rcu();
    194
    195	vmci_ctx_put(context);
    196}
    197
    198/*
    199 * Fire notification for all contexts interested in given cid.
    200 */
    201static int ctx_fire_notification(u32 context_id, u32 priv_flags)
    202{
    203	u32 i, array_size;
    204	struct vmci_ctx *sub_ctx;
    205	struct vmci_handle_arr *subscriber_array;
    206	struct vmci_handle context_handle =
    207		vmci_make_handle(context_id, VMCI_EVENT_HANDLER);
    208
    209	/*
    210	 * We create an array to hold the subscribers we find when
    211	 * scanning through all contexts.
    212	 */
    213	subscriber_array = vmci_handle_arr_create(0, VMCI_MAX_CONTEXTS);
    214	if (subscriber_array == NULL)
    215		return VMCI_ERROR_NO_MEM;
    216
    217	/*
    218	 * Scan all contexts to find who is interested in being
    219	 * notified about given contextID.
    220	 */
    221	rcu_read_lock();
    222	list_for_each_entry_rcu(sub_ctx, &ctx_list.head, list_item) {
    223		struct vmci_handle_list *node;
    224
    225		/*
    226		 * We only deliver notifications of the removal of
    227		 * contexts, if the two contexts are allowed to
    228		 * interact.
    229		 */
    230		if (vmci_deny_interaction(priv_flags, sub_ctx->priv_flags))
    231			continue;
    232
    233		list_for_each_entry_rcu(node, &sub_ctx->notifier_list, node) {
    234			if (!vmci_handle_is_equal(node->handle, context_handle))
    235				continue;
    236
    237			vmci_handle_arr_append_entry(&subscriber_array,
    238					vmci_make_handle(sub_ctx->cid,
    239							 VMCI_EVENT_HANDLER));
    240		}
    241	}
    242	rcu_read_unlock();
    243
    244	/* Fire event to all subscribers. */
    245	array_size = vmci_handle_arr_get_size(subscriber_array);
    246	for (i = 0; i < array_size; i++) {
    247		int result;
    248		struct vmci_event_ctx ev;
    249
    250		ev.msg.hdr.dst = vmci_handle_arr_get_entry(subscriber_array, i);
    251		ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
    252						  VMCI_CONTEXT_RESOURCE_ID);
    253		ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
    254		ev.msg.event_data.event = VMCI_EVENT_CTX_REMOVED;
    255		ev.payload.context_id = context_id;
    256
    257		result = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID,
    258						&ev.msg.hdr, false);
    259		if (result < VMCI_SUCCESS) {
    260			pr_devel("Failed to enqueue event datagram (type=%d) for context (ID=0x%x)\n",
    261				 ev.msg.event_data.event,
    262				 ev.msg.hdr.dst.context);
    263			/* We continue to enqueue on next subscriber. */
    264		}
    265	}
    266	vmci_handle_arr_destroy(subscriber_array);
    267
    268	return VMCI_SUCCESS;
    269}
    270
    271/*
    272 * Returns the current number of pending datagrams. The call may
    273 * also serve as a synchronization point for the datagram queue,
    274 * as no enqueue operations can occur concurrently.
    275 */
    276int vmci_ctx_pending_datagrams(u32 cid, u32 *pending)
    277{
    278	struct vmci_ctx *context;
    279
    280	context = vmci_ctx_get(cid);
    281	if (context == NULL)
    282		return VMCI_ERROR_INVALID_ARGS;
    283
    284	spin_lock(&context->lock);
    285	if (pending)
    286		*pending = context->pending_datagrams;
    287	spin_unlock(&context->lock);
    288	vmci_ctx_put(context);
    289
    290	return VMCI_SUCCESS;
    291}
    292
    293/*
    294 * Queues a VMCI datagram for the appropriate target VM context.
    295 */
    296int vmci_ctx_enqueue_datagram(u32 cid, struct vmci_datagram *dg)
    297{
    298	struct vmci_datagram_queue_entry *dq_entry;
    299	struct vmci_ctx *context;
    300	struct vmci_handle dg_src;
    301	size_t vmci_dg_size;
    302
    303	vmci_dg_size = VMCI_DG_SIZE(dg);
    304	if (vmci_dg_size > VMCI_MAX_DG_SIZE) {
    305		pr_devel("Datagram too large (bytes=%zu)\n", vmci_dg_size);
    306		return VMCI_ERROR_INVALID_ARGS;
    307	}
    308
    309	/* Get the target VM's VMCI context. */
    310	context = vmci_ctx_get(cid);
    311	if (!context) {
    312		pr_devel("Invalid context (ID=0x%x)\n", cid);
    313		return VMCI_ERROR_INVALID_ARGS;
    314	}
    315
    316	/* Allocate guest call entry and add it to the target VM's queue. */
    317	dq_entry = kmalloc(sizeof(*dq_entry), GFP_KERNEL);
    318	if (dq_entry == NULL) {
    319		pr_warn("Failed to allocate memory for datagram\n");
    320		vmci_ctx_put(context);
    321		return VMCI_ERROR_NO_MEM;
    322	}
    323	dq_entry->dg = dg;
    324	dq_entry->dg_size = vmci_dg_size;
    325	dg_src = dg->src;
    326	INIT_LIST_HEAD(&dq_entry->list_item);
    327
    328	spin_lock(&context->lock);
    329
    330	/*
    331	 * We put a higher limit on datagrams from the hypervisor.  If
    332	 * the pending datagram is not from hypervisor, then we check
    333	 * if enqueueing it would exceed the
    334	 * VMCI_MAX_DATAGRAM_QUEUE_SIZE limit on the destination.  If
    335	 * the pending datagram is from hypervisor, we allow it to be
    336	 * queued at the destination side provided we don't reach the
    337	 * VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE limit.
    338	 */
    339	if (context->datagram_queue_size + vmci_dg_size >=
    340	    VMCI_MAX_DATAGRAM_QUEUE_SIZE &&
    341	    (!vmci_handle_is_equal(dg_src,
    342				vmci_make_handle
    343				(VMCI_HYPERVISOR_CONTEXT_ID,
    344				 VMCI_CONTEXT_RESOURCE_ID)) ||
    345	     context->datagram_queue_size + vmci_dg_size >=
    346	     VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE)) {
    347		spin_unlock(&context->lock);
    348		vmci_ctx_put(context);
    349		kfree(dq_entry);
    350		pr_devel("Context (ID=0x%x) receive queue is full\n", cid);
    351		return VMCI_ERROR_NO_RESOURCES;
    352	}
    353
    354	list_add(&dq_entry->list_item, &context->datagram_queue);
    355	context->pending_datagrams++;
    356	context->datagram_queue_size += vmci_dg_size;
    357	ctx_signal_notify(context);
    358	wake_up(&context->host_context.wait_queue);
    359	spin_unlock(&context->lock);
    360	vmci_ctx_put(context);
    361
    362	return vmci_dg_size;
    363}
    364
    365/*
    366 * Verifies whether a context with the specified context ID exists.
    367 * FIXME: utility is dubious as no decisions can be reliably made
    368 * using this data as context can appear and disappear at any time.
    369 */
    370bool vmci_ctx_exists(u32 cid)
    371{
    372	struct vmci_ctx *context;
    373	bool exists = false;
    374
    375	rcu_read_lock();
    376
    377	list_for_each_entry_rcu(context, &ctx_list.head, list_item) {
    378		if (context->cid == cid) {
    379			exists = true;
    380			break;
    381		}
    382	}
    383
    384	rcu_read_unlock();
    385	return exists;
    386}
    387
    388/*
    389 * Retrieves VMCI context corresponding to the given cid.
    390 */
    391struct vmci_ctx *vmci_ctx_get(u32 cid)
    392{
    393	struct vmci_ctx *c, *context = NULL;
    394
    395	if (cid == VMCI_INVALID_ID)
    396		return NULL;
    397
    398	rcu_read_lock();
    399	list_for_each_entry_rcu(c, &ctx_list.head, list_item) {
    400		if (c->cid == cid) {
    401			/*
    402			 * The context owner drops its own reference to the
    403			 * context only after removing it from the list and
    404			 * waiting for RCU grace period to expire. This
    405			 * means that we are not about to increase the
    406			 * reference count of something that is in the
    407			 * process of being destroyed.
    408			 */
    409			context = c;
    410			kref_get(&context->kref);
    411			break;
    412		}
    413	}
    414	rcu_read_unlock();
    415
    416	return context;
    417}
    418
    419/*
    420 * Deallocates all parts of a context data structure. This
    421 * function doesn't lock the context, because it assumes that
    422 * the caller was holding the last reference to context.
    423 */
    424static void ctx_free_ctx(struct kref *kref)
    425{
    426	struct vmci_ctx *context = container_of(kref, struct vmci_ctx, kref);
    427	struct vmci_datagram_queue_entry *dq_entry, *dq_entry_tmp;
    428	struct vmci_handle temp_handle;
    429	struct vmci_handle_list *notifier, *tmp;
    430
    431	/*
    432	 * Fire event to all contexts interested in knowing this
    433	 * context is dying.
    434	 */
    435	ctx_fire_notification(context->cid, context->priv_flags);
    436
    437	/*
    438	 * Cleanup all queue pair resources attached to context.  If
    439	 * the VM dies without cleaning up, this code will make sure
    440	 * that no resources are leaked.
    441	 */
    442	temp_handle = vmci_handle_arr_get_entry(context->queue_pair_array, 0);
    443	while (!vmci_handle_is_equal(temp_handle, VMCI_INVALID_HANDLE)) {
    444		if (vmci_qp_broker_detach(temp_handle,
    445					  context) < VMCI_SUCCESS) {
    446			/*
    447			 * When vmci_qp_broker_detach() succeeds it
    448			 * removes the handle from the array.  If
    449			 * detach fails, we must remove the handle
    450			 * ourselves.
    451			 */
    452			vmci_handle_arr_remove_entry(context->queue_pair_array,
    453						     temp_handle);
    454		}
    455		temp_handle =
    456		    vmci_handle_arr_get_entry(context->queue_pair_array, 0);
    457	}
    458
    459	/*
    460	 * It is fine to destroy this without locking the callQueue, as
    461	 * this is the only thread having a reference to the context.
    462	 */
    463	list_for_each_entry_safe(dq_entry, dq_entry_tmp,
    464				 &context->datagram_queue, list_item) {
    465		WARN_ON(dq_entry->dg_size != VMCI_DG_SIZE(dq_entry->dg));
    466		list_del(&dq_entry->list_item);
    467		kfree(dq_entry->dg);
    468		kfree(dq_entry);
    469	}
    470
    471	list_for_each_entry_safe(notifier, tmp,
    472				 &context->notifier_list, node) {
    473		list_del(&notifier->node);
    474		kfree(notifier);
    475	}
    476
    477	vmci_handle_arr_destroy(context->queue_pair_array);
    478	vmci_handle_arr_destroy(context->doorbell_array);
    479	vmci_handle_arr_destroy(context->pending_doorbell_array);
    480	vmci_ctx_unset_notify(context);
    481	if (context->cred)
    482		put_cred(context->cred);
    483	kfree(context);
    484}
    485
    486/*
    487 * Drops reference to VMCI context. If this is the last reference to
    488 * the context it will be deallocated. A context is created with
    489 * a reference count of one, and on destroy, it is removed from
    490 * the context list before its reference count is decremented. Thus,
    491 * if we reach zero, we are sure that nobody else are about to increment
    492 * it (they need the entry in the context list for that), and so there
    493 * is no need for locking.
    494 */
    495void vmci_ctx_put(struct vmci_ctx *context)
    496{
    497	kref_put(&context->kref, ctx_free_ctx);
    498}
    499
    500/*
    501 * Dequeues the next datagram and returns it to caller.
    502 * The caller passes in a pointer to the max size datagram
    503 * it can handle and the datagram is only unqueued if the
    504 * size is less than max_size. If larger max_size is set to
    505 * the size of the datagram to give the caller a chance to
    506 * set up a larger buffer for the guestcall.
    507 */
    508int vmci_ctx_dequeue_datagram(struct vmci_ctx *context,
    509			      size_t *max_size,
    510			      struct vmci_datagram **dg)
    511{
    512	struct vmci_datagram_queue_entry *dq_entry;
    513	struct list_head *list_item;
    514	int rv;
    515
    516	/* Dequeue the next datagram entry. */
    517	spin_lock(&context->lock);
    518	if (context->pending_datagrams == 0) {
    519		ctx_clear_notify_call(context);
    520		spin_unlock(&context->lock);
    521		pr_devel("No datagrams pending\n");
    522		return VMCI_ERROR_NO_MORE_DATAGRAMS;
    523	}
    524
    525	list_item = context->datagram_queue.next;
    526
    527	dq_entry =
    528	    list_entry(list_item, struct vmci_datagram_queue_entry, list_item);
    529
    530	/* Check size of caller's buffer. */
    531	if (*max_size < dq_entry->dg_size) {
    532		*max_size = dq_entry->dg_size;
    533		spin_unlock(&context->lock);
    534		pr_devel("Caller's buffer should be at least (size=%u bytes)\n",
    535			 (u32) *max_size);
    536		return VMCI_ERROR_NO_MEM;
    537	}
    538
    539	list_del(list_item);
    540	context->pending_datagrams--;
    541	context->datagram_queue_size -= dq_entry->dg_size;
    542	if (context->pending_datagrams == 0) {
    543		ctx_clear_notify_call(context);
    544		rv = VMCI_SUCCESS;
    545	} else {
    546		/*
    547		 * Return the size of the next datagram.
    548		 */
    549		struct vmci_datagram_queue_entry *next_entry;
    550
    551		list_item = context->datagram_queue.next;
    552		next_entry =
    553		    list_entry(list_item, struct vmci_datagram_queue_entry,
    554			       list_item);
    555
    556		/*
    557		 * The following size_t -> int truncation is fine as
    558		 * the maximum size of a (routable) datagram is 68KB.
    559		 */
    560		rv = (int)next_entry->dg_size;
    561	}
    562	spin_unlock(&context->lock);
    563
    564	/* Caller must free datagram. */
    565	*dg = dq_entry->dg;
    566	dq_entry->dg = NULL;
    567	kfree(dq_entry);
    568
    569	return rv;
    570}
    571
    572/*
    573 * Reverts actions set up by vmci_setup_notify().  Unmaps and unlocks the
    574 * page mapped/locked by vmci_setup_notify().
    575 */
    576void vmci_ctx_unset_notify(struct vmci_ctx *context)
    577{
    578	struct page *notify_page;
    579
    580	spin_lock(&context->lock);
    581
    582	notify_page = context->notify_page;
    583	context->notify = &ctx_dummy_notify;
    584	context->notify_page = NULL;
    585
    586	spin_unlock(&context->lock);
    587
    588	if (notify_page) {
    589		kunmap(notify_page);
    590		put_page(notify_page);
    591	}
    592}
    593
    594/*
    595 * Add remote_cid to list of contexts current contexts wants
    596 * notifications from/about.
    597 */
    598int vmci_ctx_add_notification(u32 context_id, u32 remote_cid)
    599{
    600	struct vmci_ctx *context;
    601	struct vmci_handle_list *notifier, *n;
    602	int result;
    603	bool exists = false;
    604
    605	context = vmci_ctx_get(context_id);
    606	if (!context)
    607		return VMCI_ERROR_NOT_FOUND;
    608
    609	if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(remote_cid)) {
    610		pr_devel("Context removed notifications for other VMs not supported (src=0x%x, remote=0x%x)\n",
    611			 context_id, remote_cid);
    612		result = VMCI_ERROR_DST_UNREACHABLE;
    613		goto out;
    614	}
    615
    616	if (context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) {
    617		result = VMCI_ERROR_NO_ACCESS;
    618		goto out;
    619	}
    620
    621	notifier = kmalloc(sizeof(struct vmci_handle_list), GFP_KERNEL);
    622	if (!notifier) {
    623		result = VMCI_ERROR_NO_MEM;
    624		goto out;
    625	}
    626
    627	INIT_LIST_HEAD(&notifier->node);
    628	notifier->handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
    629
    630	spin_lock(&context->lock);
    631
    632	if (context->n_notifiers < VMCI_MAX_CONTEXTS) {
    633		list_for_each_entry(n, &context->notifier_list, node) {
    634			if (vmci_handle_is_equal(n->handle, notifier->handle)) {
    635				exists = true;
    636				break;
    637			}
    638		}
    639
    640		if (exists) {
    641			kfree(notifier);
    642			result = VMCI_ERROR_ALREADY_EXISTS;
    643		} else {
    644			list_add_tail_rcu(&notifier->node,
    645					  &context->notifier_list);
    646			context->n_notifiers++;
    647			result = VMCI_SUCCESS;
    648		}
    649	} else {
    650		kfree(notifier);
    651		result = VMCI_ERROR_NO_MEM;
    652	}
    653
    654	spin_unlock(&context->lock);
    655
    656 out:
    657	vmci_ctx_put(context);
    658	return result;
    659}
    660
    661/*
    662 * Remove remote_cid from current context's list of contexts it is
    663 * interested in getting notifications from/about.
    664 */
    665int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid)
    666{
    667	struct vmci_ctx *context;
    668	struct vmci_handle_list *notifier = NULL, *iter, *tmp;
    669	struct vmci_handle handle;
    670
    671	context = vmci_ctx_get(context_id);
    672	if (!context)
    673		return VMCI_ERROR_NOT_FOUND;
    674
    675	handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
    676
    677	spin_lock(&context->lock);
    678	list_for_each_entry_safe(iter, tmp,
    679				 &context->notifier_list, node) {
    680		if (vmci_handle_is_equal(iter->handle, handle)) {
    681			list_del_rcu(&iter->node);
    682			context->n_notifiers--;
    683			notifier = iter;
    684			break;
    685		}
    686	}
    687	spin_unlock(&context->lock);
    688
    689	if (notifier)
    690		kvfree_rcu(notifier);
    691
    692	vmci_ctx_put(context);
    693
    694	return notifier ? VMCI_SUCCESS : VMCI_ERROR_NOT_FOUND;
    695}
    696
    697static int vmci_ctx_get_chkpt_notifiers(struct vmci_ctx *context,
    698					u32 *buf_size, void **pbuf)
    699{
    700	u32 *notifiers;
    701	size_t data_size;
    702	struct vmci_handle_list *entry;
    703	int i = 0;
    704
    705	if (context->n_notifiers == 0) {
    706		*buf_size = 0;
    707		*pbuf = NULL;
    708		return VMCI_SUCCESS;
    709	}
    710
    711	data_size = context->n_notifiers * sizeof(*notifiers);
    712	if (*buf_size < data_size) {
    713		*buf_size = data_size;
    714		return VMCI_ERROR_MORE_DATA;
    715	}
    716
    717	notifiers = kmalloc(data_size, GFP_ATOMIC); /* FIXME: want GFP_KERNEL */
    718	if (!notifiers)
    719		return VMCI_ERROR_NO_MEM;
    720
    721	list_for_each_entry(entry, &context->notifier_list, node)
    722		notifiers[i++] = entry->handle.context;
    723
    724	*buf_size = data_size;
    725	*pbuf = notifiers;
    726	return VMCI_SUCCESS;
    727}
    728
    729static int vmci_ctx_get_chkpt_doorbells(struct vmci_ctx *context,
    730					u32 *buf_size, void **pbuf)
    731{
    732	struct dbell_cpt_state *dbells;
    733	u32 i, n_doorbells;
    734
    735	n_doorbells = vmci_handle_arr_get_size(context->doorbell_array);
    736	if (n_doorbells > 0) {
    737		size_t data_size = n_doorbells * sizeof(*dbells);
    738		if (*buf_size < data_size) {
    739			*buf_size = data_size;
    740			return VMCI_ERROR_MORE_DATA;
    741		}
    742
    743		dbells = kzalloc(data_size, GFP_ATOMIC);
    744		if (!dbells)
    745			return VMCI_ERROR_NO_MEM;
    746
    747		for (i = 0; i < n_doorbells; i++)
    748			dbells[i].handle = vmci_handle_arr_get_entry(
    749						context->doorbell_array, i);
    750
    751		*buf_size = data_size;
    752		*pbuf = dbells;
    753	} else {
    754		*buf_size = 0;
    755		*pbuf = NULL;
    756	}
    757
    758	return VMCI_SUCCESS;
    759}
    760
    761/*
    762 * Get current context's checkpoint state of given type.
    763 */
    764int vmci_ctx_get_chkpt_state(u32 context_id,
    765			     u32 cpt_type,
    766			     u32 *buf_size,
    767			     void **pbuf)
    768{
    769	struct vmci_ctx *context;
    770	int result;
    771
    772	context = vmci_ctx_get(context_id);
    773	if (!context)
    774		return VMCI_ERROR_NOT_FOUND;
    775
    776	spin_lock(&context->lock);
    777
    778	switch (cpt_type) {
    779	case VMCI_NOTIFICATION_CPT_STATE:
    780		result = vmci_ctx_get_chkpt_notifiers(context, buf_size, pbuf);
    781		break;
    782
    783	case VMCI_WELLKNOWN_CPT_STATE:
    784		/*
    785		 * For compatibility with VMX'en with VM to VM communication, we
    786		 * always return zero wellknown handles.
    787		 */
    788
    789		*buf_size = 0;
    790		*pbuf = NULL;
    791		result = VMCI_SUCCESS;
    792		break;
    793
    794	case VMCI_DOORBELL_CPT_STATE:
    795		result = vmci_ctx_get_chkpt_doorbells(context, buf_size, pbuf);
    796		break;
    797
    798	default:
    799		pr_devel("Invalid cpt state (type=%d)\n", cpt_type);
    800		result = VMCI_ERROR_INVALID_ARGS;
    801		break;
    802	}
    803
    804	spin_unlock(&context->lock);
    805	vmci_ctx_put(context);
    806
    807	return result;
    808}
    809
    810/*
    811 * Set current context's checkpoint state of given type.
    812 */
    813int vmci_ctx_set_chkpt_state(u32 context_id,
    814			     u32 cpt_type,
    815			     u32 buf_size,
    816			     void *cpt_buf)
    817{
    818	u32 i;
    819	u32 current_id;
    820	int result = VMCI_SUCCESS;
    821	u32 num_ids = buf_size / sizeof(u32);
    822
    823	if (cpt_type == VMCI_WELLKNOWN_CPT_STATE && num_ids > 0) {
    824		/*
    825		 * We would end up here if VMX with VM to VM communication
    826		 * attempts to restore a checkpoint with wellknown handles.
    827		 */
    828		pr_warn("Attempt to restore checkpoint with obsolete wellknown handles\n");
    829		return VMCI_ERROR_OBSOLETE;
    830	}
    831
    832	if (cpt_type != VMCI_NOTIFICATION_CPT_STATE) {
    833		pr_devel("Invalid cpt state (type=%d)\n", cpt_type);
    834		return VMCI_ERROR_INVALID_ARGS;
    835	}
    836
    837	for (i = 0; i < num_ids && result == VMCI_SUCCESS; i++) {
    838		current_id = ((u32 *)cpt_buf)[i];
    839		result = vmci_ctx_add_notification(context_id, current_id);
    840		if (result != VMCI_SUCCESS)
    841			break;
    842	}
    843	if (result != VMCI_SUCCESS)
    844		pr_devel("Failed to set cpt state (type=%d) (error=%d)\n",
    845			 cpt_type, result);
    846
    847	return result;
    848}
    849
    850/*
    851 * Retrieves the specified context's pending notifications in the
    852 * form of a handle array. The handle arrays returned are the
    853 * actual data - not a copy and should not be modified by the
    854 * caller. They must be released using
    855 * vmci_ctx_rcv_notifications_release.
    856 */
    857int vmci_ctx_rcv_notifications_get(u32 context_id,
    858				   struct vmci_handle_arr **db_handle_array,
    859				   struct vmci_handle_arr **qp_handle_array)
    860{
    861	struct vmci_ctx *context;
    862	int result = VMCI_SUCCESS;
    863
    864	context = vmci_ctx_get(context_id);
    865	if (context == NULL)
    866		return VMCI_ERROR_NOT_FOUND;
    867
    868	spin_lock(&context->lock);
    869
    870	*db_handle_array = context->pending_doorbell_array;
    871	context->pending_doorbell_array =
    872		vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT);
    873	if (!context->pending_doorbell_array) {
    874		context->pending_doorbell_array = *db_handle_array;
    875		*db_handle_array = NULL;
    876		result = VMCI_ERROR_NO_MEM;
    877	}
    878	*qp_handle_array = NULL;
    879
    880	spin_unlock(&context->lock);
    881	vmci_ctx_put(context);
    882
    883	return result;
    884}
    885
    886/*
    887 * Releases handle arrays with pending notifications previously
    888 * retrieved using vmci_ctx_rcv_notifications_get. If the
    889 * notifications were not successfully handed over to the guest,
    890 * success must be false.
    891 */
    892void vmci_ctx_rcv_notifications_release(u32 context_id,
    893					struct vmci_handle_arr *db_handle_array,
    894					struct vmci_handle_arr *qp_handle_array,
    895					bool success)
    896{
    897	struct vmci_ctx *context = vmci_ctx_get(context_id);
    898
    899	spin_lock(&context->lock);
    900	if (!success) {
    901		struct vmci_handle handle;
    902
    903		/*
    904		 * New notifications may have been added while we were not
    905		 * holding the context lock, so we transfer any new pending
    906		 * doorbell notifications to the old array, and reinstate the
    907		 * old array.
    908		 */
    909
    910		handle = vmci_handle_arr_remove_tail(
    911					context->pending_doorbell_array);
    912		while (!vmci_handle_is_invalid(handle)) {
    913			if (!vmci_handle_arr_has_entry(db_handle_array,
    914						       handle)) {
    915				vmci_handle_arr_append_entry(
    916						&db_handle_array, handle);
    917			}
    918			handle = vmci_handle_arr_remove_tail(
    919					context->pending_doorbell_array);
    920		}
    921		vmci_handle_arr_destroy(context->pending_doorbell_array);
    922		context->pending_doorbell_array = db_handle_array;
    923		db_handle_array = NULL;
    924	} else {
    925		ctx_clear_notify_call(context);
    926	}
    927	spin_unlock(&context->lock);
    928	vmci_ctx_put(context);
    929
    930	if (db_handle_array)
    931		vmci_handle_arr_destroy(db_handle_array);
    932
    933	if (qp_handle_array)
    934		vmci_handle_arr_destroy(qp_handle_array);
    935}
    936
    937/*
    938 * Registers that a new doorbell handle has been allocated by the
    939 * context. Only doorbell handles registered can be notified.
    940 */
    941int vmci_ctx_dbell_create(u32 context_id, struct vmci_handle handle)
    942{
    943	struct vmci_ctx *context;
    944	int result;
    945
    946	if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle))
    947		return VMCI_ERROR_INVALID_ARGS;
    948
    949	context = vmci_ctx_get(context_id);
    950	if (context == NULL)
    951		return VMCI_ERROR_NOT_FOUND;
    952
    953	spin_lock(&context->lock);
    954	if (!vmci_handle_arr_has_entry(context->doorbell_array, handle))
    955		result = vmci_handle_arr_append_entry(&context->doorbell_array,
    956						      handle);
    957	else
    958		result = VMCI_ERROR_DUPLICATE_ENTRY;
    959
    960	spin_unlock(&context->lock);
    961	vmci_ctx_put(context);
    962
    963	return result;
    964}
    965
    966/*
    967 * Unregisters a doorbell handle that was previously registered
    968 * with vmci_ctx_dbell_create.
    969 */
    970int vmci_ctx_dbell_destroy(u32 context_id, struct vmci_handle handle)
    971{
    972	struct vmci_ctx *context;
    973	struct vmci_handle removed_handle;
    974
    975	if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle))
    976		return VMCI_ERROR_INVALID_ARGS;
    977
    978	context = vmci_ctx_get(context_id);
    979	if (context == NULL)
    980		return VMCI_ERROR_NOT_FOUND;
    981
    982	spin_lock(&context->lock);
    983	removed_handle =
    984	    vmci_handle_arr_remove_entry(context->doorbell_array, handle);
    985	vmci_handle_arr_remove_entry(context->pending_doorbell_array, handle);
    986	spin_unlock(&context->lock);
    987
    988	vmci_ctx_put(context);
    989
    990	return vmci_handle_is_invalid(removed_handle) ?
    991	    VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS;
    992}
    993
    994/*
    995 * Unregisters all doorbell handles that were previously
    996 * registered with vmci_ctx_dbell_create.
    997 */
    998int vmci_ctx_dbell_destroy_all(u32 context_id)
    999{
   1000	struct vmci_ctx *context;
   1001	struct vmci_handle handle;
   1002
   1003	if (context_id == VMCI_INVALID_ID)
   1004		return VMCI_ERROR_INVALID_ARGS;
   1005
   1006	context = vmci_ctx_get(context_id);
   1007	if (context == NULL)
   1008		return VMCI_ERROR_NOT_FOUND;
   1009
   1010	spin_lock(&context->lock);
   1011	do {
   1012		struct vmci_handle_arr *arr = context->doorbell_array;
   1013		handle = vmci_handle_arr_remove_tail(arr);
   1014	} while (!vmci_handle_is_invalid(handle));
   1015	do {
   1016		struct vmci_handle_arr *arr = context->pending_doorbell_array;
   1017		handle = vmci_handle_arr_remove_tail(arr);
   1018	} while (!vmci_handle_is_invalid(handle));
   1019	spin_unlock(&context->lock);
   1020
   1021	vmci_ctx_put(context);
   1022
   1023	return VMCI_SUCCESS;
   1024}
   1025
   1026/*
   1027 * Registers a notification of a doorbell handle initiated by the
   1028 * specified source context. The notification of doorbells are
   1029 * subject to the same isolation rules as datagram delivery. To
   1030 * allow host side senders of notifications a finer granularity
   1031 * of sender rights than those assigned to the sending context
   1032 * itself, the host context is required to specify a different
   1033 * set of privilege flags that will override the privileges of
   1034 * the source context.
   1035 */
   1036int vmci_ctx_notify_dbell(u32 src_cid,
   1037			  struct vmci_handle handle,
   1038			  u32 src_priv_flags)
   1039{
   1040	struct vmci_ctx *dst_context;
   1041	int result;
   1042
   1043	if (vmci_handle_is_invalid(handle))
   1044		return VMCI_ERROR_INVALID_ARGS;
   1045
   1046	/* Get the target VM's VMCI context. */
   1047	dst_context = vmci_ctx_get(handle.context);
   1048	if (!dst_context) {
   1049		pr_devel("Invalid context (ID=0x%x)\n", handle.context);
   1050		return VMCI_ERROR_NOT_FOUND;
   1051	}
   1052
   1053	if (src_cid != handle.context) {
   1054		u32 dst_priv_flags;
   1055
   1056		if (VMCI_CONTEXT_IS_VM(src_cid) &&
   1057		    VMCI_CONTEXT_IS_VM(handle.context)) {
   1058			pr_devel("Doorbell notification from VM to VM not supported (src=0x%x, dst=0x%x)\n",
   1059				 src_cid, handle.context);
   1060			result = VMCI_ERROR_DST_UNREACHABLE;
   1061			goto out;
   1062		}
   1063
   1064		result = vmci_dbell_get_priv_flags(handle, &dst_priv_flags);
   1065		if (result < VMCI_SUCCESS) {
   1066			pr_warn("Failed to get privilege flags for destination (handle=0x%x:0x%x)\n",
   1067				handle.context, handle.resource);
   1068			goto out;
   1069		}
   1070
   1071		if (src_cid != VMCI_HOST_CONTEXT_ID ||
   1072		    src_priv_flags == VMCI_NO_PRIVILEGE_FLAGS) {
   1073			src_priv_flags = vmci_context_get_priv_flags(src_cid);
   1074		}
   1075
   1076		if (vmci_deny_interaction(src_priv_flags, dst_priv_flags)) {
   1077			result = VMCI_ERROR_NO_ACCESS;
   1078			goto out;
   1079		}
   1080	}
   1081
   1082	if (handle.context == VMCI_HOST_CONTEXT_ID) {
   1083		result = vmci_dbell_host_context_notify(src_cid, handle);
   1084	} else {
   1085		spin_lock(&dst_context->lock);
   1086
   1087		if (!vmci_handle_arr_has_entry(dst_context->doorbell_array,
   1088					       handle)) {
   1089			result = VMCI_ERROR_NOT_FOUND;
   1090		} else {
   1091			if (!vmci_handle_arr_has_entry(
   1092					dst_context->pending_doorbell_array,
   1093					handle)) {
   1094				result = vmci_handle_arr_append_entry(
   1095					&dst_context->pending_doorbell_array,
   1096					handle);
   1097				if (result == VMCI_SUCCESS) {
   1098					ctx_signal_notify(dst_context);
   1099					wake_up(&dst_context->host_context.wait_queue);
   1100				}
   1101			} else {
   1102				result = VMCI_SUCCESS;
   1103			}
   1104		}
   1105		spin_unlock(&dst_context->lock);
   1106	}
   1107
   1108 out:
   1109	vmci_ctx_put(dst_context);
   1110
   1111	return result;
   1112}
   1113
   1114bool vmci_ctx_supports_host_qp(struct vmci_ctx *context)
   1115{
   1116	return context && context->user_version >= VMCI_VERSION_HOSTQP;
   1117}
   1118
   1119/*
   1120 * Registers that a new queue pair handle has been allocated by
   1121 * the context.
   1122 */
   1123int vmci_ctx_qp_create(struct vmci_ctx *context, struct vmci_handle handle)
   1124{
   1125	int result;
   1126
   1127	if (context == NULL || vmci_handle_is_invalid(handle))
   1128		return VMCI_ERROR_INVALID_ARGS;
   1129
   1130	if (!vmci_handle_arr_has_entry(context->queue_pair_array, handle))
   1131		result = vmci_handle_arr_append_entry(
   1132			&context->queue_pair_array, handle);
   1133	else
   1134		result = VMCI_ERROR_DUPLICATE_ENTRY;
   1135
   1136	return result;
   1137}
   1138
   1139/*
   1140 * Unregisters a queue pair handle that was previously registered
   1141 * with vmci_ctx_qp_create.
   1142 */
   1143int vmci_ctx_qp_destroy(struct vmci_ctx *context, struct vmci_handle handle)
   1144{
   1145	struct vmci_handle hndl;
   1146
   1147	if (context == NULL || vmci_handle_is_invalid(handle))
   1148		return VMCI_ERROR_INVALID_ARGS;
   1149
   1150	hndl = vmci_handle_arr_remove_entry(context->queue_pair_array, handle);
   1151
   1152	return vmci_handle_is_invalid(hndl) ?
   1153		VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS;
   1154}
   1155
   1156/*
   1157 * Determines whether a given queue pair handle is registered
   1158 * with the given context.
   1159 */
   1160bool vmci_ctx_qp_exists(struct vmci_ctx *context, struct vmci_handle handle)
   1161{
   1162	if (context == NULL || vmci_handle_is_invalid(handle))
   1163		return false;
   1164
   1165	return vmci_handle_arr_has_entry(context->queue_pair_array, handle);
   1166}
   1167
   1168/*
   1169 * vmci_context_get_priv_flags() - Retrieve privilege flags.
   1170 * @context_id: The context ID of the VMCI context.
   1171 *
   1172 * Retrieves privilege flags of the given VMCI context ID.
   1173 */
   1174u32 vmci_context_get_priv_flags(u32 context_id)
   1175{
   1176	if (vmci_host_code_active()) {
   1177		u32 flags;
   1178		struct vmci_ctx *context;
   1179
   1180		context = vmci_ctx_get(context_id);
   1181		if (!context)
   1182			return VMCI_LEAST_PRIVILEGE_FLAGS;
   1183
   1184		flags = context->priv_flags;
   1185		vmci_ctx_put(context);
   1186		return flags;
   1187	}
   1188	return VMCI_NO_PRIVILEGE_FLAGS;
   1189}
   1190EXPORT_SYMBOL_GPL(vmci_context_get_priv_flags);
   1191
   1192/*
   1193 * vmci_is_context_owner() - Determimnes if user is the context owner
   1194 * @context_id: The context ID of the VMCI context.
   1195 * @uid:        The host user id (real kernel value).
   1196 *
   1197 * Determines whether a given UID is the owner of given VMCI context.
   1198 */
   1199bool vmci_is_context_owner(u32 context_id, kuid_t uid)
   1200{
   1201	bool is_owner = false;
   1202
   1203	if (vmci_host_code_active()) {
   1204		struct vmci_ctx *context = vmci_ctx_get(context_id);
   1205		if (context) {
   1206			if (context->cred)
   1207				is_owner = uid_eq(context->cred->uid, uid);
   1208			vmci_ctx_put(context);
   1209		}
   1210	}
   1211
   1212	return is_owner;
   1213}
   1214EXPORT_SYMBOL_GPL(vmci_is_context_owner);