cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

hv.c (11991B)


      1// SPDX-License-Identifier: GPL-2.0-only
      2/*
      3 * Copyright (c) 2009, Microsoft Corporation.
      4 *
      5 * Authors:
      6 *   Haiyang Zhang <haiyangz@microsoft.com>
      7 *   Hank Janssen  <hjanssen@microsoft.com>
      8 */
      9#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
     10
     11#include <linux/io.h>
     12#include <linux/kernel.h>
     13#include <linux/mm.h>
     14#include <linux/slab.h>
     15#include <linux/vmalloc.h>
     16#include <linux/hyperv.h>
     17#include <linux/random.h>
     18#include <linux/clockchips.h>
     19#include <linux/delay.h>
     20#include <linux/interrupt.h>
     21#include <clocksource/hyperv_timer.h>
     22#include <asm/mshyperv.h>
     23#include "hyperv_vmbus.h"
     24
     25/* The one and only */
     26struct hv_context hv_context;
     27
     28/*
     29 * hv_init - Main initialization routine.
     30 *
     31 * This routine must be called before any other routines in here are called
     32 */
     33int hv_init(void)
     34{
     35	hv_context.cpu_context = alloc_percpu(struct hv_per_cpu_context);
     36	if (!hv_context.cpu_context)
     37		return -ENOMEM;
     38	return 0;
     39}
     40
     41/*
     42 * Functions for allocating and freeing memory with size and
     43 * alignment HV_HYP_PAGE_SIZE. These functions are needed because
     44 * the guest page size may not be the same as the Hyper-V page
     45 * size. We depend upon kmalloc() aligning power-of-two size
     46 * allocations to the allocation size boundary, so that the
     47 * allocated memory appears to Hyper-V as a page of the size
     48 * it expects.
     49 */
     50
     51void *hv_alloc_hyperv_page(void)
     52{
     53	BUILD_BUG_ON(PAGE_SIZE <  HV_HYP_PAGE_SIZE);
     54
     55	if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
     56		return (void *)__get_free_page(GFP_KERNEL);
     57	else
     58		return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
     59}
     60
     61void *hv_alloc_hyperv_zeroed_page(void)
     62{
     63	if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
     64		return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
     65	else
     66		return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
     67}
     68
     69void hv_free_hyperv_page(unsigned long addr)
     70{
     71	if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
     72		free_page(addr);
     73	else
     74		kfree((void *)addr);
     75}
     76
     77/*
     78 * hv_post_message - Post a message using the hypervisor message IPC.
     79 *
     80 * This involves a hypercall.
     81 */
     82int hv_post_message(union hv_connection_id connection_id,
     83		  enum hv_message_type message_type,
     84		  void *payload, size_t payload_size)
     85{
     86	struct hv_input_post_message *aligned_msg;
     87	struct hv_per_cpu_context *hv_cpu;
     88	u64 status;
     89
     90	if (payload_size > HV_MESSAGE_PAYLOAD_BYTE_COUNT)
     91		return -EMSGSIZE;
     92
     93	hv_cpu = get_cpu_ptr(hv_context.cpu_context);
     94	aligned_msg = hv_cpu->post_msg_page;
     95	aligned_msg->connectionid = connection_id;
     96	aligned_msg->reserved = 0;
     97	aligned_msg->message_type = message_type;
     98	aligned_msg->payload_size = payload_size;
     99	memcpy((void *)aligned_msg->payload, payload, payload_size);
    100
    101	if (hv_isolation_type_snp())
    102		status = hv_ghcb_hypercall(HVCALL_POST_MESSAGE,
    103				(void *)aligned_msg, NULL,
    104				sizeof(*aligned_msg));
    105	else
    106		status = hv_do_hypercall(HVCALL_POST_MESSAGE,
    107				aligned_msg, NULL);
    108
    109	/* Preemption must remain disabled until after the hypercall
    110	 * so some other thread can't get scheduled onto this cpu and
    111	 * corrupt the per-cpu post_msg_page
    112	 */
    113	put_cpu_ptr(hv_cpu);
    114
    115	return hv_result(status);
    116}
    117
    118int hv_synic_alloc(void)
    119{
    120	int cpu;
    121	struct hv_per_cpu_context *hv_cpu;
    122
    123	/*
    124	 * First, zero all per-cpu memory areas so hv_synic_free() can
    125	 * detect what memory has been allocated and cleanup properly
    126	 * after any failures.
    127	 */
    128	for_each_present_cpu(cpu) {
    129		hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
    130		memset(hv_cpu, 0, sizeof(*hv_cpu));
    131	}
    132
    133	hv_context.hv_numa_map = kcalloc(nr_node_ids, sizeof(struct cpumask),
    134					 GFP_KERNEL);
    135	if (hv_context.hv_numa_map == NULL) {
    136		pr_err("Unable to allocate NUMA map\n");
    137		goto err;
    138	}
    139
    140	for_each_present_cpu(cpu) {
    141		hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu);
    142
    143		tasklet_init(&hv_cpu->msg_dpc,
    144			     vmbus_on_msg_dpc, (unsigned long) hv_cpu);
    145
    146		/*
    147		 * Synic message and event pages are allocated by paravisor.
    148		 * Skip these pages allocation here.
    149		 */
    150		if (!hv_isolation_type_snp()) {
    151			hv_cpu->synic_message_page =
    152				(void *)get_zeroed_page(GFP_ATOMIC);
    153			if (hv_cpu->synic_message_page == NULL) {
    154				pr_err("Unable to allocate SYNIC message page\n");
    155				goto err;
    156			}
    157
    158			hv_cpu->synic_event_page =
    159				(void *)get_zeroed_page(GFP_ATOMIC);
    160			if (hv_cpu->synic_event_page == NULL) {
    161				pr_err("Unable to allocate SYNIC event page\n");
    162				goto err;
    163			}
    164		}
    165
    166		hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC);
    167		if (hv_cpu->post_msg_page == NULL) {
    168			pr_err("Unable to allocate post msg page\n");
    169			goto err;
    170		}
    171	}
    172
    173	return 0;
    174err:
    175	/*
    176	 * Any memory allocations that succeeded will be freed when
    177	 * the caller cleans up by calling hv_synic_free()
    178	 */
    179	return -ENOMEM;
    180}
    181
    182
    183void hv_synic_free(void)
    184{
    185	int cpu;
    186
    187	for_each_present_cpu(cpu) {
    188		struct hv_per_cpu_context *hv_cpu
    189			= per_cpu_ptr(hv_context.cpu_context, cpu);
    190
    191		free_page((unsigned long)hv_cpu->synic_event_page);
    192		free_page((unsigned long)hv_cpu->synic_message_page);
    193		free_page((unsigned long)hv_cpu->post_msg_page);
    194	}
    195
    196	kfree(hv_context.hv_numa_map);
    197}
    198
    199/*
    200 * hv_synic_init - Initialize the Synthetic Interrupt Controller.
    201 *
    202 * If it is already initialized by another entity (ie x2v shim), we need to
    203 * retrieve the initialized message and event pages.  Otherwise, we create and
    204 * initialize the message and event pages.
    205 */
    206void hv_synic_enable_regs(unsigned int cpu)
    207{
    208	struct hv_per_cpu_context *hv_cpu
    209		= per_cpu_ptr(hv_context.cpu_context, cpu);
    210	union hv_synic_simp simp;
    211	union hv_synic_siefp siefp;
    212	union hv_synic_sint shared_sint;
    213	union hv_synic_scontrol sctrl;
    214
    215	/* Setup the Synic's message page */
    216	simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
    217	simp.simp_enabled = 1;
    218
    219	if (hv_isolation_type_snp()) {
    220		hv_cpu->synic_message_page
    221			= memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
    222				   HV_HYP_PAGE_SIZE, MEMREMAP_WB);
    223		if (!hv_cpu->synic_message_page)
    224			pr_err("Fail to map syinc message page.\n");
    225	} else {
    226		simp.base_simp_gpa = virt_to_phys(hv_cpu->synic_message_page)
    227			>> HV_HYP_PAGE_SHIFT;
    228	}
    229
    230	hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
    231
    232	/* Setup the Synic's event page */
    233	siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
    234	siefp.siefp_enabled = 1;
    235
    236	if (hv_isolation_type_snp()) {
    237		hv_cpu->synic_event_page =
    238			memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
    239				 HV_HYP_PAGE_SIZE, MEMREMAP_WB);
    240
    241		if (!hv_cpu->synic_event_page)
    242			pr_err("Fail to map syinc event page.\n");
    243	} else {
    244		siefp.base_siefp_gpa = virt_to_phys(hv_cpu->synic_event_page)
    245			>> HV_HYP_PAGE_SHIFT;
    246	}
    247
    248	hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
    249
    250	/* Setup the shared SINT. */
    251	if (vmbus_irq != -1)
    252		enable_percpu_irq(vmbus_irq, 0);
    253	shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
    254					VMBUS_MESSAGE_SINT);
    255
    256	shared_sint.vector = vmbus_interrupt;
    257	shared_sint.masked = false;
    258
    259	/*
    260	 * On architectures where Hyper-V doesn't support AEOI (e.g., ARM64),
    261	 * it doesn't provide a recommendation flag and AEOI must be disabled.
    262	 */
    263#ifdef HV_DEPRECATING_AEOI_RECOMMENDED
    264	shared_sint.auto_eoi =
    265			!(ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED);
    266#else
    267	shared_sint.auto_eoi = 0;
    268#endif
    269	hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
    270				shared_sint.as_uint64);
    271
    272	/* Enable the global synic bit */
    273	sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
    274	sctrl.enable = 1;
    275
    276	hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
    277}
    278
    279int hv_synic_init(unsigned int cpu)
    280{
    281	hv_synic_enable_regs(cpu);
    282
    283	hv_stimer_legacy_init(cpu, VMBUS_MESSAGE_SINT);
    284
    285	return 0;
    286}
    287
    288/*
    289 * hv_synic_cleanup - Cleanup routine for hv_synic_init().
    290 */
    291void hv_synic_disable_regs(unsigned int cpu)
    292{
    293	struct hv_per_cpu_context *hv_cpu
    294		= per_cpu_ptr(hv_context.cpu_context, cpu);
    295	union hv_synic_sint shared_sint;
    296	union hv_synic_simp simp;
    297	union hv_synic_siefp siefp;
    298	union hv_synic_scontrol sctrl;
    299
    300	shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
    301					VMBUS_MESSAGE_SINT);
    302
    303	shared_sint.masked = 1;
    304
    305	/* Need to correctly cleanup in the case of SMP!!! */
    306	/* Disable the interrupt */
    307	hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
    308				shared_sint.as_uint64);
    309
    310	simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
    311	/*
    312	 * In Isolation VM, sim and sief pages are allocated by
    313	 * paravisor. These pages also will be used by kdump
    314	 * kernel. So just reset enable bit here and keep page
    315	 * addresses.
    316	 */
    317	simp.simp_enabled = 0;
    318	if (hv_isolation_type_snp())
    319		memunmap(hv_cpu->synic_message_page);
    320	else
    321		simp.base_simp_gpa = 0;
    322
    323	hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
    324
    325	siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
    326	siefp.siefp_enabled = 0;
    327
    328	if (hv_isolation_type_snp())
    329		memunmap(hv_cpu->synic_event_page);
    330	else
    331		siefp.base_siefp_gpa = 0;
    332
    333	hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
    334
    335	/* Disable the global synic bit */
    336	sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
    337	sctrl.enable = 0;
    338	hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
    339
    340	if (vmbus_irq != -1)
    341		disable_percpu_irq(vmbus_irq);
    342}
    343
    344#define HV_MAX_TRIES 3
    345/*
    346 * Scan the event flags page of 'this' CPU looking for any bit that is set.  If we find one
    347 * bit set, then wait for a few milliseconds.  Repeat these steps for a maximum of 3 times.
    348 * Return 'true', if there is still any set bit after this operation; 'false', otherwise.
    349 *
    350 * If a bit is set, that means there is a pending channel interrupt.  The expectation is
    351 * that the normal interrupt handling mechanism will find and process the channel interrupt
    352 * "very soon", and in the process clear the bit.
    353 */
    354static bool hv_synic_event_pending(void)
    355{
    356	struct hv_per_cpu_context *hv_cpu = this_cpu_ptr(hv_context.cpu_context);
    357	union hv_synic_event_flags *event =
    358		(union hv_synic_event_flags *)hv_cpu->synic_event_page + VMBUS_MESSAGE_SINT;
    359	unsigned long *recv_int_page = event->flags; /* assumes VMBus version >= VERSION_WIN8 */
    360	bool pending;
    361	u32 relid;
    362	int tries = 0;
    363
    364retry:
    365	pending = false;
    366	for_each_set_bit(relid, recv_int_page, HV_EVENT_FLAGS_COUNT) {
    367		/* Special case - VMBus channel protocol messages */
    368		if (relid == 0)
    369			continue;
    370		pending = true;
    371		break;
    372	}
    373	if (pending && tries++ < HV_MAX_TRIES) {
    374		usleep_range(10000, 20000);
    375		goto retry;
    376	}
    377	return pending;
    378}
    379
    380int hv_synic_cleanup(unsigned int cpu)
    381{
    382	struct vmbus_channel *channel, *sc;
    383	bool channel_found = false;
    384
    385	if (vmbus_connection.conn_state != CONNECTED)
    386		goto always_cleanup;
    387
    388	/*
    389	 * Hyper-V does not provide a way to change the connect CPU once
    390	 * it is set; we must prevent the connect CPU from going offline
    391	 * while the VM is running normally. But in the panic or kexec()
    392	 * path where the vmbus is already disconnected, the CPU must be
    393	 * allowed to shut down.
    394	 */
    395	if (cpu == VMBUS_CONNECT_CPU)
    396		return -EBUSY;
    397
    398	/*
    399	 * Search for channels which are bound to the CPU we're about to
    400	 * cleanup.  In case we find one and vmbus is still connected, we
    401	 * fail; this will effectively prevent CPU offlining.
    402	 *
    403	 * TODO: Re-bind the channels to different CPUs.
    404	 */
    405	mutex_lock(&vmbus_connection.channel_mutex);
    406	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
    407		if (channel->target_cpu == cpu) {
    408			channel_found = true;
    409			break;
    410		}
    411		list_for_each_entry(sc, &channel->sc_list, sc_list) {
    412			if (sc->target_cpu == cpu) {
    413				channel_found = true;
    414				break;
    415			}
    416		}
    417		if (channel_found)
    418			break;
    419	}
    420	mutex_unlock(&vmbus_connection.channel_mutex);
    421
    422	if (channel_found)
    423		return -EBUSY;
    424
    425	/*
    426	 * channel_found == false means that any channels that were previously
    427	 * assigned to the CPU have been reassigned elsewhere with a call of
    428	 * vmbus_send_modifychannel().  Scan the event flags page looking for
    429	 * bits that are set and waiting with a timeout for vmbus_chan_sched()
    430	 * to process such bits.  If bits are still set after this operation
    431	 * and VMBus is connected, fail the CPU offlining operation.
    432	 */
    433	if (vmbus_proto_version >= VERSION_WIN10_V4_1 && hv_synic_event_pending())
    434		return -EBUSY;
    435
    436always_cleanup:
    437	hv_stimer_legacy_cleanup(cpu);
    438
    439	hv_synic_disable_regs(cpu);
    440
    441	return 0;
    442}