cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

vmware.c (14141B)


      1/*
      2 * VMware Detection code.
      3 *
      4 * Copyright (C) 2008, VMware, Inc.
      5 * Author : Alok N Kataria <akataria@vmware.com>
      6 *
      7 * This program is free software; you can redistribute it and/or modify
      8 * it under the terms of the GNU General Public License as published by
      9 * the Free Software Foundation; either version 2 of the License, or
     10 * (at your option) any later version.
     11 *
     12 * This program is distributed in the hope that it will be useful, but
     13 * WITHOUT ANY WARRANTY; without even the implied warranty of
     14 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
     15 * NON INFRINGEMENT.  See the GNU General Public License for more
     16 * details.
     17 *
     18 * You should have received a copy of the GNU General Public License
     19 * along with this program; if not, write to the Free Software
     20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
     21 *
     22 */
     23
     24#include <linux/dmi.h>
     25#include <linux/init.h>
     26#include <linux/export.h>
     27#include <linux/clocksource.h>
     28#include <linux/cpu.h>
     29#include <linux/reboot.h>
     30#include <linux/static_call.h>
     31#include <asm/div64.h>
     32#include <asm/x86_init.h>
     33#include <asm/hypervisor.h>
     34#include <asm/timer.h>
     35#include <asm/apic.h>
     36#include <asm/vmware.h>
     37#include <asm/svm.h>
     38
     39#undef pr_fmt
     40#define pr_fmt(fmt)	"vmware: " fmt
     41
     42#define CPUID_VMWARE_INFO_LEAF               0x40000000
     43#define CPUID_VMWARE_FEATURES_LEAF           0x40000010
     44#define CPUID_VMWARE_FEATURES_ECX_VMMCALL    BIT(0)
     45#define CPUID_VMWARE_FEATURES_ECX_VMCALL     BIT(1)
     46
     47#define VMWARE_HYPERVISOR_MAGIC	0x564D5868
     48
     49#define VMWARE_CMD_GETVERSION    10
     50#define VMWARE_CMD_GETHZ         45
     51#define VMWARE_CMD_GETVCPU_INFO  68
     52#define VMWARE_CMD_LEGACY_X2APIC  3
     53#define VMWARE_CMD_VCPU_RESERVED 31
     54#define VMWARE_CMD_STEALCLOCK    91
     55
     56#define STEALCLOCK_NOT_AVAILABLE (-1)
     57#define STEALCLOCK_DISABLED        0
     58#define STEALCLOCK_ENABLED         1
     59
     60#define VMWARE_PORT(cmd, eax, ebx, ecx, edx)				\
     61	__asm__("inl (%%dx), %%eax" :					\
     62		"=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) :		\
     63		"a"(VMWARE_HYPERVISOR_MAGIC),				\
     64		"c"(VMWARE_CMD_##cmd),					\
     65		"d"(VMWARE_HYPERVISOR_PORT), "b"(UINT_MAX) :		\
     66		"memory")
     67
     68#define VMWARE_VMCALL(cmd, eax, ebx, ecx, edx)				\
     69	__asm__("vmcall" :						\
     70		"=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) :		\
     71		"a"(VMWARE_HYPERVISOR_MAGIC),				\
     72		"c"(VMWARE_CMD_##cmd),					\
     73		"d"(0), "b"(UINT_MAX) :					\
     74		"memory")
     75
     76#define VMWARE_VMMCALL(cmd, eax, ebx, ecx, edx)                         \
     77	__asm__("vmmcall" :						\
     78		"=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) :		\
     79		"a"(VMWARE_HYPERVISOR_MAGIC),				\
     80		"c"(VMWARE_CMD_##cmd),					\
     81		"d"(0), "b"(UINT_MAX) :					\
     82		"memory")
     83
     84#define VMWARE_CMD(cmd, eax, ebx, ecx, edx) do {		\
     85	switch (vmware_hypercall_mode) {			\
     86	case CPUID_VMWARE_FEATURES_ECX_VMCALL:			\
     87		VMWARE_VMCALL(cmd, eax, ebx, ecx, edx);		\
     88		break;						\
     89	case CPUID_VMWARE_FEATURES_ECX_VMMCALL:			\
     90		VMWARE_VMMCALL(cmd, eax, ebx, ecx, edx);	\
     91		break;						\
     92	default:						\
     93		VMWARE_PORT(cmd, eax, ebx, ecx, edx);		\
     94		break;						\
     95	}							\
     96	} while (0)
     97
     98struct vmware_steal_time {
     99	union {
    100		uint64_t clock;	/* stolen time counter in units of vtsc */
    101		struct {
    102			/* only for little-endian */
    103			uint32_t clock_low;
    104			uint32_t clock_high;
    105		};
    106	};
    107	uint64_t reserved[7];
    108};
    109
    110static unsigned long vmware_tsc_khz __ro_after_init;
    111static u8 vmware_hypercall_mode     __ro_after_init;
    112
    113static inline int __vmware_platform(void)
    114{
    115	uint32_t eax, ebx, ecx, edx;
    116	VMWARE_CMD(GETVERSION, eax, ebx, ecx, edx);
    117	return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC;
    118}
    119
    120static unsigned long vmware_get_tsc_khz(void)
    121{
    122	return vmware_tsc_khz;
    123}
    124
    125#ifdef CONFIG_PARAVIRT
    126static struct cyc2ns_data vmware_cyc2ns __ro_after_init;
    127static bool vmw_sched_clock __initdata = true;
    128static DEFINE_PER_CPU_DECRYPTED(struct vmware_steal_time, vmw_steal_time) __aligned(64);
    129static bool has_steal_clock;
    130static bool steal_acc __initdata = true; /* steal time accounting */
    131
    132static __init int setup_vmw_sched_clock(char *s)
    133{
    134	vmw_sched_clock = false;
    135	return 0;
    136}
    137early_param("no-vmw-sched-clock", setup_vmw_sched_clock);
    138
    139static __init int parse_no_stealacc(char *arg)
    140{
    141	steal_acc = false;
    142	return 0;
    143}
    144early_param("no-steal-acc", parse_no_stealacc);
    145
    146static unsigned long long notrace vmware_sched_clock(void)
    147{
    148	unsigned long long ns;
    149
    150	ns = mul_u64_u32_shr(rdtsc(), vmware_cyc2ns.cyc2ns_mul,
    151			     vmware_cyc2ns.cyc2ns_shift);
    152	ns -= vmware_cyc2ns.cyc2ns_offset;
    153	return ns;
    154}
    155
    156static void __init vmware_cyc2ns_setup(void)
    157{
    158	struct cyc2ns_data *d = &vmware_cyc2ns;
    159	unsigned long long tsc_now = rdtsc();
    160
    161	clocks_calc_mult_shift(&d->cyc2ns_mul, &d->cyc2ns_shift,
    162			       vmware_tsc_khz, NSEC_PER_MSEC, 0);
    163	d->cyc2ns_offset = mul_u64_u32_shr(tsc_now, d->cyc2ns_mul,
    164					   d->cyc2ns_shift);
    165
    166	pr_info("using clock offset of %llu ns\n", d->cyc2ns_offset);
    167}
    168
    169static int vmware_cmd_stealclock(uint32_t arg1, uint32_t arg2)
    170{
    171	uint32_t result, info;
    172
    173	asm volatile (VMWARE_HYPERCALL :
    174		"=a"(result),
    175		"=c"(info) :
    176		"a"(VMWARE_HYPERVISOR_MAGIC),
    177		"b"(0),
    178		"c"(VMWARE_CMD_STEALCLOCK),
    179		"d"(0),
    180		"S"(arg1),
    181		"D"(arg2) :
    182		"memory");
    183	return result;
    184}
    185
    186static bool stealclock_enable(phys_addr_t pa)
    187{
    188	return vmware_cmd_stealclock(upper_32_bits(pa),
    189				     lower_32_bits(pa)) == STEALCLOCK_ENABLED;
    190}
    191
    192static int __stealclock_disable(void)
    193{
    194	return vmware_cmd_stealclock(0, 1);
    195}
    196
    197static void stealclock_disable(void)
    198{
    199	__stealclock_disable();
    200}
    201
    202static bool vmware_is_stealclock_available(void)
    203{
    204	return __stealclock_disable() != STEALCLOCK_NOT_AVAILABLE;
    205}
    206
    207/**
    208 * vmware_steal_clock() - read the per-cpu steal clock
    209 * @cpu:            the cpu number whose steal clock we want to read
    210 *
    211 * The function reads the steal clock if we are on a 64-bit system, otherwise
    212 * reads it in parts, checking that the high part didn't change in the
    213 * meantime.
    214 *
    215 * Return:
    216 *      The steal clock reading in ns.
    217 */
    218static uint64_t vmware_steal_clock(int cpu)
    219{
    220	struct vmware_steal_time *steal = &per_cpu(vmw_steal_time, cpu);
    221	uint64_t clock;
    222
    223	if (IS_ENABLED(CONFIG_64BIT))
    224		clock = READ_ONCE(steal->clock);
    225	else {
    226		uint32_t initial_high, low, high;
    227
    228		do {
    229			initial_high = READ_ONCE(steal->clock_high);
    230			/* Do not reorder initial_high and high readings */
    231			virt_rmb();
    232			low = READ_ONCE(steal->clock_low);
    233			/* Keep low reading in between */
    234			virt_rmb();
    235			high = READ_ONCE(steal->clock_high);
    236		} while (initial_high != high);
    237
    238		clock = ((uint64_t)high << 32) | low;
    239	}
    240
    241	return mul_u64_u32_shr(clock, vmware_cyc2ns.cyc2ns_mul,
    242			     vmware_cyc2ns.cyc2ns_shift);
    243}
    244
    245static void vmware_register_steal_time(void)
    246{
    247	int cpu = smp_processor_id();
    248	struct vmware_steal_time *st = &per_cpu(vmw_steal_time, cpu);
    249
    250	if (!has_steal_clock)
    251		return;
    252
    253	if (!stealclock_enable(slow_virt_to_phys(st))) {
    254		has_steal_clock = false;
    255		return;
    256	}
    257
    258	pr_info("vmware-stealtime: cpu %d, pa %llx\n",
    259		cpu, (unsigned long long) slow_virt_to_phys(st));
    260}
    261
    262static void vmware_disable_steal_time(void)
    263{
    264	if (!has_steal_clock)
    265		return;
    266
    267	stealclock_disable();
    268}
    269
    270static void vmware_guest_cpu_init(void)
    271{
    272	if (has_steal_clock)
    273		vmware_register_steal_time();
    274}
    275
    276static void vmware_pv_guest_cpu_reboot(void *unused)
    277{
    278	vmware_disable_steal_time();
    279}
    280
    281static int vmware_pv_reboot_notify(struct notifier_block *nb,
    282				unsigned long code, void *unused)
    283{
    284	if (code == SYS_RESTART)
    285		on_each_cpu(vmware_pv_guest_cpu_reboot, NULL, 1);
    286	return NOTIFY_DONE;
    287}
    288
    289static struct notifier_block vmware_pv_reboot_nb = {
    290	.notifier_call = vmware_pv_reboot_notify,
    291};
    292
    293#ifdef CONFIG_SMP
    294static void __init vmware_smp_prepare_boot_cpu(void)
    295{
    296	vmware_guest_cpu_init();
    297	native_smp_prepare_boot_cpu();
    298}
    299
    300static int vmware_cpu_online(unsigned int cpu)
    301{
    302	local_irq_disable();
    303	vmware_guest_cpu_init();
    304	local_irq_enable();
    305	return 0;
    306}
    307
    308static int vmware_cpu_down_prepare(unsigned int cpu)
    309{
    310	local_irq_disable();
    311	vmware_disable_steal_time();
    312	local_irq_enable();
    313	return 0;
    314}
    315#endif
    316
    317static __init int activate_jump_labels(void)
    318{
    319	if (has_steal_clock) {
    320		static_key_slow_inc(&paravirt_steal_enabled);
    321		if (steal_acc)
    322			static_key_slow_inc(&paravirt_steal_rq_enabled);
    323	}
    324
    325	return 0;
    326}
    327arch_initcall(activate_jump_labels);
    328
    329static void __init vmware_paravirt_ops_setup(void)
    330{
    331	pv_info.name = "VMware hypervisor";
    332	pv_ops.cpu.io_delay = paravirt_nop;
    333
    334	if (vmware_tsc_khz == 0)
    335		return;
    336
    337	vmware_cyc2ns_setup();
    338
    339	if (vmw_sched_clock)
    340		paravirt_set_sched_clock(vmware_sched_clock);
    341
    342	if (vmware_is_stealclock_available()) {
    343		has_steal_clock = true;
    344		static_call_update(pv_steal_clock, vmware_steal_clock);
    345
    346		/* We use reboot notifier only to disable steal clock */
    347		register_reboot_notifier(&vmware_pv_reboot_nb);
    348
    349#ifdef CONFIG_SMP
    350		smp_ops.smp_prepare_boot_cpu =
    351			vmware_smp_prepare_boot_cpu;
    352		if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
    353					      "x86/vmware:online",
    354					      vmware_cpu_online,
    355					      vmware_cpu_down_prepare) < 0)
    356			pr_err("vmware_guest: Failed to install cpu hotplug callbacks\n");
    357#else
    358		vmware_guest_cpu_init();
    359#endif
    360	}
    361}
    362#else
    363#define vmware_paravirt_ops_setup() do {} while (0)
    364#endif
    365
    366/*
    367 * VMware hypervisor takes care of exporting a reliable TSC to the guest.
    368 * Still, due to timing difference when running on virtual cpus, the TSC can
    369 * be marked as unstable in some cases. For example, the TSC sync check at
    370 * bootup can fail due to a marginal offset between vcpus' TSCs (though the
    371 * TSCs do not drift from each other).  Also, the ACPI PM timer clocksource
    372 * is not suitable as a watchdog when running on a hypervisor because the
    373 * kernel may miss a wrap of the counter if the vcpu is descheduled for a
    374 * long time. To skip these checks at runtime we set these capability bits,
    375 * so that the kernel could just trust the hypervisor with providing a
    376 * reliable virtual TSC that is suitable for timekeeping.
    377 */
    378static void __init vmware_set_capabilities(void)
    379{
    380	setup_force_cpu_cap(X86_FEATURE_CONSTANT_TSC);
    381	setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
    382	if (vmware_tsc_khz)
    383		setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
    384	if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMCALL)
    385		setup_force_cpu_cap(X86_FEATURE_VMCALL);
    386	else if (vmware_hypercall_mode == CPUID_VMWARE_FEATURES_ECX_VMMCALL)
    387		setup_force_cpu_cap(X86_FEATURE_VMW_VMMCALL);
    388}
    389
    390static void __init vmware_platform_setup(void)
    391{
    392	uint32_t eax, ebx, ecx, edx;
    393	uint64_t lpj, tsc_khz;
    394
    395	VMWARE_CMD(GETHZ, eax, ebx, ecx, edx);
    396
    397	if (ebx != UINT_MAX) {
    398		lpj = tsc_khz = eax | (((uint64_t)ebx) << 32);
    399		do_div(tsc_khz, 1000);
    400		WARN_ON(tsc_khz >> 32);
    401		pr_info("TSC freq read from hypervisor : %lu.%03lu MHz\n",
    402			(unsigned long) tsc_khz / 1000,
    403			(unsigned long) tsc_khz % 1000);
    404
    405		if (!preset_lpj) {
    406			do_div(lpj, HZ);
    407			preset_lpj = lpj;
    408		}
    409
    410		vmware_tsc_khz = tsc_khz;
    411		x86_platform.calibrate_tsc = vmware_get_tsc_khz;
    412		x86_platform.calibrate_cpu = vmware_get_tsc_khz;
    413
    414#ifdef CONFIG_X86_LOCAL_APIC
    415		/* Skip lapic calibration since we know the bus frequency. */
    416		lapic_timer_period = ecx / HZ;
    417		pr_info("Host bus clock speed read from hypervisor : %u Hz\n",
    418			ecx);
    419#endif
    420	} else {
    421		pr_warn("Failed to get TSC freq from the hypervisor\n");
    422	}
    423
    424	vmware_paravirt_ops_setup();
    425
    426#ifdef CONFIG_X86_IO_APIC
    427	no_timer_check = 1;
    428#endif
    429
    430	vmware_set_capabilities();
    431}
    432
    433static u8 __init vmware_select_hypercall(void)
    434{
    435	int eax, ebx, ecx, edx;
    436
    437	cpuid(CPUID_VMWARE_FEATURES_LEAF, &eax, &ebx, &ecx, &edx);
    438	return (ecx & (CPUID_VMWARE_FEATURES_ECX_VMMCALL |
    439		       CPUID_VMWARE_FEATURES_ECX_VMCALL));
    440}
    441
    442/*
    443 * While checking the dmi string information, just checking the product
    444 * serial key should be enough, as this will always have a VMware
    445 * specific string when running under VMware hypervisor.
    446 * If !boot_cpu_has(X86_FEATURE_HYPERVISOR), vmware_hypercall_mode
    447 * intentionally defaults to 0.
    448 */
    449static uint32_t __init vmware_platform(void)
    450{
    451	if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
    452		unsigned int eax;
    453		unsigned int hyper_vendor_id[3];
    454
    455		cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &hyper_vendor_id[0],
    456		      &hyper_vendor_id[1], &hyper_vendor_id[2]);
    457		if (!memcmp(hyper_vendor_id, "VMwareVMware", 12)) {
    458			if (eax >= CPUID_VMWARE_FEATURES_LEAF)
    459				vmware_hypercall_mode =
    460					vmware_select_hypercall();
    461
    462			pr_info("hypercall mode: 0x%02x\n",
    463				(unsigned int) vmware_hypercall_mode);
    464
    465			return CPUID_VMWARE_INFO_LEAF;
    466		}
    467	} else if (dmi_available && dmi_name_in_serial("VMware") &&
    468		   __vmware_platform())
    469		return 1;
    470
    471	return 0;
    472}
    473
    474/* Checks if hypervisor supports x2apic without VT-D interrupt remapping. */
    475static bool __init vmware_legacy_x2apic_available(void)
    476{
    477	uint32_t eax, ebx, ecx, edx;
    478	VMWARE_CMD(GETVCPU_INFO, eax, ebx, ecx, edx);
    479	return (eax & (1 << VMWARE_CMD_VCPU_RESERVED)) == 0 &&
    480	       (eax & (1 << VMWARE_CMD_LEGACY_X2APIC)) != 0;
    481}
    482
    483#ifdef CONFIG_AMD_MEM_ENCRYPT
    484static void vmware_sev_es_hcall_prepare(struct ghcb *ghcb,
    485					struct pt_regs *regs)
    486{
    487	/* Copy VMWARE specific Hypercall parameters to the GHCB */
    488	ghcb_set_rip(ghcb, regs->ip);
    489	ghcb_set_rbx(ghcb, regs->bx);
    490	ghcb_set_rcx(ghcb, regs->cx);
    491	ghcb_set_rdx(ghcb, regs->dx);
    492	ghcb_set_rsi(ghcb, regs->si);
    493	ghcb_set_rdi(ghcb, regs->di);
    494	ghcb_set_rbp(ghcb, regs->bp);
    495}
    496
    497static bool vmware_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs)
    498{
    499	if (!(ghcb_rbx_is_valid(ghcb) &&
    500	      ghcb_rcx_is_valid(ghcb) &&
    501	      ghcb_rdx_is_valid(ghcb) &&
    502	      ghcb_rsi_is_valid(ghcb) &&
    503	      ghcb_rdi_is_valid(ghcb) &&
    504	      ghcb_rbp_is_valid(ghcb)))
    505		return false;
    506
    507	regs->bx = ghcb_get_rbx(ghcb);
    508	regs->cx = ghcb_get_rcx(ghcb);
    509	regs->dx = ghcb_get_rdx(ghcb);
    510	regs->si = ghcb_get_rsi(ghcb);
    511	regs->di = ghcb_get_rdi(ghcb);
    512	regs->bp = ghcb_get_rbp(ghcb);
    513
    514	return true;
    515}
    516#endif
    517
    518const __initconst struct hypervisor_x86 x86_hyper_vmware = {
    519	.name				= "VMware",
    520	.detect				= vmware_platform,
    521	.type				= X86_HYPER_VMWARE,
    522	.init.init_platform		= vmware_platform_setup,
    523	.init.x2apic_available		= vmware_legacy_x2apic_available,
    524#ifdef CONFIG_AMD_MEM_ENCRYPT
    525	.runtime.sev_es_hcall_prepare	= vmware_sev_es_hcall_prepare,
    526	.runtime.sev_es_hcall_finish	= vmware_sev_es_hcall_finish,
    527#endif
    528};