kvm64.c (48565B)
1/* 2 * ARM implementation of KVM hooks, 64 bit specific code 3 * 4 * Copyright Mian-M. Hamayun 2013, Virtual Open Systems 5 * Copyright Alex BennĂ©e 2014, Linaro 6 * 7 * This work is licensed under the terms of the GNU GPL, version 2 or later. 8 * See the COPYING file in the top-level directory. 9 * 10 */ 11 12#include "qemu/osdep.h" 13#include <sys/ioctl.h> 14#include <sys/ptrace.h> 15 16#include <linux/elf.h> 17#include <linux/kvm.h> 18 19#include "qemu-common.h" 20#include "qapi/error.h" 21#include "cpu.h" 22#include "qemu/timer.h" 23#include "qemu/error-report.h" 24#include "qemu/host-utils.h" 25#include "qemu/main-loop.h" 26#include "exec/gdbstub.h" 27#include "sysemu/runstate.h" 28#include "sysemu/kvm.h" 29#include "sysemu/kvm_int.h" 30#include "kvm_arm.h" 31#include "internals.h" 32#include "hw/acpi/acpi.h" 33#include "hw/acpi/ghes.h" 34#include "hw/arm/virt.h" 35 36static bool have_guest_debug; 37 38/* 39 * Although the ARM implementation of hardware assisted debugging 40 * allows for different breakpoints per-core, the current GDB 41 * interface treats them as a global pool of registers (which seems to 42 * be the case for x86, ppc and s390). As a result we store one copy 43 * of registers which is used for all active cores. 44 * 45 * Write access is serialised by virtue of the GDB protocol which 46 * updates things. Read access (i.e. when the values are copied to the 47 * vCPU) is also gated by GDB's run control. 48 * 49 * This is not unreasonable as most of the time debugging kernels you 50 * never know which core will eventually execute your function. 51 */ 52 53typedef struct { 54 uint64_t bcr; 55 uint64_t bvr; 56} HWBreakpoint; 57 58/* The watchpoint registers can cover more area than the requested 59 * watchpoint so we need to store the additional information 60 * somewhere. We also need to supply a CPUWatchpoint to the GDB stub 61 * when the watchpoint is hit. 62 */ 63typedef struct { 64 uint64_t wcr; 65 uint64_t wvr; 66 CPUWatchpoint details; 67} HWWatchpoint; 68 69/* Maximum and current break/watch point counts */ 70int max_hw_bps, max_hw_wps; 71GArray *hw_breakpoints, *hw_watchpoints; 72 73#define cur_hw_wps (hw_watchpoints->len) 74#define cur_hw_bps (hw_breakpoints->len) 75#define get_hw_bp(i) (&g_array_index(hw_breakpoints, HWBreakpoint, i)) 76#define get_hw_wp(i) (&g_array_index(hw_watchpoints, HWWatchpoint, i)) 77 78/** 79 * kvm_arm_init_debug() - check for guest debug capabilities 80 * @cs: CPUState 81 * 82 * kvm_check_extension returns the number of debug registers we have 83 * or 0 if we have none. 84 * 85 */ 86static void kvm_arm_init_debug(CPUState *cs) 87{ 88 have_guest_debug = kvm_check_extension(cs->kvm_state, 89 KVM_CAP_SET_GUEST_DEBUG); 90 91 max_hw_wps = kvm_check_extension(cs->kvm_state, KVM_CAP_GUEST_DEBUG_HW_WPS); 92 hw_watchpoints = g_array_sized_new(true, true, 93 sizeof(HWWatchpoint), max_hw_wps); 94 95 max_hw_bps = kvm_check_extension(cs->kvm_state, KVM_CAP_GUEST_DEBUG_HW_BPS); 96 hw_breakpoints = g_array_sized_new(true, true, 97 sizeof(HWBreakpoint), max_hw_bps); 98 return; 99} 100 101/** 102 * insert_hw_breakpoint() 103 * @addr: address of breakpoint 104 * 105 * See ARM ARM D2.9.1 for details but here we are only going to create 106 * simple un-linked breakpoints (i.e. we don't chain breakpoints 107 * together to match address and context or vmid). The hardware is 108 * capable of fancier matching but that will require exposing that 109 * fanciness to GDB's interface 110 * 111 * DBGBCR<n>_EL1, Debug Breakpoint Control Registers 112 * 113 * 31 24 23 20 19 16 15 14 13 12 9 8 5 4 3 2 1 0 114 * +------+------+-------+-----+----+------+-----+------+-----+---+ 115 * | RES0 | BT | LBN | SSC | HMC| RES0 | BAS | RES0 | PMC | E | 116 * +------+------+-------+-----+----+------+-----+------+-----+---+ 117 * 118 * BT: Breakpoint type (0 = unlinked address match) 119 * LBN: Linked BP number (0 = unused) 120 * SSC/HMC/PMC: Security, Higher and Priv access control (Table D-12) 121 * BAS: Byte Address Select (RES1 for AArch64) 122 * E: Enable bit 123 * 124 * DBGBVR<n>_EL1, Debug Breakpoint Value Registers 125 * 126 * 63 53 52 49 48 2 1 0 127 * +------+-----------+----------+-----+ 128 * | RESS | VA[52:49] | VA[48:2] | 0 0 | 129 * +------+-----------+----------+-----+ 130 * 131 * Depending on the addressing mode bits the top bits of the register 132 * are a sign extension of the highest applicable VA bit. Some 133 * versions of GDB don't do it correctly so we ensure they are correct 134 * here so future PC comparisons will work properly. 135 */ 136 137static int insert_hw_breakpoint(target_ulong addr) 138{ 139 HWBreakpoint brk = { 140 .bcr = 0x1, /* BCR E=1, enable */ 141 .bvr = sextract64(addr, 0, 53) 142 }; 143 144 if (cur_hw_bps >= max_hw_bps) { 145 return -ENOBUFS; 146 } 147 148 brk.bcr = deposit32(brk.bcr, 1, 2, 0x3); /* PMC = 11 */ 149 brk.bcr = deposit32(brk.bcr, 5, 4, 0xf); /* BAS = RES1 */ 150 151 g_array_append_val(hw_breakpoints, brk); 152 153 return 0; 154} 155 156/** 157 * delete_hw_breakpoint() 158 * @pc: address of breakpoint 159 * 160 * Delete a breakpoint and shuffle any above down 161 */ 162 163static int delete_hw_breakpoint(target_ulong pc) 164{ 165 int i; 166 for (i = 0; i < hw_breakpoints->len; i++) { 167 HWBreakpoint *brk = get_hw_bp(i); 168 if (brk->bvr == pc) { 169 g_array_remove_index(hw_breakpoints, i); 170 return 0; 171 } 172 } 173 return -ENOENT; 174} 175 176/** 177 * insert_hw_watchpoint() 178 * @addr: address of watch point 179 * @len: size of area 180 * @type: type of watch point 181 * 182 * See ARM ARM D2.10. As with the breakpoints we can do some advanced 183 * stuff if we want to. The watch points can be linked with the break 184 * points above to make them context aware. However for simplicity 185 * currently we only deal with simple read/write watch points. 186 * 187 * D7.3.11 DBGWCR<n>_EL1, Debug Watchpoint Control Registers 188 * 189 * 31 29 28 24 23 21 20 19 16 15 14 13 12 5 4 3 2 1 0 190 * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+ 191 * | RES0 | MASK | RES0 | WT | LBN | SSC | HMC | BAS | LSC | PAC | E | 192 * +------+-------+------+----+-----+-----+-----+-----+-----+-----+---+ 193 * 194 * MASK: num bits addr mask (0=none,01/10=res,11=3 bits (8 bytes)) 195 * WT: 0 - unlinked, 1 - linked (not currently used) 196 * LBN: Linked BP number (not currently used) 197 * SSC/HMC/PAC: Security, Higher and Priv access control (Table D2-11) 198 * BAS: Byte Address Select 199 * LSC: Load/Store control (01: load, 10: store, 11: both) 200 * E: Enable 201 * 202 * The bottom 2 bits of the value register are masked. Therefore to 203 * break on any sizes smaller than an unaligned word you need to set 204 * MASK=0, BAS=bit per byte in question. For larger regions (^2) you 205 * need to ensure you mask the address as required and set BAS=0xff 206 */ 207 208static int insert_hw_watchpoint(target_ulong addr, 209 target_ulong len, int type) 210{ 211 HWWatchpoint wp = { 212 .wcr = 1, /* E=1, enable */ 213 .wvr = addr & (~0x7ULL), 214 .details = { .vaddr = addr, .len = len } 215 }; 216 217 if (cur_hw_wps >= max_hw_wps) { 218 return -ENOBUFS; 219 } 220 221 /* 222 * HMC=0 SSC=0 PAC=3 will hit EL0 or EL1, any security state, 223 * valid whether EL3 is implemented or not 224 */ 225 wp.wcr = deposit32(wp.wcr, 1, 2, 3); 226 227 switch (type) { 228 case GDB_WATCHPOINT_READ: 229 wp.wcr = deposit32(wp.wcr, 3, 2, 1); 230 wp.details.flags = BP_MEM_READ; 231 break; 232 case GDB_WATCHPOINT_WRITE: 233 wp.wcr = deposit32(wp.wcr, 3, 2, 2); 234 wp.details.flags = BP_MEM_WRITE; 235 break; 236 case GDB_WATCHPOINT_ACCESS: 237 wp.wcr = deposit32(wp.wcr, 3, 2, 3); 238 wp.details.flags = BP_MEM_ACCESS; 239 break; 240 default: 241 g_assert_not_reached(); 242 break; 243 } 244 if (len <= 8) { 245 /* we align the address and set the bits in BAS */ 246 int off = addr & 0x7; 247 int bas = (1 << len) - 1; 248 249 wp.wcr = deposit32(wp.wcr, 5 + off, 8 - off, bas); 250 } else { 251 /* For ranges above 8 bytes we need to be a power of 2 */ 252 if (is_power_of_2(len)) { 253 int bits = ctz64(len); 254 255 wp.wvr &= ~((1 << bits) - 1); 256 wp.wcr = deposit32(wp.wcr, 24, 4, bits); 257 wp.wcr = deposit32(wp.wcr, 5, 8, 0xff); 258 } else { 259 return -ENOBUFS; 260 } 261 } 262 263 g_array_append_val(hw_watchpoints, wp); 264 return 0; 265} 266 267 268static bool check_watchpoint_in_range(int i, target_ulong addr) 269{ 270 HWWatchpoint *wp = get_hw_wp(i); 271 uint64_t addr_top, addr_bottom = wp->wvr; 272 int bas = extract32(wp->wcr, 5, 8); 273 int mask = extract32(wp->wcr, 24, 4); 274 275 if (mask) { 276 addr_top = addr_bottom + (1 << mask); 277 } else { 278 /* BAS must be contiguous but can offset against the base 279 * address in DBGWVR */ 280 addr_bottom = addr_bottom + ctz32(bas); 281 addr_top = addr_bottom + clo32(bas); 282 } 283 284 if (addr >= addr_bottom && addr <= addr_top) { 285 return true; 286 } 287 288 return false; 289} 290 291/** 292 * delete_hw_watchpoint() 293 * @addr: address of breakpoint 294 * 295 * Delete a breakpoint and shuffle any above down 296 */ 297 298static int delete_hw_watchpoint(target_ulong addr, 299 target_ulong len, int type) 300{ 301 int i; 302 for (i = 0; i < cur_hw_wps; i++) { 303 if (check_watchpoint_in_range(i, addr)) { 304 g_array_remove_index(hw_watchpoints, i); 305 return 0; 306 } 307 } 308 return -ENOENT; 309} 310 311 312int kvm_arch_insert_hw_breakpoint(target_ulong addr, 313 target_ulong len, int type) 314{ 315 switch (type) { 316 case GDB_BREAKPOINT_HW: 317 return insert_hw_breakpoint(addr); 318 break; 319 case GDB_WATCHPOINT_READ: 320 case GDB_WATCHPOINT_WRITE: 321 case GDB_WATCHPOINT_ACCESS: 322 return insert_hw_watchpoint(addr, len, type); 323 default: 324 return -ENOSYS; 325 } 326} 327 328int kvm_arch_remove_hw_breakpoint(target_ulong addr, 329 target_ulong len, int type) 330{ 331 switch (type) { 332 case GDB_BREAKPOINT_HW: 333 return delete_hw_breakpoint(addr); 334 case GDB_WATCHPOINT_READ: 335 case GDB_WATCHPOINT_WRITE: 336 case GDB_WATCHPOINT_ACCESS: 337 return delete_hw_watchpoint(addr, len, type); 338 default: 339 return -ENOSYS; 340 } 341} 342 343 344void kvm_arch_remove_all_hw_breakpoints(void) 345{ 346 if (cur_hw_wps > 0) { 347 g_array_remove_range(hw_watchpoints, 0, cur_hw_wps); 348 } 349 if (cur_hw_bps > 0) { 350 g_array_remove_range(hw_breakpoints, 0, cur_hw_bps); 351 } 352} 353 354void kvm_arm_copy_hw_debug_data(struct kvm_guest_debug_arch *ptr) 355{ 356 int i; 357 memset(ptr, 0, sizeof(struct kvm_guest_debug_arch)); 358 359 for (i = 0; i < max_hw_wps; i++) { 360 HWWatchpoint *wp = get_hw_wp(i); 361 ptr->dbg_wcr[i] = wp->wcr; 362 ptr->dbg_wvr[i] = wp->wvr; 363 } 364 for (i = 0; i < max_hw_bps; i++) { 365 HWBreakpoint *bp = get_hw_bp(i); 366 ptr->dbg_bcr[i] = bp->bcr; 367 ptr->dbg_bvr[i] = bp->bvr; 368 } 369} 370 371bool kvm_arm_hw_debug_active(CPUState *cs) 372{ 373 return ((cur_hw_wps > 0) || (cur_hw_bps > 0)); 374} 375 376static bool find_hw_breakpoint(CPUState *cpu, target_ulong pc) 377{ 378 int i; 379 380 for (i = 0; i < cur_hw_bps; i++) { 381 HWBreakpoint *bp = get_hw_bp(i); 382 if (bp->bvr == pc) { 383 return true; 384 } 385 } 386 return false; 387} 388 389static CPUWatchpoint *find_hw_watchpoint(CPUState *cpu, target_ulong addr) 390{ 391 int i; 392 393 for (i = 0; i < cur_hw_wps; i++) { 394 if (check_watchpoint_in_range(i, addr)) { 395 return &get_hw_wp(i)->details; 396 } 397 } 398 return NULL; 399} 400 401static bool kvm_arm_set_device_attr(CPUState *cs, struct kvm_device_attr *attr, 402 const char *name) 403{ 404 int err; 405 406 err = kvm_vcpu_ioctl(cs, KVM_HAS_DEVICE_ATTR, attr); 407 if (err != 0) { 408 error_report("%s: KVM_HAS_DEVICE_ATTR: %s", name, strerror(-err)); 409 return false; 410 } 411 412 err = kvm_vcpu_ioctl(cs, KVM_SET_DEVICE_ATTR, attr); 413 if (err != 0) { 414 error_report("%s: KVM_SET_DEVICE_ATTR: %s", name, strerror(-err)); 415 return false; 416 } 417 418 return true; 419} 420 421void kvm_arm_pmu_init(CPUState *cs) 422{ 423 struct kvm_device_attr attr = { 424 .group = KVM_ARM_VCPU_PMU_V3_CTRL, 425 .attr = KVM_ARM_VCPU_PMU_V3_INIT, 426 }; 427 428 if (!ARM_CPU(cs)->has_pmu) { 429 return; 430 } 431 if (!kvm_arm_set_device_attr(cs, &attr, "PMU")) { 432 error_report("failed to init PMU"); 433 abort(); 434 } 435} 436 437void kvm_arm_pmu_set_irq(CPUState *cs, int irq) 438{ 439 struct kvm_device_attr attr = { 440 .group = KVM_ARM_VCPU_PMU_V3_CTRL, 441 .addr = (intptr_t)&irq, 442 .attr = KVM_ARM_VCPU_PMU_V3_IRQ, 443 }; 444 445 if (!ARM_CPU(cs)->has_pmu) { 446 return; 447 } 448 if (!kvm_arm_set_device_attr(cs, &attr, "PMU")) { 449 error_report("failed to set irq for PMU"); 450 abort(); 451 } 452} 453 454void kvm_arm_pvtime_init(CPUState *cs, uint64_t ipa) 455{ 456 struct kvm_device_attr attr = { 457 .group = KVM_ARM_VCPU_PVTIME_CTRL, 458 .attr = KVM_ARM_VCPU_PVTIME_IPA, 459 .addr = (uint64_t)&ipa, 460 }; 461 462 if (ARM_CPU(cs)->kvm_steal_time == ON_OFF_AUTO_OFF) { 463 return; 464 } 465 if (!kvm_arm_set_device_attr(cs, &attr, "PVTIME IPA")) { 466 error_report("failed to init PVTIME IPA"); 467 abort(); 468 } 469} 470 471static int read_sys_reg32(int fd, uint32_t *pret, uint64_t id) 472{ 473 uint64_t ret; 474 struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)&ret }; 475 int err; 476 477 assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64); 478 err = ioctl(fd, KVM_GET_ONE_REG, &idreg); 479 if (err < 0) { 480 return -1; 481 } 482 *pret = ret; 483 return 0; 484} 485 486static int read_sys_reg64(int fd, uint64_t *pret, uint64_t id) 487{ 488 struct kvm_one_reg idreg = { .id = id, .addr = (uintptr_t)pret }; 489 490 assert((id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64); 491 return ioctl(fd, KVM_GET_ONE_REG, &idreg); 492} 493 494bool kvm_arm_get_host_cpu_features(ARMHostCPUFeatures *ahcf) 495{ 496 /* Identify the feature bits corresponding to the host CPU, and 497 * fill out the ARMHostCPUClass fields accordingly. To do this 498 * we have to create a scratch VM, create a single CPU inside it, 499 * and then query that CPU for the relevant ID registers. 500 */ 501 int fdarray[3]; 502 bool sve_supported; 503 uint64_t features = 0; 504 uint64_t t; 505 int err; 506 507 /* Old kernels may not know about the PREFERRED_TARGET ioctl: however 508 * we know these will only support creating one kind of guest CPU, 509 * which is its preferred CPU type. Fortunately these old kernels 510 * support only a very limited number of CPUs. 511 */ 512 static const uint32_t cpus_to_try[] = { 513 KVM_ARM_TARGET_AEM_V8, 514 KVM_ARM_TARGET_FOUNDATION_V8, 515 KVM_ARM_TARGET_CORTEX_A57, 516 QEMU_KVM_ARM_TARGET_NONE 517 }; 518 /* 519 * target = -1 informs kvm_arm_create_scratch_host_vcpu() 520 * to use the preferred target 521 */ 522 struct kvm_vcpu_init init = { .target = -1, }; 523 524 if (!kvm_arm_create_scratch_host_vcpu(cpus_to_try, fdarray, &init)) { 525 return false; 526 } 527 528 ahcf->target = init.target; 529 ahcf->dtb_compatible = "arm,arm-v8"; 530 531 err = read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr0, 532 ARM64_SYS_REG(3, 0, 0, 4, 0)); 533 if (unlikely(err < 0)) { 534 /* 535 * Before v4.15, the kernel only exposed a limited number of system 536 * registers, not including any of the interesting AArch64 ID regs. 537 * For the most part we could leave these fields as zero with minimal 538 * effect, since this does not affect the values seen by the guest. 539 * 540 * However, it could cause problems down the line for QEMU, 541 * so provide a minimal v8.0 default. 542 * 543 * ??? Could read MIDR and use knowledge from cpu64.c. 544 * ??? Could map a page of memory into our temp guest and 545 * run the tiniest of hand-crafted kernels to extract 546 * the values seen by the guest. 547 * ??? Either of these sounds like too much effort just 548 * to work around running a modern host kernel. 549 */ 550 ahcf->isar.id_aa64pfr0 = 0x00000011; /* EL1&0, AArch64 only */ 551 err = 0; 552 } else { 553 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64pfr1, 554 ARM64_SYS_REG(3, 0, 0, 4, 1)); 555 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr0, 556 ARM64_SYS_REG(3, 0, 0, 5, 0)); 557 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64dfr1, 558 ARM64_SYS_REG(3, 0, 0, 5, 1)); 559 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar0, 560 ARM64_SYS_REG(3, 0, 0, 6, 0)); 561 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64isar1, 562 ARM64_SYS_REG(3, 0, 0, 6, 1)); 563 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr0, 564 ARM64_SYS_REG(3, 0, 0, 7, 0)); 565 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr1, 566 ARM64_SYS_REG(3, 0, 0, 7, 1)); 567 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64mmfr2, 568 ARM64_SYS_REG(3, 0, 0, 7, 2)); 569 570 /* 571 * Note that if AArch32 support is not present in the host, 572 * the AArch32 sysregs are present to be read, but will 573 * return UNKNOWN values. This is neither better nor worse 574 * than skipping the reads and leaving 0, as we must avoid 575 * considering the values in every case. 576 */ 577 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr0, 578 ARM64_SYS_REG(3, 0, 0, 1, 0)); 579 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr1, 580 ARM64_SYS_REG(3, 0, 0, 1, 1)); 581 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_pfr2, 582 ARM64_SYS_REG(3, 0, 0, 3, 4)); 583 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_dfr0, 584 ARM64_SYS_REG(3, 0, 0, 1, 2)); 585 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr0, 586 ARM64_SYS_REG(3, 0, 0, 1, 4)); 587 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr1, 588 ARM64_SYS_REG(3, 0, 0, 1, 5)); 589 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr2, 590 ARM64_SYS_REG(3, 0, 0, 1, 6)); 591 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr3, 592 ARM64_SYS_REG(3, 0, 0, 1, 7)); 593 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar0, 594 ARM64_SYS_REG(3, 0, 0, 2, 0)); 595 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar1, 596 ARM64_SYS_REG(3, 0, 0, 2, 1)); 597 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar2, 598 ARM64_SYS_REG(3, 0, 0, 2, 2)); 599 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar3, 600 ARM64_SYS_REG(3, 0, 0, 2, 3)); 601 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar4, 602 ARM64_SYS_REG(3, 0, 0, 2, 4)); 603 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar5, 604 ARM64_SYS_REG(3, 0, 0, 2, 5)); 605 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_mmfr4, 606 ARM64_SYS_REG(3, 0, 0, 2, 6)); 607 err |= read_sys_reg32(fdarray[2], &ahcf->isar.id_isar6, 608 ARM64_SYS_REG(3, 0, 0, 2, 7)); 609 610 err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr0, 611 ARM64_SYS_REG(3, 0, 0, 3, 0)); 612 err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr1, 613 ARM64_SYS_REG(3, 0, 0, 3, 1)); 614 err |= read_sys_reg32(fdarray[2], &ahcf->isar.mvfr2, 615 ARM64_SYS_REG(3, 0, 0, 3, 2)); 616 617 /* 618 * DBGDIDR is a bit complicated because the kernel doesn't 619 * provide an accessor for it in 64-bit mode, which is what this 620 * scratch VM is in, and there's no architected "64-bit sysreg 621 * which reads the same as the 32-bit register" the way there is 622 * for other ID registers. Instead we synthesize a value from the 623 * AArch64 ID_AA64DFR0, the same way the kernel code in 624 * arch/arm64/kvm/sys_regs.c:trap_dbgidr() does. 625 * We only do this if the CPU supports AArch32 at EL1. 626 */ 627 if (FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL1) >= 2) { 628 int wrps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, WRPS); 629 int brps = FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, BRPS); 630 int ctx_cmps = 631 FIELD_EX64(ahcf->isar.id_aa64dfr0, ID_AA64DFR0, CTX_CMPS); 632 int version = 6; /* ARMv8 debug architecture */ 633 bool has_el3 = 634 !!FIELD_EX32(ahcf->isar.id_aa64pfr0, ID_AA64PFR0, EL3); 635 uint32_t dbgdidr = 0; 636 637 dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, WRPS, wrps); 638 dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, BRPS, brps); 639 dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, CTX_CMPS, ctx_cmps); 640 dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, VERSION, version); 641 dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, NSUHD_IMP, has_el3); 642 dbgdidr = FIELD_DP32(dbgdidr, DBGDIDR, SE_IMP, has_el3); 643 dbgdidr |= (1 << 15); /* RES1 bit */ 644 ahcf->isar.dbgdidr = dbgdidr; 645 } 646 } 647 648 sve_supported = ioctl(fdarray[0], KVM_CHECK_EXTENSION, KVM_CAP_ARM_SVE) > 0; 649 650 /* Add feature bits that can't appear until after VCPU init. */ 651 if (sve_supported) { 652 t = ahcf->isar.id_aa64pfr0; 653 t = FIELD_DP64(t, ID_AA64PFR0, SVE, 1); 654 ahcf->isar.id_aa64pfr0 = t; 655 656 /* 657 * Before v5.1, KVM did not support SVE and did not expose 658 * ID_AA64ZFR0_EL1 even as RAZ. After v5.1, KVM still does 659 * not expose the register to "user" requests like this 660 * unless the host supports SVE. 661 */ 662 err |= read_sys_reg64(fdarray[2], &ahcf->isar.id_aa64zfr0, 663 ARM64_SYS_REG(3, 0, 0, 4, 4)); 664 } 665 666 kvm_arm_destroy_scratch_host_vcpu(fdarray); 667 668 if (err < 0) { 669 return false; 670 } 671 672 /* 673 * We can assume any KVM supporting CPU is at least a v8 674 * with VFPv4+Neon; this in turn implies most of the other 675 * feature bits. 676 */ 677 features |= 1ULL << ARM_FEATURE_V8; 678 features |= 1ULL << ARM_FEATURE_NEON; 679 features |= 1ULL << ARM_FEATURE_AARCH64; 680 features |= 1ULL << ARM_FEATURE_PMU; 681 features |= 1ULL << ARM_FEATURE_GENERIC_TIMER; 682 683 ahcf->features = features; 684 685 return true; 686} 687 688void kvm_arm_steal_time_finalize(ARMCPU *cpu, Error **errp) 689{ 690 bool has_steal_time = kvm_arm_steal_time_supported(); 691 692 if (cpu->kvm_steal_time == ON_OFF_AUTO_AUTO) { 693 if (!has_steal_time || !arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { 694 cpu->kvm_steal_time = ON_OFF_AUTO_OFF; 695 } else { 696 cpu->kvm_steal_time = ON_OFF_AUTO_ON; 697 } 698 } else if (cpu->kvm_steal_time == ON_OFF_AUTO_ON) { 699 if (!has_steal_time) { 700 error_setg(errp, "'kvm-steal-time' cannot be enabled " 701 "on this host"); 702 return; 703 } else if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { 704 /* 705 * DEN0057A chapter 2 says "This specification only covers 706 * systems in which the Execution state of the hypervisor 707 * as well as EL1 of virtual machines is AArch64.". And, 708 * to ensure that, the smc/hvc calls are only specified as 709 * smc64/hvc64. 710 */ 711 error_setg(errp, "'kvm-steal-time' cannot be enabled " 712 "for AArch32 guests"); 713 return; 714 } 715 } 716} 717 718bool kvm_arm_aarch32_supported(void) 719{ 720 return kvm_check_extension(kvm_state, KVM_CAP_ARM_EL1_32BIT); 721} 722 723bool kvm_arm_sve_supported(void) 724{ 725 return kvm_check_extension(kvm_state, KVM_CAP_ARM_SVE); 726} 727 728bool kvm_arm_steal_time_supported(void) 729{ 730 return kvm_check_extension(kvm_state, KVM_CAP_STEAL_TIME); 731} 732 733QEMU_BUILD_BUG_ON(KVM_ARM64_SVE_VQ_MIN != 1); 734 735void kvm_arm_sve_get_vls(CPUState *cs, unsigned long *map) 736{ 737 /* Only call this function if kvm_arm_sve_supported() returns true. */ 738 static uint64_t vls[KVM_ARM64_SVE_VLS_WORDS]; 739 static bool probed; 740 uint32_t vq = 0; 741 int i, j; 742 743 bitmap_zero(map, ARM_MAX_VQ); 744 745 /* 746 * KVM ensures all host CPUs support the same set of vector lengths. 747 * So we only need to create the scratch VCPUs once and then cache 748 * the results. 749 */ 750 if (!probed) { 751 struct kvm_vcpu_init init = { 752 .target = -1, 753 .features[0] = (1 << KVM_ARM_VCPU_SVE), 754 }; 755 struct kvm_one_reg reg = { 756 .id = KVM_REG_ARM64_SVE_VLS, 757 .addr = (uint64_t)&vls[0], 758 }; 759 int fdarray[3], ret; 760 761 probed = true; 762 763 if (!kvm_arm_create_scratch_host_vcpu(NULL, fdarray, &init)) { 764 error_report("failed to create scratch VCPU with SVE enabled"); 765 abort(); 766 } 767 ret = ioctl(fdarray[2], KVM_GET_ONE_REG, ®); 768 kvm_arm_destroy_scratch_host_vcpu(fdarray); 769 if (ret) { 770 error_report("failed to get KVM_REG_ARM64_SVE_VLS: %s", 771 strerror(errno)); 772 abort(); 773 } 774 775 for (i = KVM_ARM64_SVE_VLS_WORDS - 1; i >= 0; --i) { 776 if (vls[i]) { 777 vq = 64 - clz64(vls[i]) + i * 64; 778 break; 779 } 780 } 781 if (vq > ARM_MAX_VQ) { 782 warn_report("KVM supports vector lengths larger than " 783 "QEMU can enable"); 784 } 785 } 786 787 for (i = 0; i < KVM_ARM64_SVE_VLS_WORDS; ++i) { 788 if (!vls[i]) { 789 continue; 790 } 791 for (j = 1; j <= 64; ++j) { 792 vq = j + i * 64; 793 if (vq > ARM_MAX_VQ) { 794 return; 795 } 796 if (vls[i] & (1UL << (j - 1))) { 797 set_bit(vq - 1, map); 798 } 799 } 800 } 801} 802 803static int kvm_arm_sve_set_vls(CPUState *cs) 804{ 805 uint64_t vls[KVM_ARM64_SVE_VLS_WORDS] = {0}; 806 struct kvm_one_reg reg = { 807 .id = KVM_REG_ARM64_SVE_VLS, 808 .addr = (uint64_t)&vls[0], 809 }; 810 ARMCPU *cpu = ARM_CPU(cs); 811 uint32_t vq; 812 int i, j; 813 814 assert(cpu->sve_max_vq <= KVM_ARM64_SVE_VQ_MAX); 815 816 for (vq = 1; vq <= cpu->sve_max_vq; ++vq) { 817 if (test_bit(vq - 1, cpu->sve_vq_map)) { 818 i = (vq - 1) / 64; 819 j = (vq - 1) % 64; 820 vls[i] |= 1UL << j; 821 } 822 } 823 824 return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 825} 826 827#define ARM_CPU_ID_MPIDR 3, 0, 0, 0, 5 828 829int kvm_arch_init_vcpu(CPUState *cs) 830{ 831 int ret; 832 uint64_t mpidr; 833 ARMCPU *cpu = ARM_CPU(cs); 834 CPUARMState *env = &cpu->env; 835 836 if (cpu->kvm_target == QEMU_KVM_ARM_TARGET_NONE || 837 !object_dynamic_cast(OBJECT(cpu), TYPE_AARCH64_CPU)) { 838 error_report("KVM is not supported for this guest CPU type"); 839 return -EINVAL; 840 } 841 842 qemu_add_vm_change_state_handler(kvm_arm_vm_state_change, cs); 843 844 /* Determine init features for this CPU */ 845 memset(cpu->kvm_init_features, 0, sizeof(cpu->kvm_init_features)); 846 if (cs->start_powered_off) { 847 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_POWER_OFF; 848 } 849 if (kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PSCI_0_2)) { 850 cpu->psci_version = 2; 851 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PSCI_0_2; 852 } 853 if (!arm_feature(&cpu->env, ARM_FEATURE_AARCH64)) { 854 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_EL1_32BIT; 855 } 856 if (!kvm_check_extension(cs->kvm_state, KVM_CAP_ARM_PMU_V3)) { 857 cpu->has_pmu = false; 858 } 859 if (cpu->has_pmu) { 860 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_PMU_V3; 861 } else { 862 env->features &= ~(1ULL << ARM_FEATURE_PMU); 863 } 864 if (cpu_isar_feature(aa64_sve, cpu)) { 865 assert(kvm_arm_sve_supported()); 866 cpu->kvm_init_features[0] |= 1 << KVM_ARM_VCPU_SVE; 867 } 868 869 /* Do KVM_ARM_VCPU_INIT ioctl */ 870 ret = kvm_arm_vcpu_init(cs); 871 if (ret) { 872 return ret; 873 } 874 875 if (cpu_isar_feature(aa64_sve, cpu)) { 876 ret = kvm_arm_sve_set_vls(cs); 877 if (ret) { 878 return ret; 879 } 880 ret = kvm_arm_vcpu_finalize(cs, KVM_ARM_VCPU_SVE); 881 if (ret) { 882 return ret; 883 } 884 } 885 886 /* 887 * When KVM is in use, PSCI is emulated in-kernel and not by qemu. 888 * Currently KVM has its own idea about MPIDR assignment, so we 889 * override our defaults with what we get from KVM. 890 */ 891 ret = kvm_get_one_reg(cs, ARM64_SYS_REG(ARM_CPU_ID_MPIDR), &mpidr); 892 if (ret) { 893 return ret; 894 } 895 cpu->mp_affinity = mpidr & ARM64_AFFINITY_MASK; 896 897 kvm_arm_init_debug(cs); 898 899 /* Check whether user space can specify guest syndrome value */ 900 kvm_arm_init_serror_injection(cs); 901 902 return kvm_arm_init_cpreg_list(cpu); 903} 904 905int kvm_arch_destroy_vcpu(CPUState *cs) 906{ 907 return 0; 908} 909 910bool kvm_arm_reg_syncs_via_cpreg_list(uint64_t regidx) 911{ 912 /* Return true if the regidx is a register we should synchronize 913 * via the cpreg_tuples array (ie is not a core or sve reg that 914 * we sync by hand in kvm_arch_get/put_registers()) 915 */ 916 switch (regidx & KVM_REG_ARM_COPROC_MASK) { 917 case KVM_REG_ARM_CORE: 918 case KVM_REG_ARM64_SVE: 919 return false; 920 default: 921 return true; 922 } 923} 924 925typedef struct CPRegStateLevel { 926 uint64_t regidx; 927 int level; 928} CPRegStateLevel; 929 930/* All system registers not listed in the following table are assumed to be 931 * of the level KVM_PUT_RUNTIME_STATE. If a register should be written less 932 * often, you must add it to this table with a state of either 933 * KVM_PUT_RESET_STATE or KVM_PUT_FULL_STATE. 934 */ 935static const CPRegStateLevel non_runtime_cpregs[] = { 936 { KVM_REG_ARM_TIMER_CNT, KVM_PUT_FULL_STATE }, 937}; 938 939int kvm_arm_cpreg_level(uint64_t regidx) 940{ 941 int i; 942 943 for (i = 0; i < ARRAY_SIZE(non_runtime_cpregs); i++) { 944 const CPRegStateLevel *l = &non_runtime_cpregs[i]; 945 if (l->regidx == regidx) { 946 return l->level; 947 } 948 } 949 950 return KVM_PUT_RUNTIME_STATE; 951} 952 953/* Callers must hold the iothread mutex lock */ 954static void kvm_inject_arm_sea(CPUState *c) 955{ 956 ARMCPU *cpu = ARM_CPU(c); 957 CPUARMState *env = &cpu->env; 958 uint32_t esr; 959 bool same_el; 960 961 c->exception_index = EXCP_DATA_ABORT; 962 env->exception.target_el = 1; 963 964 /* 965 * Set the DFSC to synchronous external abort and set FnV to not valid, 966 * this will tell guest the FAR_ELx is UNKNOWN for this abort. 967 */ 968 same_el = arm_current_el(env) == env->exception.target_el; 969 esr = syn_data_abort_no_iss(same_el, 1, 0, 0, 0, 0, 0x10); 970 971 env->exception.syndrome = esr; 972 973 arm_cpu_do_interrupt(c); 974} 975 976#define AARCH64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ 977 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) 978 979#define AARCH64_SIMD_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U128 | \ 980 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) 981 982#define AARCH64_SIMD_CTRL_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U32 | \ 983 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x)) 984 985static int kvm_arch_put_fpsimd(CPUState *cs) 986{ 987 CPUARMState *env = &ARM_CPU(cs)->env; 988 struct kvm_one_reg reg; 989 int i, ret; 990 991 for (i = 0; i < 32; i++) { 992 uint64_t *q = aa64_vfp_qreg(env, i); 993#ifdef HOST_WORDS_BIGENDIAN 994 uint64_t fp_val[2] = { q[1], q[0] }; 995 reg.addr = (uintptr_t)fp_val; 996#else 997 reg.addr = (uintptr_t)q; 998#endif 999 reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]); 1000 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1001 if (ret) { 1002 return ret; 1003 } 1004 } 1005 1006 return 0; 1007} 1008 1009/* 1010 * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits 1011 * and PREGS and the FFR have a slice size of 256 bits. However we simply hard 1012 * code the slice index to zero for now as it's unlikely we'll need more than 1013 * one slice for quite some time. 1014 */ 1015static int kvm_arch_put_sve(CPUState *cs) 1016{ 1017 ARMCPU *cpu = ARM_CPU(cs); 1018 CPUARMState *env = &cpu->env; 1019 uint64_t tmp[ARM_MAX_VQ * 2]; 1020 uint64_t *r; 1021 struct kvm_one_reg reg; 1022 int n, ret; 1023 1024 for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) { 1025 r = sve_bswap64(tmp, &env->vfp.zregs[n].d[0], cpu->sve_max_vq * 2); 1026 reg.addr = (uintptr_t)r; 1027 reg.id = KVM_REG_ARM64_SVE_ZREG(n, 0); 1028 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1029 if (ret) { 1030 return ret; 1031 } 1032 } 1033 1034 for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) { 1035 r = sve_bswap64(tmp, r = &env->vfp.pregs[n].p[0], 1036 DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); 1037 reg.addr = (uintptr_t)r; 1038 reg.id = KVM_REG_ARM64_SVE_PREG(n, 0); 1039 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1040 if (ret) { 1041 return ret; 1042 } 1043 } 1044 1045 r = sve_bswap64(tmp, &env->vfp.pregs[FFR_PRED_NUM].p[0], 1046 DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); 1047 reg.addr = (uintptr_t)r; 1048 reg.id = KVM_REG_ARM64_SVE_FFR(0); 1049 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1050 if (ret) { 1051 return ret; 1052 } 1053 1054 return 0; 1055} 1056 1057int kvm_arch_put_registers(CPUState *cs, int level) 1058{ 1059 struct kvm_one_reg reg; 1060 uint64_t val; 1061 uint32_t fpr; 1062 int i, ret; 1063 unsigned int el; 1064 1065 ARMCPU *cpu = ARM_CPU(cs); 1066 CPUARMState *env = &cpu->env; 1067 1068 /* If we are in AArch32 mode then we need to copy the AArch32 regs to the 1069 * AArch64 registers before pushing them out to 64-bit KVM. 1070 */ 1071 if (!is_a64(env)) { 1072 aarch64_sync_32_to_64(env); 1073 } 1074 1075 for (i = 0; i < 31; i++) { 1076 reg.id = AARCH64_CORE_REG(regs.regs[i]); 1077 reg.addr = (uintptr_t) &env->xregs[i]; 1078 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1079 if (ret) { 1080 return ret; 1081 } 1082 } 1083 1084 /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the 1085 * QEMU side we keep the current SP in xregs[31] as well. 1086 */ 1087 aarch64_save_sp(env, 1); 1088 1089 reg.id = AARCH64_CORE_REG(regs.sp); 1090 reg.addr = (uintptr_t) &env->sp_el[0]; 1091 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1092 if (ret) { 1093 return ret; 1094 } 1095 1096 reg.id = AARCH64_CORE_REG(sp_el1); 1097 reg.addr = (uintptr_t) &env->sp_el[1]; 1098 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1099 if (ret) { 1100 return ret; 1101 } 1102 1103 /* Note that KVM thinks pstate is 64 bit but we use a uint32_t */ 1104 if (is_a64(env)) { 1105 val = pstate_read(env); 1106 } else { 1107 val = cpsr_read(env); 1108 } 1109 reg.id = AARCH64_CORE_REG(regs.pstate); 1110 reg.addr = (uintptr_t) &val; 1111 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1112 if (ret) { 1113 return ret; 1114 } 1115 1116 reg.id = AARCH64_CORE_REG(regs.pc); 1117 reg.addr = (uintptr_t) &env->pc; 1118 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1119 if (ret) { 1120 return ret; 1121 } 1122 1123 reg.id = AARCH64_CORE_REG(elr_el1); 1124 reg.addr = (uintptr_t) &env->elr_el[1]; 1125 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1126 if (ret) { 1127 return ret; 1128 } 1129 1130 /* Saved Program State Registers 1131 * 1132 * Before we restore from the banked_spsr[] array we need to 1133 * ensure that any modifications to env->spsr are correctly 1134 * reflected in the banks. 1135 */ 1136 el = arm_current_el(env); 1137 if (el > 0 && !is_a64(env)) { 1138 i = bank_number(env->uncached_cpsr & CPSR_M); 1139 env->banked_spsr[i] = env->spsr; 1140 } 1141 1142 /* KVM 0-4 map to QEMU banks 1-5 */ 1143 for (i = 0; i < KVM_NR_SPSR; i++) { 1144 reg.id = AARCH64_CORE_REG(spsr[i]); 1145 reg.addr = (uintptr_t) &env->banked_spsr[i + 1]; 1146 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1147 if (ret) { 1148 return ret; 1149 } 1150 } 1151 1152 if (cpu_isar_feature(aa64_sve, cpu)) { 1153 ret = kvm_arch_put_sve(cs); 1154 } else { 1155 ret = kvm_arch_put_fpsimd(cs); 1156 } 1157 if (ret) { 1158 return ret; 1159 } 1160 1161 reg.addr = (uintptr_t)(&fpr); 1162 fpr = vfp_get_fpsr(env); 1163 reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); 1164 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1165 if (ret) { 1166 return ret; 1167 } 1168 1169 reg.addr = (uintptr_t)(&fpr); 1170 fpr = vfp_get_fpcr(env); 1171 reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); 1172 ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); 1173 if (ret) { 1174 return ret; 1175 } 1176 1177 write_cpustate_to_list(cpu, true); 1178 1179 if (!write_list_to_kvmstate(cpu, level)) { 1180 return -EINVAL; 1181 } 1182 1183 /* 1184 * Setting VCPU events should be triggered after syncing the registers 1185 * to avoid overwriting potential changes made by KVM upon calling 1186 * KVM_SET_VCPU_EVENTS ioctl 1187 */ 1188 ret = kvm_put_vcpu_events(cpu); 1189 if (ret) { 1190 return ret; 1191 } 1192 1193 kvm_arm_sync_mpstate_to_kvm(cpu); 1194 1195 return ret; 1196} 1197 1198static int kvm_arch_get_fpsimd(CPUState *cs) 1199{ 1200 CPUARMState *env = &ARM_CPU(cs)->env; 1201 struct kvm_one_reg reg; 1202 int i, ret; 1203 1204 for (i = 0; i < 32; i++) { 1205 uint64_t *q = aa64_vfp_qreg(env, i); 1206 reg.id = AARCH64_SIMD_CORE_REG(fp_regs.vregs[i]); 1207 reg.addr = (uintptr_t)q; 1208 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1209 if (ret) { 1210 return ret; 1211 } else { 1212#ifdef HOST_WORDS_BIGENDIAN 1213 uint64_t t; 1214 t = q[0], q[0] = q[1], q[1] = t; 1215#endif 1216 } 1217 } 1218 1219 return 0; 1220} 1221 1222/* 1223 * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits 1224 * and PREGS and the FFR have a slice size of 256 bits. However we simply hard 1225 * code the slice index to zero for now as it's unlikely we'll need more than 1226 * one slice for quite some time. 1227 */ 1228static int kvm_arch_get_sve(CPUState *cs) 1229{ 1230 ARMCPU *cpu = ARM_CPU(cs); 1231 CPUARMState *env = &cpu->env; 1232 struct kvm_one_reg reg; 1233 uint64_t *r; 1234 int n, ret; 1235 1236 for (n = 0; n < KVM_ARM64_SVE_NUM_ZREGS; ++n) { 1237 r = &env->vfp.zregs[n].d[0]; 1238 reg.addr = (uintptr_t)r; 1239 reg.id = KVM_REG_ARM64_SVE_ZREG(n, 0); 1240 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1241 if (ret) { 1242 return ret; 1243 } 1244 sve_bswap64(r, r, cpu->sve_max_vq * 2); 1245 } 1246 1247 for (n = 0; n < KVM_ARM64_SVE_NUM_PREGS; ++n) { 1248 r = &env->vfp.pregs[n].p[0]; 1249 reg.addr = (uintptr_t)r; 1250 reg.id = KVM_REG_ARM64_SVE_PREG(n, 0); 1251 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1252 if (ret) { 1253 return ret; 1254 } 1255 sve_bswap64(r, r, DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); 1256 } 1257 1258 r = &env->vfp.pregs[FFR_PRED_NUM].p[0]; 1259 reg.addr = (uintptr_t)r; 1260 reg.id = KVM_REG_ARM64_SVE_FFR(0); 1261 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1262 if (ret) { 1263 return ret; 1264 } 1265 sve_bswap64(r, r, DIV_ROUND_UP(cpu->sve_max_vq * 2, 8)); 1266 1267 return 0; 1268} 1269 1270int kvm_arch_get_registers(CPUState *cs) 1271{ 1272 struct kvm_one_reg reg; 1273 uint64_t val; 1274 unsigned int el; 1275 uint32_t fpr; 1276 int i, ret; 1277 1278 ARMCPU *cpu = ARM_CPU(cs); 1279 CPUARMState *env = &cpu->env; 1280 1281 for (i = 0; i < 31; i++) { 1282 reg.id = AARCH64_CORE_REG(regs.regs[i]); 1283 reg.addr = (uintptr_t) &env->xregs[i]; 1284 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1285 if (ret) { 1286 return ret; 1287 } 1288 } 1289 1290 reg.id = AARCH64_CORE_REG(regs.sp); 1291 reg.addr = (uintptr_t) &env->sp_el[0]; 1292 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1293 if (ret) { 1294 return ret; 1295 } 1296 1297 reg.id = AARCH64_CORE_REG(sp_el1); 1298 reg.addr = (uintptr_t) &env->sp_el[1]; 1299 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1300 if (ret) { 1301 return ret; 1302 } 1303 1304 reg.id = AARCH64_CORE_REG(regs.pstate); 1305 reg.addr = (uintptr_t) &val; 1306 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1307 if (ret) { 1308 return ret; 1309 } 1310 1311 env->aarch64 = ((val & PSTATE_nRW) == 0); 1312 if (is_a64(env)) { 1313 pstate_write(env, val); 1314 } else { 1315 cpsr_write(env, val, 0xffffffff, CPSRWriteRaw); 1316 } 1317 1318 /* KVM puts SP_EL0 in regs.sp and SP_EL1 in regs.sp_el1. On the 1319 * QEMU side we keep the current SP in xregs[31] as well. 1320 */ 1321 aarch64_restore_sp(env, 1); 1322 1323 reg.id = AARCH64_CORE_REG(regs.pc); 1324 reg.addr = (uintptr_t) &env->pc; 1325 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1326 if (ret) { 1327 return ret; 1328 } 1329 1330 /* If we are in AArch32 mode then we need to sync the AArch32 regs with the 1331 * incoming AArch64 regs received from 64-bit KVM. 1332 * We must perform this after all of the registers have been acquired from 1333 * the kernel. 1334 */ 1335 if (!is_a64(env)) { 1336 aarch64_sync_64_to_32(env); 1337 } 1338 1339 reg.id = AARCH64_CORE_REG(elr_el1); 1340 reg.addr = (uintptr_t) &env->elr_el[1]; 1341 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1342 if (ret) { 1343 return ret; 1344 } 1345 1346 /* Fetch the SPSR registers 1347 * 1348 * KVM SPSRs 0-4 map to QEMU banks 1-5 1349 */ 1350 for (i = 0; i < KVM_NR_SPSR; i++) { 1351 reg.id = AARCH64_CORE_REG(spsr[i]); 1352 reg.addr = (uintptr_t) &env->banked_spsr[i + 1]; 1353 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1354 if (ret) { 1355 return ret; 1356 } 1357 } 1358 1359 el = arm_current_el(env); 1360 if (el > 0 && !is_a64(env)) { 1361 i = bank_number(env->uncached_cpsr & CPSR_M); 1362 env->spsr = env->banked_spsr[i]; 1363 } 1364 1365 if (cpu_isar_feature(aa64_sve, cpu)) { 1366 ret = kvm_arch_get_sve(cs); 1367 } else { 1368 ret = kvm_arch_get_fpsimd(cs); 1369 } 1370 if (ret) { 1371 return ret; 1372 } 1373 1374 reg.addr = (uintptr_t)(&fpr); 1375 reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpsr); 1376 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1377 if (ret) { 1378 return ret; 1379 } 1380 vfp_set_fpsr(env, fpr); 1381 1382 reg.addr = (uintptr_t)(&fpr); 1383 reg.id = AARCH64_SIMD_CTRL_REG(fp_regs.fpcr); 1384 ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); 1385 if (ret) { 1386 return ret; 1387 } 1388 vfp_set_fpcr(env, fpr); 1389 1390 ret = kvm_get_vcpu_events(cpu); 1391 if (ret) { 1392 return ret; 1393 } 1394 1395 if (!write_kvmstate_to_list(cpu)) { 1396 return -EINVAL; 1397 } 1398 /* Note that it's OK to have registers which aren't in CPUState, 1399 * so we can ignore a failure return here. 1400 */ 1401 write_list_to_cpustate(cpu); 1402 1403 kvm_arm_sync_mpstate_to_qemu(cpu); 1404 1405 /* TODO: other registers */ 1406 return ret; 1407} 1408 1409void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) 1410{ 1411 ram_addr_t ram_addr; 1412 hwaddr paddr; 1413 1414 assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO); 1415 1416 if (acpi_ghes_present() && addr) { 1417 ram_addr = qemu_ram_addr_from_host(addr); 1418 if (ram_addr != RAM_ADDR_INVALID && 1419 kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) { 1420 kvm_hwpoison_page_add(ram_addr); 1421 /* 1422 * If this is a BUS_MCEERR_AR, we know we have been called 1423 * synchronously from the vCPU thread, so we can easily 1424 * synchronize the state and inject an error. 1425 * 1426 * TODO: we currently don't tell the guest at all about 1427 * BUS_MCEERR_AO. In that case we might either be being 1428 * called synchronously from the vCPU thread, or a bit 1429 * later from the main thread, so doing the injection of 1430 * the error would be more complicated. 1431 */ 1432 if (code == BUS_MCEERR_AR) { 1433 kvm_cpu_synchronize_state(c); 1434 if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) { 1435 kvm_inject_arm_sea(c); 1436 } else { 1437 error_report("failed to record the error"); 1438 abort(); 1439 } 1440 } 1441 return; 1442 } 1443 if (code == BUS_MCEERR_AO) { 1444 error_report("Hardware memory error at addr %p for memory used by " 1445 "QEMU itself instead of guest system!", addr); 1446 } 1447 } 1448 1449 if (code == BUS_MCEERR_AR) { 1450 error_report("Hardware memory error!"); 1451 exit(1); 1452 } 1453} 1454 1455/* C6.6.29 BRK instruction */ 1456static const uint32_t brk_insn = 0xd4200000; 1457 1458int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1459{ 1460 if (have_guest_debug) { 1461 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 0) || 1462 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk_insn, 4, 1)) { 1463 return -EINVAL; 1464 } 1465 return 0; 1466 } else { 1467 error_report("guest debug not supported on this kernel"); 1468 return -EINVAL; 1469 } 1470} 1471 1472int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) 1473{ 1474 static uint32_t brk; 1475 1476 if (have_guest_debug) { 1477 if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&brk, 4, 0) || 1478 brk != brk_insn || 1479 cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 4, 1)) { 1480 return -EINVAL; 1481 } 1482 return 0; 1483 } else { 1484 error_report("guest debug not supported on this kernel"); 1485 return -EINVAL; 1486 } 1487} 1488 1489/* See v8 ARM ARM D7.2.27 ESR_ELx, Exception Syndrome Register 1490 * 1491 * To minimise translating between kernel and user-space the kernel 1492 * ABI just provides user-space with the full exception syndrome 1493 * register value to be decoded in QEMU. 1494 */ 1495 1496bool kvm_arm_handle_debug(CPUState *cs, struct kvm_debug_exit_arch *debug_exit) 1497{ 1498 int hsr_ec = syn_get_ec(debug_exit->hsr); 1499 ARMCPU *cpu = ARM_CPU(cs); 1500 CPUARMState *env = &cpu->env; 1501 1502 /* Ensure PC is synchronised */ 1503 kvm_cpu_synchronize_state(cs); 1504 1505 switch (hsr_ec) { 1506 case EC_SOFTWARESTEP: 1507 if (cs->singlestep_enabled) { 1508 return true; 1509 } else { 1510 /* 1511 * The kernel should have suppressed the guest's ability to 1512 * single step at this point so something has gone wrong. 1513 */ 1514 error_report("%s: guest single-step while debugging unsupported" 1515 " (%"PRIx64", %"PRIx32")", 1516 __func__, env->pc, debug_exit->hsr); 1517 return false; 1518 } 1519 break; 1520 case EC_AA64_BKPT: 1521 if (kvm_find_sw_breakpoint(cs, env->pc)) { 1522 return true; 1523 } 1524 break; 1525 case EC_BREAKPOINT: 1526 if (find_hw_breakpoint(cs, env->pc)) { 1527 return true; 1528 } 1529 break; 1530 case EC_WATCHPOINT: 1531 { 1532 CPUWatchpoint *wp = find_hw_watchpoint(cs, debug_exit->far); 1533 if (wp) { 1534 cs->watchpoint_hit = wp; 1535 return true; 1536 } 1537 break; 1538 } 1539 default: 1540 error_report("%s: unhandled debug exit (%"PRIx32", %"PRIx64")", 1541 __func__, debug_exit->hsr, env->pc); 1542 } 1543 1544 /* If we are not handling the debug exception it must belong to 1545 * the guest. Let's re-use the existing TCG interrupt code to set 1546 * everything up properly. 1547 */ 1548 cs->exception_index = EXCP_BKPT; 1549 env->exception.syndrome = debug_exit->hsr; 1550 env->exception.vaddress = debug_exit->far; 1551 env->exception.target_el = 1; 1552 qemu_mutex_lock_iothread(); 1553 arm_cpu_do_interrupt(cs); 1554 qemu_mutex_unlock_iothread(); 1555 1556 return false; 1557} 1558 1559#define ARM64_REG_ESR_EL1 ARM64_SYS_REG(3, 0, 5, 2, 0) 1560#define ARM64_REG_TCR_EL1 ARM64_SYS_REG(3, 0, 2, 0, 2) 1561 1562/* 1563 * ESR_EL1 1564 * ISS encoding 1565 * AARCH64: DFSC, bits [5:0] 1566 * AARCH32: 1567 * TTBCR.EAE == 0 1568 * FS[4] - DFSR[10] 1569 * FS[3:0] - DFSR[3:0] 1570 * TTBCR.EAE == 1 1571 * FS, bits [5:0] 1572 */ 1573#define ESR_DFSC(aarch64, lpae, v) \ 1574 ((aarch64 || (lpae)) ? ((v) & 0x3F) \ 1575 : (((v) >> 6) | ((v) & 0x1F))) 1576 1577#define ESR_DFSC_EXTABT(aarch64, lpae) \ 1578 ((aarch64) ? 0x10 : (lpae) ? 0x10 : 0x8) 1579 1580bool kvm_arm_verify_ext_dabt_pending(CPUState *cs) 1581{ 1582 uint64_t dfsr_val; 1583 1584 if (!kvm_get_one_reg(cs, ARM64_REG_ESR_EL1, &dfsr_val)) { 1585 ARMCPU *cpu = ARM_CPU(cs); 1586 CPUARMState *env = &cpu->env; 1587 int aarch64_mode = arm_feature(env, ARM_FEATURE_AARCH64); 1588 int lpae = 0; 1589 1590 if (!aarch64_mode) { 1591 uint64_t ttbcr; 1592 1593 if (!kvm_get_one_reg(cs, ARM64_REG_TCR_EL1, &ttbcr)) { 1594 lpae = arm_feature(env, ARM_FEATURE_LPAE) 1595 && (ttbcr & TTBCR_EAE); 1596 } 1597 } 1598 /* 1599 * The verification here is based on the DFSC bits 1600 * of the ESR_EL1 reg only 1601 */ 1602 return (ESR_DFSC(aarch64_mode, lpae, dfsr_val) == 1603 ESR_DFSC_EXTABT(aarch64_mode, lpae)); 1604 } 1605 return false; 1606}