cpus.c (18325B)
1/* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25#include "qemu/osdep.h" 26#include "qemu-common.h" 27#include "monitor/monitor.h" 28#include "qapi/error.h" 29#include "qapi/qapi-commands-machine.h" 30#include "qapi/qapi-commands-misc.h" 31#include "qapi/qapi-events-run-state.h" 32#include "qapi/qmp/qerror.h" 33#include "exec/gdbstub.h" 34#include "sysemu/hw_accel.h" 35#include "exec/exec-all.h" 36#include "qemu/thread.h" 37#include "qemu/plugin.h" 38#include "sysemu/cpus.h" 39#include "qemu/guest-random.h" 40#include "hw/nmi.h" 41#include "sysemu/replay.h" 42#include "sysemu/runstate.h" 43#include "sysemu/cpu-timers.h" 44#include "sysemu/whpx.h" 45#include "hw/boards.h" 46#include "hw/hw.h" 47#include "trace.h" 48 49#ifdef CONFIG_LINUX 50 51#include <sys/prctl.h> 52 53#ifndef PR_MCE_KILL 54#define PR_MCE_KILL 33 55#endif 56 57#ifndef PR_MCE_KILL_SET 58#define PR_MCE_KILL_SET 1 59#endif 60 61#ifndef PR_MCE_KILL_EARLY 62#define PR_MCE_KILL_EARLY 1 63#endif 64 65#endif /* CONFIG_LINUX */ 66 67static QemuMutex qemu_global_mutex; 68 69bool cpu_is_stopped(CPUState *cpu) 70{ 71 return cpu->stopped || !runstate_is_running(); 72} 73 74bool cpu_work_list_empty(CPUState *cpu) 75{ 76 bool ret; 77 78 qemu_mutex_lock(&cpu->work_mutex); 79 ret = QSIMPLEQ_EMPTY(&cpu->work_list); 80 qemu_mutex_unlock(&cpu->work_mutex); 81 return ret; 82} 83 84bool cpu_thread_is_idle(CPUState *cpu) 85{ 86 if (cpu->stop || !cpu_work_list_empty(cpu)) { 87 return false; 88 } 89 if (cpu_is_stopped(cpu)) { 90 return true; 91 } 92 if (!cpu->halted || cpu_has_work(cpu) || 93 kvm_halt_in_kernel() || whpx_apic_in_platform()) { 94 return false; 95 } 96 return true; 97} 98 99bool all_cpu_threads_idle(void) 100{ 101 CPUState *cpu; 102 103 CPU_FOREACH(cpu) { 104 if (!cpu_thread_is_idle(cpu)) { 105 return false; 106 } 107 } 108 return true; 109} 110 111/***********************************************************/ 112void hw_error(const char *fmt, ...) 113{ 114 va_list ap; 115 CPUState *cpu; 116 117 va_start(ap, fmt); 118 fprintf(stderr, "qemu: hardware error: "); 119 vfprintf(stderr, fmt, ap); 120 fprintf(stderr, "\n"); 121 CPU_FOREACH(cpu) { 122 fprintf(stderr, "CPU #%d:\n", cpu->cpu_index); 123 cpu_dump_state(cpu, stderr, CPU_DUMP_FPU); 124 } 125 va_end(ap); 126 abort(); 127} 128 129/* 130 * The chosen accelerator is supposed to register this. 131 */ 132static const AccelOpsClass *cpus_accel; 133 134void cpu_synchronize_all_states(void) 135{ 136 CPUState *cpu; 137 138 CPU_FOREACH(cpu) { 139 cpu_synchronize_state(cpu); 140 } 141} 142 143void cpu_synchronize_all_post_reset(void) 144{ 145 CPUState *cpu; 146 147 CPU_FOREACH(cpu) { 148 cpu_synchronize_post_reset(cpu); 149 } 150} 151 152void cpu_synchronize_all_post_init(void) 153{ 154 CPUState *cpu; 155 156 CPU_FOREACH(cpu) { 157 cpu_synchronize_post_init(cpu); 158 } 159} 160 161void cpu_synchronize_all_pre_loadvm(void) 162{ 163 CPUState *cpu; 164 165 CPU_FOREACH(cpu) { 166 cpu_synchronize_pre_loadvm(cpu); 167 } 168} 169 170void cpu_synchronize_state(CPUState *cpu) 171{ 172 if (cpus_accel->synchronize_state) { 173 cpus_accel->synchronize_state(cpu); 174 } 175} 176 177void cpu_synchronize_post_reset(CPUState *cpu) 178{ 179 if (cpus_accel->synchronize_post_reset) { 180 cpus_accel->synchronize_post_reset(cpu); 181 } 182} 183 184void cpu_synchronize_post_init(CPUState *cpu) 185{ 186 if (cpus_accel->synchronize_post_init) { 187 cpus_accel->synchronize_post_init(cpu); 188 } 189} 190 191void cpu_synchronize_pre_loadvm(CPUState *cpu) 192{ 193 if (cpus_accel->synchronize_pre_loadvm) { 194 cpus_accel->synchronize_pre_loadvm(cpu); 195 } 196} 197 198bool cpus_are_resettable(void) 199{ 200 return cpu_check_are_resettable(); 201} 202 203int64_t cpus_get_virtual_clock(void) 204{ 205 /* 206 * XXX 207 * 208 * need to check that cpus_accel is not NULL, because qcow2 calls 209 * qemu_get_clock_ns(CLOCK_VIRTUAL) without any accel initialized and 210 * with ticks disabled in some io-tests: 211 * 030 040 041 060 099 120 127 140 156 161 172 181 191 192 195 203 229 249 256 267 212 * 213 * is this expected? 214 * 215 * XXX 216 */ 217 if (cpus_accel && cpus_accel->get_virtual_clock) { 218 return cpus_accel->get_virtual_clock(); 219 } 220 return cpu_get_clock(); 221} 222 223/* 224 * return the time elapsed in VM between vm_start and vm_stop. Unless 225 * icount is active, cpus_get_elapsed_ticks() uses units of the host CPU cycle 226 * counter. 227 */ 228int64_t cpus_get_elapsed_ticks(void) 229{ 230 if (cpus_accel->get_elapsed_ticks) { 231 return cpus_accel->get_elapsed_ticks(); 232 } 233 return cpu_get_ticks(); 234} 235 236static void generic_handle_interrupt(CPUState *cpu, int mask) 237{ 238 cpu->interrupt_request |= mask; 239 240 if (!qemu_cpu_is_self(cpu)) { 241 qemu_cpu_kick(cpu); 242 } 243} 244 245void cpu_interrupt(CPUState *cpu, int mask) 246{ 247 if (cpus_accel->handle_interrupt) { 248 cpus_accel->handle_interrupt(cpu, mask); 249 } else { 250 generic_handle_interrupt(cpu, mask); 251 } 252} 253 254static int do_vm_stop(RunState state, bool send_stop) 255{ 256 int ret = 0; 257 258 if (runstate_is_running()) { 259 runstate_set(state); 260 cpu_disable_ticks(); 261 pause_all_vcpus(); 262 vm_state_notify(0, state); 263 if (send_stop) { 264 qapi_event_send_stop(); 265 } 266 } 267 268 bdrv_drain_all(); 269 ret = bdrv_flush_all(); 270 trace_vm_stop_flush_all(ret); 271 272 return ret; 273} 274 275/* Special vm_stop() variant for terminating the process. Historically clients 276 * did not expect a QMP STOP event and so we need to retain compatibility. 277 */ 278int vm_shutdown(void) 279{ 280 return do_vm_stop(RUN_STATE_SHUTDOWN, false); 281} 282 283bool cpu_can_run(CPUState *cpu) 284{ 285 if (cpu->stop) { 286 return false; 287 } 288 if (cpu_is_stopped(cpu)) { 289 return false; 290 } 291 return true; 292} 293 294void cpu_handle_guest_debug(CPUState *cpu) 295{ 296 if (replay_running_debug()) { 297 if (!cpu->singlestep_enabled) { 298 /* 299 * Report about the breakpoint and 300 * make a single step to skip it 301 */ 302 replay_breakpoint(); 303 cpu_single_step(cpu, SSTEP_ENABLE); 304 } else { 305 cpu_single_step(cpu, 0); 306 } 307 } else { 308 gdb_set_stop_cpu(cpu); 309 qemu_system_debug_request(); 310 cpu->stopped = true; 311 } 312} 313 314#ifdef CONFIG_LINUX 315static void sigbus_reraise(void) 316{ 317 sigset_t set; 318 struct sigaction action; 319 320 memset(&action, 0, sizeof(action)); 321 action.sa_handler = SIG_DFL; 322 if (!sigaction(SIGBUS, &action, NULL)) { 323 raise(SIGBUS); 324 sigemptyset(&set); 325 sigaddset(&set, SIGBUS); 326 pthread_sigmask(SIG_UNBLOCK, &set, NULL); 327 } 328 perror("Failed to re-raise SIGBUS!"); 329 abort(); 330} 331 332static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx) 333{ 334 if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) { 335 sigbus_reraise(); 336 } 337 338 if (current_cpu) { 339 /* Called asynchronously in VCPU thread. */ 340 if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) { 341 sigbus_reraise(); 342 } 343 } else { 344 /* Called synchronously (via signalfd) in main thread. */ 345 if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) { 346 sigbus_reraise(); 347 } 348 } 349} 350 351static void qemu_init_sigbus(void) 352{ 353 struct sigaction action; 354 355 memset(&action, 0, sizeof(action)); 356 action.sa_flags = SA_SIGINFO; 357 action.sa_sigaction = sigbus_handler; 358 sigaction(SIGBUS, &action, NULL); 359 360 prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0); 361} 362#else /* !CONFIG_LINUX */ 363static void qemu_init_sigbus(void) 364{ 365} 366#endif /* !CONFIG_LINUX */ 367 368static QemuThread io_thread; 369 370/* cpu creation */ 371static QemuCond qemu_cpu_cond; 372/* system init */ 373static QemuCond qemu_pause_cond; 374 375void qemu_init_cpu_loop(void) 376{ 377 qemu_init_sigbus(); 378 qemu_cond_init(&qemu_cpu_cond); 379 qemu_cond_init(&qemu_pause_cond); 380 qemu_mutex_init(&qemu_global_mutex); 381 382 qemu_thread_get_self(&io_thread); 383} 384 385void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data) 386{ 387 do_run_on_cpu(cpu, func, data, &qemu_global_mutex); 388} 389 390static void qemu_cpu_stop(CPUState *cpu, bool exit) 391{ 392 g_assert(qemu_cpu_is_self(cpu)); 393 cpu->stop = false; 394 cpu->stopped = true; 395 if (exit) { 396 cpu_exit(cpu); 397 } 398 qemu_cond_broadcast(&qemu_pause_cond); 399} 400 401void qemu_wait_io_event_common(CPUState *cpu) 402{ 403 qatomic_mb_set(&cpu->thread_kicked, false); 404 if (cpu->stop) { 405 qemu_cpu_stop(cpu, false); 406 } 407 process_queued_cpu_work(cpu); 408} 409 410void qemu_wait_io_event(CPUState *cpu) 411{ 412 bool slept = false; 413 414 while (cpu_thread_is_idle(cpu)) { 415 if (!slept) { 416 slept = true; 417 qemu_plugin_vcpu_idle_cb(cpu); 418 } 419 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex); 420 } 421 if (slept) { 422 qemu_plugin_vcpu_resume_cb(cpu); 423 } 424 425#ifdef _WIN32 426 /* Eat dummy APC queued by cpus_kick_thread. */ 427 if (hax_enabled()) { 428 SleepEx(0, TRUE); 429 } 430#endif 431 qemu_wait_io_event_common(cpu); 432} 433 434void cpus_kick_thread(CPUState *cpu) 435{ 436#ifndef _WIN32 437 int err; 438 439 if (cpu->thread_kicked) { 440 return; 441 } 442 cpu->thread_kicked = true; 443 err = pthread_kill(cpu->thread->thread, SIG_IPI); 444 if (err && err != ESRCH) { 445 fprintf(stderr, "qemu:%s: %s", __func__, strerror(err)); 446 exit(1); 447 } 448#endif 449} 450 451void qemu_cpu_kick(CPUState *cpu) 452{ 453 qemu_cond_broadcast(cpu->halt_cond); 454 if (cpus_accel->kick_vcpu_thread) { 455 cpus_accel->kick_vcpu_thread(cpu); 456 } else { /* default */ 457 cpus_kick_thread(cpu); 458 } 459} 460 461void qemu_cpu_kick_self(void) 462{ 463 assert(current_cpu); 464 cpus_kick_thread(current_cpu); 465} 466 467bool qemu_cpu_is_self(CPUState *cpu) 468{ 469 return qemu_thread_is_self(cpu->thread); 470} 471 472bool qemu_in_vcpu_thread(void) 473{ 474 return current_cpu && qemu_cpu_is_self(current_cpu); 475} 476 477static __thread bool iothread_locked = false; 478 479bool qemu_mutex_iothread_locked(void) 480{ 481 return iothread_locked; 482} 483 484/* 485 * The BQL is taken from so many places that it is worth profiling the 486 * callers directly, instead of funneling them all through a single function. 487 */ 488void qemu_mutex_lock_iothread_impl(const char *file, int line) 489{ 490 QemuMutexLockFunc bql_lock = qatomic_read(&qemu_bql_mutex_lock_func); 491 492 g_assert(!qemu_mutex_iothread_locked()); 493 bql_lock(&qemu_global_mutex, file, line); 494 iothread_locked = true; 495} 496 497void qemu_mutex_unlock_iothread(void) 498{ 499 g_assert(qemu_mutex_iothread_locked()); 500 iothread_locked = false; 501 qemu_mutex_unlock(&qemu_global_mutex); 502} 503 504void qemu_cond_wait_iothread(QemuCond *cond) 505{ 506 qemu_cond_wait(cond, &qemu_global_mutex); 507} 508 509void qemu_cond_timedwait_iothread(QemuCond *cond, int ms) 510{ 511 qemu_cond_timedwait(cond, &qemu_global_mutex, ms); 512} 513 514/* signal CPU creation */ 515void cpu_thread_signal_created(CPUState *cpu) 516{ 517 cpu->created = true; 518 qemu_cond_signal(&qemu_cpu_cond); 519} 520 521/* signal CPU destruction */ 522void cpu_thread_signal_destroyed(CPUState *cpu) 523{ 524 cpu->created = false; 525 qemu_cond_signal(&qemu_cpu_cond); 526} 527 528 529static bool all_vcpus_paused(void) 530{ 531 CPUState *cpu; 532 533 CPU_FOREACH(cpu) { 534 if (!cpu->stopped) { 535 return false; 536 } 537 } 538 539 return true; 540} 541 542void pause_all_vcpus(void) 543{ 544 CPUState *cpu; 545 546 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false); 547 CPU_FOREACH(cpu) { 548 if (qemu_cpu_is_self(cpu)) { 549 qemu_cpu_stop(cpu, true); 550 } else { 551 cpu->stop = true; 552 qemu_cpu_kick(cpu); 553 } 554 } 555 556 /* We need to drop the replay_lock so any vCPU threads woken up 557 * can finish their replay tasks 558 */ 559 replay_mutex_unlock(); 560 561 while (!all_vcpus_paused()) { 562 qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex); 563 CPU_FOREACH(cpu) { 564 qemu_cpu_kick(cpu); 565 } 566 } 567 568 qemu_mutex_unlock_iothread(); 569 replay_mutex_lock(); 570 qemu_mutex_lock_iothread(); 571} 572 573void cpu_resume(CPUState *cpu) 574{ 575 cpu->stop = false; 576 cpu->stopped = false; 577 qemu_cpu_kick(cpu); 578} 579 580void resume_all_vcpus(void) 581{ 582 CPUState *cpu; 583 584 if (!runstate_is_running()) { 585 return; 586 } 587 588 qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true); 589 CPU_FOREACH(cpu) { 590 cpu_resume(cpu); 591 } 592} 593 594void cpu_remove_sync(CPUState *cpu) 595{ 596 cpu->stop = true; 597 cpu->unplug = true; 598 qemu_cpu_kick(cpu); 599 qemu_mutex_unlock_iothread(); 600 qemu_thread_join(cpu->thread); 601 qemu_mutex_lock_iothread(); 602} 603 604void cpus_register_accel(const AccelOpsClass *ops) 605{ 606 assert(ops != NULL); 607 assert(ops->create_vcpu_thread != NULL); /* mandatory */ 608 cpus_accel = ops; 609} 610 611void qemu_init_vcpu(CPUState *cpu) 612{ 613 MachineState *ms = MACHINE(qdev_get_machine()); 614 615 cpu->nr_cores = ms->smp.cores; 616 cpu->nr_threads = ms->smp.threads; 617 cpu->stopped = true; 618 cpu->random_seed = qemu_guest_random_seed_thread_part1(); 619 620 if (!cpu->as) { 621 /* If the target cpu hasn't set up any address spaces itself, 622 * give it the default one. 623 */ 624 cpu->num_ases = 1; 625 cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory); 626 } 627 628 /* accelerators all implement the AccelOpsClass */ 629 g_assert(cpus_accel != NULL && cpus_accel->create_vcpu_thread != NULL); 630 cpus_accel->create_vcpu_thread(cpu); 631 632 while (!cpu->created) { 633 qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex); 634 } 635} 636 637void cpu_stop_current(void) 638{ 639 if (current_cpu) { 640 current_cpu->stop = true; 641 cpu_exit(current_cpu); 642 } 643} 644 645int vm_stop(RunState state) 646{ 647 if (qemu_in_vcpu_thread()) { 648 qemu_system_vmstop_request_prepare(); 649 qemu_system_vmstop_request(state); 650 /* 651 * FIXME: should not return to device code in case 652 * vm_stop() has been requested. 653 */ 654 cpu_stop_current(); 655 return 0; 656 } 657 658 return do_vm_stop(state, true); 659} 660 661/** 662 * Prepare for (re)starting the VM. 663 * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already 664 * running or in case of an error condition), 0 otherwise. 665 */ 666int vm_prepare_start(void) 667{ 668 RunState requested; 669 670 qemu_vmstop_requested(&requested); 671 if (runstate_is_running() && requested == RUN_STATE__MAX) { 672 return -1; 673 } 674 675 /* Ensure that a STOP/RESUME pair of events is emitted if a 676 * vmstop request was pending. The BLOCK_IO_ERROR event, for 677 * example, according to documentation is always followed by 678 * the STOP event. 679 */ 680 if (runstate_is_running()) { 681 qapi_event_send_stop(); 682 qapi_event_send_resume(); 683 return -1; 684 } 685 686 /* We are sending this now, but the CPUs will be resumed shortly later */ 687 qapi_event_send_resume(); 688 689 cpu_enable_ticks(); 690 runstate_set(RUN_STATE_RUNNING); 691 vm_state_notify(1, RUN_STATE_RUNNING); 692 return 0; 693} 694 695void vm_start(void) 696{ 697 if (!vm_prepare_start()) { 698 resume_all_vcpus(); 699 } 700} 701 702/* does a state transition even if the VM is already stopped, 703 current state is forgotten forever */ 704int vm_stop_force_state(RunState state) 705{ 706 if (runstate_is_running()) { 707 return vm_stop(state); 708 } else { 709 int ret; 710 runstate_set(state); 711 712 bdrv_drain_all(); 713 /* Make sure to return an error if the flush in a previous vm_stop() 714 * failed. */ 715 ret = bdrv_flush_all(); 716 trace_vm_stop_flush_all(ret); 717 return ret; 718 } 719} 720 721void list_cpus(const char *optarg) 722{ 723 /* XXX: implement xxx_cpu_list for targets that still miss it */ 724#if defined(cpu_list) 725 cpu_list(); 726#endif 727} 728 729void qmp_memsave(int64_t addr, int64_t size, const char *filename, 730 bool has_cpu, int64_t cpu_index, Error **errp) 731{ 732 FILE *f; 733 uint32_t l; 734 CPUState *cpu; 735 uint8_t buf[1024]; 736 int64_t orig_addr = addr, orig_size = size; 737 738 if (!has_cpu) { 739 cpu_index = 0; 740 } 741 742 cpu = qemu_get_cpu(cpu_index); 743 if (cpu == NULL) { 744 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index", 745 "a CPU number"); 746 return; 747 } 748 749 f = fopen(filename, "wb"); 750 if (!f) { 751 error_setg_file_open(errp, errno, filename); 752 return; 753 } 754 755 while (size != 0) { 756 l = sizeof(buf); 757 if (l > size) 758 l = size; 759 if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) { 760 error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64 761 " specified", orig_addr, orig_size); 762 goto exit; 763 } 764 if (fwrite(buf, 1, l, f) != l) { 765 error_setg(errp, QERR_IO_ERROR); 766 goto exit; 767 } 768 addr += l; 769 size -= l; 770 } 771 772exit: 773 fclose(f); 774} 775 776void qmp_pmemsave(int64_t addr, int64_t size, const char *filename, 777 Error **errp) 778{ 779 FILE *f; 780 uint32_t l; 781 uint8_t buf[1024]; 782 783 f = fopen(filename, "wb"); 784 if (!f) { 785 error_setg_file_open(errp, errno, filename); 786 return; 787 } 788 789 while (size != 0) { 790 l = sizeof(buf); 791 if (l > size) 792 l = size; 793 cpu_physical_memory_read(addr, buf, l); 794 if (fwrite(buf, 1, l, f) != l) { 795 error_setg(errp, QERR_IO_ERROR); 796 goto exit; 797 } 798 addr += l; 799 size -= l; 800 } 801 802exit: 803 fclose(f); 804} 805 806void qmp_inject_nmi(Error **errp) 807{ 808 nmi_monitor_handle(monitor_get_cpu_index(monitor_cur()), errp); 809} 810