icount.c (16092B)
1/* 2 * QEMU System Emulator 3 * 4 * Copyright (c) 2003-2008 Fabrice Bellard 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to deal 8 * in the Software without restriction, including without limitation the rights 9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 * copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 * THE SOFTWARE. 23 */ 24 25#include "qemu/osdep.h" 26#include "qemu-common.h" 27#include "qemu/cutils.h" 28#include "migration/vmstate.h" 29#include "qapi/error.h" 30#include "qemu/error-report.h" 31#include "exec/exec-all.h" 32#include "sysemu/cpus.h" 33#include "sysemu/qtest.h" 34#include "qemu/main-loop.h" 35#include "qemu/option.h" 36#include "qemu/seqlock.h" 37#include "sysemu/replay.h" 38#include "sysemu/runstate.h" 39#include "hw/core/cpu.h" 40#include "sysemu/cpu-timers.h" 41#include "sysemu/cpu-throttle.h" 42#include "timers-state.h" 43 44/* 45 * ICOUNT: Instruction Counter 46 * 47 * this module is split off from cpu-timers because the icount part 48 * is TCG-specific, and does not need to be built for other accels. 49 */ 50static bool icount_sleep = true; 51/* Arbitrarily pick 1MIPS as the minimum allowable speed. */ 52#define MAX_ICOUNT_SHIFT 10 53 54/* 55 * 0 = Do not count executed instructions. 56 * 1 = Fixed conversion of insn to ns via "shift" option 57 * 2 = Runtime adaptive algorithm to compute shift 58 */ 59int use_icount; 60 61static void icount_enable_precise(void) 62{ 63 use_icount = 1; 64} 65 66static void icount_enable_adaptive(void) 67{ 68 use_icount = 2; 69} 70 71/* 72 * The current number of executed instructions is based on what we 73 * originally budgeted minus the current state of the decrementing 74 * icount counters in extra/u16.low. 75 */ 76static int64_t icount_get_executed(CPUState *cpu) 77{ 78 return (cpu->icount_budget - 79 (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra)); 80} 81 82/* 83 * Update the global shared timer_state.qemu_icount to take into 84 * account executed instructions. This is done by the TCG vCPU 85 * thread so the main-loop can see time has moved forward. 86 */ 87static void icount_update_locked(CPUState *cpu) 88{ 89 int64_t executed = icount_get_executed(cpu); 90 cpu->icount_budget -= executed; 91 92 qatomic_set_i64(&timers_state.qemu_icount, 93 timers_state.qemu_icount + executed); 94} 95 96/* 97 * Update the global shared timer_state.qemu_icount to take into 98 * account executed instructions. This is done by the TCG vCPU 99 * thread so the main-loop can see time has moved forward. 100 */ 101void icount_update(CPUState *cpu) 102{ 103 seqlock_write_lock(&timers_state.vm_clock_seqlock, 104 &timers_state.vm_clock_lock); 105 icount_update_locked(cpu); 106 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 107 &timers_state.vm_clock_lock); 108} 109 110static int64_t icount_get_raw_locked(void) 111{ 112 CPUState *cpu = current_cpu; 113 114 if (cpu && cpu->running) { 115 if (!cpu->can_do_io) { 116 error_report("Bad icount read"); 117 exit(1); 118 } 119 /* Take into account what has run */ 120 icount_update_locked(cpu); 121 } 122 /* The read is protected by the seqlock, but needs atomic64 to avoid UB */ 123 return qatomic_read_i64(&timers_state.qemu_icount); 124} 125 126static int64_t icount_get_locked(void) 127{ 128 int64_t icount = icount_get_raw_locked(); 129 return qatomic_read_i64(&timers_state.qemu_icount_bias) + 130 icount_to_ns(icount); 131} 132 133int64_t icount_get_raw(void) 134{ 135 int64_t icount; 136 unsigned start; 137 138 do { 139 start = seqlock_read_begin(&timers_state.vm_clock_seqlock); 140 icount = icount_get_raw_locked(); 141 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); 142 143 return icount; 144} 145 146/* Return the virtual CPU time, based on the instruction counter. */ 147int64_t icount_get(void) 148{ 149 int64_t icount; 150 unsigned start; 151 152 do { 153 start = seqlock_read_begin(&timers_state.vm_clock_seqlock); 154 icount = icount_get_locked(); 155 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); 156 157 return icount; 158} 159 160int64_t icount_to_ns(int64_t icount) 161{ 162 return icount << qatomic_read(&timers_state.icount_time_shift); 163} 164 165/* 166 * Correlation between real and virtual time is always going to be 167 * fairly approximate, so ignore small variation. 168 * When the guest is idle real and virtual time will be aligned in 169 * the IO wait loop. 170 */ 171#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10) 172 173static void icount_adjust(void) 174{ 175 int64_t cur_time; 176 int64_t cur_icount; 177 int64_t delta; 178 179 /* If the VM is not running, then do nothing. */ 180 if (!runstate_is_running()) { 181 return; 182 } 183 184 seqlock_write_lock(&timers_state.vm_clock_seqlock, 185 &timers_state.vm_clock_lock); 186 cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, 187 cpu_get_clock_locked()); 188 cur_icount = icount_get_locked(); 189 190 delta = cur_icount - cur_time; 191 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */ 192 if (delta > 0 193 && timers_state.last_delta + ICOUNT_WOBBLE < delta * 2 194 && timers_state.icount_time_shift > 0) { 195 /* The guest is getting too far ahead. Slow time down. */ 196 qatomic_set(&timers_state.icount_time_shift, 197 timers_state.icount_time_shift - 1); 198 } 199 if (delta < 0 200 && timers_state.last_delta - ICOUNT_WOBBLE > delta * 2 201 && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) { 202 /* The guest is getting too far behind. Speed time up. */ 203 qatomic_set(&timers_state.icount_time_shift, 204 timers_state.icount_time_shift + 1); 205 } 206 timers_state.last_delta = delta; 207 qatomic_set_i64(&timers_state.qemu_icount_bias, 208 cur_icount - (timers_state.qemu_icount 209 << timers_state.icount_time_shift)); 210 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 211 &timers_state.vm_clock_lock); 212} 213 214static void icount_adjust_rt(void *opaque) 215{ 216 timer_mod(timers_state.icount_rt_timer, 217 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); 218 icount_adjust(); 219} 220 221static void icount_adjust_vm(void *opaque) 222{ 223 timer_mod(timers_state.icount_vm_timer, 224 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 225 NANOSECONDS_PER_SECOND / 10); 226 icount_adjust(); 227} 228 229int64_t icount_round(int64_t count) 230{ 231 int shift = qatomic_read(&timers_state.icount_time_shift); 232 return (count + (1 << shift) - 1) >> shift; 233} 234 235static void icount_warp_rt(void) 236{ 237 unsigned seq; 238 int64_t warp_start; 239 240 /* 241 * The icount_warp_timer is rescheduled soon after vm_clock_warp_start 242 * changes from -1 to another value, so the race here is okay. 243 */ 244 do { 245 seq = seqlock_read_begin(&timers_state.vm_clock_seqlock); 246 warp_start = timers_state.vm_clock_warp_start; 247 } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq)); 248 249 if (warp_start == -1) { 250 return; 251 } 252 253 seqlock_write_lock(&timers_state.vm_clock_seqlock, 254 &timers_state.vm_clock_lock); 255 if (runstate_is_running()) { 256 int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, 257 cpu_get_clock_locked()); 258 int64_t warp_delta; 259 260 warp_delta = clock - timers_state.vm_clock_warp_start; 261 if (icount_enabled() == 2) { 262 /* 263 * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too 264 * far ahead of real time. 265 */ 266 int64_t cur_icount = icount_get_locked(); 267 int64_t delta = clock - cur_icount; 268 warp_delta = MIN(warp_delta, delta); 269 } 270 qatomic_set_i64(&timers_state.qemu_icount_bias, 271 timers_state.qemu_icount_bias + warp_delta); 272 } 273 timers_state.vm_clock_warp_start = -1; 274 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 275 &timers_state.vm_clock_lock); 276 277 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) { 278 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 279 } 280} 281 282static void icount_timer_cb(void *opaque) 283{ 284 /* 285 * No need for a checkpoint because the timer already synchronizes 286 * with CHECKPOINT_CLOCK_VIRTUAL_RT. 287 */ 288 icount_warp_rt(); 289} 290 291void icount_start_warp_timer(void) 292{ 293 int64_t clock; 294 int64_t deadline; 295 296 assert(icount_enabled()); 297 298 /* 299 * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers 300 * do not fire, so computing the deadline does not make sense. 301 */ 302 if (!runstate_is_running()) { 303 return; 304 } 305 306 if (replay_mode != REPLAY_MODE_PLAY) { 307 if (!all_cpu_threads_idle()) { 308 return; 309 } 310 311 if (qtest_enabled()) { 312 /* When testing, qtest commands advance icount. */ 313 return; 314 } 315 316 replay_checkpoint(CHECKPOINT_CLOCK_WARP_START); 317 } else { 318 /* warp clock deterministically in record/replay mode */ 319 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) { 320 /* 321 * vCPU is sleeping and warp can't be started. 322 * It is probably a race condition: notification sent 323 * to vCPU was processed in advance and vCPU went to sleep. 324 * Therefore we have to wake it up for doing someting. 325 */ 326 if (replay_has_checkpoint()) { 327 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 328 } 329 return; 330 } 331 } 332 333 /* We want to use the earliest deadline from ALL vm_clocks */ 334 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT); 335 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, 336 ~QEMU_TIMER_ATTR_EXTERNAL); 337 if (deadline < 0) { 338 static bool notified; 339 if (!icount_sleep && !notified) { 340 warn_report("icount sleep disabled and no active timers"); 341 notified = true; 342 } 343 return; 344 } 345 346 if (deadline > 0) { 347 /* 348 * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to 349 * sleep. Otherwise, the CPU might be waiting for a future timer 350 * interrupt to wake it up, but the interrupt never comes because 351 * the vCPU isn't running any insns and thus doesn't advance the 352 * QEMU_CLOCK_VIRTUAL. 353 */ 354 if (!icount_sleep) { 355 /* 356 * We never let VCPUs sleep in no sleep icount mode. 357 * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance 358 * to the next QEMU_CLOCK_VIRTUAL event and notify it. 359 * It is useful when we want a deterministic execution time, 360 * isolated from host latencies. 361 */ 362 seqlock_write_lock(&timers_state.vm_clock_seqlock, 363 &timers_state.vm_clock_lock); 364 qatomic_set_i64(&timers_state.qemu_icount_bias, 365 timers_state.qemu_icount_bias + deadline); 366 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 367 &timers_state.vm_clock_lock); 368 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 369 } else { 370 /* 371 * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some 372 * "real" time, (related to the time left until the next event) has 373 * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this. 374 * This avoids that the warps are visible externally; for example, 375 * you will not be sending network packets continuously instead of 376 * every 100ms. 377 */ 378 seqlock_write_lock(&timers_state.vm_clock_seqlock, 379 &timers_state.vm_clock_lock); 380 if (timers_state.vm_clock_warp_start == -1 381 || timers_state.vm_clock_warp_start > clock) { 382 timers_state.vm_clock_warp_start = clock; 383 } 384 seqlock_write_unlock(&timers_state.vm_clock_seqlock, 385 &timers_state.vm_clock_lock); 386 timer_mod_anticipate(timers_state.icount_warp_timer, 387 clock + deadline); 388 } 389 } else if (deadline == 0) { 390 qemu_clock_notify(QEMU_CLOCK_VIRTUAL); 391 } 392} 393 394void icount_account_warp_timer(void) 395{ 396 if (!icount_sleep) { 397 return; 398 } 399 400 /* 401 * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers 402 * do not fire, so computing the deadline does not make sense. 403 */ 404 if (!runstate_is_running()) { 405 return; 406 } 407 408 /* warp clock deterministically in record/replay mode */ 409 if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) { 410 return; 411 } 412 413 timer_del(timers_state.icount_warp_timer); 414 icount_warp_rt(); 415} 416 417void icount_configure(QemuOpts *opts, Error **errp) 418{ 419 const char *option = qemu_opt_get(opts, "shift"); 420 bool sleep = qemu_opt_get_bool(opts, "sleep", true); 421 bool align = qemu_opt_get_bool(opts, "align", false); 422 long time_shift = -1; 423 424 if (!option) { 425 if (qemu_opt_get(opts, "align") != NULL) { 426 error_setg(errp, "Please specify shift option when using align"); 427 } 428 return; 429 } 430 431 if (align && !sleep) { 432 error_setg(errp, "align=on and sleep=off are incompatible"); 433 return; 434 } 435 436 if (strcmp(option, "auto") != 0) { 437 if (qemu_strtol(option, NULL, 0, &time_shift) < 0 438 || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) { 439 error_setg(errp, "icount: Invalid shift value"); 440 return; 441 } 442 } else if (icount_align_option) { 443 error_setg(errp, "shift=auto and align=on are incompatible"); 444 return; 445 } else if (!icount_sleep) { 446 error_setg(errp, "shift=auto and sleep=off are incompatible"); 447 return; 448 } 449 450 icount_sleep = sleep; 451 if (icount_sleep) { 452 timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, 453 icount_timer_cb, NULL); 454 } 455 456 icount_align_option = align; 457 458 if (time_shift >= 0) { 459 timers_state.icount_time_shift = time_shift; 460 icount_enable_precise(); 461 return; 462 } 463 464 icount_enable_adaptive(); 465 466 /* 467 * 125MIPS seems a reasonable initial guess at the guest speed. 468 * It will be corrected fairly quickly anyway. 469 */ 470 timers_state.icount_time_shift = 3; 471 472 /* 473 * Have both realtime and virtual time triggers for speed adjustment. 474 * The realtime trigger catches emulated time passing too slowly, 475 * the virtual time trigger catches emulated time passing too fast. 476 * Realtime triggers occur even when idle, so use them less frequently 477 * than VM triggers. 478 */ 479 timers_state.vm_clock_warp_start = -1; 480 timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT, 481 icount_adjust_rt, NULL); 482 timer_mod(timers_state.icount_rt_timer, 483 qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); 484 timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, 485 icount_adjust_vm, NULL); 486 timer_mod(timers_state.icount_vm_timer, 487 qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + 488 NANOSECONDS_PER_SECOND / 10); 489}