|  | /* | 
|  | * QEMU System Emulator | 
|  | * | 
|  | * Copyright (c) 2003-2008 Fabrice Bellard | 
|  | * | 
|  | * Permission is hereby granted, free of charge, to any person obtaining a copy | 
|  | * of this software and associated documentation files (the "Software"), to deal | 
|  | * in the Software without restriction, including without limitation the rights | 
|  | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | 
|  | * copies of the Software, and to permit persons to whom the Software is | 
|  | * furnished to do so, subject to the following conditions: | 
|  | * | 
|  | * The above copyright notice and this permission notice shall be included in | 
|  | * all copies or substantial portions of the Software. | 
|  | * | 
|  | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | 
|  | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
|  | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | 
|  | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | 
|  | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | 
|  | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | 
|  | * THE SOFTWARE. | 
|  | */ | 
|  |  | 
|  | #include "qemu/osdep.h" | 
|  | #include "qemu/cutils.h" | 
|  | #include "migration/vmstate.h" | 
|  | #include "qapi/error.h" | 
|  | #include "qemu/error-report.h" | 
|  | #include "system/cpus.h" | 
|  | #include "system/qtest.h" | 
|  | #include "qemu/main-loop.h" | 
|  | #include "qemu/option.h" | 
|  | #include "qemu/seqlock.h" | 
|  | #include "system/replay.h" | 
|  | #include "system/runstate.h" | 
|  | #include "hw/core/cpu.h" | 
|  | #include "system/cpu-timers.h" | 
|  | #include "system/cpu-timers-internal.h" | 
|  |  | 
|  | /* | 
|  | * ICOUNT: Instruction Counter | 
|  | * | 
|  | * this module is split off from cpu-timers because the icount part | 
|  | * is TCG-specific, and does not need to be built for other accels. | 
|  | */ | 
|  | static bool icount_sleep = true; | 
|  | /* Arbitrarily pick 1MIPS as the minimum allowable speed.  */ | 
|  | #define MAX_ICOUNT_SHIFT 10 | 
|  |  | 
|  | bool icount_align_option; | 
|  |  | 
|  | /* Do not count executed instructions */ | 
|  | ICountMode use_icount = ICOUNT_DISABLED; | 
|  |  | 
|  | static void icount_enable_precise(void) | 
|  | { | 
|  | /* Fixed conversion of insn to ns via "shift" option */ | 
|  | use_icount = ICOUNT_PRECISE; | 
|  | } | 
|  |  | 
|  | static void icount_enable_adaptive(void) | 
|  | { | 
|  | /* Runtime adaptive algorithm to compute shift */ | 
|  | use_icount = ICOUNT_ADAPTATIVE; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * The current number of executed instructions is based on what we | 
|  | * originally budgeted minus the current state of the decrementing | 
|  | * icount counters in extra/u16.low. | 
|  | */ | 
|  | static int64_t icount_get_executed(CPUState *cpu) | 
|  | { | 
|  | return (cpu->icount_budget - | 
|  | (cpu->neg.icount_decr.u16.low + cpu->icount_extra)); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Update the global shared timer_state.qemu_icount to take into | 
|  | * account executed instructions. This is done by the TCG vCPU | 
|  | * thread so the main-loop can see time has moved forward. | 
|  | */ | 
|  | static void icount_update_locked(CPUState *cpu) | 
|  | { | 
|  | int64_t executed = icount_get_executed(cpu); | 
|  | cpu->icount_budget -= executed; | 
|  |  | 
|  | qatomic_set_i64(&timers_state.qemu_icount, | 
|  | timers_state.qemu_icount + executed); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Update the global shared timer_state.qemu_icount to take into | 
|  | * account executed instructions. This is done by the TCG vCPU | 
|  | * thread so the main-loop can see time has moved forward. | 
|  | */ | 
|  | void icount_update(CPUState *cpu) | 
|  | { | 
|  | seqlock_write_lock(&timers_state.vm_clock_seqlock, | 
|  | &timers_state.vm_clock_lock); | 
|  | icount_update_locked(cpu); | 
|  | seqlock_write_unlock(&timers_state.vm_clock_seqlock, | 
|  | &timers_state.vm_clock_lock); | 
|  | } | 
|  |  | 
|  | static int64_t icount_get_raw_locked(void) | 
|  | { | 
|  | CPUState *cpu = current_cpu; | 
|  |  | 
|  | if (cpu && cpu->running) { | 
|  | if (!cpu->neg.can_do_io) { | 
|  | error_report("Bad icount read"); | 
|  | exit(1); | 
|  | } | 
|  | /* Take into account what has run */ | 
|  | icount_update_locked(cpu); | 
|  | } | 
|  | /* The read is protected by the seqlock, but needs atomic64 to avoid UB */ | 
|  | return qatomic_read_i64(&timers_state.qemu_icount); | 
|  | } | 
|  |  | 
|  | static int64_t icount_get_locked(void) | 
|  | { | 
|  | int64_t icount = icount_get_raw_locked(); | 
|  | return qatomic_read_i64(&timers_state.qemu_icount_bias) + | 
|  | icount_to_ns(icount); | 
|  | } | 
|  |  | 
|  | int64_t icount_get_raw(void) | 
|  | { | 
|  | int64_t icount; | 
|  | unsigned start; | 
|  |  | 
|  | do { | 
|  | start = seqlock_read_begin(&timers_state.vm_clock_seqlock); | 
|  | icount = icount_get_raw_locked(); | 
|  | } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); | 
|  |  | 
|  | return icount; | 
|  | } | 
|  |  | 
|  | /* Return the virtual CPU time, based on the instruction counter.  */ | 
|  | int64_t icount_get(void) | 
|  | { | 
|  | int64_t icount; | 
|  | unsigned start; | 
|  |  | 
|  | do { | 
|  | start = seqlock_read_begin(&timers_state.vm_clock_seqlock); | 
|  | icount = icount_get_locked(); | 
|  | } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); | 
|  |  | 
|  | return icount; | 
|  | } | 
|  |  | 
|  | int64_t icount_to_ns(int64_t icount) | 
|  | { | 
|  | return icount << qatomic_read(&timers_state.icount_time_shift); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Correlation between real and virtual time is always going to be | 
|  | * fairly approximate, so ignore small variation. | 
|  | * When the guest is idle real and virtual time will be aligned in | 
|  | * the IO wait loop. | 
|  | */ | 
|  | #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10) | 
|  |  | 
|  | static void icount_adjust(void) | 
|  | { | 
|  | int64_t cur_time; | 
|  | int64_t cur_icount; | 
|  | int64_t delta; | 
|  |  | 
|  | /* If the VM is not running, then do nothing.  */ | 
|  | if (!runstate_is_running()) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | seqlock_write_lock(&timers_state.vm_clock_seqlock, | 
|  | &timers_state.vm_clock_lock); | 
|  | cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, | 
|  | cpu_get_clock_locked()); | 
|  | cur_icount = icount_get_locked(); | 
|  |  | 
|  | delta = cur_icount - cur_time; | 
|  | /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  */ | 
|  | if (delta > 0 | 
|  | && timers_state.last_delta + ICOUNT_WOBBLE < delta * 2 | 
|  | && timers_state.icount_time_shift > 0) { | 
|  | /* The guest is getting too far ahead.  Slow time down.  */ | 
|  | qatomic_set(&timers_state.icount_time_shift, | 
|  | timers_state.icount_time_shift - 1); | 
|  | } | 
|  | if (delta < 0 | 
|  | && timers_state.last_delta - ICOUNT_WOBBLE > delta * 2 | 
|  | && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) { | 
|  | /* The guest is getting too far behind.  Speed time up.  */ | 
|  | qatomic_set(&timers_state.icount_time_shift, | 
|  | timers_state.icount_time_shift + 1); | 
|  | } | 
|  | timers_state.last_delta = delta; | 
|  | qatomic_set_i64(&timers_state.qemu_icount_bias, | 
|  | cur_icount - (timers_state.qemu_icount | 
|  | << timers_state.icount_time_shift)); | 
|  | seqlock_write_unlock(&timers_state.vm_clock_seqlock, | 
|  | &timers_state.vm_clock_lock); | 
|  | } | 
|  |  | 
|  | static void icount_adjust_rt(void *opaque) | 
|  | { | 
|  | timer_mod(timers_state.icount_rt_timer, | 
|  | qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); | 
|  | icount_adjust(); | 
|  | } | 
|  |  | 
|  | static void icount_adjust_vm(void *opaque) | 
|  | { | 
|  | timer_mod(timers_state.icount_vm_timer, | 
|  | qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + | 
|  | NANOSECONDS_PER_SECOND / 10); | 
|  | icount_adjust(); | 
|  | } | 
|  |  | 
|  | int64_t icount_round(int64_t count) | 
|  | { | 
|  | int shift = qatomic_read(&timers_state.icount_time_shift); | 
|  | return (count + (1 << shift) - 1) >> shift; | 
|  | } | 
|  |  | 
|  | static void icount_warp_rt(void) | 
|  | { | 
|  | unsigned seq; | 
|  | int64_t warp_start; | 
|  |  | 
|  | /* | 
|  | * The icount_warp_timer is rescheduled soon after vm_clock_warp_start | 
|  | * changes from -1 to another value, so the race here is okay. | 
|  | */ | 
|  | do { | 
|  | seq = seqlock_read_begin(&timers_state.vm_clock_seqlock); | 
|  | warp_start = timers_state.vm_clock_warp_start; | 
|  | } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq)); | 
|  |  | 
|  | if (warp_start == -1) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | seqlock_write_lock(&timers_state.vm_clock_seqlock, | 
|  | &timers_state.vm_clock_lock); | 
|  | if (runstate_is_running()) { | 
|  | int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, | 
|  | cpu_get_clock_locked()); | 
|  | int64_t warp_delta; | 
|  |  | 
|  | warp_delta = clock - timers_state.vm_clock_warp_start; | 
|  | if (icount_enabled() == ICOUNT_ADAPTATIVE) { | 
|  | /* | 
|  | * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too far | 
|  | * ahead of real time (it might already be ahead so careful not | 
|  | * to go backwards). | 
|  | */ | 
|  | int64_t cur_icount = icount_get_locked(); | 
|  | int64_t delta = clock - cur_icount; | 
|  |  | 
|  | if (delta < 0) { | 
|  | delta = 0; | 
|  | } | 
|  | warp_delta = MIN(warp_delta, delta); | 
|  | } | 
|  | qatomic_set_i64(&timers_state.qemu_icount_bias, | 
|  | timers_state.qemu_icount_bias + warp_delta); | 
|  | } | 
|  | timers_state.vm_clock_warp_start = -1; | 
|  | seqlock_write_unlock(&timers_state.vm_clock_seqlock, | 
|  | &timers_state.vm_clock_lock); | 
|  |  | 
|  | if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) { | 
|  | qemu_clock_notify(QEMU_CLOCK_VIRTUAL); | 
|  | } | 
|  | } | 
|  |  | 
|  | static void icount_timer_cb(void *opaque) | 
|  | { | 
|  | /* | 
|  | * No need for a checkpoint because the timer already synchronizes | 
|  | * with CHECKPOINT_CLOCK_VIRTUAL_RT. | 
|  | */ | 
|  | icount_warp_rt(); | 
|  | } | 
|  |  | 
|  | void icount_start_warp_timer(void) | 
|  | { | 
|  | int64_t clock; | 
|  | int64_t deadline; | 
|  |  | 
|  | assert(icount_enabled()); | 
|  |  | 
|  | /* | 
|  | * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers | 
|  | * do not fire, so computing the deadline does not make sense. | 
|  | */ | 
|  | if (!runstate_is_running()) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | if (replay_mode != REPLAY_MODE_PLAY) { | 
|  | if (!all_cpu_threads_idle()) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | if (qtest_enabled()) { | 
|  | /* When testing, qtest commands advance icount.  */ | 
|  | return; | 
|  | } | 
|  |  | 
|  | replay_checkpoint(CHECKPOINT_CLOCK_WARP_START); | 
|  | } else { | 
|  | /* warp clock deterministically in record/replay mode */ | 
|  | if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) { | 
|  | /* | 
|  | * vCPU is sleeping and warp can't be started. | 
|  | * It is probably a race condition: notification sent | 
|  | * to vCPU was processed in advance and vCPU went to sleep. | 
|  | * Therefore we have to wake it up for doing something. | 
|  | */ | 
|  | if (replay_has_event()) { | 
|  | qemu_clock_notify(QEMU_CLOCK_VIRTUAL); | 
|  | } | 
|  | return; | 
|  | } | 
|  | } | 
|  |  | 
|  | /* We want to use the earliest deadline from ALL vm_clocks */ | 
|  | clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT); | 
|  | deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, | 
|  | ~QEMU_TIMER_ATTR_EXTERNAL); | 
|  | if (deadline < 0) { | 
|  | if (!icount_sleep) { | 
|  | warn_report_once("icount sleep disabled and no active timers"); | 
|  | } | 
|  | return; | 
|  | } | 
|  |  | 
|  | if (deadline > 0) { | 
|  | /* | 
|  | * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to | 
|  | * sleep.  Otherwise, the CPU might be waiting for a future timer | 
|  | * interrupt to wake it up, but the interrupt never comes because | 
|  | * the vCPU isn't running any insns and thus doesn't advance the | 
|  | * QEMU_CLOCK_VIRTUAL. | 
|  | */ | 
|  | if (!icount_sleep) { | 
|  | /* | 
|  | * We never let VCPUs sleep in no sleep icount mode. | 
|  | * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance | 
|  | * to the next QEMU_CLOCK_VIRTUAL event and notify it. | 
|  | * It is useful when we want a deterministic execution time, | 
|  | * isolated from host latencies. | 
|  | */ | 
|  | seqlock_write_lock(&timers_state.vm_clock_seqlock, | 
|  | &timers_state.vm_clock_lock); | 
|  | qatomic_set_i64(&timers_state.qemu_icount_bias, | 
|  | timers_state.qemu_icount_bias + deadline); | 
|  | seqlock_write_unlock(&timers_state.vm_clock_seqlock, | 
|  | &timers_state.vm_clock_lock); | 
|  | qemu_clock_notify(QEMU_CLOCK_VIRTUAL); | 
|  | } else { | 
|  | /* | 
|  | * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some | 
|  | * "real" time, (related to the time left until the next event) has | 
|  | * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this. | 
|  | * This avoids that the warps are visible externally; for example, | 
|  | * you will not be sending network packets continuously instead of | 
|  | * every 100ms. | 
|  | */ | 
|  | seqlock_write_lock(&timers_state.vm_clock_seqlock, | 
|  | &timers_state.vm_clock_lock); | 
|  | if (timers_state.vm_clock_warp_start == -1 | 
|  | || timers_state.vm_clock_warp_start > clock) { | 
|  | timers_state.vm_clock_warp_start = clock; | 
|  | } | 
|  | seqlock_write_unlock(&timers_state.vm_clock_seqlock, | 
|  | &timers_state.vm_clock_lock); | 
|  | timer_mod_anticipate(timers_state.icount_warp_timer, | 
|  | clock + deadline); | 
|  | } | 
|  | } else if (deadline == 0) { | 
|  | qemu_clock_notify(QEMU_CLOCK_VIRTUAL); | 
|  | } | 
|  | } | 
|  |  | 
|  | void icount_account_warp_timer(void) | 
|  | { | 
|  | if (!icount_sleep) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers | 
|  | * do not fire, so computing the deadline does not make sense. | 
|  | */ | 
|  | if (!runstate_is_running()) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | replay_async_events(); | 
|  |  | 
|  | /* warp clock deterministically in record/replay mode */ | 
|  | if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) { | 
|  | return; | 
|  | } | 
|  |  | 
|  | timer_del(timers_state.icount_warp_timer); | 
|  | icount_warp_rt(); | 
|  | } | 
|  |  | 
|  | bool icount_configure(QemuOpts *opts, Error **errp) | 
|  | { | 
|  | const char *option = qemu_opt_get(opts, "shift"); | 
|  | bool sleep = qemu_opt_get_bool(opts, "sleep", true); | 
|  | bool align = qemu_opt_get_bool(opts, "align", false); | 
|  | long time_shift = -1; | 
|  |  | 
|  | if (!option) { | 
|  | if (qemu_opt_get(opts, "align") != NULL) { | 
|  | error_setg(errp, "Please specify shift option when using align"); | 
|  | return false; | 
|  | } | 
|  | return true; | 
|  | } | 
|  |  | 
|  | if (align && !sleep) { | 
|  | error_setg(errp, "align=on and sleep=off are incompatible"); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | if (strcmp(option, "auto") != 0) { | 
|  | if (qemu_strtol(option, NULL, 0, &time_shift) < 0 | 
|  | || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) { | 
|  | error_setg(errp, "icount: Invalid shift value"); | 
|  | return false; | 
|  | } | 
|  | } else if (icount_align_option) { | 
|  | error_setg(errp, "shift=auto and align=on are incompatible"); | 
|  | return false; | 
|  | } else if (!icount_sleep) { | 
|  | error_setg(errp, "shift=auto and sleep=off are incompatible"); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | icount_sleep = sleep; | 
|  | if (icount_sleep) { | 
|  | timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, | 
|  | icount_timer_cb, NULL); | 
|  | } | 
|  |  | 
|  | icount_align_option = align; | 
|  |  | 
|  | if (time_shift >= 0) { | 
|  | timers_state.icount_time_shift = time_shift; | 
|  | icount_enable_precise(); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | icount_enable_adaptive(); | 
|  |  | 
|  | /* | 
|  | * 125MIPS seems a reasonable initial guess at the guest speed. | 
|  | * It will be corrected fairly quickly anyway. | 
|  | */ | 
|  | timers_state.icount_time_shift = 3; | 
|  |  | 
|  | /* | 
|  | * Have both realtime and virtual time triggers for speed adjustment. | 
|  | * The realtime trigger catches emulated time passing too slowly, | 
|  | * the virtual time trigger catches emulated time passing too fast. | 
|  | * Realtime triggers occur even when idle, so use them less frequently | 
|  | * than VM triggers. | 
|  | */ | 
|  | timers_state.vm_clock_warp_start = -1; | 
|  | timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT, | 
|  | icount_adjust_rt, NULL); | 
|  | timer_mod(timers_state.icount_rt_timer, | 
|  | qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); | 
|  | timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, | 
|  | icount_adjust_vm, NULL); | 
|  | timer_mod(timers_state.icount_vm_timer, | 
|  | qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + | 
|  | NANOSECONDS_PER_SECOND / 10); | 
|  | return true; | 
|  | } | 
|  |  | 
|  | void icount_notify_exit(void) | 
|  | { | 
|  | assert(icount_enabled()); | 
|  |  | 
|  | if (current_cpu) { | 
|  | qemu_cpu_kick(current_cpu); | 
|  | qemu_clock_notify(QEMU_CLOCK_VIRTUAL); | 
|  | } | 
|  | } |