| /* |
| * QEMU System Emulator |
| * |
| * Copyright (c) 2003-2008 Fabrice Bellard |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a copy |
| * of this software and associated documentation files (the "Software"), to deal |
| * in the Software without restriction, including without limitation the rights |
| * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| * copies of the Software, and to permit persons to whom the Software is |
| * furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included in |
| * all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| * THE SOFTWARE. |
| */ |
| |
| #include "qemu/osdep.h" |
| #include "qemu/cutils.h" |
| #include "migration/vmstate.h" |
| #include "qapi/error.h" |
| #include "qemu/error-report.h" |
| #include "exec/exec-all.h" |
| #include "sysemu/cpus.h" |
| #include "sysemu/qtest.h" |
| #include "qemu/main-loop.h" |
| #include "qemu/option.h" |
| #include "qemu/seqlock.h" |
| #include "sysemu/replay.h" |
| #include "sysemu/runstate.h" |
| #include "hw/core/cpu.h" |
| #include "sysemu/cpu-timers.h" |
| #include "sysemu/cpu-throttle.h" |
| #include "timers-state.h" |
| |
| /* |
| * ICOUNT: Instruction Counter |
| * |
| * this module is split off from cpu-timers because the icount part |
| * is TCG-specific, and does not need to be built for other accels. |
| */ |
| static bool icount_sleep = true; |
| /* Arbitrarily pick 1MIPS as the minimum allowable speed. */ |
| #define MAX_ICOUNT_SHIFT 10 |
| |
| /* |
| * 0 = Do not count executed instructions. |
| * 1 = Fixed conversion of insn to ns via "shift" option |
| * 2 = Runtime adaptive algorithm to compute shift |
| */ |
| int use_icount; |
| |
| static void icount_enable_precise(void) |
| { |
| use_icount = 1; |
| } |
| |
| static void icount_enable_adaptive(void) |
| { |
| use_icount = 2; |
| } |
| |
| /* |
| * The current number of executed instructions is based on what we |
| * originally budgeted minus the current state of the decrementing |
| * icount counters in extra/u16.low. |
| */ |
| static int64_t icount_get_executed(CPUState *cpu) |
| { |
| return (cpu->icount_budget - |
| (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra)); |
| } |
| |
| /* |
| * Update the global shared timer_state.qemu_icount to take into |
| * account executed instructions. This is done by the TCG vCPU |
| * thread so the main-loop can see time has moved forward. |
| */ |
| static void icount_update_locked(CPUState *cpu) |
| { |
| int64_t executed = icount_get_executed(cpu); |
| cpu->icount_budget -= executed; |
| |
| qatomic_set_i64(&timers_state.qemu_icount, |
| timers_state.qemu_icount + executed); |
| } |
| |
| /* |
| * Update the global shared timer_state.qemu_icount to take into |
| * account executed instructions. This is done by the TCG vCPU |
| * thread so the main-loop can see time has moved forward. |
| */ |
| void icount_update(CPUState *cpu) |
| { |
| seqlock_write_lock(&timers_state.vm_clock_seqlock, |
| &timers_state.vm_clock_lock); |
| icount_update_locked(cpu); |
| seqlock_write_unlock(&timers_state.vm_clock_seqlock, |
| &timers_state.vm_clock_lock); |
| } |
| |
| static int64_t icount_get_raw_locked(void) |
| { |
| CPUState *cpu = current_cpu; |
| |
| if (cpu && cpu->running) { |
| if (!cpu->can_do_io) { |
| error_report("Bad icount read"); |
| exit(1); |
| } |
| /* Take into account what has run */ |
| icount_update_locked(cpu); |
| } |
| /* The read is protected by the seqlock, but needs atomic64 to avoid UB */ |
| return qatomic_read_i64(&timers_state.qemu_icount); |
| } |
| |
| static int64_t icount_get_locked(void) |
| { |
| int64_t icount = icount_get_raw_locked(); |
| return qatomic_read_i64(&timers_state.qemu_icount_bias) + |
| icount_to_ns(icount); |
| } |
| |
| int64_t icount_get_raw(void) |
| { |
| int64_t icount; |
| unsigned start; |
| |
| do { |
| start = seqlock_read_begin(&timers_state.vm_clock_seqlock); |
| icount = icount_get_raw_locked(); |
| } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); |
| |
| return icount; |
| } |
| |
| /* Return the virtual CPU time, based on the instruction counter. */ |
| int64_t icount_get(void) |
| { |
| int64_t icount; |
| unsigned start; |
| |
| do { |
| start = seqlock_read_begin(&timers_state.vm_clock_seqlock); |
| icount = icount_get_locked(); |
| } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start)); |
| |
| return icount; |
| } |
| |
| int64_t icount_to_ns(int64_t icount) |
| { |
| return icount << qatomic_read(&timers_state.icount_time_shift); |
| } |
| |
| /* |
| * Correlation between real and virtual time is always going to be |
| * fairly approximate, so ignore small variation. |
| * When the guest is idle real and virtual time will be aligned in |
| * the IO wait loop. |
| */ |
| #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10) |
| |
| static void icount_adjust(void) |
| { |
| int64_t cur_time; |
| int64_t cur_icount; |
| int64_t delta; |
| |
| /* If the VM is not running, then do nothing. */ |
| if (!runstate_is_running()) { |
| return; |
| } |
| |
| seqlock_write_lock(&timers_state.vm_clock_seqlock, |
| &timers_state.vm_clock_lock); |
| cur_time = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, |
| cpu_get_clock_locked()); |
| cur_icount = icount_get_locked(); |
| |
| delta = cur_icount - cur_time; |
| /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */ |
| if (delta > 0 |
| && timers_state.last_delta + ICOUNT_WOBBLE < delta * 2 |
| && timers_state.icount_time_shift > 0) { |
| /* The guest is getting too far ahead. Slow time down. */ |
| qatomic_set(&timers_state.icount_time_shift, |
| timers_state.icount_time_shift - 1); |
| } |
| if (delta < 0 |
| && timers_state.last_delta - ICOUNT_WOBBLE > delta * 2 |
| && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) { |
| /* The guest is getting too far behind. Speed time up. */ |
| qatomic_set(&timers_state.icount_time_shift, |
| timers_state.icount_time_shift + 1); |
| } |
| timers_state.last_delta = delta; |
| qatomic_set_i64(&timers_state.qemu_icount_bias, |
| cur_icount - (timers_state.qemu_icount |
| << timers_state.icount_time_shift)); |
| seqlock_write_unlock(&timers_state.vm_clock_seqlock, |
| &timers_state.vm_clock_lock); |
| } |
| |
| static void icount_adjust_rt(void *opaque) |
| { |
| timer_mod(timers_state.icount_rt_timer, |
| qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); |
| icount_adjust(); |
| } |
| |
| static void icount_adjust_vm(void *opaque) |
| { |
| timer_mod(timers_state.icount_vm_timer, |
| qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + |
| NANOSECONDS_PER_SECOND / 10); |
| icount_adjust(); |
| } |
| |
| int64_t icount_round(int64_t count) |
| { |
| int shift = qatomic_read(&timers_state.icount_time_shift); |
| return (count + (1 << shift) - 1) >> shift; |
| } |
| |
| static void icount_warp_rt(void) |
| { |
| unsigned seq; |
| int64_t warp_start; |
| |
| /* |
| * The icount_warp_timer is rescheduled soon after vm_clock_warp_start |
| * changes from -1 to another value, so the race here is okay. |
| */ |
| do { |
| seq = seqlock_read_begin(&timers_state.vm_clock_seqlock); |
| warp_start = timers_state.vm_clock_warp_start; |
| } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq)); |
| |
| if (warp_start == -1) { |
| return; |
| } |
| |
| seqlock_write_lock(&timers_state.vm_clock_seqlock, |
| &timers_state.vm_clock_lock); |
| if (runstate_is_running()) { |
| int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT, |
| cpu_get_clock_locked()); |
| int64_t warp_delta; |
| |
| warp_delta = clock - timers_state.vm_clock_warp_start; |
| if (icount_enabled() == 2) { |
| /* |
| * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too |
| * far ahead of real time. |
| */ |
| int64_t cur_icount = icount_get_locked(); |
| int64_t delta = clock - cur_icount; |
| warp_delta = MIN(warp_delta, delta); |
| } |
| qatomic_set_i64(&timers_state.qemu_icount_bias, |
| timers_state.qemu_icount_bias + warp_delta); |
| } |
| timers_state.vm_clock_warp_start = -1; |
| seqlock_write_unlock(&timers_state.vm_clock_seqlock, |
| &timers_state.vm_clock_lock); |
| |
| if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) { |
| qemu_clock_notify(QEMU_CLOCK_VIRTUAL); |
| } |
| } |
| |
| static void icount_timer_cb(void *opaque) |
| { |
| /* |
| * No need for a checkpoint because the timer already synchronizes |
| * with CHECKPOINT_CLOCK_VIRTUAL_RT. |
| */ |
| icount_warp_rt(); |
| } |
| |
| void icount_start_warp_timer(void) |
| { |
| int64_t clock; |
| int64_t deadline; |
| |
| assert(icount_enabled()); |
| |
| /* |
| * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers |
| * do not fire, so computing the deadline does not make sense. |
| */ |
| if (!runstate_is_running()) { |
| return; |
| } |
| |
| if (replay_mode != REPLAY_MODE_PLAY) { |
| if (!all_cpu_threads_idle()) { |
| return; |
| } |
| |
| if (qtest_enabled()) { |
| /* When testing, qtest commands advance icount. */ |
| return; |
| } |
| |
| replay_checkpoint(CHECKPOINT_CLOCK_WARP_START); |
| } else { |
| /* warp clock deterministically in record/replay mode */ |
| if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) { |
| /* |
| * vCPU is sleeping and warp can't be started. |
| * It is probably a race condition: notification sent |
| * to vCPU was processed in advance and vCPU went to sleep. |
| * Therefore we have to wake it up for doing someting. |
| */ |
| if (replay_has_checkpoint()) { |
| qemu_clock_notify(QEMU_CLOCK_VIRTUAL); |
| } |
| return; |
| } |
| } |
| |
| /* We want to use the earliest deadline from ALL vm_clocks */ |
| clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT); |
| deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL, |
| ~QEMU_TIMER_ATTR_EXTERNAL); |
| if (deadline < 0) { |
| static bool notified; |
| if (!icount_sleep && !notified) { |
| warn_report("icount sleep disabled and no active timers"); |
| notified = true; |
| } |
| return; |
| } |
| |
| if (deadline > 0) { |
| /* |
| * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to |
| * sleep. Otherwise, the CPU might be waiting for a future timer |
| * interrupt to wake it up, but the interrupt never comes because |
| * the vCPU isn't running any insns and thus doesn't advance the |
| * QEMU_CLOCK_VIRTUAL. |
| */ |
| if (!icount_sleep) { |
| /* |
| * We never let VCPUs sleep in no sleep icount mode. |
| * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance |
| * to the next QEMU_CLOCK_VIRTUAL event and notify it. |
| * It is useful when we want a deterministic execution time, |
| * isolated from host latencies. |
| */ |
| seqlock_write_lock(&timers_state.vm_clock_seqlock, |
| &timers_state.vm_clock_lock); |
| qatomic_set_i64(&timers_state.qemu_icount_bias, |
| timers_state.qemu_icount_bias + deadline); |
| seqlock_write_unlock(&timers_state.vm_clock_seqlock, |
| &timers_state.vm_clock_lock); |
| qemu_clock_notify(QEMU_CLOCK_VIRTUAL); |
| } else { |
| /* |
| * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some |
| * "real" time, (related to the time left until the next event) has |
| * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this. |
| * This avoids that the warps are visible externally; for example, |
| * you will not be sending network packets continuously instead of |
| * every 100ms. |
| */ |
| seqlock_write_lock(&timers_state.vm_clock_seqlock, |
| &timers_state.vm_clock_lock); |
| if (timers_state.vm_clock_warp_start == -1 |
| || timers_state.vm_clock_warp_start > clock) { |
| timers_state.vm_clock_warp_start = clock; |
| } |
| seqlock_write_unlock(&timers_state.vm_clock_seqlock, |
| &timers_state.vm_clock_lock); |
| timer_mod_anticipate(timers_state.icount_warp_timer, |
| clock + deadline); |
| } |
| } else if (deadline == 0) { |
| qemu_clock_notify(QEMU_CLOCK_VIRTUAL); |
| } |
| } |
| |
| void icount_account_warp_timer(void) |
| { |
| if (!icount_sleep) { |
| return; |
| } |
| |
| /* |
| * Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers |
| * do not fire, so computing the deadline does not make sense. |
| */ |
| if (!runstate_is_running()) { |
| return; |
| } |
| |
| /* warp clock deterministically in record/replay mode */ |
| if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) { |
| return; |
| } |
| |
| timer_del(timers_state.icount_warp_timer); |
| icount_warp_rt(); |
| } |
| |
| void icount_configure(QemuOpts *opts, Error **errp) |
| { |
| const char *option = qemu_opt_get(opts, "shift"); |
| bool sleep = qemu_opt_get_bool(opts, "sleep", true); |
| bool align = qemu_opt_get_bool(opts, "align", false); |
| long time_shift = -1; |
| |
| if (!option) { |
| if (qemu_opt_get(opts, "align") != NULL) { |
| error_setg(errp, "Please specify shift option when using align"); |
| } |
| return; |
| } |
| |
| if (align && !sleep) { |
| error_setg(errp, "align=on and sleep=off are incompatible"); |
| return; |
| } |
| |
| if (strcmp(option, "auto") != 0) { |
| if (qemu_strtol(option, NULL, 0, &time_shift) < 0 |
| || time_shift < 0 || time_shift > MAX_ICOUNT_SHIFT) { |
| error_setg(errp, "icount: Invalid shift value"); |
| return; |
| } |
| } else if (icount_align_option) { |
| error_setg(errp, "shift=auto and align=on are incompatible"); |
| return; |
| } else if (!icount_sleep) { |
| error_setg(errp, "shift=auto and sleep=off are incompatible"); |
| return; |
| } |
| |
| icount_sleep = sleep; |
| if (icount_sleep) { |
| timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, |
| icount_timer_cb, NULL); |
| } |
| |
| icount_align_option = align; |
| |
| if (time_shift >= 0) { |
| timers_state.icount_time_shift = time_shift; |
| icount_enable_precise(); |
| return; |
| } |
| |
| icount_enable_adaptive(); |
| |
| /* |
| * 125MIPS seems a reasonable initial guess at the guest speed. |
| * It will be corrected fairly quickly anyway. |
| */ |
| timers_state.icount_time_shift = 3; |
| |
| /* |
| * Have both realtime and virtual time triggers for speed adjustment. |
| * The realtime trigger catches emulated time passing too slowly, |
| * the virtual time trigger catches emulated time passing too fast. |
| * Realtime triggers occur even when idle, so use them less frequently |
| * than VM triggers. |
| */ |
| timers_state.vm_clock_warp_start = -1; |
| timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT, |
| icount_adjust_rt, NULL); |
| timer_mod(timers_state.icount_rt_timer, |
| qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000); |
| timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, |
| icount_adjust_vm, NULL); |
| timer_mod(timers_state.icount_vm_timer, |
| qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + |
| NANOSECONDS_PER_SECOND / 10); |
| } |