| /* |
| * Host code generation |
| * |
| * Copyright (c) 2003 Fabrice Bellard |
| * |
| * This library is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU Lesser General Public |
| * License as published by the Free Software Foundation; either |
| * version 2.1 of the License, or (at your option) any later version. |
| * |
| * This library is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| * Lesser General Public License for more details. |
| * |
| * You should have received a copy of the GNU Lesser General Public |
| * License along with this library; if not, see <http://www.gnu.org/licenses/>. |
| */ |
| |
| #include "qemu/osdep.h" |
| |
| #define NO_CPU_IO_DEFS |
| #include "trace.h" |
| #include "disas/disas.h" |
| #include "exec/exec-all.h" |
| #include "tcg/tcg.h" |
| #if defined(CONFIG_USER_ONLY) |
| #include "qemu.h" |
| #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) |
| #include <sys/param.h> |
| #if __FreeBSD_version >= 700104 |
| #define HAVE_KINFO_GETVMMAP |
| #define sigqueue sigqueue_freebsd /* avoid redefinition */ |
| #include <sys/proc.h> |
| #include <machine/profile.h> |
| #define _KERNEL |
| #include <sys/user.h> |
| #undef _KERNEL |
| #undef sigqueue |
| #include <libutil.h> |
| #endif |
| #endif |
| #else |
| #include "exec/ram_addr.h" |
| #endif |
| |
| #include "exec/cputlb.h" |
| #include "exec/translate-all.h" |
| #include "exec/translator.h" |
| #include "exec/tb-flush.h" |
| #include "qemu/bitmap.h" |
| #include "qemu/qemu-print.h" |
| #include "qemu/main-loop.h" |
| #include "qemu/cacheinfo.h" |
| #include "qemu/timer.h" |
| #include "exec/log.h" |
| #include "sysemu/cpus.h" |
| #include "sysemu/cpu-timers.h" |
| #include "sysemu/tcg.h" |
| #include "qapi/error.h" |
| #include "hw/core/tcg-cpu-ops.h" |
| #include "tb-jmp-cache.h" |
| #include "tb-hash.h" |
| #include "tb-context.h" |
| #include "internal.h" |
| #include "perf.h" |
| |
| TBContext tb_ctx; |
| |
| /* |
| * Encode VAL as a signed leb128 sequence at P. |
| * Return P incremented past the encoded value. |
| */ |
| static uint8_t *encode_sleb128(uint8_t *p, int64_t val) |
| { |
| int more, byte; |
| |
| do { |
| byte = val & 0x7f; |
| val >>= 7; |
| more = !((val == 0 && (byte & 0x40) == 0) |
| || (val == -1 && (byte & 0x40) != 0)); |
| if (more) { |
| byte |= 0x80; |
| } |
| *p++ = byte; |
| } while (more); |
| |
| return p; |
| } |
| |
| /* |
| * Decode a signed leb128 sequence at *PP; increment *PP past the |
| * decoded value. Return the decoded value. |
| */ |
| static int64_t decode_sleb128(const uint8_t **pp) |
| { |
| const uint8_t *p = *pp; |
| int64_t val = 0; |
| int byte, shift = 0; |
| |
| do { |
| byte = *p++; |
| val |= (int64_t)(byte & 0x7f) << shift; |
| shift += 7; |
| } while (byte & 0x80); |
| if (shift < TARGET_LONG_BITS && (byte & 0x40)) { |
| val |= -(int64_t)1 << shift; |
| } |
| |
| *pp = p; |
| return val; |
| } |
| |
| /* Encode the data collected about the instructions while compiling TB. |
| Place the data at BLOCK, and return the number of bytes consumed. |
| |
| The logical table consists of TARGET_INSN_START_WORDS target_ulong's, |
| which come from the target's insn_start data, followed by a uintptr_t |
| which comes from the host pc of the end of the code implementing the insn. |
| |
| Each line of the table is encoded as sleb128 deltas from the previous |
| line. The seed for the first line is { tb->pc, 0..., tb->tc.ptr }. |
| That is, the first column is seeded with the guest pc, the last column |
| with the host pc, and the middle columns with zeros. */ |
| |
| static int encode_search(TranslationBlock *tb, uint8_t *block) |
| { |
| uint8_t *highwater = tcg_ctx->code_gen_highwater; |
| uint8_t *p = block; |
| int i, j, n; |
| |
| for (i = 0, n = tb->icount; i < n; ++i) { |
| uint64_t prev; |
| |
| for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { |
| if (i == 0) { |
| prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb->pc : 0); |
| } else { |
| prev = tcg_ctx->gen_insn_data[i - 1][j]; |
| } |
| p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev); |
| } |
| prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]); |
| p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev); |
| |
| /* Test for (pending) buffer overflow. The assumption is that any |
| one row beginning below the high water mark cannot overrun |
| the buffer completely. Thus we can test for overflow after |
| encoding a row without having to check during encoding. */ |
| if (unlikely(p > highwater)) { |
| return -1; |
| } |
| } |
| |
| return p - block; |
| } |
| |
| static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc, |
| uint64_t *data) |
| { |
| uintptr_t iter_pc = (uintptr_t)tb->tc.ptr; |
| const uint8_t *p = tb->tc.ptr + tb->tc.size; |
| int i, j, num_insns = tb->icount; |
| |
| host_pc -= GETPC_ADJ; |
| |
| if (host_pc < iter_pc) { |
| return -1; |
| } |
| |
| memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS); |
| if (!(tb_cflags(tb) & CF_PCREL)) { |
| data[0] = tb->pc; |
| } |
| |
| /* |
| * Reconstruct the stored insn data while looking for the point |
| * at which the end of the insn exceeds host_pc. |
| */ |
| for (i = 0; i < num_insns; ++i) { |
| for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { |
| data[j] += decode_sleb128(&p); |
| } |
| iter_pc += decode_sleb128(&p); |
| if (iter_pc > host_pc) { |
| return num_insns - i; |
| } |
| } |
| return -1; |
| } |
| |
| /* |
| * The cpu state corresponding to 'host_pc' is restored in |
| * preparation for exiting the TB. |
| */ |
| void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, |
| uintptr_t host_pc) |
| { |
| uint64_t data[TARGET_INSN_START_WORDS]; |
| #ifdef CONFIG_PROFILER |
| TCGProfile *prof = &tcg_ctx->prof; |
| int64_t ti = profile_getclock(); |
| #endif |
| int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data); |
| |
| if (insns_left < 0) { |
| return; |
| } |
| |
| if (tb_cflags(tb) & CF_USE_ICOUNT) { |
| assert(icount_enabled()); |
| /* |
| * Reset the cycle counter to the start of the block and |
| * shift if to the number of actually executed instructions. |
| */ |
| cpu_neg(cpu)->icount_decr.u16.low += insns_left; |
| } |
| |
| cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data); |
| |
| #ifdef CONFIG_PROFILER |
| qatomic_set(&prof->restore_time, |
| prof->restore_time + profile_getclock() - ti); |
| qatomic_set(&prof->restore_count, prof->restore_count + 1); |
| #endif |
| } |
| |
| bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc) |
| { |
| /* |
| * The host_pc has to be in the rx region of the code buffer. |
| * If it is not we will not be able to resolve it here. |
| * The two cases where host_pc will not be correct are: |
| * |
| * - fault during translation (instruction fetch) |
| * - fault from helper (not using GETPC() macro) |
| * |
| * Either way we need return early as we can't resolve it here. |
| */ |
| if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { |
| TranslationBlock *tb = tcg_tb_lookup(host_pc); |
| if (tb) { |
| cpu_restore_state_from_tb(cpu, tb, host_pc); |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data) |
| { |
| if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { |
| TranslationBlock *tb = tcg_tb_lookup(host_pc); |
| if (tb) { |
| return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0; |
| } |
| } |
| return false; |
| } |
| |
| void page_init(void) |
| { |
| page_size_init(); |
| page_table_config_init(); |
| } |
| |
| /* |
| * Isolate the portion of code gen which can setjmp/longjmp. |
| * Return the size of the generated code, or negative on error. |
| */ |
| static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb, |
| target_ulong pc, void *host_pc, |
| int *max_insns, int64_t *ti) |
| { |
| int ret = sigsetjmp(tcg_ctx->jmp_trans, 0); |
| if (unlikely(ret != 0)) { |
| return ret; |
| } |
| |
| tcg_func_start(tcg_ctx); |
| |
| tcg_ctx->cpu = env_cpu(env); |
| gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc); |
| assert(tb->size != 0); |
| tcg_ctx->cpu = NULL; |
| *max_insns = tb->icount; |
| |
| #ifdef CONFIG_PROFILER |
| qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1); |
| qatomic_set(&tcg_ctx->prof.interm_time, |
| tcg_ctx->prof.interm_time + profile_getclock() - *ti); |
| *ti = profile_getclock(); |
| #endif |
| |
| return tcg_gen_code(tcg_ctx, tb, pc); |
| } |
| |
| /* Called with mmap_lock held for user mode emulation. */ |
| TranslationBlock *tb_gen_code(CPUState *cpu, |
| target_ulong pc, target_ulong cs_base, |
| uint32_t flags, int cflags) |
| { |
| CPUArchState *env = cpu->env_ptr; |
| TranslationBlock *tb, *existing_tb; |
| tb_page_addr_t phys_pc; |
| tcg_insn_unit *gen_code_buf; |
| int gen_code_size, search_size, max_insns; |
| #ifdef CONFIG_PROFILER |
| TCGProfile *prof = &tcg_ctx->prof; |
| #endif |
| int64_t ti; |
| void *host_pc; |
| |
| assert_memory_lock(); |
| qemu_thread_jit_write(); |
| |
| phys_pc = get_page_addr_code_hostp(env, pc, &host_pc); |
| |
| if (phys_pc == -1) { |
| /* Generate a one-shot TB with 1 insn in it */ |
| cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1; |
| } |
| |
| max_insns = cflags & CF_COUNT_MASK; |
| if (max_insns == 0) { |
| max_insns = TCG_MAX_INSNS; |
| } |
| QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS); |
| |
| buffer_overflow: |
| tb = tcg_tb_alloc(tcg_ctx); |
| if (unlikely(!tb)) { |
| /* flush must be done */ |
| tb_flush(cpu); |
| mmap_unlock(); |
| /* Make the execution loop process the flush as soon as possible. */ |
| cpu->exception_index = EXCP_INTERRUPT; |
| cpu_loop_exit(cpu); |
| } |
| |
| gen_code_buf = tcg_ctx->code_gen_ptr; |
| tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf); |
| if (!(cflags & CF_PCREL)) { |
| tb->pc = pc; |
| } |
| tb->cs_base = cs_base; |
| tb->flags = flags; |
| tb->cflags = cflags; |
| tb_set_page_addr0(tb, phys_pc); |
| tb_set_page_addr1(tb, -1); |
| tcg_ctx->gen_tb = tb; |
| tcg_ctx->addr_type = TCG_TYPE_TL; |
| #ifdef CONFIG_SOFTMMU |
| tcg_ctx->page_bits = TARGET_PAGE_BITS; |
| tcg_ctx->page_mask = TARGET_PAGE_MASK; |
| tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS; |
| #endif |
| |
| tb_overflow: |
| |
| #ifdef CONFIG_PROFILER |
| /* includes aborted translations because of exceptions */ |
| qatomic_set(&prof->tb_count1, prof->tb_count1 + 1); |
| ti = profile_getclock(); |
| #endif |
| |
| trace_translate_block(tb, pc, tb->tc.ptr); |
| |
| gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti); |
| if (unlikely(gen_code_size < 0)) { |
| switch (gen_code_size) { |
| case -1: |
| /* |
| * Overflow of code_gen_buffer, or the current slice of it. |
| * |
| * TODO: We don't need to re-do gen_intermediate_code, nor |
| * should we re-do the tcg optimization currently hidden |
| * inside tcg_gen_code. All that should be required is to |
| * flush the TBs, allocate a new TB, re-initialize it per |
| * above, and re-do the actual code generation. |
| */ |
| qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, |
| "Restarting code generation for " |
| "code_gen_buffer overflow\n"); |
| goto buffer_overflow; |
| |
| case -2: |
| /* |
| * The code generated for the TranslationBlock is too large. |
| * The maximum size allowed by the unwind info is 64k. |
| * There may be stricter constraints from relocations |
| * in the tcg backend. |
| * |
| * Try again with half as many insns as we attempted this time. |
| * If a single insn overflows, there's a bug somewhere... |
| */ |
| assert(max_insns > 1); |
| max_insns /= 2; |
| qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT, |
| "Restarting code generation with " |
| "smaller translation block (max %d insns)\n", |
| max_insns); |
| goto tb_overflow; |
| |
| default: |
| g_assert_not_reached(); |
| } |
| } |
| search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size); |
| if (unlikely(search_size < 0)) { |
| goto buffer_overflow; |
| } |
| tb->tc.size = gen_code_size; |
| |
| /* |
| * For CF_PCREL, attribute all executions of the generated code |
| * to its first mapping. |
| */ |
| perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf)); |
| |
| #ifdef CONFIG_PROFILER |
| qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti); |
| qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size); |
| qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size); |
| qatomic_set(&prof->search_out_len, prof->search_out_len + search_size); |
| #endif |
| |
| if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) && |
| qemu_log_in_addr_range(pc)) { |
| FILE *logfile = qemu_log_trylock(); |
| if (logfile) { |
| int code_size, data_size; |
| const tcg_target_ulong *rx_data_gen_ptr; |
| size_t chunk_start; |
| int insn = 0; |
| |
| if (tcg_ctx->data_gen_ptr) { |
| rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr); |
| code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr; |
| data_size = gen_code_size - code_size; |
| } else { |
| rx_data_gen_ptr = 0; |
| code_size = gen_code_size; |
| data_size = 0; |
| } |
| |
| /* Dump header and the first instruction */ |
| fprintf(logfile, "OUT: [size=%d]\n", gen_code_size); |
| fprintf(logfile, |
| " -- guest addr 0x%016" PRIx64 " + tb prologue\n", |
| tcg_ctx->gen_insn_data[insn][0]); |
| chunk_start = tcg_ctx->gen_insn_end_off[insn]; |
| disas(logfile, tb->tc.ptr, chunk_start); |
| |
| /* |
| * Dump each instruction chunk, wrapping up empty chunks into |
| * the next instruction. The whole array is offset so the |
| * first entry is the beginning of the 2nd instruction. |
| */ |
| while (insn < tb->icount) { |
| size_t chunk_end = tcg_ctx->gen_insn_end_off[insn]; |
| if (chunk_end > chunk_start) { |
| fprintf(logfile, " -- guest addr 0x%016" PRIx64 "\n", |
| tcg_ctx->gen_insn_data[insn][0]); |
| disas(logfile, tb->tc.ptr + chunk_start, |
| chunk_end - chunk_start); |
| chunk_start = chunk_end; |
| } |
| insn++; |
| } |
| |
| if (chunk_start < code_size) { |
| fprintf(logfile, " -- tb slow paths + alignment\n"); |
| disas(logfile, tb->tc.ptr + chunk_start, |
| code_size - chunk_start); |
| } |
| |
| /* Finally dump any data we may have after the block */ |
| if (data_size) { |
| int i; |
| fprintf(logfile, " data: [size=%d]\n", data_size); |
| for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) { |
| if (sizeof(tcg_target_ulong) == 8) { |
| fprintf(logfile, |
| "0x%08" PRIxPTR ": .quad 0x%016" TCG_PRIlx "\n", |
| (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); |
| } else if (sizeof(tcg_target_ulong) == 4) { |
| fprintf(logfile, |
| "0x%08" PRIxPTR ": .long 0x%08" TCG_PRIlx "\n", |
| (uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]); |
| } else { |
| qemu_build_not_reached(); |
| } |
| } |
| } |
| fprintf(logfile, "\n"); |
| qemu_log_unlock(logfile); |
| } |
| } |
| |
| qatomic_set(&tcg_ctx->code_gen_ptr, (void *) |
| ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size, |
| CODE_GEN_ALIGN)); |
| |
| /* init jump list */ |
| qemu_spin_init(&tb->jmp_lock); |
| tb->jmp_list_head = (uintptr_t)NULL; |
| tb->jmp_list_next[0] = (uintptr_t)NULL; |
| tb->jmp_list_next[1] = (uintptr_t)NULL; |
| tb->jmp_dest[0] = (uintptr_t)NULL; |
| tb->jmp_dest[1] = (uintptr_t)NULL; |
| |
| /* init original jump addresses which have been set during tcg_gen_code() */ |
| if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { |
| tb_reset_jump(tb, 0); |
| } |
| if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { |
| tb_reset_jump(tb, 1); |
| } |
| |
| /* |
| * If the TB is not associated with a physical RAM page then it must be |
| * a temporary one-insn TB, and we have nothing left to do. Return early |
| * before attempting to link to other TBs or add to the lookup table. |
| */ |
| if (tb_page_addr0(tb) == -1) { |
| return tb; |
| } |
| |
| /* |
| * Insert TB into the corresponding region tree before publishing it |
| * through QHT. Otherwise rewinding happened in the TB might fail to |
| * lookup itself using host PC. |
| */ |
| tcg_tb_insert(tb); |
| |
| /* |
| * No explicit memory barrier is required -- tb_link_page() makes the |
| * TB visible in a consistent state. |
| */ |
| existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb)); |
| /* if the TB already exists, discard what we just translated */ |
| if (unlikely(existing_tb != tb)) { |
| uintptr_t orig_aligned = (uintptr_t)gen_code_buf; |
| |
| orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize); |
| qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned); |
| tcg_tb_remove(tb); |
| return existing_tb; |
| } |
| return tb; |
| } |
| |
| /* user-mode: call with mmap_lock held */ |
| void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr) |
| { |
| TranslationBlock *tb; |
| |
| assert_memory_lock(); |
| |
| tb = tcg_tb_lookup(retaddr); |
| if (tb) { |
| /* We can use retranslation to find the PC. */ |
| cpu_restore_state_from_tb(cpu, tb, retaddr); |
| tb_phys_invalidate(tb, -1); |
| } else { |
| /* The exception probably happened in a helper. The CPU state should |
| have been saved before calling it. Fetch the PC from there. */ |
| CPUArchState *env = cpu->env_ptr; |
| target_ulong pc, cs_base; |
| tb_page_addr_t addr; |
| uint32_t flags; |
| |
| cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); |
| addr = get_page_addr_code(env, pc); |
| if (addr != -1) { |
| tb_invalidate_phys_range(addr, addr); |
| } |
| } |
| } |
| |
| #ifndef CONFIG_USER_ONLY |
| /* |
| * In deterministic execution mode, instructions doing device I/Os |
| * must be at the end of the TB. |
| * |
| * Called by softmmu_template.h, with iothread mutex not held. |
| */ |
| void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) |
| { |
| TranslationBlock *tb; |
| CPUClass *cc; |
| uint32_t n; |
| |
| tb = tcg_tb_lookup(retaddr); |
| if (!tb) { |
| cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p", |
| (void *)retaddr); |
| } |
| cpu_restore_state_from_tb(cpu, tb, retaddr); |
| |
| /* |
| * Some guests must re-execute the branch when re-executing a delay |
| * slot instruction. When this is the case, adjust icount and N |
| * to account for the re-execution of the branch. |
| */ |
| n = 1; |
| cc = CPU_GET_CLASS(cpu); |
| if (cc->tcg_ops->io_recompile_replay_branch && |
| cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) { |
| cpu_neg(cpu)->icount_decr.u16.low++; |
| n = 2; |
| } |
| |
| /* |
| * Exit the loop and potentially generate a new TB executing the |
| * just the I/O insns. We also limit instrumentation to memory |
| * operations only (which execute after completion) so we don't |
| * double instrument the instruction. |
| */ |
| cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n; |
| |
| if (qemu_loglevel_mask(CPU_LOG_EXEC)) { |
| target_ulong pc = log_pc(cpu, tb); |
| if (qemu_log_in_addr_range(pc)) { |
| qemu_log("cpu_io_recompile: rewound execution of TB to " |
| TARGET_FMT_lx "\n", pc); |
| } |
| } |
| |
| cpu_loop_exit_noexc(cpu); |
| } |
| |
| static void print_qht_statistics(struct qht_stats hst, GString *buf) |
| { |
| uint32_t hgram_opts; |
| size_t hgram_bins; |
| char *hgram; |
| |
| if (!hst.head_buckets) { |
| return; |
| } |
| g_string_append_printf(buf, "TB hash buckets %zu/%zu " |
| "(%0.2f%% head buckets used)\n", |
| hst.used_head_buckets, hst.head_buckets, |
| (double)hst.used_head_buckets / |
| hst.head_buckets * 100); |
| |
| hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; |
| hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT; |
| if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) { |
| hgram_opts |= QDIST_PR_NODECIMAL; |
| } |
| hgram = qdist_pr(&hst.occupancy, 10, hgram_opts); |
| g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. " |
| "Histogram: %s\n", |
| qdist_avg(&hst.occupancy) * 100, hgram); |
| g_free(hgram); |
| |
| hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS; |
| hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain); |
| if (hgram_bins > 10) { |
| hgram_bins = 10; |
| } else { |
| hgram_bins = 0; |
| hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE; |
| } |
| hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts); |
| g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. " |
| "Histogram: %s\n", |
| qdist_avg(&hst.chain), hgram); |
| g_free(hgram); |
| } |
| |
| struct tb_tree_stats { |
| size_t nb_tbs; |
| size_t host_size; |
| size_t target_size; |
| size_t max_target_size; |
| size_t direct_jmp_count; |
| size_t direct_jmp2_count; |
| size_t cross_page; |
| }; |
| |
| static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data) |
| { |
| const TranslationBlock *tb = value; |
| struct tb_tree_stats *tst = data; |
| |
| tst->nb_tbs++; |
| tst->host_size += tb->tc.size; |
| tst->target_size += tb->size; |
| if (tb->size > tst->max_target_size) { |
| tst->max_target_size = tb->size; |
| } |
| if (tb_page_addr1(tb) != -1) { |
| tst->cross_page++; |
| } |
| if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { |
| tst->direct_jmp_count++; |
| if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { |
| tst->direct_jmp2_count++; |
| } |
| } |
| return false; |
| } |
| |
| void dump_exec_info(GString *buf) |
| { |
| struct tb_tree_stats tst = {}; |
| struct qht_stats hst; |
| size_t nb_tbs, flush_full, flush_part, flush_elide; |
| |
| tcg_tb_foreach(tb_tree_stats_iter, &tst); |
| nb_tbs = tst.nb_tbs; |
| /* XXX: avoid using doubles ? */ |
| g_string_append_printf(buf, "Translation buffer state:\n"); |
| /* |
| * Report total code size including the padding and TB structs; |
| * otherwise users might think "-accel tcg,tb-size" is not honoured. |
| * For avg host size we use the precise numbers from tb_tree_stats though. |
| */ |
| g_string_append_printf(buf, "gen code size %zu/%zu\n", |
| tcg_code_size(), tcg_code_capacity()); |
| g_string_append_printf(buf, "TB count %zu\n", nb_tbs); |
| g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n", |
| nb_tbs ? tst.target_size / nb_tbs : 0, |
| tst.max_target_size); |
| g_string_append_printf(buf, "TB avg host size %zu bytes " |
| "(expansion ratio: %0.1f)\n", |
| nb_tbs ? tst.host_size / nb_tbs : 0, |
| tst.target_size ? |
| (double)tst.host_size / tst.target_size : 0); |
| g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n", |
| tst.cross_page, |
| nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0); |
| g_string_append_printf(buf, "direct jump count %zu (%zu%%) " |
| "(2 jumps=%zu %zu%%)\n", |
| tst.direct_jmp_count, |
| nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0, |
| tst.direct_jmp2_count, |
| nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0); |
| |
| qht_statistics_init(&tb_ctx.htable, &hst); |
| print_qht_statistics(hst, buf); |
| qht_statistics_destroy(&hst); |
| |
| g_string_append_printf(buf, "\nStatistics:\n"); |
| g_string_append_printf(buf, "TB flush count %u\n", |
| qatomic_read(&tb_ctx.tb_flush_count)); |
| g_string_append_printf(buf, "TB invalidate count %u\n", |
| qatomic_read(&tb_ctx.tb_phys_invalidate_count)); |
| |
| tlb_flush_counts(&flush_full, &flush_part, &flush_elide); |
| g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full); |
| g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part); |
| g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide); |
| tcg_dump_info(buf); |
| } |
| |
| #else /* CONFIG_USER_ONLY */ |
| |
| void cpu_interrupt(CPUState *cpu, int mask) |
| { |
| g_assert(qemu_mutex_iothread_locked()); |
| cpu->interrupt_request |= mask; |
| qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1); |
| } |
| |
| #endif /* CONFIG_USER_ONLY */ |
| |
| /* |
| * Called by generic code at e.g. cpu reset after cpu creation, |
| * therefore we must be prepared to allocate the jump cache. |
| */ |
| void tcg_flush_jmp_cache(CPUState *cpu) |
| { |
| CPUJumpCache *jc = cpu->tb_jmp_cache; |
| |
| /* During early initialization, the cache may not yet be allocated. */ |
| if (unlikely(jc == NULL)) { |
| return; |
| } |
| |
| for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) { |
| qatomic_set(&jc->array[i].tb, NULL); |
| } |
| } |
| |
| /* This is a wrapper for common code that can not use CONFIG_SOFTMMU */ |
| void tcg_flush_softmmu_tlb(CPUState *cs) |
| { |
| #ifdef CONFIG_SOFTMMU |
| tlb_flush(cs); |
| #endif |
| } |