|  | /* | 
|  | *  qemu user cpu loop | 
|  | * | 
|  | *  Copyright (c) 2003-2008 Fabrice Bellard | 
|  | * | 
|  | *  This program is free software; you can redistribute it and/or modify | 
|  | *  it under the terms of the GNU General Public License as published by | 
|  | *  the Free Software Foundation; either version 2 of the License, or | 
|  | *  (at your option) any later version. | 
|  | * | 
|  | *  This program is distributed in the hope that it will be useful, | 
|  | *  but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|  | *  GNU General Public License for more details. | 
|  | * | 
|  | *  You should have received a copy of the GNU General Public License | 
|  | *  along with this program; if not, see <http://www.gnu.org/licenses/>. | 
|  | */ | 
|  |  | 
|  | #include "qemu/osdep.h" | 
|  | #include "qemu.h" | 
|  | #include "qemu/timer.h" | 
|  | #include "user-internals.h" | 
|  | #include "cpu_loop-common.h" | 
|  | #include "signal-common.h" | 
|  | #include "user-mmap.h" | 
|  |  | 
|  | /***********************************************************/ | 
|  | /* CPUX86 core interface */ | 
|  |  | 
|  | uint64_t cpu_get_tsc(CPUX86State *env) | 
|  | { | 
|  | return cpu_get_host_ticks(); | 
|  | } | 
|  |  | 
|  | static void write_dt(void *ptr, unsigned long addr, unsigned long limit, | 
|  | int flags) | 
|  | { | 
|  | unsigned int e1, e2; | 
|  | uint32_t *p; | 
|  | e1 = (addr << 16) | (limit & 0xffff); | 
|  | e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000); | 
|  | e2 |= flags; | 
|  | p = ptr; | 
|  | p[0] = tswap32(e1); | 
|  | p[1] = tswap32(e2); | 
|  | } | 
|  |  | 
|  | static uint64_t *idt_table; | 
|  |  | 
|  | static void set_gate64(void *ptr, unsigned int type, unsigned int dpl, | 
|  | uint64_t addr, unsigned int sel) | 
|  | { | 
|  | uint32_t *p, e1, e2; | 
|  | e1 = (addr & 0xffff) | (sel << 16); | 
|  | e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8); | 
|  | p = ptr; | 
|  | p[0] = tswap32(e1); | 
|  | p[1] = tswap32(e2); | 
|  | p[2] = tswap32(addr >> 32); | 
|  | p[3] = 0; | 
|  | } | 
|  |  | 
|  | #ifdef TARGET_X86_64 | 
|  | /* only dpl matters as we do only user space emulation */ | 
|  | static void set_idt(int n, unsigned int dpl, bool is64) | 
|  | { | 
|  | set_gate64(idt_table + n * 2, 0, dpl, 0, 0); | 
|  | } | 
|  | #else | 
|  | static void set_gate(void *ptr, unsigned int type, unsigned int dpl, | 
|  | uint32_t addr, unsigned int sel) | 
|  | { | 
|  | uint32_t *p, e1, e2; | 
|  | e1 = (addr & 0xffff) | (sel << 16); | 
|  | e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8); | 
|  | p = ptr; | 
|  | p[0] = tswap32(e1); | 
|  | p[1] = tswap32(e2); | 
|  | } | 
|  |  | 
|  | /* only dpl matters as we do only user space emulation */ | 
|  | static void set_idt(int n, unsigned int dpl, bool is64) | 
|  | { | 
|  | if (is64) { | 
|  | set_gate64(idt_table + n * 2, 0, dpl, 0, 0); | 
|  | } else { | 
|  | set_gate(idt_table + n, 0, dpl, 0, 0); | 
|  | } | 
|  | } | 
|  | #endif | 
|  |  | 
|  | #ifdef TARGET_X86_64 | 
|  | static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len) | 
|  | { | 
|  | /* | 
|  | * For all the vsyscalls, NULL means "don't write anything" not | 
|  | * "write it at address 0". | 
|  | */ | 
|  | if (addr == 0 || access_ok(env_cpu(env), VERIFY_WRITE, addr, len)) { | 
|  | return true; | 
|  | } | 
|  |  | 
|  | env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK; | 
|  | force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr); | 
|  | return false; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Since v3.1, the kernel traps and emulates the vsyscall page. | 
|  | * Entry points other than the official generate SIGSEGV. | 
|  | */ | 
|  | static void emulate_vsyscall(CPUX86State *env) | 
|  | { | 
|  | int syscall; | 
|  | abi_ulong ret; | 
|  | uint64_t caller; | 
|  |  | 
|  | /* | 
|  | * Validate the entry point.  We have already validated the page | 
|  | * during translation to get here; now verify the offset. | 
|  | */ | 
|  | switch (env->eip & ~TARGET_PAGE_MASK) { | 
|  | case 0x000: | 
|  | syscall = TARGET_NR_gettimeofday; | 
|  | break; | 
|  | case 0x400: | 
|  | syscall = TARGET_NR_time; | 
|  | break; | 
|  | case 0x800: | 
|  | syscall = TARGET_NR_getcpu; | 
|  | break; | 
|  | default: | 
|  | goto sigsegv; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Validate the return address. | 
|  | * Note that the kernel treats this the same as an invalid entry point. | 
|  | */ | 
|  | if (get_user_u64(caller, env->regs[R_ESP])) { | 
|  | goto sigsegv; | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Validate the pointer arguments. | 
|  | */ | 
|  | switch (syscall) { | 
|  | case TARGET_NR_gettimeofday: | 
|  | if (!write_ok_or_segv(env, env->regs[R_EDI], | 
|  | sizeof(struct target_timeval)) || | 
|  | !write_ok_or_segv(env, env->regs[R_ESI], | 
|  | sizeof(struct target_timezone))) { | 
|  | return; | 
|  | } | 
|  | break; | 
|  | case TARGET_NR_time: | 
|  | if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) { | 
|  | return; | 
|  | } | 
|  | break; | 
|  | case TARGET_NR_getcpu: | 
|  | if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) || | 
|  | !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) { | 
|  | return; | 
|  | } | 
|  | break; | 
|  | default: | 
|  | g_assert_not_reached(); | 
|  | } | 
|  |  | 
|  | /* | 
|  | * Perform the syscall.  None of the vsyscalls should need restarting. | 
|  | */ | 
|  | ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI], | 
|  | env->regs[R_EDX], env->regs[10], env->regs[8], | 
|  | env->regs[9], 0, 0); | 
|  | g_assert(ret != -QEMU_ERESTARTSYS); | 
|  | g_assert(ret != -QEMU_ESIGRETURN); | 
|  | if (ret == -TARGET_EFAULT) { | 
|  | goto sigsegv; | 
|  | } | 
|  | env->regs[R_EAX] = ret; | 
|  |  | 
|  | /* Emulate a ret instruction to leave the vsyscall page.  */ | 
|  | env->eip = caller; | 
|  | env->regs[R_ESP] += 8; | 
|  | return; | 
|  |  | 
|  | sigsegv: | 
|  | force_sig(TARGET_SIGSEGV); | 
|  | } | 
|  | #endif | 
|  |  | 
|  | static bool maybe_handle_vm86_trap(CPUX86State *env, int trapnr) | 
|  | { | 
|  | #ifndef TARGET_X86_64 | 
|  | if (env->eflags & VM_MASK) { | 
|  | handle_vm86_trap(env, trapnr); | 
|  | return true; | 
|  | } | 
|  | #endif | 
|  | return false; | 
|  | } | 
|  |  | 
|  | void cpu_loop(CPUX86State *env) | 
|  | { | 
|  | CPUState *cs = env_cpu(env); | 
|  | int trapnr; | 
|  | abi_ulong ret; | 
|  |  | 
|  | for(;;) { | 
|  | cpu_exec_start(cs); | 
|  | trapnr = cpu_exec(cs); | 
|  | cpu_exec_end(cs); | 
|  | process_queued_cpu_work(cs); | 
|  |  | 
|  | switch(trapnr) { | 
|  | case 0x80: | 
|  | #ifndef TARGET_X86_64 | 
|  | case EXCP_SYSCALL: | 
|  | #endif | 
|  | /* linux syscall from int $0x80 */ | 
|  | ret = do_syscall(env, | 
|  | env->regs[R_EAX], | 
|  | env->regs[R_EBX], | 
|  | env->regs[R_ECX], | 
|  | env->regs[R_EDX], | 
|  | env->regs[R_ESI], | 
|  | env->regs[R_EDI], | 
|  | env->regs[R_EBP], | 
|  | 0, 0); | 
|  | if (ret == -QEMU_ERESTARTSYS) { | 
|  | env->eip -= 2; | 
|  | } else if (ret != -QEMU_ESIGRETURN) { | 
|  | env->regs[R_EAX] = ret; | 
|  | } | 
|  | break; | 
|  | #ifdef TARGET_X86_64 | 
|  | case EXCP_SYSCALL: | 
|  | /* linux syscall from syscall instruction.  */ | 
|  | ret = do_syscall(env, | 
|  | env->regs[R_EAX], | 
|  | env->regs[R_EDI], | 
|  | env->regs[R_ESI], | 
|  | env->regs[R_EDX], | 
|  | env->regs[10], | 
|  | env->regs[8], | 
|  | env->regs[9], | 
|  | 0, 0); | 
|  | if (ret == -QEMU_ERESTARTSYS) { | 
|  | env->eip -= 2; | 
|  | } else if (ret != -QEMU_ESIGRETURN) { | 
|  | env->regs[R_EAX] = ret; | 
|  | } | 
|  | break; | 
|  | case EXCP_VSYSCALL: | 
|  | emulate_vsyscall(env); | 
|  | break; | 
|  | #endif | 
|  | case EXCP0B_NOSEG: | 
|  | case EXCP0C_STACK: | 
|  | force_sig(TARGET_SIGBUS); | 
|  | break; | 
|  | case EXCP0D_GPF: | 
|  | /* XXX: potential problem if ABI32 */ | 
|  | if (maybe_handle_vm86_trap(env, trapnr)) { | 
|  | break; | 
|  | } | 
|  | force_sig(TARGET_SIGSEGV); | 
|  | break; | 
|  | case EXCP0E_PAGE: | 
|  | force_sig_fault(TARGET_SIGSEGV, | 
|  | (env->error_code & PG_ERROR_P_MASK ? | 
|  | TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR), | 
|  | env->cr[2]); | 
|  | break; | 
|  | case EXCP00_DIVZ: | 
|  | if (maybe_handle_vm86_trap(env, trapnr)) { | 
|  | break; | 
|  | } | 
|  | force_sig_fault(TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip); | 
|  | break; | 
|  | case EXCP01_DB: | 
|  | if (maybe_handle_vm86_trap(env, trapnr)) { | 
|  | break; | 
|  | } | 
|  | force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip); | 
|  | break; | 
|  | case EXCP03_INT3: | 
|  | if (maybe_handle_vm86_trap(env, trapnr)) { | 
|  | break; | 
|  | } | 
|  | force_sig(TARGET_SIGTRAP); | 
|  | break; | 
|  | case EXCP04_INTO: | 
|  | case EXCP05_BOUND: | 
|  | if (maybe_handle_vm86_trap(env, trapnr)) { | 
|  | break; | 
|  | } | 
|  | force_sig(TARGET_SIGSEGV); | 
|  | break; | 
|  | case EXCP06_ILLOP: | 
|  | force_sig_fault(TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip); | 
|  | break; | 
|  | case EXCP_INTERRUPT: | 
|  | /* just indicate that signals should be handled asap */ | 
|  | break; | 
|  | case EXCP_DEBUG: | 
|  | force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip); | 
|  | break; | 
|  | case EXCP_ATOMIC: | 
|  | cpu_exec_step_atomic(cs); | 
|  | break; | 
|  | default: | 
|  | EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", | 
|  | trapnr); | 
|  | abort(); | 
|  | } | 
|  | process_pending_signals(env); | 
|  | } | 
|  | } | 
|  |  | 
|  | static void target_cpu_free(void *obj) | 
|  | { | 
|  | CPUArchState *env = cpu_env(obj); | 
|  | target_munmap(env->gdt.base, sizeof(uint64_t) * TARGET_GDT_ENTRIES); | 
|  | g_free(obj); | 
|  | } | 
|  |  | 
|  | void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs) | 
|  | { | 
|  | CPUState *cpu = env_cpu(env); | 
|  | bool is64 = (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) != 0; | 
|  | int i; | 
|  |  | 
|  | OBJECT(cpu)->free = target_cpu_free; | 
|  | env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; | 
|  | env->hflags |= HF_PE_MASK | HF_CPL_MASK; | 
|  | if (env->features[FEAT_1_EDX] & CPUID_SSE) { | 
|  | env->cr[4] |= CR4_OSFXSR_MASK; | 
|  | env->hflags |= HF_OSFXSR_MASK; | 
|  | } | 
|  |  | 
|  | /* enable 64 bit mode if possible */ | 
|  | if (is64) { | 
|  | env->cr[4] |= CR4_PAE_MASK; | 
|  | env->efer |= MSR_EFER_LMA | MSR_EFER_LME; | 
|  | env->hflags |= HF_LMA_MASK; | 
|  | } | 
|  | #ifndef TARGET_ABI32 | 
|  | else { | 
|  | fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n"); | 
|  | exit(EXIT_FAILURE); | 
|  | } | 
|  | #endif | 
|  |  | 
|  | /* flags setup : we activate the IRQs by default as in user mode */ | 
|  | env->eflags |= IF_MASK; | 
|  |  | 
|  | /* linux register setup */ | 
|  | #ifndef TARGET_ABI32 | 
|  | env->regs[R_EAX] = regs->rax; | 
|  | env->regs[R_EBX] = regs->rbx; | 
|  | env->regs[R_ECX] = regs->rcx; | 
|  | env->regs[R_EDX] = regs->rdx; | 
|  | env->regs[R_ESI] = regs->rsi; | 
|  | env->regs[R_EDI] = regs->rdi; | 
|  | env->regs[R_EBP] = regs->rbp; | 
|  | env->regs[R_ESP] = regs->rsp; | 
|  | env->eip = regs->rip; | 
|  | #else | 
|  | env->regs[R_EAX] = regs->eax; | 
|  | env->regs[R_EBX] = regs->ebx; | 
|  | env->regs[R_ECX] = regs->ecx; | 
|  | env->regs[R_EDX] = regs->edx; | 
|  | env->regs[R_ESI] = regs->esi; | 
|  | env->regs[R_EDI] = regs->edi; | 
|  | env->regs[R_EBP] = regs->ebp; | 
|  | env->regs[R_ESP] = regs->esp; | 
|  | env->eip = regs->eip; | 
|  | #endif | 
|  |  | 
|  | /* linux interrupt setup */ | 
|  | #ifndef TARGET_ABI32 | 
|  | env->idt.limit = 511; | 
|  | #else | 
|  | env->idt.limit = 255; | 
|  | #endif | 
|  | env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1), | 
|  | PROT_READ|PROT_WRITE, | 
|  | MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); | 
|  | idt_table = g2h_untagged(env->idt.base); | 
|  | for (i = 0; i < 20; i++) { | 
|  | set_idt(i, 0, is64); | 
|  | } | 
|  | set_idt(3, 3, is64); | 
|  | set_idt(4, 3, is64); | 
|  | set_idt(0x80, 3, is64); | 
|  |  | 
|  | /* linux segment setup */ | 
|  | { | 
|  | uint64_t *gdt_table; | 
|  | env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES, | 
|  | PROT_READ|PROT_WRITE, | 
|  | MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); | 
|  | env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1; | 
|  | gdt_table = g2h_untagged(env->gdt.base); | 
|  | #ifdef TARGET_ABI32 | 
|  | write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff, | 
|  | DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | | 
|  | (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT)); | 
|  | #else | 
|  | /* 64 bit code segment */ | 
|  | write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff, | 
|  | DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | | 
|  | DESC_L_MASK | | 
|  | (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT)); | 
|  | #endif | 
|  | write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff, | 
|  | DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | | 
|  | (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT)); | 
|  | } | 
|  | cpu_x86_load_seg(env, R_CS, __USER_CS); | 
|  | cpu_x86_load_seg(env, R_SS, __USER_DS); | 
|  | #ifdef TARGET_ABI32 | 
|  | cpu_x86_load_seg(env, R_DS, __USER_DS); | 
|  | cpu_x86_load_seg(env, R_ES, __USER_DS); | 
|  | cpu_x86_load_seg(env, R_FS, __USER_DS); | 
|  | cpu_x86_load_seg(env, R_GS, __USER_DS); | 
|  | /* This hack makes Wine work... */ | 
|  | env->segs[R_FS].selector = 0; | 
|  | #else | 
|  | cpu_x86_load_seg(env, R_DS, 0); | 
|  | cpu_x86_load_seg(env, R_ES, 0); | 
|  | cpu_x86_load_seg(env, R_FS, 0); | 
|  | cpu_x86_load_seg(env, R_GS, 0); | 
|  | #endif | 
|  | } |