| /* |
| * qemu user cpu loop |
| * |
| * Copyright (c) 2003-2008 Fabrice Bellard |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License as published by |
| * the Free Software Foundation; either version 2 of the License, or |
| * (at your option) any later version. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, see <http://www.gnu.org/licenses/>. |
| */ |
| |
| #include "qemu/osdep.h" |
| #include "qemu.h" |
| #include "qemu/timer.h" |
| #include "user-internals.h" |
| #include "cpu_loop-common.h" |
| #include "signal-common.h" |
| #include "user-mmap.h" |
| |
| /***********************************************************/ |
| /* CPUX86 core interface */ |
| |
| uint64_t cpu_get_tsc(CPUX86State *env) |
| { |
| return cpu_get_host_ticks(); |
| } |
| |
| static void write_dt(void *ptr, unsigned long addr, unsigned long limit, |
| int flags) |
| { |
| unsigned int e1, e2; |
| uint32_t *p; |
| e1 = (addr << 16) | (limit & 0xffff); |
| e2 = ((addr >> 16) & 0xff) | (addr & 0xff000000) | (limit & 0x000f0000); |
| e2 |= flags; |
| p = ptr; |
| p[0] = tswap32(e1); |
| p[1] = tswap32(e2); |
| } |
| |
| static uint64_t *idt_table; |
| |
| static void set_gate64(void *ptr, unsigned int type, unsigned int dpl, |
| uint64_t addr, unsigned int sel) |
| { |
| uint32_t *p, e1, e2; |
| e1 = (addr & 0xffff) | (sel << 16); |
| e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8); |
| p = ptr; |
| p[0] = tswap32(e1); |
| p[1] = tswap32(e2); |
| p[2] = tswap32(addr >> 32); |
| p[3] = 0; |
| } |
| |
| #ifdef TARGET_X86_64 |
| /* only dpl matters as we do only user space emulation */ |
| static void set_idt(int n, unsigned int dpl, bool is64) |
| { |
| set_gate64(idt_table + n * 2, 0, dpl, 0, 0); |
| } |
| #else |
| static void set_gate(void *ptr, unsigned int type, unsigned int dpl, |
| uint32_t addr, unsigned int sel) |
| { |
| uint32_t *p, e1, e2; |
| e1 = (addr & 0xffff) | (sel << 16); |
| e2 = (addr & 0xffff0000) | 0x8000 | (dpl << 13) | (type << 8); |
| p = ptr; |
| p[0] = tswap32(e1); |
| p[1] = tswap32(e2); |
| } |
| |
| /* only dpl matters as we do only user space emulation */ |
| static void set_idt(int n, unsigned int dpl, bool is64) |
| { |
| if (is64) { |
| set_gate64(idt_table + n * 2, 0, dpl, 0, 0); |
| } else { |
| set_gate(idt_table + n, 0, dpl, 0, 0); |
| } |
| } |
| #endif |
| |
| #ifdef TARGET_X86_64 |
| static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len) |
| { |
| /* |
| * For all the vsyscalls, NULL means "don't write anything" not |
| * "write it at address 0". |
| */ |
| if (addr == 0 || access_ok(env_cpu(env), VERIFY_WRITE, addr, len)) { |
| return true; |
| } |
| |
| env->error_code = PG_ERROR_W_MASK | PG_ERROR_U_MASK; |
| force_sig_fault(TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr); |
| return false; |
| } |
| |
| /* |
| * Since v3.1, the kernel traps and emulates the vsyscall page. |
| * Entry points other than the official generate SIGSEGV. |
| */ |
| static void emulate_vsyscall(CPUX86State *env) |
| { |
| int syscall; |
| abi_ulong ret; |
| uint64_t caller; |
| |
| /* |
| * Validate the entry point. We have already validated the page |
| * during translation to get here; now verify the offset. |
| */ |
| switch (env->eip & ~TARGET_PAGE_MASK) { |
| case 0x000: |
| syscall = TARGET_NR_gettimeofday; |
| break; |
| case 0x400: |
| syscall = TARGET_NR_time; |
| break; |
| case 0x800: |
| syscall = TARGET_NR_getcpu; |
| break; |
| default: |
| goto sigsegv; |
| } |
| |
| /* |
| * Validate the return address. |
| * Note that the kernel treats this the same as an invalid entry point. |
| */ |
| if (get_user_u64(caller, env->regs[R_ESP])) { |
| goto sigsegv; |
| } |
| |
| /* |
| * Validate the pointer arguments. |
| */ |
| switch (syscall) { |
| case TARGET_NR_gettimeofday: |
| if (!write_ok_or_segv(env, env->regs[R_EDI], |
| sizeof(struct target_timeval)) || |
| !write_ok_or_segv(env, env->regs[R_ESI], |
| sizeof(struct target_timezone))) { |
| return; |
| } |
| break; |
| case TARGET_NR_time: |
| if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) { |
| return; |
| } |
| break; |
| case TARGET_NR_getcpu: |
| if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) || |
| !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) { |
| return; |
| } |
| break; |
| default: |
| g_assert_not_reached(); |
| } |
| |
| /* |
| * Perform the syscall. None of the vsyscalls should need restarting. |
| */ |
| get_task_state(env_cpu(env))->orig_ax = syscall; |
| ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI], |
| env->regs[R_EDX], env->regs[10], env->regs[8], |
| env->regs[9], 0, 0); |
| g_assert(ret != -QEMU_ERESTARTSYS); |
| g_assert(ret != -QEMU_ESIGRETURN); |
| if (ret == -TARGET_EFAULT) { |
| goto sigsegv; |
| } |
| env->regs[R_EAX] = ret; |
| |
| /* Emulate a ret instruction to leave the vsyscall page. */ |
| env->eip = caller; |
| env->regs[R_ESP] += 8; |
| return; |
| |
| sigsegv: |
| force_sig(TARGET_SIGSEGV); |
| } |
| #endif |
| |
| static bool maybe_handle_vm86_trap(CPUX86State *env, int trapnr) |
| { |
| #ifndef TARGET_X86_64 |
| if (env->eflags & VM_MASK) { |
| handle_vm86_trap(env, trapnr); |
| return true; |
| } |
| #endif |
| return false; |
| } |
| |
| void cpu_loop(CPUX86State *env) |
| { |
| CPUState *cs = env_cpu(env); |
| int trapnr; |
| abi_ulong ret; |
| |
| for(;;) { |
| cpu_exec_start(cs); |
| trapnr = cpu_exec(cs); |
| cpu_exec_end(cs); |
| process_queued_cpu_work(cs); |
| |
| switch(trapnr) { |
| case 0x80: |
| #ifndef TARGET_X86_64 |
| case EXCP_SYSCALL: |
| #endif |
| /* linux syscall from int $0x80 */ |
| get_task_state(cs)->orig_ax = env->regs[R_EAX]; |
| ret = do_syscall(env, |
| env->regs[R_EAX], |
| env->regs[R_EBX], |
| env->regs[R_ECX], |
| env->regs[R_EDX], |
| env->regs[R_ESI], |
| env->regs[R_EDI], |
| env->regs[R_EBP], |
| 0, 0); |
| if (ret == -QEMU_ERESTARTSYS) { |
| env->eip -= 2; |
| } else if (ret != -QEMU_ESIGRETURN) { |
| env->regs[R_EAX] = ret; |
| } |
| break; |
| #ifdef TARGET_X86_64 |
| case EXCP_SYSCALL: |
| /* linux syscall from syscall instruction. */ |
| get_task_state(cs)->orig_ax = env->regs[R_EAX]; |
| ret = do_syscall(env, |
| env->regs[R_EAX], |
| env->regs[R_EDI], |
| env->regs[R_ESI], |
| env->regs[R_EDX], |
| env->regs[10], |
| env->regs[8], |
| env->regs[9], |
| 0, 0); |
| if (ret == -QEMU_ERESTARTSYS) { |
| env->eip -= 2; |
| } else if (ret != -QEMU_ESIGRETURN) { |
| env->regs[R_EAX] = ret; |
| } |
| break; |
| case EXCP_VSYSCALL: |
| emulate_vsyscall(env); |
| break; |
| #endif |
| case EXCP0B_NOSEG: |
| case EXCP0C_STACK: |
| force_sig(TARGET_SIGBUS); |
| break; |
| case EXCP0D_GPF: |
| /* XXX: potential problem if ABI32 */ |
| if (maybe_handle_vm86_trap(env, trapnr)) { |
| break; |
| } |
| force_sig(TARGET_SIGSEGV); |
| break; |
| case EXCP0E_PAGE: |
| force_sig_fault(TARGET_SIGSEGV, |
| (env->error_code & PG_ERROR_P_MASK ? |
| TARGET_SEGV_ACCERR : TARGET_SEGV_MAPERR), |
| env->cr[2]); |
| break; |
| case EXCP00_DIVZ: |
| if (maybe_handle_vm86_trap(env, trapnr)) { |
| break; |
| } |
| force_sig_fault(TARGET_SIGFPE, TARGET_FPE_INTDIV, env->eip); |
| break; |
| case EXCP01_DB: |
| if (maybe_handle_vm86_trap(env, trapnr)) { |
| break; |
| } |
| force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip); |
| break; |
| case EXCP03_INT3: |
| if (maybe_handle_vm86_trap(env, trapnr)) { |
| break; |
| } |
| force_sig(TARGET_SIGTRAP); |
| break; |
| case EXCP04_INTO: |
| case EXCP05_BOUND: |
| if (maybe_handle_vm86_trap(env, trapnr)) { |
| break; |
| } |
| force_sig(TARGET_SIGSEGV); |
| break; |
| case EXCP06_ILLOP: |
| force_sig_fault(TARGET_SIGILL, TARGET_ILL_ILLOPN, env->eip); |
| break; |
| case EXCP_INTERRUPT: |
| /* just indicate that signals should be handled asap */ |
| break; |
| case EXCP_DEBUG: |
| force_sig_fault(TARGET_SIGTRAP, TARGET_TRAP_BRKPT, env->eip); |
| break; |
| case EXCP_ATOMIC: |
| cpu_exec_step_atomic(cs); |
| break; |
| default: |
| EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", |
| trapnr); |
| abort(); |
| } |
| process_pending_signals(env); |
| } |
| } |
| |
| static void target_cpu_free(void *obj) |
| { |
| target_munmap(cpu_env(obj)->gdt.base, |
| sizeof(uint64_t) * TARGET_GDT_ENTRIES); |
| g_free(obj); |
| } |
| |
| void target_cpu_copy_regs(CPUArchState *env, struct target_pt_regs *regs) |
| { |
| CPUState *cpu = env_cpu(env); |
| bool is64 = (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) != 0; |
| int i; |
| |
| OBJECT(cpu)->free = target_cpu_free; |
| env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; |
| env->hflags |= HF_PE_MASK | HF_CPL_MASK; |
| if (env->features[FEAT_1_EDX] & CPUID_SSE) { |
| env->cr[4] |= CR4_OSFXSR_MASK; |
| env->hflags |= HF_OSFXSR_MASK; |
| } |
| |
| /* enable 64 bit mode if possible */ |
| if (is64) { |
| env->cr[4] |= CR4_PAE_MASK; |
| env->efer |= MSR_EFER_LMA | MSR_EFER_LME; |
| env->hflags |= HF_LMA_MASK; |
| } |
| #ifndef TARGET_ABI32 |
| else { |
| fprintf(stderr, "The selected x86 CPU does not support 64 bit mode\n"); |
| exit(EXIT_FAILURE); |
| } |
| #endif |
| |
| /* flags setup : we activate the IRQs by default as in user mode */ |
| env->eflags |= IF_MASK; |
| |
| /* linux register setup */ |
| #ifndef TARGET_ABI32 |
| env->regs[R_EAX] = regs->rax; |
| env->regs[R_EBX] = regs->rbx; |
| env->regs[R_ECX] = regs->rcx; |
| env->regs[R_EDX] = regs->rdx; |
| env->regs[R_ESI] = regs->rsi; |
| env->regs[R_EDI] = regs->rdi; |
| env->regs[R_EBP] = regs->rbp; |
| env->regs[R_ESP] = regs->rsp; |
| env->eip = regs->rip; |
| #else |
| env->regs[R_EAX] = regs->eax; |
| env->regs[R_EBX] = regs->ebx; |
| env->regs[R_ECX] = regs->ecx; |
| env->regs[R_EDX] = regs->edx; |
| env->regs[R_ESI] = regs->esi; |
| env->regs[R_EDI] = regs->edi; |
| env->regs[R_EBP] = regs->ebp; |
| env->regs[R_ESP] = regs->esp; |
| env->eip = regs->eip; |
| #endif |
| |
| /* linux interrupt setup */ |
| #ifndef TARGET_ABI32 |
| env->idt.limit = 511; |
| #else |
| env->idt.limit = 255; |
| #endif |
| env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1), |
| PROT_READ|PROT_WRITE, |
| MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); |
| idt_table = g2h_untagged(env->idt.base); |
| for (i = 0; i < 20; i++) { |
| set_idt(i, 0, is64); |
| } |
| set_idt(3, 3, is64); |
| set_idt(4, 3, is64); |
| set_idt(0x80, 3, is64); |
| |
| /* linux segment setup */ |
| { |
| uint64_t *gdt_table; |
| env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES, |
| PROT_READ|PROT_WRITE, |
| MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); |
| env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1; |
| gdt_table = g2h_untagged(env->gdt.base); |
| #ifdef TARGET_ABI32 |
| write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff, |
| DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | |
| (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT)); |
| #else |
| /* 64 bit code segment */ |
| write_dt(&gdt_table[__USER_CS >> 3], 0, 0xfffff, |
| DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | |
| DESC_L_MASK | |
| (3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT)); |
| #endif |
| write_dt(&gdt_table[__USER_DS >> 3], 0, 0xfffff, |
| DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | |
| (3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT)); |
| } |
| cpu_x86_load_seg(env, R_CS, __USER_CS); |
| cpu_x86_load_seg(env, R_SS, __USER_DS); |
| #ifdef TARGET_ABI32 |
| cpu_x86_load_seg(env, R_DS, __USER_DS); |
| cpu_x86_load_seg(env, R_ES, __USER_DS); |
| cpu_x86_load_seg(env, R_FS, __USER_DS); |
| cpu_x86_load_seg(env, R_GS, __USER_DS); |
| /* This hack makes Wine work... */ |
| env->segs[R_FS].selector = 0; |
| #else |
| cpu_x86_load_seg(env, R_DS, 0); |
| cpu_x86_load_seg(env, R_ES, 0); |
| cpu_x86_load_seg(env, R_FS, 0); |
| cpu_x86_load_seg(env, R_GS, 0); |
| #endif |
| } |