full softmmu support


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@410 c046a42c-6fe2-441c-8c8c-71466251a162
diff --git a/cpu-all.h b/cpu-all.h
index 00b9399..c91813f 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -20,18 +20,19 @@
 #ifndef CPU_ALL_H
 #define CPU_ALL_H
 
-/* all CPU memory access use these macros */
-static inline int ldub(void *ptr)
+/* CPU memory access without any memory or io remapping */
+
+static inline int ldub_raw(void *ptr)
 {
     return *(uint8_t *)ptr;
 }
 
-static inline int ldsb(void *ptr)
+static inline int ldsb_raw(void *ptr)
 {
     return *(int8_t *)ptr;
 }
 
-static inline void stb(void *ptr, int v)
+static inline void stb_raw(void *ptr, int v)
 {
     *(uint8_t *)ptr = v;
 }
@@ -42,7 +43,7 @@
 #if defined(WORDS_BIGENDIAN) || defined(__arm__)
 
 /* conservative code for little endian unaligned accesses */
-static inline int lduw(void *ptr)
+static inline int lduw_raw(void *ptr)
 {
 #ifdef __powerpc__
     int val;
@@ -54,7 +55,7 @@
 #endif
 }
 
-static inline int ldsw(void *ptr)
+static inline int ldsw_raw(void *ptr)
 {
 #ifdef __powerpc__
     int val;
@@ -66,7 +67,7 @@
 #endif
 }
 
-static inline int ldl(void *ptr)
+static inline int ldl_raw(void *ptr)
 {
 #ifdef __powerpc__
     int val;
@@ -78,16 +79,16 @@
 #endif
 }
 
-static inline uint64_t ldq(void *ptr)
+static inline uint64_t ldq_raw(void *ptr)
 {
     uint8_t *p = ptr;
     uint32_t v1, v2;
-    v1 = ldl(p);
-    v2 = ldl(p + 4);
+    v1 = ldl_raw(p);
+    v2 = ldl_raw(p + 4);
     return v1 | ((uint64_t)v2 << 32);
 }
 
-static inline void stw(void *ptr, int v)
+static inline void stw_raw(void *ptr, int v)
 {
 #ifdef __powerpc__
     __asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*(uint16_t *)ptr) : "r" (v), "r" (ptr));
@@ -98,7 +99,7 @@
 #endif
 }
 
-static inline void stl(void *ptr, int v)
+static inline void stl_raw(void *ptr, int v)
 {
 #ifdef __powerpc__
     __asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*(uint32_t *)ptr) : "r" (v), "r" (ptr));
@@ -111,104 +112,104 @@
 #endif
 }
 
-static inline void stq(void *ptr, uint64_t v)
+static inline void stq_raw(void *ptr, uint64_t v)
 {
     uint8_t *p = ptr;
-    stl(p, (uint32_t)v);
-    stl(p + 4, v >> 32);
+    stl_raw(p, (uint32_t)v);
+    stl_raw(p + 4, v >> 32);
 }
 
 /* float access */
 
-static inline float ldfl(void *ptr)
+static inline float ldfl_raw(void *ptr)
 {
     union {
         float f;
         uint32_t i;
     } u;
-    u.i = ldl(ptr);
+    u.i = ldl_raw(ptr);
     return u.f;
 }
 
-static inline void stfl(void *ptr, float v)
+static inline void stfl_raw(void *ptr, float v)
 {
     union {
         float f;
         uint32_t i;
     } u;
     u.f = v;
-    stl(ptr, u.i);
+    stl_raw(ptr, u.i);
 }
 
 
 #if defined(__arm__) && !defined(WORDS_BIGENDIAN)
 
 /* NOTE: arm is horrible as double 32 bit words are stored in big endian ! */
-static inline double ldfq(void *ptr)
+static inline double ldfq_raw(void *ptr)
 {
     union {
         double d;
         uint32_t tab[2];
     } u;
-    u.tab[1] = ldl(ptr);
-    u.tab[0] = ldl(ptr + 4);
+    u.tab[1] = ldl_raw(ptr);
+    u.tab[0] = ldl_raw(ptr + 4);
     return u.d;
 }
 
-static inline void stfq(void *ptr, double v)
+static inline void stfq_raw(void *ptr, double v)
 {
     union {
         double d;
         uint32_t tab[2];
     } u;
     u.d = v;
-    stl(ptr, u.tab[1]);
-    stl(ptr + 4, u.tab[0]);
+    stl_raw(ptr, u.tab[1]);
+    stl_raw(ptr + 4, u.tab[0]);
 }
 
 #else
-static inline double ldfq(void *ptr)
+static inline double ldfq_raw(void *ptr)
 {
     union {
         double d;
         uint64_t i;
     } u;
-    u.i = ldq(ptr);
+    u.i = ldq_raw(ptr);
     return u.d;
 }
 
-static inline void stfq(void *ptr, double v)
+static inline void stfq_raw(void *ptr, double v)
 {
     union {
         double d;
         uint64_t i;
     } u;
     u.d = v;
-    stq(ptr, u.i);
+    stq_raw(ptr, u.i);
 }
 #endif
 
 #elif defined(TARGET_WORDS_BIGENDIAN) && !defined(WORDS_BIGENDIAN)
 
-static inline int lduw(void *ptr)
+static inline int lduw_raw(void *ptr)
 {
     uint8_t *b = (uint8_t *) ptr;
     return (b[0]<<8|b[1]);
 }
 
-static inline int ldsw(void *ptr)
+static inline int ldsw_raw(void *ptr)
 {
     int8_t *b = (int8_t *) ptr;
     return (b[0]<<8|b[1]);
 }
 
-static inline int ldl(void *ptr)
+static inline int ldl_raw(void *ptr)
 {
     uint8_t *b = (uint8_t *) ptr;
     return (b[0]<<24|b[1]<<16|b[2]<<8|b[3]);
 }
 
-static inline uint64_t ldq(void *ptr)
+static inline uint64_t ldq_raw(void *ptr)
 {
     uint32_t a,b;
     a = ldl (ptr);
@@ -216,14 +217,14 @@
     return (((uint64_t)a<<32)|b);
 }
 
-static inline void stw(void *ptr, int v)
+static inline void stw_raw(void *ptr, int v)
 {
     uint8_t *d = (uint8_t *) ptr;
     d[0] = v >> 8;
     d[1] = v;
 }
 
-static inline void stl(void *ptr, int v)
+static inline void stl_raw(void *ptr, int v)
 {
     uint8_t *d = (uint8_t *) ptr;
     d[0] = v >> 24;
@@ -232,7 +233,7 @@
     d[3] = v;
 }
 
-static inline void stq(void *ptr, uint64_t v)
+static inline void stq_raw(void *ptr, uint64_t v)
 {
     stl (ptr, v);
     stl (ptr+4, v >> 32);
@@ -240,64 +241,102 @@
 
 #else
 
-static inline int lduw(void *ptr)
+static inline int lduw_raw(void *ptr)
 {
     return *(uint16_t *)ptr;
 }
 
-static inline int ldsw(void *ptr)
+static inline int ldsw_raw(void *ptr)
 {
     return *(int16_t *)ptr;
 }
 
-static inline int ldl(void *ptr)
+static inline int ldl_raw(void *ptr)
 {
     return *(uint32_t *)ptr;
 }
 
-static inline uint64_t ldq(void *ptr)
+static inline uint64_t ldq_raw(void *ptr)
 {
     return *(uint64_t *)ptr;
 }
 
-static inline void stw(void *ptr, int v)
+static inline void stw_raw(void *ptr, int v)
 {
     *(uint16_t *)ptr = v;
 }
 
-static inline void stl(void *ptr, int v)
+static inline void stl_raw(void *ptr, int v)
 {
     *(uint32_t *)ptr = v;
 }
 
-static inline void stq(void *ptr, uint64_t v)
+static inline void stq_raw(void *ptr, uint64_t v)
 {
     *(uint64_t *)ptr = v;
 }
 
 /* float access */
 
-static inline float ldfl(void *ptr)
+static inline float ldfl_raw(void *ptr)
 {
     return *(float *)ptr;
 }
 
-static inline double ldfq(void *ptr)
+static inline double ldfq_raw(void *ptr)
 {
     return *(double *)ptr;
 }
 
-static inline void stfl(void *ptr, float v)
+static inline void stfl_raw(void *ptr, float v)
 {
     *(float *)ptr = v;
 }
 
-static inline void stfq(void *ptr, double v)
+static inline void stfq_raw(void *ptr, double v)
 {
     *(double *)ptr = v;
 }
 #endif
 
+/* MMU memory access macros */
+
+#if defined(CONFIG_USER_ONLY) 
+
+/* if user mode, no other memory access functions */
+#define ldub(p) ldub_raw(p)
+#define ldsb(p) ldsb_raw(p)
+#define lduw(p) lduw_raw(p)
+#define ldsw(p) ldsw_raw(p)
+#define ldl(p) ldl_raw(p)
+#define ldq(p) ldq_raw(p)
+#define ldfl(p) ldfl_raw(p)
+#define ldfq(p) ldfq_raw(p)
+#define stb(p, v) stb_raw(p, v)
+#define stw(p, v) stw_raw(p, v)
+#define stl(p, v) stl_raw(p, v)
+#define stq(p, v) stq_raw(p, v)
+#define stfl(p, v) stfl_raw(p, v)
+#define stfq(p, v) stfq_raw(p, v)
+
+#define ldub_code(p) ldub_raw(p)
+#define ldsb_code(p) ldsb_raw(p)
+#define lduw_code(p) lduw_raw(p)
+#define ldsw_code(p) ldsw_raw(p)
+#define ldl_code(p) ldl_raw(p)
+
+#define ldub_kernel(p) ldub_raw(p)
+#define ldsb_kernel(p) ldsb_raw(p)
+#define lduw_kernel(p) lduw_raw(p)
+#define ldsw_kernel(p) ldsw_raw(p)
+#define ldl_kernel(p) ldl_raw(p)
+#define stb_kernel(p, v) stb_raw(p, v)
+#define stw_kernel(p, v) stw_raw(p, v)
+#define stl_kernel(p, v) stl_raw(p, v)
+#define stq_kernel(p, v) stq_raw(p, v)
+
+#endif /* defined(CONFIG_USER_ONLY) */
+
 /* page related stuff */
 
 #define TARGET_PAGE_SIZE (1 << TARGET_PAGE_BITS)
diff --git a/exec.c b/exec.c
index f4f6a9b..ca767e2 100644
--- a/exec.c
+++ b/exec.c
@@ -444,16 +444,20 @@
         prot = 0;
         for(addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE)
             prot |= page_get_flags(addr);
+#if !defined(CONFIG_SOFTMMU)
         mprotect((void *)host_start, host_page_size, 
                  (prot & PAGE_BITS) & ~PAGE_WRITE);
+#endif
+#if !defined(CONFIG_USER_ONLY)
+        /* suppress soft TLB */
+        /* XXX: must flush on all processor with same address space */
+        tlb_flush_page_write(cpu_single_env, host_start);
+#endif
 #ifdef DEBUG_TB_INVALIDATE
         printf("protecting code page: 0x%08lx\n", 
                host_start);
 #endif
         p->flags &= ~PAGE_WRITE;
-#ifdef DEBUG_TB_CHECK
-        tb_page_check();
-#endif
     }
 }
 
@@ -483,6 +487,9 @@
     if (page_index2 != page_index1) {
         tb_alloc_page(tb, page_index2);
     }
+#ifdef DEBUG_TB_CHECK
+    tb_page_check();
+#endif
     tb->jmp_first = (TranslationBlock *)((long)tb | 2);
     tb->jmp_next[0] = NULL;
     tb->jmp_next[1] = NULL;
@@ -517,20 +524,23 @@
     /* if the page was really writable, then we change its
        protection back to writable */
     if (prot & PAGE_WRITE_ORG) {
-        mprotect((void *)host_start, host_page_size, 
-                 (prot & PAGE_BITS) | PAGE_WRITE);
         pindex = (address - host_start) >> TARGET_PAGE_BITS;
-        p1[pindex].flags |= PAGE_WRITE;
-        /* and since the content will be modified, we must invalidate
-           the corresponding translated code. */
-        tb_invalidate_page(address);
-#ifdef DEBUG_TB_CHECK
-        tb_invalidate_check(address);
+        if (!(p1[pindex].flags & PAGE_WRITE)) {
+#if !defined(CONFIG_SOFTMMU)
+            mprotect((void *)host_start, host_page_size, 
+                     (prot & PAGE_BITS) | PAGE_WRITE);
 #endif
-        return 1;
-    } else {
-        return 0;
+            p1[pindex].flags |= PAGE_WRITE;
+            /* and since the content will be modified, we must invalidate
+               the corresponding translated code. */
+            tb_invalidate_page(address);
+#ifdef DEBUG_TB_CHECK
+            tb_invalidate_check(address);
+#endif
+            return 1;
+        }
     }
+    return 0;
 }
 
 /* call this function when system calls directly modify a memory area */
@@ -734,13 +744,17 @@
 /* unmap all maped pages and flush all associated code */
 void page_unmap(void)
 {
-    PageDesc *p, *pmap;
-    unsigned long addr;
-    int i, j, ret, j1;
+    PageDesc *pmap;
+    int i;
 
     for(i = 0; i < L1_SIZE; i++) {
         pmap = l1_map[i];
         if (pmap) {
+#if !defined(CONFIG_SOFTMMU)
+            PageDesc *p;
+            unsigned long addr;
+            int j, ret, j1;
+            
             p = pmap;
             for(j = 0;j < L2_SIZE;) {
                 if (p->flags & PAGE_VALID) {
@@ -763,6 +777,7 @@
                     j++;
                 }
             }
+#endif
             free(pmap);
             l1_map[i] = NULL;
         }
@@ -773,7 +788,7 @@
 
 void tlb_flush(CPUState *env)
 {
-#if defined(TARGET_I386)
+#if !defined(CONFIG_USER_ONLY)
     int i;
     for(i = 0; i < CPU_TLB_SIZE; i++) {
         env->tlb_read[0][i].address = -1;
@@ -784,16 +799,38 @@
 #endif
 }
 
+static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, uint32_t addr)
+{
+    if (addr == (tlb_entry->address & 
+                 (TARGET_PAGE_MASK | TLB_INVALID_MASK)))
+        tlb_entry->address = -1;
+}
+
 void tlb_flush_page(CPUState *env, uint32_t addr)
 {
-#if defined(TARGET_I386)
+#if !defined(CONFIG_USER_ONLY)
     int i;
 
+    addr &= TARGET_PAGE_MASK;
     i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-    env->tlb_read[0][i].address = -1;
-    env->tlb_write[0][i].address = -1;
-    env->tlb_read[1][i].address = -1;
-    env->tlb_write[1][i].address = -1;
+    tlb_flush_entry(&env->tlb_read[0][i], addr);
+    tlb_flush_entry(&env->tlb_write[0][i], addr);
+    tlb_flush_entry(&env->tlb_read[1][i], addr);
+    tlb_flush_entry(&env->tlb_write[1][i], addr);
+#endif
+}
+
+/* make all write to page 'addr' trigger a TLB exception to detect
+   self modifying code */
+void tlb_flush_page_write(CPUState *env, uint32_t addr)
+{
+#if !defined(CONFIG_USER_ONLY)
+    int i;
+
+    addr &= TARGET_PAGE_MASK;
+    i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+    tlb_flush_entry(&env->tlb_write[0][i], addr);
+    tlb_flush_entry(&env->tlb_write[1][i], addr);
 #endif
 }
 
@@ -900,3 +937,25 @@
     }
     return io_index << IO_MEM_SHIFT;
 }
+
+#if !defined(CONFIG_USER_ONLY) 
+
+#define MMUSUFFIX _cmmu
+#define GETPC() NULL
+#define env cpu_single_env
+
+#define SHIFT 0
+#include "softmmu_template.h"
+
+#define SHIFT 1
+#include "softmmu_template.h"
+
+#define SHIFT 2
+#include "softmmu_template.h"
+
+#define SHIFT 3
+#include "softmmu_template.h"
+
+#undef env
+
+#endif
diff --git a/hw/vga_template.h b/hw/vga_template.h
index 0d1d5ce..cc4df95 100644
--- a/hw/vga_template.h
+++ b/hw/vga_template.h
@@ -354,7 +354,7 @@
 
     w = width;
     do {
-        v = lduw((void *)s);
+        v = lduw_raw((void *)s);
         r = (v >> 7) & 0xf8;
         g = (v >> 2) & 0xf8;
         b = (v << 3) & 0xf8;
@@ -379,7 +379,7 @@
 
     w = width;
     do {
-        v = lduw((void *)s);
+        v = lduw_raw((void *)s);
         r = (v >> 8) & 0xf8;
         g = (v >> 3) & 0xfc;
         b = (v << 3) & 0xf8;
diff --git a/softmmu_header.h b/softmmu_header.h
index 36cf9f0..26b4f2c 100644
--- a/softmmu_header.h
+++ b/softmmu_header.h
@@ -19,26 +19,48 @@
  */
 #if DATA_SIZE == 8
 #define SUFFIX q
+#define USUFFIX q
 #define DATA_TYPE uint64_t
 #elif DATA_SIZE == 4
 #define SUFFIX l
+#define USUFFIX l
 #define DATA_TYPE uint32_t
 #elif DATA_SIZE == 2
 #define SUFFIX w
+#define USUFFIX uw
 #define DATA_TYPE uint16_t
 #define DATA_STYPE int16_t
 #elif DATA_SIZE == 1
 #define SUFFIX b
+#define USUFFIX ub
 #define DATA_TYPE uint8_t
 #define DATA_STYPE int8_t
 #else
 #error unsupported data size
 #endif
 
-#if MEMUSER == 0
-#define MEMSUFFIX _kernel
+#if ACCESS_TYPE == 0
+
+#define CPU_MEM_INDEX 0
+#define MMUSUFFIX _mmu
+
+#elif ACCESS_TYPE == 1
+
+#define CPU_MEM_INDEX 1
+#define MMUSUFFIX _mmu
+
+#elif ACCESS_TYPE == 2
+
+#define CPU_MEM_INDEX ((env->hflags & HF_CPL_MASK) == 3)
+#define MMUSUFFIX _mmu
+
+#elif ACCESS_TYPE == 3
+
+#define CPU_MEM_INDEX ((env->hflags & HF_CPL_MASK) == 3)
+#define MMUSUFFIX _cmmu
+
 #else
-#define MEMSUFFIX _user
+#error invalid ACCESS_TYPE
 #endif
 
 #if DATA_SIZE == 8
@@ -48,24 +70,26 @@
 #endif
 
 
-#if MEMUSER == 0
-DATA_TYPE REGPARM(1) glue(glue(__ld, SUFFIX), _mmu)(unsigned long addr);
-void REGPARM(2) glue(glue(__st, SUFFIX), _mmu)(unsigned long addr, DATA_TYPE v);
-#endif
+DATA_TYPE REGPARM(1) glue(glue(__ld, SUFFIX), MMUSUFFIX)(unsigned long addr,
+                                                         int is_user);
+void REGPARM(2) glue(glue(__st, SUFFIX), MMUSUFFIX)(unsigned long addr, DATA_TYPE v, int is_user);
 
-static inline int glue(glue(ldu, SUFFIX), MEMSUFFIX)(void *ptr)
+static inline int glue(glue(ld, USUFFIX), MEMSUFFIX)(void *ptr)
 {
     int index;
     RES_TYPE res;
     unsigned long addr, physaddr;
+    int is_user;
+
     addr = (unsigned long)ptr;
     index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-    if (__builtin_expect(env->tlb_read[MEMUSER][index].address != 
+    is_user = CPU_MEM_INDEX;
+    if (__builtin_expect(env->tlb_read[is_user][index].address != 
                          (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))), 0)) {
-        res = glue(glue(__ld, SUFFIX), _mmu)(addr);
+        res = glue(glue(__ld, SUFFIX), MMUSUFFIX)(addr, is_user);
     } else {
-        physaddr = addr + env->tlb_read[MEMUSER][index].addend;
-        res = glue(glue(ldu, SUFFIX), _raw)((uint8_t *)physaddr);
+        physaddr = addr + env->tlb_read[is_user][index].addend;
+        res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)physaddr);
     }
     return res;
 }
@@ -75,13 +99,16 @@
 {
     int res, index;
     unsigned long addr, physaddr;
+    int is_user;
+
     addr = (unsigned long)ptr;
     index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-    if (__builtin_expect(env->tlb_read[MEMUSER][index].address != 
+    is_user = CPU_MEM_INDEX;
+    if (__builtin_expect(env->tlb_read[is_user][index].address != 
                          (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))), 0)) {
-        res = (DATA_STYPE)glue(glue(__ld, SUFFIX), _mmu)(addr);
+        res = (DATA_STYPE)glue(glue(__ld, SUFFIX), MMUSUFFIX)(addr, is_user);
     } else {
-        physaddr = addr + env->tlb_read[MEMUSER][index].addend;
+        physaddr = addr + env->tlb_read[is_user][index].addend;
         res = glue(glue(lds, SUFFIX), _raw)((uint8_t *)physaddr);
     }
     return res;
@@ -92,13 +119,16 @@
 {
     int index;
     unsigned long addr, physaddr;
+    int is_user;
+
     addr = (unsigned long)ptr;
     index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
-    if (__builtin_expect(env->tlb_write[MEMUSER][index].address != 
+    is_user = CPU_MEM_INDEX;
+    if (__builtin_expect(env->tlb_write[is_user][index].address != 
                          (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))), 0)) {
-        glue(glue(__st, SUFFIX), _mmu)(addr, v);
+        glue(glue(__st, SUFFIX), MMUSUFFIX)(addr, v, is_user);
     } else {
-        physaddr = addr + env->tlb_write[MEMUSER][index].addend;
+        physaddr = addr + env->tlb_write[is_user][index].addend;
         glue(glue(st, SUFFIX), _raw)((uint8_t *)physaddr, v);
     }
 }
@@ -107,5 +137,7 @@
 #undef DATA_TYPE
 #undef DATA_STYPE
 #undef SUFFIX
+#undef USUFFIX
 #undef DATA_SIZE
-#undef MEMSUFFIX
+#undef CPU_MEM_INDEX
+#undef MMUSUFFIX
diff --git a/softmmu_template.h b/softmmu_template.h
index 765e913..4f4f2f4 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -21,23 +21,31 @@
 
 #if DATA_SIZE == 8
 #define SUFFIX q
+#define USUFFIX q
 #define DATA_TYPE uint64_t
 #elif DATA_SIZE == 4
 #define SUFFIX l
+#define USUFFIX l
 #define DATA_TYPE uint32_t
 #elif DATA_SIZE == 2
 #define SUFFIX w
+#define USUFFIX uw
 #define DATA_TYPE uint16_t
 #elif DATA_SIZE == 1
 #define SUFFIX b
+#define USUFFIX ub
 #define DATA_TYPE uint8_t
 #else
 #error unsupported data size
 #endif
 
-static DATA_TYPE glue(slow_ld, SUFFIX)(unsigned long addr, void *retaddr);
-static void glue(slow_st, SUFFIX)(unsigned long addr, DATA_TYPE val,
-                                  void *retaddr);
+static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(unsigned long addr, 
+                                                        int is_user,
+                                                        void *retaddr);
+static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(unsigned long addr, 
+                                                   DATA_TYPE val, 
+                                                   int is_user,
+                                                   void *retaddr);
 
 static inline DATA_TYPE glue(io_read, SUFFIX)(unsigned long physaddr, 
                                               unsigned long tlb_addr)
@@ -81,16 +89,16 @@
 }
 
 /* handle all cases except unaligned access which span two pages */
-DATA_TYPE REGPARM(1) glue(glue(__ld, SUFFIX), _mmu)(unsigned long addr)
+DATA_TYPE REGPARM(1) glue(glue(__ld, SUFFIX), MMUSUFFIX)(unsigned long addr,
+                                                         int is_user)
 {
     DATA_TYPE res;
-    int is_user, index;
+    int index;
     unsigned long physaddr, tlb_addr;
     void *retaddr;
     
     /* test if there is match for unaligned or IO access */
     /* XXX: could done more in memory macro in a non portable way */
-    is_user = ((env->hflags & HF_CPL_MASK) == 3);
     index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
  redo:
     tlb_addr = env->tlb_read[is_user][index].address;
@@ -104,29 +112,31 @@
         } else if (((addr & 0xfff) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
             /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
-            retaddr = __builtin_return_address(0);
-            res = glue(slow_ld, SUFFIX)(addr, retaddr);
+            retaddr = GETPC();
+            res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr, 
+                                                         is_user, retaddr);
         } else {
             /* unaligned access in the same page */
-            res = glue(glue(ldu, SUFFIX), _raw)((uint8_t *)physaddr);
+            res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)physaddr);
         }
     } else {
         /* the page is not in the TLB : fill it */
-        retaddr = __builtin_return_address(0);
-        tlb_fill(addr, 0, retaddr);
+        retaddr = GETPC();
+        tlb_fill(addr, 0, is_user, retaddr);
         goto redo;
     }
     return res;
 }
 
 /* handle all unaligned cases */
-static DATA_TYPE glue(slow_ld, SUFFIX)(unsigned long addr, void *retaddr)
+static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(unsigned long addr, 
+                                                        int is_user,
+                                                        void *retaddr)
 {
     DATA_TYPE res, res1, res2;
-    int is_user, index, shift;
+    int index, shift;
     unsigned long physaddr, tlb_addr, addr1, addr2;
 
-    is_user = ((env->hflags & HF_CPL_MASK) == 3);
     index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
  redo:
     tlb_addr = env->tlb_read[is_user][index].address;
@@ -142,8 +152,10 @@
             /* slow unaligned access (it spans two pages) */
             addr1 = addr & ~(DATA_SIZE - 1);
             addr2 = addr1 + DATA_SIZE;
-            res1 = glue(slow_ld, SUFFIX)(addr1, retaddr);
-            res2 = glue(slow_ld, SUFFIX)(addr2, retaddr);
+            res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr1, 
+                                                          is_user, retaddr);
+            res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(addr2, 
+                                                          is_user, retaddr);
             shift = (addr & (DATA_SIZE - 1)) * 8;
 #ifdef TARGET_WORDS_BIGENDIAN
             res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift));
@@ -152,24 +164,25 @@
 #endif
         } else {
             /* unaligned/aligned access in the same page */
-            res = glue(glue(ldu, SUFFIX), _raw)((uint8_t *)physaddr);
+            res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)physaddr);
         }
     } else {
         /* the page is not in the TLB : fill it */
-        tlb_fill(addr, 0, retaddr);
+        tlb_fill(addr, 0, is_user, retaddr);
         goto redo;
     }
     return res;
 }
 
 
-void REGPARM(2) glue(glue(__st, SUFFIX), _mmu)(unsigned long addr, DATA_TYPE val)
+void REGPARM(2) glue(glue(__st, SUFFIX), MMUSUFFIX)(unsigned long addr, 
+                                                    DATA_TYPE val,
+                                                    int is_user)
 {
     unsigned long physaddr, tlb_addr;
     void *retaddr;
-    int is_user, index;
+    int index;
     
-    is_user = ((env->hflags & HF_CPL_MASK) == 3);
     index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
  redo:
     tlb_addr = env->tlb_write[is_user][index].address;
@@ -182,28 +195,30 @@
             glue(io_write, SUFFIX)(physaddr, val, tlb_addr);
         } else if (((addr & 0xfff) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
-            retaddr = __builtin_return_address(0);
-            glue(slow_st, SUFFIX)(addr, val, retaddr);
+            retaddr = GETPC();
+            glue(glue(slow_st, SUFFIX), MMUSUFFIX)(addr, val, 
+                                                   is_user, retaddr);
         } else {
             /* aligned/unaligned access in the same page */
             glue(glue(st, SUFFIX), _raw)((uint8_t *)physaddr, val);
         }
     } else {
         /* the page is not in the TLB : fill it */
-        retaddr = __builtin_return_address(0);
-        tlb_fill(addr, 1, retaddr);
+        retaddr = GETPC();
+        tlb_fill(addr, 1, is_user, retaddr);
         goto redo;
     }
 }
 
 /* handles all unaligned cases */
-static void glue(slow_st, SUFFIX)(unsigned long addr, DATA_TYPE val,
-                                  void *retaddr)
+static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(unsigned long addr, 
+                                                   DATA_TYPE val,
+                                                   int is_user,
+                                                   void *retaddr)
 {
     unsigned long physaddr, tlb_addr;
-    int is_user, index, i;
+    int index, i;
 
-    is_user = ((env->hflags & HF_CPL_MASK) == 3);
     index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
  redo:
     tlb_addr = env->tlb_write[is_user][index].address;
@@ -219,9 +234,11 @@
             /* XXX: not efficient, but simple */
             for(i = 0;i < DATA_SIZE; i++) {
 #ifdef TARGET_WORDS_BIGENDIAN
-                slow_stb(addr + i, val >> (((DATA_SIZE - 1) * 8) - (i * 8)), retaddr);
+                glue(slow_stb, MMUSUFFIX)(addr + i, val >> (((DATA_SIZE - 1) * 8) - (i * 8)), 
+                                          is_user, retaddr);
 #else
-                slow_stb(addr + i, val >> (i * 8), retaddr);
+                glue(slow_stb, MMUSUFFIX)(addr + i, val >> (i * 8), 
+                                          is_user, retaddr);
 #endif
             }
         } else {
@@ -230,7 +247,7 @@
         }
     } else {
         /* the page is not in the TLB : fill it */
-        tlb_fill(addr, 1, retaddr);
+        tlb_fill(addr, 1, is_user, retaddr);
         goto redo;
     }
 }
@@ -238,4 +255,5 @@
 #undef SHIFT
 #undef DATA_TYPE
 #undef SUFFIX
+#undef USUFFIX
 #undef DATA_SIZE
diff --git a/target-i386/exec.h b/target-i386/exec.h
index b53928c..eb13186 100644
--- a/target-i386/exec.h
+++ b/target-i386/exec.h
@@ -137,8 +137,10 @@
 void cpu_x86_update_cr0(CPUX86State *env);
 void cpu_x86_update_cr3(CPUX86State *env);
 void cpu_x86_flush_tlb(CPUX86State *env, uint32_t addr);
-int cpu_x86_handle_mmu_fault(CPUX86State *env, uint32_t addr, int is_write);
-void tlb_fill(unsigned long addr, int is_write, void *retaddr);
+int cpu_x86_handle_mmu_fault(CPUX86State *env, uint32_t addr, 
+                             int is_write, int is_user, int is_softmmu);
+void tlb_fill(unsigned long addr, int is_write, int is_user, 
+              void *retaddr);
 void __hidden cpu_lock(void);
 void __hidden cpu_unlock(void);
 void do_interrupt(int intno, int is_int, int error_code, 
@@ -366,26 +368,14 @@
         (eflags & update_mask);
 }
 
-/* memory access macros */
+/* XXX: move that to a generic header */
+#if !defined(CONFIG_USER_ONLY)
 
-#define ldul ldl
-#define lduq ldq
 #define ldul_user ldl_user
 #define ldul_kernel ldl_kernel
 
-#define ldub_raw ldub
-#define ldsb_raw ldsb
-#define lduw_raw lduw
-#define ldsw_raw ldsw
-#define ldl_raw ldl
-#define ldq_raw ldq
-
-#define stb_raw stb
-#define stw_raw stw
-#define stl_raw stl
-#define stq_raw stq
-
-#define MEMUSER 0
+#define ACCESS_TYPE 0
+#define MEMSUFFIX _kernel
 #define DATA_SIZE 1
 #include "softmmu_header.h"
 
@@ -397,9 +387,11 @@
 
 #define DATA_SIZE 8
 #include "softmmu_header.h"
+#undef ACCESS_TYPE
+#undef MEMSUFFIX
 
-#undef MEMUSER
-#define MEMUSER 1
+#define ACCESS_TYPE 1
+#define MEMSUFFIX _user
 #define DATA_SIZE 1
 #include "softmmu_header.h"
 
@@ -411,6 +403,76 @@
 
 #define DATA_SIZE 8
 #include "softmmu_header.h"
+#undef ACCESS_TYPE
+#undef MEMSUFFIX
 
-#undef MEMUSER
+/* these access are slower, they must be as rare as possible */
+#define ACCESS_TYPE 2
+#define MEMSUFFIX _data
+#define DATA_SIZE 1
+#include "softmmu_header.h"
 
+#define DATA_SIZE 2
+#include "softmmu_header.h"
+
+#define DATA_SIZE 4
+#include "softmmu_header.h"
+
+#define DATA_SIZE 8
+#include "softmmu_header.h"
+#undef ACCESS_TYPE
+#undef MEMSUFFIX
+
+#define ldub(p) ldub_data(p)
+#define ldsb(p) ldsb_data(p)
+#define lduw(p) lduw_data(p)
+#define ldsw(p) ldsw_data(p)
+#define ldl(p) ldl_data(p)
+#define ldq(p) ldq_data(p)
+
+#define stb(p, v) stb_data(p, v)
+#define stw(p, v) stw_data(p, v)
+#define stl(p, v) stl_data(p, v)
+#define stq(p, v) stq_data(p, v)
+
+static inline double ldfq(void *ptr)
+{
+    union {
+        double d;
+        uint64_t i;
+    } u;
+    u.i = ldq(ptr);
+    return u.d;
+}
+
+static inline void stfq(void *ptr, double v)
+{
+    union {
+        double d;
+        uint64_t i;
+    } u;
+    u.d = v;
+    stq(ptr, u.i);
+}
+
+static inline float ldfl(void *ptr)
+{
+    union {
+        float f;
+        uint32_t i;
+    } u;
+    u.i = ldl(ptr);
+    return u.f;
+}
+
+static inline void stfl(void *ptr, float v)
+{
+    union {
+        float f;
+        uint32_t i;
+    } u;
+    u.f = v;
+    stl(ptr, u.i);
+}
+
+#endif /* !defined(CONFIG_USER_ONLY) */
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 075a99f..43b8168 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -153,11 +153,11 @@
     if (index + (4 << shift) - 1 > env->tr.limit)
         raise_exception_err(EXCP0A_TSS, env->tr.selector & 0xfffc);
     if (shift == 0) {
-        *esp_ptr = lduw(env->tr.base + index);
-        *ss_ptr = lduw(env->tr.base + index + 2);
+        *esp_ptr = lduw_kernel(env->tr.base + index);
+        *ss_ptr = lduw_kernel(env->tr.base + index + 2);
     } else {
-        *esp_ptr = ldl(env->tr.base + index);
-        *ss_ptr = lduw(env->tr.base + index + 4);
+        *esp_ptr = ldl_kernel(env->tr.base + index);
+        *ss_ptr = lduw_kernel(env->tr.base + index + 4);
     }
 }
 
@@ -177,8 +177,8 @@
     if ((index + 7) > dt->limit)
         return -1;
     ptr = dt->base + index;
-    *e1_ptr = ldl(ptr);
-    *e2_ptr = ldl(ptr + 4);
+    *e1_ptr = ldl_kernel(ptr);
+    *e2_ptr = ldl_kernel(ptr + 4);
     return 0;
 }
                                      
@@ -226,8 +226,8 @@
     if (intno * 8 + 7 > dt->limit)
         raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
     ptr = dt->base + intno * 8;
-    e1 = ldl(ptr);
-    e2 = ldl(ptr + 4);
+    e1 = ldl_kernel(ptr);
+    e2 = ldl_kernel(ptr + 4);
     /* check gate type */
     type = (e2 >> DESC_TYPE_SHIFT) & 0x1f;
     switch(type) {
@@ -344,47 +344,47 @@
         int old_eflags;
         if (env->eflags & VM_MASK) {
             ssp -= 4;
-            stl(ssp, env->segs[R_GS].selector);
+            stl_kernel(ssp, env->segs[R_GS].selector);
             ssp -= 4;
-            stl(ssp, env->segs[R_FS].selector);
+            stl_kernel(ssp, env->segs[R_FS].selector);
             ssp -= 4;
-            stl(ssp, env->segs[R_DS].selector);
+            stl_kernel(ssp, env->segs[R_DS].selector);
             ssp -= 4;
-            stl(ssp, env->segs[R_ES].selector);
+            stl_kernel(ssp, env->segs[R_ES].selector);
         }
         if (new_stack) {
             ssp -= 4;
-            stl(ssp, old_ss);
+            stl_kernel(ssp, old_ss);
             ssp -= 4;
-            stl(ssp, old_esp);
+            stl_kernel(ssp, old_esp);
         }
         ssp -= 4;
         old_eflags = compute_eflags();
-        stl(ssp, old_eflags);
+        stl_kernel(ssp, old_eflags);
         ssp -= 4;
-        stl(ssp, old_cs);
+        stl_kernel(ssp, old_cs);
         ssp -= 4;
-        stl(ssp, old_eip);
+        stl_kernel(ssp, old_eip);
         if (has_error_code) {
             ssp -= 4;
-            stl(ssp, error_code);
+            stl_kernel(ssp, error_code);
         }
     } else {
         if (new_stack) {
             ssp -= 2;
-            stw(ssp, old_ss);
+            stw_kernel(ssp, old_ss);
             ssp -= 2;
-            stw(ssp, old_esp);
+            stw_kernel(ssp, old_esp);
         }
         ssp -= 2;
-        stw(ssp, compute_eflags());
+        stw_kernel(ssp, compute_eflags());
         ssp -= 2;
-        stw(ssp, old_cs);
+        stw_kernel(ssp, old_cs);
         ssp -= 2;
-        stw(ssp, old_eip);
+        stw_kernel(ssp, old_eip);
         if (has_error_code) {
             ssp -= 2;
-            stw(ssp, error_code);
+            stw_kernel(ssp, error_code);
         }
     }
     
@@ -410,8 +410,8 @@
     if (intno * 4 + 3 > dt->limit)
         raise_exception_err(EXCP0D_GPF, intno * 8 + 2);
     ptr = dt->base + intno * 4;
-    offset = lduw(ptr);
-    selector = lduw(ptr + 2);
+    offset = lduw_kernel(ptr);
+    selector = lduw_kernel(ptr + 2);
     esp = ESP;
     ssp = env->segs[R_SS].base;
     if (is_int)
@@ -420,11 +420,11 @@
         old_eip = env->eip;
     old_cs = env->segs[R_CS].selector;
     esp -= 2;
-    stw(ssp + (esp & 0xffff), compute_eflags());
+    stw_kernel(ssp + (esp & 0xffff), compute_eflags());
     esp -= 2;
-    stw(ssp + (esp & 0xffff), old_cs);
+    stw_kernel(ssp + (esp & 0xffff), old_cs);
     esp -= 2;
-    stw(ssp + (esp & 0xffff), old_eip);
+    stw_kernel(ssp + (esp & 0xffff), old_eip);
     
     /* update processor state */
     ESP = (ESP & ~0xffff) | (esp & 0xffff);
@@ -445,7 +445,7 @@
 
     dt = &env->idt;
     ptr = dt->base + (intno * 8);
-    e2 = ldl(ptr + 4);
+    e2 = ldl_kernel(ptr + 4);
     
     dpl = (e2 >> DESC_DPL_SHIFT) & 3;
     cpl = env->hflags & HF_CPL_MASK;
@@ -651,8 +651,8 @@
         if ((index + 7) > dt->limit)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl(ptr);
-        e2 = ldl(ptr + 4);
+        e1 = ldl_kernel(ptr);
+        e2 = ldl_kernel(ptr + 4);
         if ((e2 & DESC_S_MASK) || ((e2 >> DESC_TYPE_SHIFT) & 0xf) != 2)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         if (!(e2 & DESC_P_MASK))
@@ -684,8 +684,8 @@
         if ((index + 7) > dt->limit)
             raise_exception_err(EXCP0D_GPF, selector & 0xfffc);
         ptr = dt->base + index;
-        e1 = ldl(ptr);
-        e2 = ldl(ptr + 4);
+        e1 = ldl_kernel(ptr);
+        e2 = ldl_kernel(ptr + 4);
         type = (e2 >> DESC_TYPE_SHIFT) & 0xf;
         if ((e2 & DESC_S_MASK) || 
             (type != 2 && type != 9))
@@ -694,7 +694,7 @@
             raise_exception_err(EXCP0B_NOSEG, selector & 0xfffc);
         load_seg_cache_raw_dt(&env->tr, e1, e2);
         e2 |= 0x00000200; /* set the busy bit */
-        stl(ptr + 4, e2);
+        stl_kernel(ptr + 4, e2);
     }
     env->tr.selector = selector;
 }
@@ -813,14 +813,14 @@
     ssp = env->segs[R_SS].base;
     if (shift) {
         esp -= 4;
-        stl(ssp + (esp & esp_mask), env->segs[R_CS].selector);
+        stl_kernel(ssp + (esp & esp_mask), env->segs[R_CS].selector);
         esp -= 4;
-        stl(ssp + (esp & esp_mask), next_eip);
+        stl_kernel(ssp + (esp & esp_mask), next_eip);
     } else {
         esp -= 2;
-        stw(ssp + (esp & esp_mask), env->segs[R_CS].selector);
+        stw_kernel(ssp + (esp & esp_mask), env->segs[R_CS].selector);
         esp -= 2;
-        stw(ssp + (esp & esp_mask), next_eip);
+        stw_kernel(ssp + (esp & esp_mask), next_eip);
     }
 
     if (!(env->segs[R_SS].flags & DESC_B_MASK))
@@ -873,14 +873,14 @@
         ssp = env->segs[R_SS].base + sp;
         if (shift) {
             ssp -= 4;
-            stl(ssp, env->segs[R_CS].selector);
+            stl_kernel(ssp, env->segs[R_CS].selector);
             ssp -= 4;
-            stl(ssp, next_eip);
+            stl_kernel(ssp, next_eip);
         } else {
             ssp -= 2;
-            stw(ssp, env->segs[R_CS].selector);
+            stw_kernel(ssp, env->segs[R_CS].selector);
             ssp -= 2;
-            stw(ssp, next_eip);
+            stw_kernel(ssp, next_eip);
         }
         sp -= (4 << shift);
         
@@ -975,23 +975,23 @@
             ssp = env->segs[R_SS].base + sp;
             if (shift) {
                 ssp -= 4;
-                stl(ssp, old_ss);
+                stl_kernel(ssp, old_ss);
                 ssp -= 4;
-                stl(ssp, old_esp);
+                stl_kernel(ssp, old_esp);
                 ssp -= 4 * param_count;
                 for(i = 0; i < param_count; i++) {
-                    val = ldl(old_ssp + i * 4);
-                    stl(ssp + i * 4, val);
+                    val = ldl_kernel(old_ssp + i * 4);
+                    stl_kernel(ssp + i * 4, val);
                 }
             } else {
                 ssp -= 2;
-                stw(ssp, old_ss);
+                stw_kernel(ssp, old_ss);
                 ssp -= 2;
-                stw(ssp, old_esp);
+                stw_kernel(ssp, old_esp);
                 ssp -= 2 * param_count;
                 for(i = 0; i < param_count; i++) {
-                    val = lduw(old_ssp + i * 2);
-                    stw(ssp + i * 2, val);
+                    val = lduw_kernel(old_ssp + i * 2);
+                    stw_kernel(ssp + i * 2, val);
                 }
             }
         } else {
@@ -1004,14 +1004,14 @@
 
         if (shift) {
             ssp -= 4;
-            stl(ssp, env->segs[R_CS].selector);
+            stl_kernel(ssp, env->segs[R_CS].selector);
             ssp -= 4;
-            stl(ssp, next_eip);
+            stl_kernel(ssp, next_eip);
         } else {
             ssp -= 2;
-            stw(ssp, env->segs[R_CS].selector);
+            stw_kernel(ssp, env->segs[R_CS].selector);
             ssp -= 2;
-            stw(ssp, next_eip);
+            stw_kernel(ssp, next_eip);
         }
 
         sp -= push_size;
@@ -1042,14 +1042,14 @@
     ssp = env->segs[R_SS].base + sp;
     if (shift == 1) {
         /* 32 bits */
-        new_eflags = ldl(ssp + 8);
-        new_cs = ldl(ssp + 4) & 0xffff;
-        new_eip = ldl(ssp) & 0xffff;
+        new_eflags = ldl_kernel(ssp + 8);
+        new_cs = ldl_kernel(ssp + 4) & 0xffff;
+        new_eip = ldl_kernel(ssp) & 0xffff;
     } else {
         /* 16 bits */
-        new_eflags = lduw(ssp + 4);
-        new_cs = lduw(ssp + 2);
-        new_eip = lduw(ssp);
+        new_eflags = lduw_kernel(ssp + 4);
+        new_cs = lduw_kernel(ssp + 2);
+        new_eip = lduw_kernel(ssp);
     }
     new_esp = sp + (6 << shift);
     ESP = (ESP & 0xffff0000) | 
@@ -1078,17 +1078,17 @@
     if (shift == 1) {
         /* 32 bits */
         if (is_iret)
-            new_eflags = ldl(ssp + 8);
-        new_cs = ldl(ssp + 4) & 0xffff;
-        new_eip = ldl(ssp);
+            new_eflags = ldl_kernel(ssp + 8);
+        new_cs = ldl_kernel(ssp + 4) & 0xffff;
+        new_eip = ldl_kernel(ssp);
         if (is_iret && (new_eflags & VM_MASK))
             goto return_to_vm86;
     } else {
         /* 16 bits */
         if (is_iret)
-            new_eflags = lduw(ssp + 4);
-        new_cs = lduw(ssp + 2);
-        new_eip = lduw(ssp);
+            new_eflags = lduw_kernel(ssp + 4);
+        new_cs = lduw_kernel(ssp + 2);
+        new_eip = lduw_kernel(ssp);
     }
     if ((new_cs & 0xfffc) == 0)
         raise_exception_err(EXCP0D_GPF, new_cs & 0xfffc);
@@ -1124,12 +1124,12 @@
         ssp += (4 << shift) + ((2 * is_iret) << shift) + addend;
         if (shift == 1) {
             /* 32 bits */
-            new_esp = ldl(ssp);
-            new_ss = ldl(ssp + 4) & 0xffff;
+            new_esp = ldl_kernel(ssp);
+            new_ss = ldl_kernel(ssp + 4) & 0xffff;
         } else {
             /* 16 bits */
-            new_esp = lduw(ssp);
-            new_ss = lduw(ssp + 2);
+            new_esp = lduw_kernel(ssp);
+            new_ss = lduw_kernel(ssp + 2);
         }
         
         if ((new_ss & 3) != rpl)
@@ -1175,12 +1175,12 @@
     return;
 
  return_to_vm86:
-    new_esp = ldl(ssp + 12);
-    new_ss = ldl(ssp + 16);
-    new_es = ldl(ssp + 20);
-    new_ds = ldl(ssp + 24);
-    new_fs = ldl(ssp + 28);
-    new_gs = ldl(ssp + 32);
+    new_esp = ldl_kernel(ssp + 12);
+    new_ss = ldl_kernel(ssp + 16);
+    new_es = ldl_kernel(ssp + 20);
+    new_ds = ldl_kernel(ssp + 24);
+    new_fs = ldl_kernel(ssp + 28);
+    new_gs = ldl_kernel(ssp + 32);
     
     /* modify processor state */
     load_eflags(new_eflags, FL_UPDATE_CPL0_MASK | VM_MASK | VIF_MASK | VIP_MASK);
@@ -1770,6 +1770,11 @@
     }
 }
 
+#if !defined(CONFIG_USER_ONLY) 
+
+#define MMUSUFFIX _mmu
+#define GETPC() (__builtin_return_address(0))
+
 #define SHIFT 0
 #include "softmmu_template.h"
 
@@ -1782,22 +1787,41 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
-/* try to fill the TLB and return an exception if error */
-void tlb_fill(unsigned long addr, int is_write, void *retaddr)
+#endif
+
+/* try to fill the TLB and return an exception if error. If retaddr is
+   NULL, it means that the function was called in C code (i.e. not
+   from generated code or from helper.c) */
+/* XXX: fix it to restore all registers */
+void tlb_fill(unsigned long addr, int is_write, int is_user, void *retaddr)
 {
     TranslationBlock *tb;
     int ret;
     unsigned long pc;
-    ret = cpu_x86_handle_mmu_fault(env, addr, is_write);
+    CPUX86State *saved_env;
+
+    /* XXX: hack to restore env in all cases, even if not called from
+       generated code */
+    saved_env = env;
+    env = cpu_single_env;
+    if (is_write && page_unprotect(addr)) {
+        /* nothing more to do: the page was write protected because
+           there was code in it. page_unprotect() flushed the code. */
+    }
+
+    ret = cpu_x86_handle_mmu_fault(env, addr, is_write, is_user, 1);
     if (ret) {
-        /* now we have a real cpu fault */
-        pc = (unsigned long)retaddr;
-        tb = tb_find_pc(pc);
-        if (tb) {
-            /* the PC is inside the translated code. It means that we have
-               a virtual CPU fault */
-            cpu_restore_state(tb, env, pc);
+        if (retaddr) {
+            /* now we have a real cpu fault */
+            pc = (unsigned long)retaddr;
+            tb = tb_find_pc(pc);
+            if (tb) {
+                /* the PC is inside the translated code. It means that we have
+                   a virtual CPU fault */
+                cpu_restore_state(tb, env, pc);
+            }
         }
         raise_exception_err(EXCP0E_PAGE, env->error_code);
     }
+    env = saved_env;
 }
diff --git a/target-i386/helper2.c b/target-i386/helper2.c
index bc0d426..1bec820 100644
--- a/target-i386/helper2.c
+++ b/target-i386/helper2.c
@@ -210,7 +210,9 @@
     flags = page_get_flags(addr);
     if (flags & PAGE_VALID) {
         virt_addr = addr & ~0xfff;
+#if !defined(CONFIG_SOFTMMU)
         munmap((void *)virt_addr, 4096);
+#endif
         page_set_flags(virt_addr, virt_addr + 4096, 0);
     }
 }
@@ -221,16 +223,14 @@
    1  = generate PF fault
    2  = soft MMU activation required for this block
 */
-int cpu_x86_handle_mmu_fault(CPUX86State *env, uint32_t addr, int is_write)
+int cpu_x86_handle_mmu_fault(CPUX86State *env, uint32_t addr, 
+                             int is_write, int is_user, int is_softmmu)
 {
     uint8_t *pde_ptr, *pte_ptr;
     uint32_t pde, pte, virt_addr;
-    int cpl, error_code, is_dirty, is_user, prot, page_size, ret;
+    int error_code, is_dirty, prot, page_size, ret;
     unsigned long pd;
     
-    cpl = env->hflags & HF_CPL_MASK;
-    is_user = (cpl == 3);
-    
 #ifdef DEBUG_MMU
     printf("MMU fault: addr=0x%08x w=%d u=%d eip=%08x\n", 
            addr, is_write, is_user, env->eip);
@@ -252,7 +252,7 @@
 
     /* page directory entry */
     pde_ptr = phys_ram_base + ((env->cr[3] & ~0xfff) + ((addr >> 20) & ~3));
-    pde = ldl(pde_ptr);
+    pde = ldl_raw(pde_ptr);
     if (!(pde & PG_PRESENT_MASK)) {
         error_code = 0;
         goto do_fault;
@@ -274,7 +274,7 @@
             pde |= PG_ACCESSED_MASK;
             if (is_dirty)
                 pde |= PG_DIRTY_MASK;
-            stl(pde_ptr, pde);
+            stl_raw(pde_ptr, pde);
         }
         
         pte = pde & ~0x003ff000; /* align to 4MB */
@@ -283,12 +283,12 @@
     } else {
         if (!(pde & PG_ACCESSED_MASK)) {
             pde |= PG_ACCESSED_MASK;
-            stl(pde_ptr, pde);
+            stl_raw(pde_ptr, pde);
         }
 
         /* page directory entry */
         pte_ptr = phys_ram_base + ((pde & ~0xfff) + ((addr >> 10) & 0xffc));
-        pte = ldl(pte_ptr);
+        pte = ldl_raw(pte_ptr);
         if (!(pte & PG_PRESENT_MASK)) {
             error_code = 0;
             goto do_fault;
@@ -308,7 +308,7 @@
             pte |= PG_ACCESSED_MASK;
             if (is_dirty)
                 pte |= PG_DIRTY_MASK;
-            stl(pte_ptr, pte);
+            stl_raw(pte_ptr, pte);
         }
         page_size = 4096;
         virt_addr = addr & ~0xfff;
@@ -325,7 +325,10 @@
     }
     
  do_mapping:
-    if (env->hflags & HF_SOFTMMU_MASK) {
+#if !defined(CONFIG_SOFTMMU)
+    if (is_softmmu) 
+#endif
+    {
         unsigned long paddr, vaddr, address, addend, page_offset;
         int index;
 
@@ -352,32 +355,39 @@
             env->tlb_write[is_user][index].address = address;
             env->tlb_write[is_user][index].addend = addend;
         }
-    }
-    ret = 0;
-    /* XXX: incorrect for 4MB pages */
-    pd = physpage_find(pte & ~0xfff);
-    if ((pd & 0xfff) != 0) {
-        /* IO access: no mapping is done as it will be handled by the
-           soft MMU */
-        if (!(env->hflags & HF_SOFTMMU_MASK))
-            ret = 2;
-    } else {
-        void *map_addr;
-        map_addr = mmap((void *)virt_addr, page_size, prot, 
-                        MAP_SHARED | MAP_FIXED, phys_ram_fd, pd);
-        if (map_addr == MAP_FAILED) {
-            fprintf(stderr, 
-                    "mmap failed when mapped physical address 0x%08x to virtual address 0x%08x\n",
-                    pte & ~0xfff, virt_addr);
-            exit(1);
-        }
-#ifdef DEBUG_MMU
-        printf("mmaping 0x%08x to virt 0x%08x pse=%d\n", 
-               pte & ~0xfff, virt_addr, (page_size != 4096));
-#endif
-        page_set_flags(virt_addr, virt_addr + page_size, 
+        page_set_flags(vaddr, vaddr + TARGET_PAGE_SIZE, 
                        PAGE_VALID | PAGE_EXEC | prot);
+        ret = 0;
     }
+#if !defined(CONFIG_SOFTMMU)
+    else {
+        ret = 0;
+        /* XXX: incorrect for 4MB pages */
+        pd = physpage_find(pte & ~0xfff);
+        if ((pd & 0xfff) != 0) {
+            /* IO access: no mapping is done as it will be handled by the
+               soft MMU */
+            if (!(env->hflags & HF_SOFTMMU_MASK))
+                ret = 2;
+        } else {
+            void *map_addr;
+            map_addr = mmap((void *)virt_addr, page_size, prot, 
+                            MAP_SHARED | MAP_FIXED, phys_ram_fd, pd);
+            if (map_addr == MAP_FAILED) {
+                fprintf(stderr, 
+                        "mmap failed when mapped physical address 0x%08x to virtual address 0x%08x\n",
+                        pte & ~0xfff, virt_addr);
+                exit(1);
+            }
+#ifdef DEBUG_MMU
+            printf("mmaping 0x%08x to virt 0x%08x pse=%d\n", 
+                   pte & ~0xfff, virt_addr, (page_size != 4096));
+#endif
+            page_set_flags(virt_addr, virt_addr + page_size, 
+                           PAGE_VALID | PAGE_EXEC | prot);
+        }
+    }
+#endif
     return ret;
  do_fault_protect:
     error_code = PG_ERROR_P_MASK;
diff --git a/target-i386/op.c b/target-i386/op.c
index 68191f9..0f4dbd1 100644
--- a/target-i386/op.c
+++ b/target-i386/op.c
@@ -376,14 +376,16 @@
 
 /* memory access */
 
-#define MEMSUFFIX
+#define MEMSUFFIX _raw
 #include "ops_mem.h"
 
+#if !defined(CONFIG_USER_ONLY)
 #define MEMSUFFIX _user
 #include "ops_mem.h"
 
 #define MEMSUFFIX _kernel
 #include "ops_mem.h"
+#endif
 
 /* used for bit operations */
 
diff --git a/target-i386/translate.c b/target-i386/translate.c
index 43d3bbc..a64c58c 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -570,10 +570,10 @@
 };
 
 static GenOpFunc *gen_op_lds_T0_A0[3 * 3] = {
-    gen_op_ldsb_T0_A0,
-    gen_op_ldsw_T0_A0,
+    gen_op_ldsb_raw_T0_A0,
+    gen_op_ldsw_raw_T0_A0,
     NULL,
-
+#ifndef CONFIG_USER_ONLY
     gen_op_ldsb_kernel_T0_A0,
     gen_op_ldsw_kernel_T0_A0,
     NULL,
@@ -581,13 +581,15 @@
     gen_op_ldsb_user_T0_A0,
     gen_op_ldsw_user_T0_A0,
     NULL,
+#endif
 };
 
 static GenOpFunc *gen_op_ldu_T0_A0[3 * 3] = {
-    gen_op_ldub_T0_A0,
-    gen_op_lduw_T0_A0,
+    gen_op_ldub_raw_T0_A0,
+    gen_op_lduw_raw_T0_A0,
     NULL,
 
+#ifndef CONFIG_USER_ONLY
     gen_op_ldub_kernel_T0_A0,
     gen_op_lduw_kernel_T0_A0,
     NULL,
@@ -595,14 +597,16 @@
     gen_op_ldub_user_T0_A0,
     gen_op_lduw_user_T0_A0,
     NULL,
+#endif
 };
 
 /* sign does not matter, except for lidt/lgdt call (TODO: fix it) */
 static GenOpFunc *gen_op_ld_T0_A0[3 * 3] = {
-    gen_op_ldub_T0_A0,
-    gen_op_lduw_T0_A0,
-    gen_op_ldl_T0_A0,
+    gen_op_ldub_raw_T0_A0,
+    gen_op_lduw_raw_T0_A0,
+    gen_op_ldl_raw_T0_A0,
 
+#ifndef CONFIG_USER_ONLY
     gen_op_ldub_kernel_T0_A0,
     gen_op_lduw_kernel_T0_A0,
     gen_op_ldl_kernel_T0_A0,
@@ -610,13 +614,15 @@
     gen_op_ldub_user_T0_A0,
     gen_op_lduw_user_T0_A0,
     gen_op_ldl_user_T0_A0,
+#endif
 };
 
 static GenOpFunc *gen_op_ld_T1_A0[3 * 3] = {
-    gen_op_ldub_T1_A0,
-    gen_op_lduw_T1_A0,
-    gen_op_ldl_T1_A0,
+    gen_op_ldub_raw_T1_A0,
+    gen_op_lduw_raw_T1_A0,
+    gen_op_ldl_raw_T1_A0,
 
+#ifndef CONFIG_USER_ONLY
     gen_op_ldub_kernel_T1_A0,
     gen_op_lduw_kernel_T1_A0,
     gen_op_ldl_kernel_T1_A0,
@@ -624,13 +630,15 @@
     gen_op_ldub_user_T1_A0,
     gen_op_lduw_user_T1_A0,
     gen_op_ldl_user_T1_A0,
+#endif
 };
 
 static GenOpFunc *gen_op_st_T0_A0[3 * 3] = {
-    gen_op_stb_T0_A0,
-    gen_op_stw_T0_A0,
-    gen_op_stl_T0_A0,
+    gen_op_stb_raw_T0_A0,
+    gen_op_stw_raw_T0_A0,
+    gen_op_stl_raw_T0_A0,
 
+#ifndef CONFIG_USER_ONLY
     gen_op_stb_kernel_T0_A0,
     gen_op_stw_kernel_T0_A0,
     gen_op_stl_kernel_T0_A0,
@@ -638,6 +646,7 @@
     gen_op_stb_user_T0_A0,
     gen_op_stw_user_T0_A0,
     gen_op_stl_user_T0_A0,
+#endif
 };
 
 static inline void gen_string_movl_A0_ESI(DisasContext *s)
@@ -1176,7 +1185,7 @@
         
         if (base == 4) {
             havesib = 1;
-            code = ldub(s->pc++);
+            code = ldub_code(s->pc++);
             scale = (code >> 6) & 3;
             index = (code >> 3) & 7;
             base = code & 7;
@@ -1186,18 +1195,18 @@
         case 0:
             if (base == 5) {
                 base = -1;
-                disp = ldl(s->pc);
+                disp = ldl_code(s->pc);
                 s->pc += 4;
             } else {
                 disp = 0;
             }
             break;
         case 1:
-            disp = (int8_t)ldub(s->pc++);
+            disp = (int8_t)ldub_code(s->pc++);
             break;
         default:
         case 2:
-            disp = ldl(s->pc);
+            disp = ldl_code(s->pc);
             s->pc += 4;
             break;
         }
@@ -1229,7 +1238,7 @@
         switch (mod) {
         case 0:
             if (rm == 6) {
-                disp = lduw(s->pc);
+                disp = lduw_code(s->pc);
                 s->pc += 2;
                 gen_op_movl_A0_im(disp);
                 rm = 0; /* avoid SS override */
@@ -1239,11 +1248,11 @@
             }
             break;
         case 1:
-            disp = (int8_t)ldub(s->pc++);
+            disp = (int8_t)ldub_code(s->pc++);
             break;
         default:
         case 2:
-            disp = lduw(s->pc);
+            disp = lduw_code(s->pc);
             s->pc += 2;
             break;
         }
@@ -1337,16 +1346,16 @@
 
     switch(ot) {
     case OT_BYTE:
-        ret = ldub(s->pc);
+        ret = ldub_code(s->pc);
         s->pc++;
         break;
     case OT_WORD:
-        ret = lduw(s->pc);
+        ret = lduw_code(s->pc);
         s->pc += 2;
         break;
     default:
     case OT_LONG:
-        ret = ldl(s->pc);
+        ret = ldl_code(s->pc);
         s->pc += 4;
         break;
     }
@@ -1756,7 +1765,7 @@
     dflag = s->code32;
     s->override = -1;
  next_byte:
-    b = ldub(s->pc);
+    b = ldub_code(s->pc);
     s->pc++;
     /* check prefixes */
     switch (b) {
@@ -1814,7 +1823,7 @@
     case 0x0f:
         /**************************/
         /* extended op code */
-        b = ldub(s->pc++) | 0x100;
+        b = ldub_code(s->pc++) | 0x100;
         goto reswitch;
         
         /**************************/
@@ -1839,7 +1848,7 @@
             
             switch(f) {
             case 0: /* OP Ev, Gv */
-                modrm = ldub(s->pc++);
+                modrm = ldub_code(s->pc++);
                 reg = ((modrm >> 3) & 7);
                 mod = (modrm >> 6) & 3;
                 rm = modrm & 7;
@@ -1861,7 +1870,7 @@
                 gen_op(s, op, ot, opreg);
                 break;
             case 1: /* OP Gv, Ev */
-                modrm = ldub(s->pc++);
+                modrm = ldub_code(s->pc++);
                 mod = (modrm >> 6) & 3;
                 reg = ((modrm >> 3) & 7);
                 rm = modrm & 7;
@@ -1895,7 +1904,7 @@
             else
                 ot = dflag ? OT_LONG : OT_WORD;
             
-            modrm = ldub(s->pc++);
+            modrm = ldub_code(s->pc++);
             mod = (modrm >> 6) & 3;
             rm = modrm & 7;
             op = (modrm >> 3) & 7;
@@ -1939,7 +1948,7 @@
         else
             ot = dflag ? OT_LONG : OT_WORD;
 
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         mod = (modrm >> 6) & 3;
         rm = modrm & 7;
         op = (modrm >> 3) & 7;
@@ -2045,7 +2054,7 @@
         else
             ot = dflag ? OT_LONG : OT_WORD;
 
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         mod = (modrm >> 6) & 3;
         rm = modrm & 7;
         op = (modrm >> 3) & 7;
@@ -2085,10 +2094,10 @@
             gen_push_T0(s);
             gen_eob(s);
             break;
-        case 3: /*< lcall Ev */
+        case 3: /* lcall Ev */
             gen_op_ld_T1_A0[ot + s->mem_index]();
             gen_op_addl_A0_im(1 << (ot - OT_WORD + 1));
-            gen_op_ld_T0_A0[OT_WORD + s->mem_index]();
+            gen_op_ldu_T0_A0[OT_WORD + s->mem_index]();
         do_lcall:
             if (s->pe && !s->vm86) {
                 if (s->cc_op != CC_OP_DYNAMIC)
@@ -2109,7 +2118,7 @@
         case 5: /* ljmp Ev */
             gen_op_ld_T1_A0[ot + s->mem_index]();
             gen_op_addl_A0_im(1 << (ot - OT_WORD + 1));
-            gen_op_lduw_T0_A0();
+            gen_op_ldu_T0_A0[OT_WORD + s->mem_index]();
         do_ljmp:
             if (s->pe && !s->vm86) {
                 if (s->cc_op != CC_OP_DYNAMIC)
@@ -2138,7 +2147,7 @@
         else
             ot = dflag ? OT_LONG : OT_WORD;
 
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         mod = (modrm >> 6) & 3;
         rm = modrm & 7;
         reg = (modrm >> 3) & 7;
@@ -2179,7 +2188,7 @@
     case 0x69: /* imul Gv, Ev, I */
     case 0x6b:
         ot = dflag ? OT_LONG : OT_WORD;
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         reg = ((modrm >> 3) & 7) + OR_EAX;
         gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
         if (b == 0x69) {
@@ -2206,7 +2215,7 @@
             ot = OT_BYTE;
         else
             ot = dflag ? OT_LONG : OT_WORD;
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         reg = (modrm >> 3) & 7;
         mod = (modrm >> 6) & 3;
         if (mod == 3) {
@@ -2233,7 +2242,7 @@
             ot = OT_BYTE;
         else
             ot = dflag ? OT_LONG : OT_WORD;
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         reg = (modrm >> 3) & 7;
         mod = (modrm >> 6) & 3;
         gen_op_mov_TN_reg[ot][1][reg]();
@@ -2250,7 +2259,7 @@
         s->cc_op = CC_OP_SUBB + ot;
         break;
     case 0x1c7: /* cmpxchg8b */
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         mod = (modrm >> 6) & 3;
         if (mod == 3)
             goto illegal_op;
@@ -2291,7 +2300,7 @@
         break;
     case 0x8f: /* pop Ev */
         ot = dflag ? OT_LONG : OT_WORD;
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         gen_pop_T0(s);
         s->popl_esp_hack = 2 << dflag;
         gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
@@ -2301,9 +2310,9 @@
     case 0xc8: /* enter */
         {
             int level;
-            val = lduw(s->pc);
+            val = lduw_code(s->pc);
             s->pc += 2;
-            level = ldub(s->pc++);
+            level = ldub_code(s->pc++);
             gen_enter(s, val, level);
         }
         break;
@@ -2369,7 +2378,7 @@
             ot = OT_BYTE;
         else
             ot = dflag ? OT_LONG : OT_WORD;
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         reg = (modrm >> 3) & 7;
         
         /* generate a generic store */
@@ -2381,7 +2390,7 @@
             ot = OT_BYTE;
         else
             ot = dflag ? OT_LONG : OT_WORD;
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         mod = (modrm >> 6) & 3;
         if (mod != 3)
             gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
@@ -2398,14 +2407,14 @@
             ot = OT_BYTE;
         else
             ot = dflag ? OT_LONG : OT_WORD;
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         reg = (modrm >> 3) & 7;
         
         gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
         gen_op_mov_reg_T0[ot][reg]();
         break;
     case 0x8e: /* mov seg, Gv */
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         reg = (modrm >> 3) & 7;
         if (reg >= 6 || reg == R_CS)
             goto illegal_op;
@@ -2422,7 +2431,7 @@
         }
         break;
     case 0x8c: /* mov Gv, seg */
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         reg = (modrm >> 3) & 7;
         mod = (modrm >> 6) & 3;
         if (reg >= 6)
@@ -2444,7 +2453,7 @@
             d_ot = dflag + OT_WORD;
             /* ot is the size of source */
             ot = (b & 1) + OT_BYTE;
-            modrm = ldub(s->pc++);
+            modrm = ldub_code(s->pc++);
             reg = ((modrm >> 3) & 7) + OR_EAX;
             mod = (modrm >> 6) & 3;
             rm = modrm & 7;
@@ -2481,7 +2490,7 @@
 
     case 0x8d: /* lea */
         ot = dflag ? OT_LONG : OT_WORD;
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         reg = (modrm >> 3) & 7;
         /* we must ensure that no segment is added */
         s->override = -1;
@@ -2574,7 +2583,7 @@
             ot = OT_BYTE;
         else
             ot = dflag ? OT_LONG : OT_WORD;
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         reg = (modrm >> 3) & 7;
         mod = (modrm >> 6) & 3;
         if (mod == 3) {
@@ -2613,7 +2622,7 @@
         op = R_GS;
     do_lxx:
         ot = dflag ? OT_LONG : OT_WORD;
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         reg = (modrm >> 3) & 7;
         mod = (modrm >> 6) & 3;
         if (mod == 3)
@@ -2622,7 +2631,7 @@
         gen_op_ld_T1_A0[ot + s->mem_index]();
         gen_op_addl_A0_im(1 << (ot - OT_WORD + 1));
         /* load the segment first to handle exceptions properly */
-        gen_op_lduw_T0_A0();
+        gen_op_ldu_T0_A0[OT_WORD + s->mem_index]();
         gen_movl_seg_T0(s, op, pc_start - s->cs_base);
         /* then put the data */
         gen_op_mov_reg_T1[ot][reg]();
@@ -2645,7 +2654,7 @@
             else
                 ot = dflag ? OT_LONG : OT_WORD;
             
-            modrm = ldub(s->pc++);
+            modrm = ldub_code(s->pc++);
             mod = (modrm >> 6) & 3;
             rm = modrm & 7;
             op = (modrm >> 3) & 7;
@@ -2662,7 +2671,7 @@
                 gen_shift(s, op, ot, opreg, OR_ECX);
             } else {
                 if (shift == 2) {
-                    shift = ldub(s->pc++);
+                    shift = ldub_code(s->pc++);
                 }
                 gen_shifti(s, op, ot, opreg, shift);
             }
@@ -2696,7 +2705,7 @@
         shift = 0;
     do_shiftd:
         ot = dflag ? OT_LONG : OT_WORD;
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         mod = (modrm >> 6) & 3;
         rm = modrm & 7;
         reg = (modrm >> 3) & 7;
@@ -2710,7 +2719,7 @@
         gen_op_mov_TN_reg[ot][1][reg]();
         
         if (shift) {
-            val = ldub(s->pc++);
+            val = ldub_code(s->pc++);
             val &= 0x1f;
             if (val) {
                 if (mod == 3)
@@ -2739,7 +2748,7 @@
         /************************/
         /* floats */
     case 0xd8 ... 0xdf: 
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         mod = (modrm >> 6) & 3;
         rm = modrm & 7;
         op = ((b & 7) << 3) | ((modrm >> 3) & 7);
@@ -3256,7 +3265,7 @@
                 ot = OT_BYTE;
             else
                 ot = dflag ? OT_LONG : OT_WORD;
-            val = ldub(s->pc++);
+            val = ldub_code(s->pc++);
             gen_op_movl_T0_im(val);
             gen_op_in[ot]();
             gen_op_mov_reg_T1[ot][R_EAX]();
@@ -3271,7 +3280,7 @@
                 ot = OT_BYTE;
             else
                 ot = dflag ? OT_LONG : OT_WORD;
-            val = ldub(s->pc++);
+            val = ldub_code(s->pc++);
             gen_op_movl_T0_im(val);
             gen_op_mov_TN_reg[ot][1][R_EAX]();
             gen_op_out[ot]();
@@ -3309,7 +3318,7 @@
         /************************/
         /* control */
     case 0xc2: /* ret im */
-        val = ldsw(s->pc);
+        val = ldsw_code(s->pc);
         s->pc += 2;
         gen_pop_T0(s);
         gen_stack_update(s, val + (2 << s->dflag));
@@ -3327,7 +3336,7 @@
         gen_eob(s);
         break;
     case 0xca: /* lret im */
-        val = ldsw(s->pc);
+        val = ldsw_code(s->pc);
         s->pc += 2;
     do_lret:
         if (s->pe && !s->vm86) {
@@ -3443,13 +3452,13 @@
         break;
 
     case 0x190 ... 0x19f: /* setcc Gv */
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         gen_setcc(s, b);
         gen_ldst_modrm(s, modrm, OT_BYTE, OR_TMP0, 1);
         break;
     case 0x140 ... 0x14f: /* cmov Gv, Ev */
         ot = dflag ? OT_LONG : OT_WORD;
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         reg = (modrm >> 3) & 7;
         mod = (modrm >> 6) & 3;
         gen_setcc(s, b);
@@ -3542,7 +3551,7 @@
         /* bit operations */
     case 0x1ba: /* bt/bts/btr/btc Gv, im */
         ot = dflag ? OT_LONG : OT_WORD;
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         op = (modrm >> 3) & 7;
         mod = (modrm >> 6) & 3;
         rm = modrm & 7;
@@ -3553,7 +3562,7 @@
             gen_op_mov_TN_reg[ot][0][rm]();
         }
         /* load shift */
-        val = ldub(s->pc++);
+        val = ldub_code(s->pc++);
         gen_op_movl_T1_im(val);
         if (op < 4)
             goto illegal_op;
@@ -3581,7 +3590,7 @@
         op = 3;
     do_btx:
         ot = dflag ? OT_LONG : OT_WORD;
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         reg = (modrm >> 3) & 7;
         mod = (modrm >> 6) & 3;
         rm = modrm & 7;
@@ -3610,7 +3619,7 @@
     case 0x1bc: /* bsf */
     case 0x1bd: /* bsr */
         ot = dflag ? OT_LONG : OT_WORD;
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         reg = (modrm >> 3) & 7;
         gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
         gen_op_bsx_T0_cc[ot - OT_WORD][b & 1]();
@@ -3646,12 +3655,12 @@
         s->cc_op = CC_OP_EFLAGS;
         break;
     case 0xd4: /* aam */
-        val = ldub(s->pc++);
+        val = ldub_code(s->pc++);
         gen_op_aam(val);
         s->cc_op = CC_OP_LOGICB;
         break;
     case 0xd5: /* aad */
-        val = ldub(s->pc++);
+        val = ldub_code(s->pc++);
         gen_op_aad(val);
         s->cc_op = CC_OP_LOGICB;
         break;
@@ -3665,7 +3674,7 @@
         gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base);
         break;
     case 0xcd: /* int N */
-        val = ldub(s->pc++);
+        val = ldub_code(s->pc++);
         /* XXX: add error code for vm86 GPF */
         if (!s->vm86)
             gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base);
@@ -3718,7 +3727,7 @@
         break;
     case 0x62: /* bound */
         ot = dflag ? OT_LONG : OT_WORD;
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         reg = (modrm >> 3) & 7;
         mod = (modrm >> 6) & 3;
         if (mod == 3)
@@ -3785,7 +3794,7 @@
         }
         break;
     case 0x100:
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         mod = (modrm >> 6) & 3;
         op = (modrm >> 3) & 7;
         switch(op) {
@@ -3828,7 +3837,7 @@
         }
         break;
     case 0x101:
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         mod = (modrm >> 6) & 3;
         op = (modrm >> 3) & 7;
         switch(op) {
@@ -3904,7 +3913,7 @@
         if (!s->pe || s->vm86)
             goto illegal_op;
         ot = dflag ? OT_LONG : OT_WORD;
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         reg = (modrm >> 3) & 7;
         gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
         gen_op_mov_TN_reg[ot][1][reg]();
@@ -3918,7 +3927,7 @@
         gen_op_mov_reg_T1[ot][reg]();
         break;
     case 0x118:
-        modrm = ldub(s->pc++);
+        modrm = ldub_code(s->pc++);
         mod = (modrm >> 6) & 3;
         op = (modrm >> 3) & 7;
         switch(op) {
@@ -3940,7 +3949,7 @@
         if (s->cpl != 0) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
-            modrm = ldub(s->pc++);
+            modrm = ldub_code(s->pc++);
             if ((modrm & 0xc0) != 0xc0)
                 goto illegal_op;
             rm = modrm & 7;
@@ -3970,7 +3979,7 @@
         if (s->cpl != 0) {
             gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base);
         } else {
-            modrm = ldub(s->pc++);
+            modrm = ldub_code(s->pc++);
             if ((modrm & 0xc0) != 0xc0)
                 goto illegal_op;
             rm = modrm & 7;