ARM VFP support (Paul Brook)


git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@1309 c046a42c-6fe2-441c-8c8c-71466251a162
diff --git a/Changelog b/Changelog
index 2d1c067..8898eba 100644
--- a/Changelog
+++ b/Changelog
@@ -15,6 +15,7 @@
   - PC parallel port support (Mark Jonckheere)
   - initial SPARC64 support (Blue Swirl)
   - armv5te user mode support (Paul Brook)
+  - ARM VFP support (Paul Brook)
 
 version 0.6.1:
 
diff --git a/Makefile.target b/Makefile.target
index 5ed2b92..25f4719 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -259,6 +259,10 @@
 LIBOBJS+= op_helper.o helper.o
 endif
 
+ifeq ($(TARGET_BASE_ARCH), arm)
+LIBOBJS+= op_helper.o
+endif
+
 # NOTE: the disassembler code is only needed for debugging
 LIBOBJS+=disas.o 
 ifeq ($(findstring i386, $(TARGET_ARCH) $(ARCH)),i386)
diff --git a/cpu-exec.c b/cpu-exec.c
index 835ca42..94c35e3 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -346,7 +346,8 @@
                 cs_base = env->segs[R_CS].base;
                 pc = cs_base + env->eip;
 #elif defined(TARGET_ARM)
-                flags = env->thumb;
+                flags = env->thumb | (env->vfp.vec_len << 1)
+                        | (env->vfp.vec_stride << 4);
                 cs_base = 0;
                 pc = env->regs[15];
 #elif defined(TARGET_SPARC)
@@ -619,6 +620,7 @@
 #endif
 #elif defined(TARGET_ARM)
     env->cpsr = compute_cpsr();
+    /* XXX: Save/restore host fpu exception state?.  */
 #elif defined(TARGET_SPARC)
 #elif defined(TARGET_PPC)
 #else
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index c0994c0..a5f4f94 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -29,6 +29,14 @@
 #define EXCP_PREFETCH_ABORT  3
 #define EXCP_DATA_ABORT      4
 
+/* We currently assume float and double are IEEE single and double
+   precision respectively.
+   Doing runtime conversions is tricky because VFP registers may contain
+   integer values (eg. as the result of a FTOSI instruction).
+   A double precision register load/store must also load/store the
+   corresponding single precision pair, although it is undefined how
+   these overlap.  */
+
 typedef struct CPUARMState {
     uint32_t regs[16];
     uint32_t cpsr;
@@ -50,6 +58,7 @@
     int interrupt_request;
     struct TranslationBlock *current_tb;
     int user_mode_only;
+    uint32_t address;
 
     /* in order to avoid passing too many arguments to the memory
        write helpers, we store some rarely used information in the CPU
@@ -58,6 +67,25 @@
                                    written */
     unsigned long mem_write_vaddr; /* target virtual addr at which the
                                       memory was written */
+    /* VFP coprocessor state.  */
+    struct {
+        union {
+            float s[32];
+            double d[16];
+        } regs;
+
+        /* We store these fpcsr fields separately for convenience.  */
+        int vec_len;
+        int vec_stride;
+
+        uint32_t fpscr;
+
+        /* Temporary variables if we don't have spare fp regs.  */
+        float tmp0s, tmp1s;
+        double tmp0d, tmp1d;
+
+    } vfp;
+
     /* user data */
     void *opaque;
 } CPUARMState;
diff --git a/target-arm/exec.h b/target-arm/exec.h
index e17302e..64fce71 100644
--- a/target-arm/exec.h
+++ b/target-arm/exec.h
@@ -24,13 +24,16 @@
 register uint32_t T1 asm(AREG2);
 register uint32_t T2 asm(AREG3);
 
+/* TODO: Put these in FP regs on targets that have such things.  */
+/* It is ok for FT0s and FT0d to overlap.  Likewise FT1s and FT1d.  */
+#define FT0s env->vfp.tmp0s
+#define FT1s env->vfp.tmp1s
+#define FT0d env->vfp.tmp0d
+#define FT1d env->vfp.tmp1d
+
 #include "cpu.h"
 #include "exec-all.h"
 
-void cpu_lock(void);
-void cpu_unlock(void);
-void cpu_loop_exit(void);
-
 /* Implemented CPSR bits.  */
 #define CACHED_CPSR_BITS 0xf8000000
 static inline int compute_cpsr(void)
@@ -51,3 +54,24 @@
 
 int cpu_arm_handle_mmu_fault (CPUState *env, target_ulong address, int rw,
                               int is_user, int is_softmmu);
+
+/* In op_helper.c */
+
+void cpu_lock(void);
+void cpu_unlock(void);
+void cpu_loop_exit(void);
+
+void raise_exception(int);
+
+void do_vfp_abss(void);
+void do_vfp_absd(void);
+void do_vfp_negs(void);
+void do_vfp_negd(void);
+void do_vfp_sqrts(void);
+void do_vfp_sqrtd(void);
+void do_vfp_cmps(void);
+void do_vfp_cmpd(void);
+void do_vfp_cmpes(void);
+void do_vfp_cmped(void);
+void do_vfp_set_fpscr(void);
+void do_vfp_get_fpscr(void);
diff --git a/target-arm/op.c b/target-arm/op.c
index 5618834..6c608f1 100644
--- a/target-arm/op.c
+++ b/target-arm/op.c
@@ -2,6 +2,7 @@
  *  ARM micro operations
  * 
  *  Copyright (c) 2003 Fabrice Bellard
+ *  Copyright (c) 2005 CodeSourcery, LLC
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -857,17 +858,259 @@
     cpu_loop_exit();
 }
 
-/* thread support */
+/* VFP support.  We follow the convention used for VFP instrunctions:
+   Single precition routines have a "s" suffix, double precision a
+   "d" suffix.  */
 
-spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED;
+#define VFP_OP(name, p) void OPPROTO op_vfp_##name##p(void)
 
-void cpu_lock(void)
+#define VFP_BINOP(name, op) \
+VFP_OP(name, s)             \
+{                           \
+    FT0s = FT0s op FT1s;    \
+}                           \
+VFP_OP(name, d)             \
+{                           \
+    FT0d = FT0d op FT1d;    \
+}
+VFP_BINOP(add, +)
+VFP_BINOP(sub, -)
+VFP_BINOP(mul, *)
+VFP_BINOP(div, /)
+#undef VFP_BINOP
+
+#define VFP_HELPER(name)  \
+VFP_OP(name, s)           \
+{                         \
+    do_vfp_##name##s();    \
+}                         \
+VFP_OP(name, d)           \
+{                         \
+    do_vfp_##name##d();    \
+}
+VFP_HELPER(abs)
+VFP_HELPER(sqrt)
+VFP_HELPER(cmp)
+VFP_HELPER(cmpe)
+#undef VFP_HELPER
+
+/* XXX: Will this do the right thing for NANs.  Should invert the signbit
+   without looking at the rest of the value.  */
+VFP_OP(neg, s)
 {
-    spin_lock(&global_cpu_lock);
+    FT0s = -FT0s;
 }
 
-void cpu_unlock(void)
+VFP_OP(neg, d)
 {
-    spin_unlock(&global_cpu_lock);
+    FT0d = -FT0d;
 }
 
+VFP_OP(F1_ld0, s)
+{
+    FT1s = 0.0f;
+}
+
+VFP_OP(F1_ld0, d)
+{
+    FT1d = 0.0;
+}
+
+/* Helper routines to perform bitwise copies between float and int.  */
+static inline float vfp_itos(uint32_t i)
+{
+    union {
+        uint32_t i;
+        float s;
+    } v;
+
+    v.i = i;
+    return v.s;
+}
+
+static inline uint32_t vfp_stoi(float s)
+{
+    union {
+        uint32_t i;
+        float s;
+    } v;
+
+    v.s = s;
+    return v.i;
+}
+
+/* Integer to float conversion.  */
+VFP_OP(uito, s)
+{
+    FT0s = (float)(uint32_t)vfp_stoi(FT0s);
+}
+
+VFP_OP(uito, d)
+{
+    FT0d = (double)(uint32_t)vfp_stoi(FT0s);
+}
+
+VFP_OP(sito, s)
+{
+    FT0s = (float)(int32_t)vfp_stoi(FT0s);
+}
+
+VFP_OP(sito, d)
+{
+    FT0d = (double)(int32_t)vfp_stoi(FT0s);
+}
+
+/* Float to integer conversion.  */
+VFP_OP(toui, s)
+{
+    FT0s = vfp_itos((uint32_t)FT0s);
+}
+
+VFP_OP(toui, d)
+{
+    FT0s = vfp_itos((uint32_t)FT0d);
+}
+
+VFP_OP(tosi, s)
+{
+    FT0s = vfp_itos((int32_t)FT0s);
+}
+
+VFP_OP(tosi, d)
+{
+    FT0s = vfp_itos((int32_t)FT0d);
+}
+
+/* TODO: Set rounding mode properly.  */
+VFP_OP(touiz, s)
+{
+    FT0s = vfp_itos((uint32_t)FT0s);
+}
+
+VFP_OP(touiz, d)
+{
+    FT0s = vfp_itos((uint32_t)FT0d);
+}
+
+VFP_OP(tosiz, s)
+{
+    FT0s = vfp_itos((int32_t)FT0s);
+}
+
+VFP_OP(tosiz, d)
+{
+    FT0s = vfp_itos((int32_t)FT0d);
+}
+
+/* floating point conversion */
+VFP_OP(fcvtd, s)
+{
+    FT0d = (double)FT0s;
+}
+
+VFP_OP(fcvts, d)
+{
+    FT0s = (float)FT0d;
+}
+
+/* Get and Put values from registers.  */
+VFP_OP(getreg_F0, d)
+{
+  FT0d = *(double *)((char *) env + PARAM1);
+}
+
+VFP_OP(getreg_F0, s)
+{
+  FT0s = *(float *)((char *) env + PARAM1);
+}
+
+VFP_OP(getreg_F1, d)
+{
+  FT1d = *(double *)((char *) env + PARAM1);
+}
+
+VFP_OP(getreg_F1, s)
+{
+  FT1s = *(float *)((char *) env + PARAM1);
+}
+
+VFP_OP(setreg_F0, d)
+{
+  *(double *)((char *) env + PARAM1) = FT0d;
+}
+
+VFP_OP(setreg_F0, s)
+{
+  *(float *)((char *) env + PARAM1) = FT0s;
+}
+
+VFP_OP(foobar, d)
+{
+  FT0d = env->vfp.regs.s[3];
+}
+
+void OPPROTO op_vfp_movl_T0_fpscr(void)
+{
+    do_vfp_get_fpscr ();
+}
+
+void OPPROTO op_vfp_movl_T0_fpscr_flags(void)
+{
+    T0 = env->vfp.fpscr & (0xf << 28);
+}
+
+void OPPROTO op_vfp_movl_fpscr_T0(void)
+{
+    do_vfp_set_fpscr();
+}
+
+/* Move between FT0s to T0  */
+void OPPROTO op_vfp_mrs(void)
+{
+    T0 = vfp_stoi(FT0s);
+}
+
+void OPPROTO op_vfp_msr(void)
+{
+    FT0s = vfp_itos(T0);
+}
+
+/* Move between FT0d and {T0,T1} */
+void OPPROTO op_vfp_mrrd(void)
+{
+    CPU_DoubleU u;
+    
+    u.d = FT0d;
+    T0 = u.l.lower;
+    T1 = u.l.upper;
+}
+
+void OPPROTO op_vfp_mdrr(void)
+{
+    CPU_DoubleU u;
+    
+    u.l.lower = T0;
+    u.l.upper = T1;
+    FT0d = u.d;
+}
+
+/* Floating point load/store.  Address is in T1 */
+void OPPROTO op_vfp_lds(void)
+{
+    FT0s = ldfl((void *)T1);
+}
+
+void OPPROTO op_vfp_ldd(void)
+{
+    FT0d = ldfq((void *)T1);
+}
+
+void OPPROTO op_vfp_sts(void)
+{
+    stfl((void *)T1, FT0s);
+}
+
+void OPPROTO op_vfp_std(void)
+{
+    stfq((void *)T1, FT0d);
+}
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
new file mode 100644
index 0000000..38c97fe
--- /dev/null
+++ b/target-arm/op_helper.c
@@ -0,0 +1,229 @@
+/*
+ *  ARM helper routines
+ * 
+ *  Copyright (c) 2005 CodeSourcery, LLC
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <math.h>
+#include <fenv.h>
+#include "exec.h"
+
+/* If the host doesn't define C99 math intrinsics then use the normal
+   operators.  This may generate excess exceptions, but it's probably
+   near enough for most things.  */
+#ifndef isless
+#define isless(x, y) (x < y)
+#endif
+#ifndef isgreater
+#define isgreater(x, y) (x > y)
+#endif
+#ifndef isunordered
+#define isunordered(x, y) (!((x < y) || (x >= y)))
+#endif
+
+void raise_exception(int tt)
+{
+    env->exception_index = tt;
+    cpu_loop_exit();
+}
+
+/* thread support */
+
+spinlock_t global_cpu_lock = SPIN_LOCK_UNLOCKED;
+
+void cpu_lock(void)
+{
+    spin_lock(&global_cpu_lock);
+}
+
+void cpu_unlock(void)
+{
+    spin_unlock(&global_cpu_lock);
+}
+
+/* VFP support.  */
+
+void do_vfp_abss(void)
+{
+  FT0s = fabsf(FT0s);
+}
+
+void do_vfp_absd(void)
+{
+  FT0d = fabs(FT0d);
+}
+
+void do_vfp_sqrts(void)
+{
+  FT0s = sqrtf(FT0s);
+}
+
+void do_vfp_sqrtd(void)
+{
+  FT0d = sqrt(FT0d);
+}
+
+/* We use an == operator first to generate teh correct floating point
+   exception.  Subsequent comparisons use the exception-safe macros.  */
+#define DO_VFP_cmp(p)                     \
+void do_vfp_cmp##p(void)                  \
+{                                         \
+    uint32_t flags;                       \
+    if (FT0##p == FT1##p)                 \
+        flags = 0xc;                      \
+    else if (isless (FT0##p, FT1##p))     \
+        flags = 0x8;                      \
+    else if (isgreater (FT0##p, FT1##p))  \
+        flags = 0x2;                      \
+    else /* unordered */                  \
+        flags = 0x3;                      \
+    env->vfp.fpscr = (flags << 28) | (env->vfp.fpscr & 0x0fffffff); \
+    FORCE_RET();                          \
+}
+DO_VFP_cmp(s)
+DO_VFP_cmp(d)
+#undef DO_VFP_cmp
+
+/* We use a > operator first to get FP exceptions right.  */
+#define DO_VFP_cmpe(p)                      \
+void do_vfp_cmpe##p(void)                   \
+{                                           \
+    uint32_t flags;                         \
+    if (FT0##p > FT1##p)                    \
+        flags = 0x2;                        \
+    else if (isless (FT0##p, FT1##p))       \
+        flags = 0x8;                        \
+    else if (isunordered (FT0##p, FT1##p))  \
+        flags = 0x3;                        \
+    else /* equal */                        \
+        flags = 0xc;                        \
+    env->vfp.fpscr = (flags << 28) | (env->vfp.fpscr & 0x0fffffff); \
+    FORCE_RET();                            \
+}
+DO_VFP_cmpe(s)
+DO_VFP_cmpe(d)
+#undef DO_VFP_cmpe
+
+/* Convert host exception flags to vfp form.  */
+int vfp_exceptbits_from_host(int host_bits)
+{
+    int target_bits = 0;
+
+#ifdef FE_INVALID
+    if (host_bits & FE_INVALID)
+        target_bits |= 1;
+#endif
+#ifdef FE_DIVBYZERO
+    if (host_bits & FE_DIVBYZERO)
+        target_bits |= 2;
+#endif
+#ifdef FE_OVERFLOW
+    if (host_bits & FE_OVERFLOW)
+        target_bits |= 4;
+#endif
+#ifdef FE_UNDERFLOW
+    if (host_bits & FE_UNDERFLOW)
+        target_bits |= 8;
+#endif
+#ifdef FE_INEXACT
+    if (host_bits & FE_INEXACT)
+        target_bits |= 0x10;
+#endif
+    /* C doesn't define an inexact exception.  */
+    return target_bits;
+}
+
+/* Convert vfp exception flags to target form.  */
+int vfp_host_exceptbits_to_host(int target_bits)
+{
+    int host_bits = 0;
+
+#ifdef FE_INVALID
+    if (target_bits & 1)
+        host_bits |= FE_INVALID;
+#endif
+#ifdef FE_DIVBYZERO
+    if (target_bits & 2)
+        host_bits |= FE_DIVBYZERO;
+#endif
+#ifdef FE_OVERFLOW
+    if (target_bits & 4)
+        host_bits |= FE_OVERFLOW;
+#endif
+#ifdef FE_UNDERFLOW
+    if (target_bits & 8)
+        host_bits |= FE_UNDERFLOW;
+#endif
+#ifdef FE_INEXACT
+    if (target_bits & 0x10)
+        host_bits |= FE_INEXACT;
+#endif
+    return host_bits;
+}
+
+void do_vfp_set_fpscr(void)
+{
+    int i;
+    uint32_t changed;
+
+    changed = env->vfp.fpscr;
+    env->vfp.fpscr = (T0 & 0xffc8ffff);
+    env->vfp.vec_len = (T0 >> 16) & 7;
+    env->vfp.vec_stride = (T0 >> 20) & 3;
+
+    changed ^= T0;
+    if (changed & (3 << 22)) {
+        i = (T0 >> 22) & 3;
+        switch (i) {
+        case 0:
+            i = FE_TONEAREST;
+            break;
+        case 1:
+            i = FE_UPWARD;
+            break;
+        case 2:
+            i = FE_DOWNWARD;
+            break;
+        case 3:
+            i = FE_TOWARDZERO;
+            break;
+        }
+        fesetround (i);
+    }
+
+    /* Clear host exception flags.  */
+    feclearexcept(FE_ALL_EXCEPT);
+
+#ifdef feenableexcept
+    if (changed & 0x1f00) {
+        i = vfp_exceptbits_to_host((T0 >> 8) & 0x1f);
+        feenableexcept (i);
+        fedisableexcept (FE_ALL_EXCEPT & ~i);
+    }
+#endif
+    /* XXX: FZ and DN are not implemented.  */
+}
+
+void do_vfp_get_fpscr(void)
+{
+    int i;
+
+    T0 = (env->vfp.fpscr & 0xffc8ffff) | (env->vfp.vec_len << 16)
+          | (env->vfp.vec_stride << 20);
+    i = fetestexcept(FE_ALL_EXCEPT);
+    T0 |= vfp_exceptbits_from_host(i);
+}
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 2eb325e..db38e33 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -2,6 +2,7 @@
  *  ARM translation
  * 
  *  Copyright (c) 2003 Fabrice Bellard
+ *  Copyright (c) 2005 CodeSourcery, LLC
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -354,7 +355,527 @@
     }
 }
 
-static void disas_arm_insn(DisasContext *s)
+#define VFP_OP(name)                      \
+static inline void gen_vfp_##name(int dp) \
+{                                         \
+    if (dp)                               \
+        gen_op_vfp_##name##d();           \
+    else                                  \
+        gen_op_vfp_##name##s();           \
+}
+
+VFP_OP(add)
+VFP_OP(sub)
+VFP_OP(mul)
+VFP_OP(div)
+VFP_OP(neg)
+VFP_OP(abs)
+VFP_OP(sqrt)
+VFP_OP(cmp)
+VFP_OP(cmpe)
+VFP_OP(F1_ld0)
+VFP_OP(uito)
+VFP_OP(sito)
+VFP_OP(toui)
+VFP_OP(touiz)
+VFP_OP(tosi)
+VFP_OP(tosiz)
+VFP_OP(ld)
+VFP_OP(st)
+
+#undef VFP_OP
+
+static inline void gen_mov_F0_vreg(int dp, int reg)
+{
+    if (dp)
+        gen_op_vfp_getreg_F0d(offsetof(CPUARMState, vfp.regs.d[reg]));
+    else
+        gen_op_vfp_getreg_F0s(offsetof(CPUARMState, vfp.regs.s[reg]));
+}
+
+static inline void gen_mov_F1_vreg(int dp, int reg)
+{
+    if (dp)
+        gen_op_vfp_getreg_F1d(offsetof(CPUARMState, vfp.regs.d[reg]));
+    else
+        gen_op_vfp_getreg_F1s(offsetof(CPUARMState, vfp.regs.s[reg]));
+}
+
+static inline void gen_mov_vreg_F0(int dp, int reg)
+{
+    if (dp)
+        gen_op_vfp_setreg_F0d(offsetof(CPUARMState, vfp.regs.d[reg]));
+    else
+        gen_op_vfp_setreg_F0s(offsetof(CPUARMState, vfp.regs.s[reg]));
+}
+
+/* Disassemble a VFP instruction.  Returns nonzero if an error occured
+   (ie. an undefined instruction).  */
+static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
+{
+    uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask;
+    int dp, veclen;
+
+    dp = ((insn & 0xf00) == 0xb00);
+    switch ((insn >> 24) & 0xf) {
+    case 0xe:
+        if (insn & (1 << 4)) {
+            /* single register transfer */
+            if ((insn & 0x6f) != 0x00)
+                return 1;
+            rd = (insn >> 12) & 0xf;
+            if (dp) {
+                if (insn & 0x80)
+                    return 1;
+                rn = (insn >> 16) & 0xf;
+                /* Get the existing value even for arm->vfp moves because
+                   we only set half the register.  */
+                gen_mov_F0_vreg(1, rn);
+                gen_op_vfp_mrrd();
+                if (insn & (1 << 20)) {
+                    /* vfp->arm */
+                    if (insn & (1 << 21))
+                        gen_movl_reg_T1(s, rd);
+                    else
+                        gen_movl_reg_T0(s, rd);
+                } else {
+                    /* arm->vfp */
+                    if (insn & (1 << 21))
+                        gen_movl_T1_reg(s, rd);
+                    else
+                        gen_movl_T0_reg(s, rd);
+                    gen_op_vfp_mdrr();
+                    gen_mov_vreg_F0(dp, rn);
+                }
+            } else {
+                rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
+                if (insn & (1 << 20)) {
+                    /* vfp->arm */
+                    if (insn & (1 << 21)) {
+                        /* system register */
+                        switch (rn) {
+                        case 0: /* fpsid */
+                            n = 0x0091A0000;
+                            break;
+                        case 2: /* fpscr */
+			    if (rd == 15)
+				gen_op_vfp_movl_T0_fpscr_flags();
+			    else
+				gen_op_vfp_movl_T0_fpscr();
+                            break;
+                        default:
+                            return 1;
+                        }
+                    } else {
+                        gen_mov_F0_vreg(0, rn);
+                        gen_op_vfp_mrs();
+                    }
+                    if (rd == 15) {
+                        /* This will only set the 4 flag bits */
+                        gen_op_movl_psr_T0();
+                    } else
+                        gen_movl_reg_T0(s, rd);
+                } else {
+                    /* arm->vfp */
+                    gen_movl_T0_reg(s, rd);
+                    if (insn & (1 << 21)) {
+                        /* system register */
+                        switch (rn) {
+                        case 0: /* fpsid */
+                            /* Writes are ignored.  */
+                            break;
+                        case 2: /* fpscr */
+                            gen_op_vfp_movl_fpscr_T0();
+                            /* This could change vector settings, so jump to
+                               the next instuction.  */
+                            gen_op_movl_T0_im(s->pc);
+                            gen_movl_reg_T0(s, 15);
+                            s->is_jmp = DISAS_UPDATE;
+                            break;
+                        default:
+                            return 1;
+                        }
+                    } else {
+                        gen_op_vfp_msr();
+                        gen_mov_vreg_F0(0, rn);
+                    }
+                }
+            }
+        } else {
+            /* data processing */
+            /* The opcode is in bits 23, 21, 20 and 6.  */
+            op = ((insn >> 20) & 8) | ((insn >> 19) & 6) | ((insn >> 6) & 1);
+            if (dp) {
+                if (op == 15) {
+                    /* rn is opcode */
+                    rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
+                } else {
+                    /* rn is register number */
+                    if (insn & (1 << 7))
+                        return 1;
+                    rn = (insn >> 16) & 0xf;
+                }
+
+                if (op == 15 && (rn == 15 || rn > 17)) {
+                    /* Integer or single precision destination.  */
+                    rd = ((insn >> 11) & 0x1e) | ((insn >> 22) & 1);
+                } else {
+                    if (insn & (1 << 22))
+                        return 1;
+                    rd = (insn >> 12) & 0xf;
+                }
+
+                if (op == 15 && (rn == 16 || rn == 17)) {
+                    /* Integer source.  */
+                    rm = ((insn << 1) & 0x1e) | ((insn >> 5) & 1);
+                } else {
+                    if (insn & (1 << 5))
+                        return 1;
+                    rm = insn & 0xf;
+                }
+            } else {
+                rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
+                if (op == 15 && rn == 15) {
+                    /* Double precision destination.  */
+                    if (insn & (1 << 22))
+                        return 1;
+                    rd = (insn >> 12) & 0xf;
+                } else
+                    rd = ((insn >> 11) & 0x1e) | ((insn >> 22) & 1);
+                rm = ((insn << 1) & 0x1e) | ((insn >> 5) & 1);
+            }
+
+            veclen = env->vfp.vec_len;
+            if (op == 15 && rn > 3)
+                veclen = 0;
+
+            /* Shut up compiler warnings.  */
+            delta_m = 0;
+            delta_d = 0;
+            bank_mask = 0;
+            
+            if (veclen > 0) {
+                if (dp)
+                    bank_mask = 0xc;
+                else
+                    bank_mask = 0x18;
+
+                /* Figure out what type of vector operation this is.  */
+                if ((rd & bank_mask) == 0) {
+                    /* scalar */
+                    veclen = 0;
+                } else {
+                    if (dp)
+                        delta_d = (env->vfp.vec_stride >> 1) + 1;
+                    else
+                        delta_d = env->vfp.vec_stride + 1;
+
+                    if ((rm & bank_mask) == 0) {
+                        /* mixed scalar/vector */
+                        delta_m = 0;
+                    } else {
+                        /* vector */
+                        delta_m = delta_d;
+                    }
+                }
+            }
+
+            /* Load the initial operands.  */
+            if (op == 15) {
+                switch (rn) {
+                case 16:
+                case 17:
+                    /* Integer source */
+                    gen_mov_F0_vreg(0, rm);
+                    break;
+                case 8:
+                case 9:
+                    /* Compare */
+                    gen_mov_F0_vreg(dp, rd);
+                    gen_mov_F1_vreg(dp, rm);
+                    break;
+                case 10:
+                case 11:
+                    /* Compare with zero */
+                    gen_mov_F0_vreg(dp, rd);
+                    gen_vfp_F1_ld0(dp);
+                    break;
+                default:
+                    /* One source operand.  */
+                    gen_mov_F0_vreg(dp, rm);
+                }
+            } else {
+                /* Two source operands.  */
+                gen_mov_F0_vreg(dp, rn);
+                gen_mov_F1_vreg(dp, rm);
+            }
+
+            for (;;) {
+                /* Perform the calculation.  */
+                switch (op) {
+                case 0: /* mac: fd + (fn * fm) */
+                    gen_vfp_mul(dp);
+                    gen_mov_F1_vreg(dp, rd);
+                    gen_vfp_add(dp);
+                    break;
+                case 1: /* nmac: fd - (fn * fm) */
+                    gen_vfp_mul(dp);
+                    gen_vfp_neg(dp);
+                    gen_mov_F1_vreg(dp, rd);
+                    gen_vfp_add(dp);
+                    break;
+                case 2: /* msc: -fd + (fn * fm) */
+                    gen_vfp_mul(dp);
+                    gen_mov_F1_vreg(dp, rd);
+                    gen_vfp_sub(dp);
+                    break;
+                case 3: /* nmsc: -fd - (fn * fm)  */
+                    gen_vfp_mul(dp);
+                    gen_mov_F1_vreg(dp, rd);
+                    gen_vfp_add(dp);
+                    gen_vfp_neg(dp);
+                    break;
+                case 4: /* mul: fn * fm */
+                    gen_vfp_mul(dp);
+                    break;
+                case 5: /* nmul: -(fn * fm) */
+                    gen_vfp_mul(dp);
+                    gen_vfp_neg(dp);
+                    break;
+                case 6: /* add: fn + fm */
+                    gen_vfp_add(dp);
+                    break;
+                case 7: /* sub: fn - fm */
+                    gen_vfp_sub(dp);
+                    break;
+                case 8: /* div: fn / fm */
+                    gen_vfp_div(dp);
+                    break;
+                case 15: /* extension space */
+                    switch (rn) {
+                    case 0: /* cpy */
+                        /* no-op */
+                        break;
+                    case 1: /* abs */
+                        gen_vfp_abs(dp);
+                        break;
+                    case 2: /* neg */
+                        gen_vfp_neg(dp);
+                        break;
+                    case 3: /* sqrt */
+                        gen_vfp_sqrt(dp);
+                        break;
+                    case 8: /* cmp */
+                        gen_vfp_cmp(dp);
+                        break;
+                    case 9: /* cmpe */
+                        gen_vfp_cmpe(dp);
+                        break;
+                    case 10: /* cmpz */
+                        gen_vfp_cmp(dp);
+                        break;
+                    case 11: /* cmpez */
+                        gen_vfp_F1_ld0(dp);
+                        gen_vfp_cmpe(dp);
+                        break;
+                    case 15: /* single<->double conversion */
+                        if (dp)
+                            gen_op_vfp_fcvtsd();
+                        else
+                            gen_op_vfp_fcvtds();
+                        break;
+                    case 16: /* fuito */
+                        gen_vfp_uito(dp);
+                        break;
+                    case 17: /* fsito */
+                        gen_vfp_sito(dp);
+                        break;
+                    case 24: /* ftoui */
+                        gen_vfp_toui(dp);
+                        break;
+                    case 25: /* ftouiz */
+                        gen_vfp_touiz(dp);
+                        break;
+                    case 26: /* ftosi */
+                        gen_vfp_tosi(dp);
+                        break;
+                    case 27: /* ftosiz */
+                        gen_vfp_tosiz(dp);
+                        break;
+                    default: /* undefined */
+                        printf ("rn:%d\n", rn);
+                        return 1;
+                    }
+                    break;
+                default: /* undefined */
+                    printf ("op:%d\n", op);
+                    return 1;
+                }
+
+                /* Write back the result.  */
+                if (op == 15 && (rn >= 8 && rn <= 11))
+                    ; /* Comparison, do nothing.  */
+                else if (op == 15 && rn > 17)
+                    /* Integer result.  */
+                    gen_mov_vreg_F0(0, rd);
+                else if (op == 15 && rn == 15)
+                    /* conversion */
+                    gen_mov_vreg_F0(!dp, rd);
+                else
+                    gen_mov_vreg_F0(dp, rd);
+
+                /* break out of the loop if we have finished  */
+                if (veclen == 0)
+                    break;
+
+                if (op == 15 && delta_m == 0) {
+                    /* single source one-many */
+                    while (veclen--) {
+                        rd = ((rd + delta_d) & (bank_mask - 1))
+                             | (rd & bank_mask);
+                        gen_mov_vreg_F0(dp, rd);
+                    }
+                    break;
+                }
+                /* Setup the next operands.  */
+                veclen--;
+                rd = ((rd + delta_d) & (bank_mask - 1))
+                     | (rd & bank_mask);
+
+                if (op == 15) {
+                    /* One source operand.  */
+                    rm = ((rm + delta_m) & (bank_mask - 1))
+                         | (rm & bank_mask);
+                    gen_mov_F0_vreg(dp, rm);
+                } else {
+                    /* Two source operands.  */
+                    rn = ((rn + delta_d) & (bank_mask - 1))
+                         | (rn & bank_mask);
+                    gen_mov_F0_vreg(dp, rn);
+                    if (delta_m) {
+                        rm = ((rm + delta_m) & (bank_mask - 1))
+                             | (rm & bank_mask);
+                        gen_mov_F1_vreg(dp, rm);
+                    }
+                }
+            }
+        }
+        break;
+    case 0xc:
+    case 0xd:
+        if (dp && (insn & (1 << 22))) {
+            /* two-register transfer */
+            rn = (insn >> 16) & 0xf;
+            rd = (insn >> 12) & 0xf;
+            if (dp) {
+                if (insn & (1 << 5))
+                    return 1;
+                rm = insn & 0xf;
+            } else
+                rm = ((insn << 1) & 0x1e) | ((insn >> 5) & 1);
+
+            if (insn & (1 << 20)) {
+                /* vfp->arm */
+                if (dp) {
+                    gen_mov_F0_vreg(1, rm);
+                    gen_op_vfp_mrrd();
+                    gen_movl_reg_T0(s, rd);
+                    gen_movl_reg_T1(s, rn);
+                } else {
+                    gen_mov_F0_vreg(0, rm);
+                    gen_op_vfp_mrs();
+                    gen_movl_reg_T0(s, rn);
+                    gen_mov_F0_vreg(0, rm + 1);
+                    gen_op_vfp_mrs();
+                    gen_movl_reg_T0(s, rd);
+                }
+            } else {
+                /* arm->vfp */
+                if (dp) {
+                    gen_movl_T0_reg(s, rd);
+                    gen_movl_T1_reg(s, rn);
+                    gen_op_vfp_mdrr();
+                    gen_mov_vreg_F0(1, rm);
+                } else {
+                    gen_movl_T0_reg(s, rn);
+                    gen_op_vfp_msr();
+                    gen_mov_vreg_F0(0, rm);
+                    gen_movl_T0_reg(s, rd);
+                    gen_op_vfp_msr();
+                    gen_mov_vreg_F0(0, rm + 1);
+                }
+            }
+        } else {
+            /* Load/store */
+            rn = (insn >> 16) & 0xf;
+            if (dp)
+                rd = (insn >> 12) & 0xf;
+            else
+                rd = ((insn >> 11) & 0x1e) | ((insn >> 22) & 1);
+            gen_movl_T1_reg(s, rn);
+            if ((insn & 0x01200000) == 0x01000000) {
+                /* Single load/store */
+                offset = (insn & 0xff) << 2;
+                if ((insn & (1 << 23)) == 0)
+                    offset = -offset;
+                gen_op_addl_T1_im(offset);
+                if (insn & (1 << 20)) {
+                    gen_vfp_ld(dp);
+                    gen_mov_vreg_F0(dp, rd);
+                } else {
+                    gen_mov_F0_vreg(dp, rd);
+                    gen_vfp_st(dp);
+                }
+            } else {
+                /* load/store multiple */
+                if (dp)
+                    n = (insn >> 1) & 0x7f;
+                else
+                    n = insn & 0xff;
+
+                if (insn & (1 << 24)) /* pre-decrement */
+                    gen_op_addl_T1_im(-((insn & 0xff) << 2));
+
+                if (dp)
+                    offset = 8;
+                else
+                    offset = 4;
+                for (i = 0; i < n; i++) {
+                    if (insn & (1 << 20)) {
+                        /* load */
+                        gen_vfp_ld(dp);
+                        gen_mov_vreg_F0(dp, rd + i);
+                    } else {
+                        /* store */
+                        gen_mov_F0_vreg(dp, rd + i);
+                        gen_vfp_st(dp);
+                    }
+                    gen_op_addl_T1_im(offset);
+                }
+                if (insn & (1 << 21)) {
+                    /* writeback */
+                    if (insn & (1 << 24))
+                        offset = -offset * n;
+                    else if (dp && (insn & 1))
+                        offset = 4;
+                    else
+                        offset = 0;
+
+                    if (offset != 0)
+                        gen_op_addl_T1_im(offset);
+                    gen_movl_reg_T1(s, rn);
+                }
+            }
+        }
+        break;
+    default:
+        /* Should never happen.  */
+        return 1;
+    }
+    return 0;
+}
+
+static void disas_arm_insn(CPUState * env, DisasContext *s)
 {
     unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh;
     
@@ -363,6 +884,7 @@
     
     cond = insn >> 28;
     if (cond == 0xf){
+        /* Unconditional instructions.  */
         if ((insn & 0x0d70f000) == 0x0550f000)
             return; /* PLD */
         else if ((insn & 0x0e000000) == 0x0a000000) {
@@ -381,6 +903,10 @@
             gen_op_movl_T0_im(val);
             gen_bx(s);
             return;
+        } else if ((insn & 0x0fe00000) == 0x0c400000) {
+            /* Coprocessor double register transfer.  */
+        } else if ((insn & 0x0f000010) == 0x0e000010) {
+            /* Additional coprocessor register transfer.  */
         }
         goto illegal_op;
     }
@@ -934,6 +1460,22 @@
                 s->is_jmp = DISAS_TB_JUMP;
             }
             break;
+        case 0xc:
+        case 0xd:
+        case 0xe:
+            /* Coprocessor.  */
+            op1 = (insn >> 8) & 0xf;
+            switch (op1) {
+            case 10:
+            case 11:
+                if (disas_vfp_insn (env, s, insn))
+                    goto illegal_op;
+                break;
+            default:
+                /* unknown coprocessor.  */
+                goto illegal_op;
+            }
+            break;
         case 0xf:
             /* swi */
             gen_op_movl_T0_im((long)s->pc);
@@ -1484,7 +2026,7 @@
         if (env->thumb)
           disas_thumb_insn(dc);
         else
-          disas_arm_insn(dc);
+          disas_arm_insn(env, dc);
     } while (!dc->is_jmp && gen_opc_ptr < gen_opc_end && 
              (dc->pc - pc_start) < (TARGET_PAGE_SIZE - 32));
     switch(dc->is_jmp) {
@@ -1557,6 +2099,11 @@
                     int flags)
 {
     int i;
+    struct {
+        uint32_t i;
+        float s;
+    } s0, s1;
+    CPU_DoubleU d;
 
     for(i=0;i<16;i++) {
         cpu_fprintf(f, "R%02d=%08x", i, env->regs[i]);
@@ -1566,11 +2113,23 @@
             cpu_fprintf(f, " ");
     }
     cpu_fprintf(f, "PSR=%08x %c%c%c%c\n", 
-            env->cpsr, 
+             env->cpsr, 
             env->cpsr & (1 << 31) ? 'N' : '-',
             env->cpsr & (1 << 30) ? 'Z' : '-',
             env->cpsr & (1 << 29) ? 'C' : '-',
             env->cpsr & (1 << 28) ? 'V' : '-');
+
+    for (i = 0; i < 16; i++) {
+        s0.s = env->vfp.regs.s[i * 2];
+        s1.s = env->vfp.regs.s[i * 2 + 1];
+        d.d = env->vfp.regs.d[i];
+        cpu_fprintf(f, "s%02d=%08x(%8f) s%02d=%08x(%8f) d%02d=%08x%08x(%8f)\n",
+                    i * 2, (int)s0.i, s0.s,
+                    i * 2 + 1, (int)s0.i, s0.s,
+                    i, (int)(uint32_t)d.l.upper, (int)(uint32_t)d.l.lower,
+                    d.d);
+        cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.fpscr);
+    }
 }
 
 target_ulong cpu_get_phys_page_debug(CPUState *env, target_ulong addr)