Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20140501' into staging

target-arm queue:
 * implement XScale cache lockdown cp15 ops
 * fix v7M CPUID base register
 * implement WFE and YIELD as yields for A64
 * fix A64 "BLR LR"
 * support Cortex-A57 in virt machine model
 * a few other minor AArch64 bugfixes

# gpg: Signature made Thu 01 May 2014 15:42:17 BST using RSA key ID 14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"

* remotes/pmaydell/tags/pull-target-arm-20140501:
  hw/arm/virt: Add support for Cortex-A57
  hw/arm/virt: Put GIC register banks on 64K boundaries
  hw/arm/virt: Create the GIC ourselves rather than (ab)using a15mpcore_priv
  target-arm: Correct a comment refering to EL0
  target-arm: A64: Fix a typo when declaring TLBI ops
  target-arm: A64: Handle blr lr
  target-arm: Make vbar_write 64bit friendly on 32bit hosts
  target-arm: implement WFE/YIELD as a yield for AArch64
  armv7m_nvic: fix CPUID Base Register
  target-arm: Implement XScale cache lockdown operations as NOPs

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 2bbc931..ea4f02d 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -75,8 +75,6 @@
 typedef struct VirtBoardInfo {
     struct arm_boot_info bootinfo;
     const char *cpu_model;
-    const char *qdevname;
-    const char *gic_compatible;
     const MemMapEntry *memmap;
     const int *irqmap;
     int smp_cpus;
@@ -98,10 +96,10 @@
 static const MemMapEntry a15memmap[] = {
     /* Space up to 0x8000000 is reserved for a boot ROM */
     [VIRT_FLASH] = { 0, 0x8000000 },
-    [VIRT_CPUPERIPHS] = { 0x8000000, 0x8000 },
+    [VIRT_CPUPERIPHS] = { 0x8000000, 0x20000 },
     /* GIC distributor and CPU interfaces sit inside the CPU peripheral space */
-    [VIRT_GIC_DIST] = { 0x8001000, 0x1000 },
-    [VIRT_GIC_CPU] = { 0x8002000, 0x1000 },
+    [VIRT_GIC_DIST] = { 0x8000000, 0x10000 },
+    [VIRT_GIC_CPU] = { 0x8010000, 0x10000 },
     [VIRT_UART] = { 0x9000000, 0x1000 },
     [VIRT_MMIO] = { 0xa000000, 0x200 },
     /* ...repeating for a total of NUM_VIRTIO_TRANSPORTS, each of that size */
@@ -117,16 +115,16 @@
 static VirtBoardInfo machines[] = {
     {
         .cpu_model = "cortex-a15",
-        .qdevname = "a15mpcore_priv",
-        .gic_compatible = "arm,cortex-a15-gic",
+        .memmap = a15memmap,
+        .irqmap = a15irqmap,
+    },
+    {
+        .cpu_model = "cortex-a57",
         .memmap = a15memmap,
         .irqmap = a15irqmap,
     },
     {
         .cpu_model = "host",
-        /* We use the A15 private peripheral model to get a V2 GIC */
-        .qdevname = "a15mpcore_priv",
-        .gic_compatible = "arm,cortex-a15-gic",
         .memmap = a15memmap,
         .irqmap = a15irqmap,
     },
@@ -251,8 +249,9 @@
     qemu_fdt_setprop_cell(vbi->fdt, "/", "interrupt-parent", gic_phandle);
 
     qemu_fdt_add_subnode(vbi->fdt, "/intc");
+    /* 'cortex-a15-gic' means 'GIC v2' */
     qemu_fdt_setprop_string(vbi->fdt, "/intc", "compatible",
-                                vbi->gic_compatible);
+                            "arm,cortex-a15-gic");
     qemu_fdt_setprop_cell(vbi->fdt, "/intc", "#interrupt-cells", 3);
     qemu_fdt_setprop(vbi->fdt, "/intc", "interrupt-controller", NULL, 0);
     qemu_fdt_setprop_sized_cells(vbi->fdt, "/intc", "reg",
@@ -263,6 +262,56 @@
     qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", gic_phandle);
 }
 
+static void create_gic(const VirtBoardInfo *vbi, qemu_irq *pic)
+{
+    /* We create a standalone GIC v2 */
+    DeviceState *gicdev;
+    SysBusDevice *gicbusdev;
+    const char *gictype = "arm_gic";
+    int i;
+
+    if (kvm_irqchip_in_kernel()) {
+        gictype = "kvm-arm-gic";
+    }
+
+    gicdev = qdev_create(NULL, gictype);
+    qdev_prop_set_uint32(gicdev, "revision", 2);
+    qdev_prop_set_uint32(gicdev, "num-cpu", smp_cpus);
+    /* Note that the num-irq property counts both internal and external
+     * interrupts; there are always 32 of the former (mandated by GIC spec).
+     */
+    qdev_prop_set_uint32(gicdev, "num-irq", NUM_IRQS + 32);
+    qdev_init_nofail(gicdev);
+    gicbusdev = SYS_BUS_DEVICE(gicdev);
+    sysbus_mmio_map(gicbusdev, 0, vbi->memmap[VIRT_GIC_DIST].base);
+    sysbus_mmio_map(gicbusdev, 1, vbi->memmap[VIRT_GIC_CPU].base);
+
+    /* Wire the outputs from each CPU's generic timer to the
+     * appropriate GIC PPI inputs, and the GIC's IRQ output to
+     * the CPU's IRQ input.
+     */
+    for (i = 0; i < smp_cpus; i++) {
+        DeviceState *cpudev = DEVICE(qemu_get_cpu(i));
+        int ppibase = NUM_IRQS + i * 32;
+        /* physical timer; we wire it up to the non-secure timer's ID,
+         * since a real A15 always has TrustZone but QEMU doesn't.
+         */
+        qdev_connect_gpio_out(cpudev, 0,
+                              qdev_get_gpio_in(gicdev, ppibase + 30));
+        /* virtual timer */
+        qdev_connect_gpio_out(cpudev, 1,
+                              qdev_get_gpio_in(gicdev, ppibase + 27));
+
+        sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ));
+    }
+
+    for (i = 0; i < NUM_IRQS; i++) {
+        pic[i] = qdev_get_gpio_in(gicdev, i);
+    }
+
+    fdt_add_gic_node(vbi);
+}
+
 static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic)
 {
     char *nodename;
@@ -340,8 +389,6 @@
     MemoryRegion *sysmem = get_system_memory();
     int n;
     MemoryRegion *ram = g_new(MemoryRegion, 1);
-    DeviceState *dev;
-    SysBusDevice *busdev;
     const char *cpu_model = args->cpu_model;
     VirtBoardInfo *vbi;
 
@@ -404,25 +451,7 @@
     vmstate_register_ram_global(ram);
     memory_region_add_subregion(sysmem, vbi->memmap[VIRT_MEM].base, ram);
 
-    dev = qdev_create(NULL, vbi->qdevname);
-    qdev_prop_set_uint32(dev, "num-cpu", smp_cpus);
-    /* Note that the num-irq property counts both internal and external
-     * interrupts; there are always 32 of the former (mandated by GIC spec).
-     */
-    qdev_prop_set_uint32(dev, "num-irq", NUM_IRQS + 32);
-    qdev_init_nofail(dev);
-    busdev = SYS_BUS_DEVICE(dev);
-    sysbus_mmio_map(busdev, 0, vbi->memmap[VIRT_CPUPERIPHS].base);
-    fdt_add_gic_node(vbi);
-    for (n = 0; n < smp_cpus; n++) {
-        DeviceState *cpudev = DEVICE(qemu_get_cpu(n));
-
-        sysbus_connect_irq(busdev, n, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ));
-    }
-
-    for (n = 0; n < NUM_IRQS; n++) {
-        pic[n] = qdev_get_gpio_in(dev, n);
-    }
+    create_gic(vbi, pic);
 
     create_uart(vbi, pic);
 
diff --git a/hw/intc/armv7m_nvic.c b/hw/intc/armv7m_nvic.c
index 6066fa6..f5b0c3b 100644
--- a/hw/intc/armv7m_nvic.c
+++ b/hw/intc/armv7m_nvic.c
@@ -173,7 +173,7 @@
         return 10000;
     case 0xd00: /* CPUID Base.  */
         cpu = ARM_CPU(current_cpu);
-        return cpu->env.cp15.c0_cpuid;
+        return cpu->midr;
     case 0xd04: /* Interrupt Control State.  */
         /* VECTACTIVE */
         val = s->gic.running_irq[0];
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 43c1b4f..3be917c 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -657,7 +657,7 @@
      * contexts. (ARMv8 would permit us to do no masking at all, but ARMv7
      * requires the bottom five bits to be RAZ/WI because they're UNK/SBZP.)
      */
-    env->cp15.c12_vbar = value & ~0x1Ful;
+    env->cp15.c12_vbar = value & ~0x1FULL;
 }
 
 static uint64_t ccsidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
@@ -1578,6 +1578,21 @@
       .cp = 15, .crn = 1, .crm = 0, .opc1 = 0, .opc2 = 1, .access = PL1_RW,
       .fieldoffset = offsetof(CPUARMState, cp15.c1_xscaleauxcr),
       .resetvalue = 0, },
+    /* XScale specific cache-lockdown: since we have no cache we NOP these
+     * and hope the guest does not really rely on cache behaviour.
+     */
+    { .name = "XSCALE_LOCK_ICACHE_LINE",
+      .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 0,
+      .access = PL1_W, .type = ARM_CP_NOP },
+    { .name = "XSCALE_UNLOCK_ICACHE",
+      .cp = 15, .opc1 = 0, .crn = 9, .crm = 1, .opc2 = 1,
+      .access = PL1_W, .type = ARM_CP_NOP },
+    { .name = "XSCALE_DCACHE_LOCK",
+      .cp = 15, .opc1 = 0, .crn = 9, .crm = 2, .opc2 = 0,
+      .access = PL1_RW, .type = ARM_CP_NOP },
+    { .name = "XSCALE_UNLOCK_DCACHE",
+      .cp = 15, .opc1 = 0, .crn = 9, .crm = 2, .opc2 = 1,
+      .access = PL1_W, .type = ARM_CP_NOP },
     REGINFO_SENTINEL
 };
 
@@ -1893,51 +1908,51 @@
       .access = PL1_W, .type = ARM_CP_NOP },
     /* TLBI operations */
     { .name = "TLBI_VMALLE1IS", .state = ARM_CP_STATE_AA64,
-      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 0,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 0,
       .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
       .writefn = tlbiall_write },
     { .name = "TLBI_VAE1IS", .state = ARM_CP_STATE_AA64,
-      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 1,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 1,
       .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
       .writefn = tlbi_aa64_va_write },
     { .name = "TLBI_ASIDE1IS", .state = ARM_CP_STATE_AA64,
-      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 2,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 2,
       .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
       .writefn = tlbi_aa64_asid_write },
     { .name = "TLBI_VAAE1IS", .state = ARM_CP_STATE_AA64,
-      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 3,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 3,
       .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
       .writefn = tlbi_aa64_vaa_write },
     { .name = "TLBI_VALE1IS", .state = ARM_CP_STATE_AA64,
-      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 5,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 5,
       .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
       .writefn = tlbi_aa64_va_write },
     { .name = "TLBI_VAALE1IS", .state = ARM_CP_STATE_AA64,
-      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 3, .opc2 = 7,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 3, .opc2 = 7,
       .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
       .writefn = tlbi_aa64_vaa_write },
     { .name = "TLBI_VMALLE1", .state = ARM_CP_STATE_AA64,
-      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 0,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 0,
       .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
       .writefn = tlbiall_write },
     { .name = "TLBI_VAE1", .state = ARM_CP_STATE_AA64,
-      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 1,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 1,
       .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
       .writefn = tlbi_aa64_va_write },
     { .name = "TLBI_ASIDE1", .state = ARM_CP_STATE_AA64,
-      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 2,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 2,
       .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
       .writefn = tlbi_aa64_asid_write },
     { .name = "TLBI_VAAE1", .state = ARM_CP_STATE_AA64,
-      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 3,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 3,
       .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
       .writefn = tlbi_aa64_vaa_write },
     { .name = "TLBI_VALE1", .state = ARM_CP_STATE_AA64,
-      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 5,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 5,
       .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
       .writefn = tlbi_aa64_va_write },
     { .name = "TLBI_VAALE1", .state = ARM_CP_STATE_AA64,
-      .opc0 = 1, .opc2 = 0, .crn = 8, .crm = 7, .opc2 = 7,
+      .opc0 = 1, .opc1 = 0, .crn = 8, .crm = 7, .opc2 = 7,
       .access = PL1_W, .type = ARM_CP_NO_MIGRATE,
       .writefn = tlbi_aa64_vaa_write },
 #ifndef CONFIG_USER_ONLY
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 57e7d9c..fb90676 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -418,7 +418,7 @@
             goto illegal_return;
         }
         if (new_el == 0 && (spsr & PSTATE_SP)) {
-            /* Return to EL1 with M[0] bit set */
+            /* Return to EL0 with M[0] bit set */
             goto illegal_return;
         }
         env->aarch64 = 1;
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index d86b8ff..b62db4d 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -1151,6 +1151,8 @@
         return;
     case 1: /* YIELD */
     case 2: /* WFE */
+        s->is_jmp = DISAS_WFE;
+        return;
     case 4: /* SEV */
     case 5: /* SEVL */
         /* we treat all as NOP at least for now */
@@ -1507,8 +1509,10 @@
     switch (opc) {
     case 0: /* BR */
     case 2: /* RET */
+        tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
         break;
     case 1: /* BLR */
+        tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
         break;
     case 4: /* ERET */
@@ -1527,7 +1531,6 @@
         return;
     }
 
-    tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
     s->is_jmp = DISAS_JUMP;
 }
 
@@ -10765,6 +10768,10 @@
         case DISAS_EXC:
         case DISAS_SWI:
             break;
+        case DISAS_WFE:
+            gen_a64_set_pc_im(dc->pc);
+            gen_helper_wfe(cpu_env);
+            break;
         case DISAS_WFI:
             /* This is a special case because we don't want to just halt the CPU
              * if trying to debug across a WFI.