Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20150914' into staging

target-arm queue:
 * fix GIC region size in xlnx-zynqmp
 * xlnx-zynqmp: Remove unnecessary brackets
 * improve A64 generated TCG code
 * add GPIO devices to i.MX25 and i.MX31
 * more missing pieces for EL2 support

# gpg: Signature made Mon 14 Sep 2015 14:51:12 BST using RSA key ID 14360CDE
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>"
# gpg:                 aka "Peter Maydell <pmaydell@gmail.com>"
# gpg:                 aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>"

* remotes/pmaydell/tags/pull-target-arm-20150914: (24 commits)
  target-arm: Add VMPIDR_EL2
  target-arm: Break out mpidr_read_val()
  target-arm: Add VPIDR_EL2
  target-arm: Suppress EPD for S2, EL2 and EL3 translations
  target-arm: Suppress TBI for S2 translations
  target-arm: Add VTTBR_EL2
  target-arm: Add VTCR_EL2
  hw/cpu/{a15mpcore, a9mpcore}: Handle missing has_el3 CPU props gracefully
  i.MX: Add GPIO devices to i.MX25 SOC
  i.MX: Add GPIO devices to i.MX31 SOC
  i.MX: Add GPIO device
  target-arm: Use tcg_gen_extrh_i64_i32
  target-arm: Recognize ROR
  target-arm: Eliminate unnecessary zero-extend in disas_bitfield
  target-arm: Recognize UXTB, UXTH, LSR, LSL
  target-arm: Recognize SXTB, SXTH, SXTW, ASR
  target-arm: Implement fcsel with movcond
  target-arm: Implement ccmp branchless
  target-arm: Use setcond and movcond for csel
  target-arm: Handle always condition codes within arm_test_cc
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/hw/arm/fsl-imx25.c b/hw/arm/fsl-imx25.c
index 6d157c9..86fde42 100644
--- a/hw/arm/fsl-imx25.c
+++ b/hw/arm/fsl-imx25.c
@@ -63,6 +63,11 @@
         object_initialize(&s->i2c[i], sizeof(s->i2c[i]), TYPE_IMX_I2C);
         qdev_set_parent_bus(DEVICE(&s->i2c[i]), sysbus_get_default());
     }
+
+    for (i = 0; i < FSL_IMX25_NUM_GPIOS; i++) {
+        object_initialize(&s->gpio[i], sizeof(s->gpio[i]), TYPE_IMX_GPIO);
+        qdev_set_parent_bus(DEVICE(&s->gpio[i]), sysbus_get_default());
+    }
 }
 
 static void fsl_imx25_realize(DeviceState *dev, Error **errp)
@@ -214,6 +219,30 @@
                                             i2c_table[i].irq));
     }
 
+    /* Initialize all GPIOs */
+    for (i = 0; i < FSL_IMX25_NUM_GPIOS; i++) {
+        static const struct {
+            hwaddr addr;
+            unsigned int irq;
+        } gpio_table[FSL_IMX25_NUM_GPIOS] = {
+            { FSL_IMX25_GPIO1_ADDR, FSL_IMX25_GPIO1_IRQ },
+            { FSL_IMX25_GPIO2_ADDR, FSL_IMX25_GPIO2_IRQ },
+            { FSL_IMX25_GPIO3_ADDR, FSL_IMX25_GPIO3_IRQ },
+            { FSL_IMX25_GPIO4_ADDR, FSL_IMX25_GPIO4_IRQ }
+        };
+
+        object_property_set_bool(OBJECT(&s->gpio[i]), true, "realized", &err);
+        if (err) {
+            error_propagate(errp, err);
+            return;
+        }
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->gpio[i]), 0, gpio_table[i].addr);
+        /* Connect GPIO IRQ to PIC */
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->gpio[i]), 0,
+                           qdev_get_gpio_in(DEVICE(&s->avic),
+                                            gpio_table[i].irq));
+    }
+
     /* initialize 2 x 16 KB ROM */
     memory_region_init_rom_device(&s->rom[0], NULL, NULL, NULL,
                                   "imx25.rom0", FSL_IMX25_ROM0_SIZE, &err);
diff --git a/hw/arm/fsl-imx31.c b/hw/arm/fsl-imx31.c
index 87548c8..8e1ed48 100644
--- a/hw/arm/fsl-imx31.c
+++ b/hw/arm/fsl-imx31.c
@@ -55,6 +55,11 @@
         object_initialize(&s->i2c[i], sizeof(s->i2c[i]), TYPE_IMX_I2C);
         qdev_set_parent_bus(DEVICE(&s->i2c[i]), sysbus_get_default());
     }
+
+    for (i = 0; i < FSL_IMX31_NUM_GPIOS; i++) {
+        object_initialize(&s->gpio[i], sizeof(s->gpio[i]), TYPE_IMX_GPIO);
+        qdev_set_parent_bus(DEVICE(&s->gpio[i]), sysbus_get_default());
+    }
 }
 
 static void fsl_imx31_realize(DeviceState *dev, Error **errp)
@@ -184,6 +189,31 @@
                                             i2c_table[i].irq));
     }
 
+    /* Initialize all GPIOs */
+    for (i = 0; i < FSL_IMX31_NUM_GPIOS; i++) {
+        static const struct {
+            hwaddr addr;
+            unsigned int irq;
+        } gpio_table[FSL_IMX31_NUM_GPIOS] = {
+            { FSL_IMX31_GPIO1_ADDR, FSL_IMX31_GPIO1_IRQ },
+            { FSL_IMX31_GPIO2_ADDR, FSL_IMX31_GPIO2_IRQ },
+            { FSL_IMX31_GPIO3_ADDR, FSL_IMX31_GPIO3_IRQ }
+        };
+
+        object_property_set_bool(OBJECT(&s->gpio[i]), false, "has-edge-sel",
+                                 &error_abort);
+        object_property_set_bool(OBJECT(&s->gpio[i]), true, "realized", &err);
+        if (err) {
+            error_propagate(errp, err);
+            return;
+        }
+        sysbus_mmio_map(SYS_BUS_DEVICE(&s->gpio[i]), 0, gpio_table[i].addr);
+        /* Connect GPIO IRQ to PIC */
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->gpio[i]), 0,
+                           qdev_get_gpio_in(DEVICE(&s->avic),
+                                            gpio_table[i].irq));
+    }
+
     /* On a real system, the first 16k is a `secure boot rom' */
     memory_region_init_rom_device(&s->secure_rom, NULL, NULL, NULL,
                                   "imx31.secure_rom",
diff --git a/hw/arm/xlnx-zynqmp.c b/hw/arm/xlnx-zynqmp.c
index 2955f3b..2185542 100644
--- a/hw/arm/xlnx-zynqmp.c
+++ b/hw/arm/xlnx-zynqmp.c
@@ -128,7 +128,7 @@
     qdev_prop_set_uint32(DEVICE(&s->gic), "num-cpu", XLNX_ZYNQMP_NUM_APU_CPUS);
     object_property_set_bool(OBJECT(&s->gic), true, "realized", &err);
     if (err) {
-        error_propagate((errp), (err));
+        error_propagate(errp, err);
         return;
     }
     assert(ARRAY_SIZE(xlnx_zynqmp_gic_regions) == XLNX_ZYNQMP_GIC_REGIONS);
@@ -173,7 +173,7 @@
         object_property_set_bool(OBJECT(&s->apu_cpu[i]), true, "realized",
                                  &err);
         if (err) {
-            error_propagate((errp), (err));
+            error_propagate(errp, err);
             return;
         }
 
@@ -206,7 +206,7 @@
         object_property_set_bool(OBJECT(&s->rpu_cpu[i]), true, "realized",
                                  &err);
         if (err) {
-            error_propagate((errp), (err));
+            error_propagate(errp, err);
             return;
         }
     }
@@ -229,7 +229,7 @@
         }
         object_property_set_bool(OBJECT(&s->gem[i]), true, "realized", &err);
         if (err) {
-            error_propagate((errp), (err));
+            error_propagate(errp, err);
             return;
         }
         sysbus_mmio_map(SYS_BUS_DEVICE(&s->gem[i]), 0, gem_addr[i]);
@@ -240,7 +240,7 @@
     for (i = 0; i < XLNX_ZYNQMP_NUM_UARTS; i++) {
         object_property_set_bool(OBJECT(&s->uart[i]), true, "realized", &err);
         if (err) {
-            error_propagate((errp), (err));
+            error_propagate(errp, err);
             return;
         }
         sysbus_mmio_map(SYS_BUS_DEVICE(&s->uart[i]), 0, uart_addr[i]);
diff --git a/hw/cpu/a15mpcore.c b/hw/cpu/a15mpcore.c
index 4ef8db1..94e8cc1 100644
--- a/hw/cpu/a15mpcore.c
+++ b/hw/cpu/a15mpcore.c
@@ -64,7 +64,7 @@
          * either all the CPUs have TZ, or none do.
          */
         cpuobj = OBJECT(qemu_get_cpu(0));
-        has_el3 = object_property_find(cpuobj, "has_el3", &error_abort) &&
+        has_el3 = object_property_find(cpuobj, "has_el3", NULL) &&
             object_property_get_bool(cpuobj, "has_el3", &error_abort);
         qdev_prop_set_bit(gicdev, "has-security-extensions", has_el3);
     }
diff --git a/hw/cpu/a9mpcore.c b/hw/cpu/a9mpcore.c
index 7046246..869818c 100644
--- a/hw/cpu/a9mpcore.c
+++ b/hw/cpu/a9mpcore.c
@@ -69,7 +69,7 @@
      * either all the CPUs have TZ, or none do.
      */
     cpuobj = OBJECT(qemu_get_cpu(0));
-    has_el3 = object_property_find(cpuobj, "has_el3", &error_abort) &&
+    has_el3 = object_property_find(cpuobj, "has_el3", NULL) &&
         object_property_get_bool(cpuobj, "has_el3", &error_abort);
     qdev_prop_set_bit(gicdev, "has-security-extensions", has_el3);
 
diff --git a/hw/gpio/Makefile.objs b/hw/gpio/Makefile.objs
index 1abcf17..52233f7 100644
--- a/hw/gpio/Makefile.objs
+++ b/hw/gpio/Makefile.objs
@@ -5,3 +5,4 @@
 common-obj-$(CONFIG_E500) += mpc8xxx.o
 
 obj-$(CONFIG_OMAP) += omap_gpio.o
+obj-$(CONFIG_IMX) += imx_gpio.o
diff --git a/hw/gpio/imx_gpio.c b/hw/gpio/imx_gpio.c
new file mode 100644
index 0000000..d56ffcd
--- /dev/null
+++ b/hw/gpio/imx_gpio.c
@@ -0,0 +1,340 @@
+/*
+ * i.MX processors GPIO emulation.
+ *
+ * Copyright (C) 2015 Jean-Christophe Dubois <jcd@tribudubois.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/gpio/imx_gpio.h"
+
+#ifndef DEBUG_IMX_GPIO
+#define DEBUG_IMX_GPIO 0
+#endif
+
+typedef enum IMXGPIOLevel {
+    IMX_GPIO_LEVEL_LOW = 0,
+    IMX_GPIO_LEVEL_HIGH = 1,
+} IMXGPIOLevel;
+
+#define DPRINTF(fmt, args...) \
+          do { \
+              if (DEBUG_IMX_GPIO) { \
+                  fprintf(stderr, "%s: " fmt , __func__, ##args); \
+              } \
+          } while (0)
+
+static const char *imx_gpio_reg_name(uint32_t reg)
+{
+    switch (reg) {
+    case DR_ADDR:
+        return "DR";
+    case GDIR_ADDR:
+        return "GDIR";
+    case PSR_ADDR:
+        return "PSR";
+    case ICR1_ADDR:
+        return "ICR1";
+    case ICR2_ADDR:
+        return "ICR2";
+    case IMR_ADDR:
+        return "IMR";
+    case ISR_ADDR:
+        return "ISR";
+    case EDGE_SEL_ADDR:
+        return "EDGE_SEL";
+    default:
+        return "[?]";
+    }
+}
+
+static void imx_gpio_update_int(IMXGPIOState *s)
+{
+    qemu_set_irq(s->irq, (s->isr & s->imr) ? 1 : 0);
+}
+
+static void imx_gpio_set_int_line(IMXGPIOState *s, int line, IMXGPIOLevel level)
+{
+    /* if this signal isn't configured as an input signal, nothing to do */
+    if (!extract32(s->gdir, line, 1)) {
+        return;
+    }
+
+    /* When set, EDGE_SEL overrides the ICR config */
+    if (extract32(s->edge_sel, line, 1)) {
+        /* we detect interrupt on rising and falling edge */
+        if (extract32(s->psr, line, 1) != level) {
+            /* level changed */
+            s->isr = deposit32(s->isr, line, 1, 1);
+        }
+    } else if (extract64(s->icr, 2*line + 1, 1)) {
+        /* interrupt is edge sensitive */
+        if (extract32(s->psr, line, 1) != level) {
+            /* level changed */
+            if (extract64(s->icr, 2*line, 1) != level) {
+                s->isr = deposit32(s->isr, line, 1, 1);
+            }
+        }
+    } else {
+        /* interrupt is level sensitive */
+        if (extract64(s->icr, 2*line, 1) == level) {
+            s->isr = deposit32(s->isr, line, 1, 1);
+        }
+    }
+}
+
+static void imx_gpio_set(void *opaque, int line, int level)
+{
+    IMXGPIOState *s = IMX_GPIO(opaque);
+    IMXGPIOLevel imx_level = level ? IMX_GPIO_LEVEL_HIGH : IMX_GPIO_LEVEL_LOW;
+
+    imx_gpio_set_int_line(s, line, imx_level);
+
+    /* this is an input signal, so set PSR */
+    s->psr = deposit32(s->psr, line, 1, imx_level);
+
+    imx_gpio_update_int(s);
+}
+
+static void imx_gpio_set_all_int_lines(IMXGPIOState *s)
+{
+    int i;
+
+    for (i = 0; i < IMX_GPIO_PIN_COUNT; i++) {
+        IMXGPIOLevel imx_level = extract32(s->psr, i, 1);
+        imx_gpio_set_int_line(s, i, imx_level);
+    }
+
+    imx_gpio_update_int(s);
+}
+
+static inline void imx_gpio_set_all_output_lines(IMXGPIOState *s)
+{
+    int i;
+
+    for (i = 0; i < IMX_GPIO_PIN_COUNT; i++) {
+        /*
+         * if the line is set as output, then forward the line
+         * level to its user.
+         */
+        if (extract32(s->gdir, i, 1) && s->output[i]) {
+            qemu_set_irq(s->output[i], extract32(s->dr, i, 1));
+        }
+    }
+}
+
+static uint64_t imx_gpio_read(void *opaque, hwaddr offset, unsigned size)
+{
+    IMXGPIOState *s = IMX_GPIO(opaque);
+    uint32_t reg_value = 0;
+
+    switch (offset) {
+    case DR_ADDR:
+        /*
+         * depending on the "line" configuration, the bit values
+         * are coming either from DR or PSR
+         */
+        reg_value = (s->dr & s->gdir) | (s->psr & ~s->gdir);
+        break;
+
+    case GDIR_ADDR:
+        reg_value = s->gdir;
+        break;
+
+    case PSR_ADDR:
+        reg_value = s->psr & ~s->gdir;
+        break;
+
+    case ICR1_ADDR:
+        reg_value = extract64(s->icr, 0, 32);
+        break;
+
+    case ICR2_ADDR:
+        reg_value = extract64(s->icr, 32, 32);
+        break;
+
+    case IMR_ADDR:
+        reg_value = s->imr;
+        break;
+
+    case ISR_ADDR:
+        reg_value = s->isr;
+        break;
+
+    case EDGE_SEL_ADDR:
+        if (s->has_edge_sel) {
+            reg_value = s->edge_sel;
+        } else {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: EDGE_SEL register not "
+                          "present on this version of GPIO device\n",
+                          TYPE_IMX_GPIO, __func__);
+        }
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad register at offset %d\n",
+                      TYPE_IMX_GPIO, __func__, (int)offset);
+        break;
+    }
+
+    DPRINTF("(%s) = 0x%"PRIx32"\n", imx_gpio_reg_name(offset), reg_value);
+
+    return reg_value;
+}
+
+static void imx_gpio_write(void *opaque, hwaddr offset, uint64_t value,
+                           unsigned size)
+{
+    IMXGPIOState *s = IMX_GPIO(opaque);
+
+    DPRINTF("(%s, value = 0x%"PRIx32")\n", imx_gpio_reg_name(offset),
+            (uint32_t)value);
+
+    switch (offset) {
+    case DR_ADDR:
+        s->dr = value;
+        imx_gpio_set_all_output_lines(s);
+        break;
+
+    case GDIR_ADDR:
+        s->gdir = value;
+        imx_gpio_set_all_output_lines(s);
+        imx_gpio_set_all_int_lines(s);
+        break;
+
+    case ICR1_ADDR:
+        s->icr = deposit64(s->icr, 0, 32, value);
+        imx_gpio_set_all_int_lines(s);
+        break;
+
+    case ICR2_ADDR:
+        s->icr = deposit64(s->icr, 32, 32, value);
+        imx_gpio_set_all_int_lines(s);
+        break;
+
+    case IMR_ADDR:
+        s->imr = value;
+        imx_gpio_update_int(s);
+        break;
+
+    case ISR_ADDR:
+        s->isr |= ~value;
+        imx_gpio_set_all_int_lines(s);
+        break;
+
+    case EDGE_SEL_ADDR:
+        if (s->has_edge_sel) {
+            s->edge_sel = value;
+            imx_gpio_set_all_int_lines(s);
+        } else {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: EDGE_SEL register not "
+                          "present on this version of GPIO device\n",
+                          TYPE_IMX_GPIO, __func__);
+        }
+        break;
+
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR, "%s[%s]: Bad register at offset %d\n",
+                      TYPE_IMX_GPIO, __func__, (int)offset);
+        break;
+    }
+
+    return;
+}
+
+static const MemoryRegionOps imx_gpio_ops = {
+    .read = imx_gpio_read,
+    .write = imx_gpio_write,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+};
+
+static const VMStateDescription vmstate_imx_gpio = {
+    .name = TYPE_IMX_GPIO,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(dr, IMXGPIOState),
+        VMSTATE_UINT32(gdir, IMXGPIOState),
+        VMSTATE_UINT32(psr, IMXGPIOState),
+        VMSTATE_UINT64(icr, IMXGPIOState),
+        VMSTATE_UINT32(imr, IMXGPIOState),
+        VMSTATE_UINT32(isr, IMXGPIOState),
+        VMSTATE_BOOL(has_edge_sel, IMXGPIOState),
+        VMSTATE_UINT32(edge_sel, IMXGPIOState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static Property imx_gpio_properties[] = {
+    DEFINE_PROP_BOOL("has-edge-sel", IMXGPIOState, has_edge_sel, true),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void imx_gpio_reset(DeviceState *dev)
+{
+    IMXGPIOState *s = IMX_GPIO(dev);
+
+    s->dr       = 0;
+    s->gdir     = 0;
+    s->psr      = 0;
+    s->icr      = 0;
+    s->imr      = 0;
+    s->isr      = 0;
+    s->edge_sel = 0;
+
+    imx_gpio_set_all_output_lines(s);
+    imx_gpio_update_int(s);
+}
+
+static void imx_gpio_realize(DeviceState *dev, Error **errp)
+{
+    IMXGPIOState *s = IMX_GPIO(dev);
+
+    memory_region_init_io(&s->iomem, OBJECT(s), &imx_gpio_ops, s,
+                          TYPE_IMX_GPIO, IMX_GPIO_MEM_SIZE);
+
+    qdev_init_gpio_in(DEVICE(s), imx_gpio_set, IMX_GPIO_PIN_COUNT);
+    qdev_init_gpio_out(DEVICE(s), s->output, IMX_GPIO_PIN_COUNT);
+
+    sysbus_init_irq(SYS_BUS_DEVICE(dev), &s->irq);
+    sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->iomem);
+}
+
+static void imx_gpio_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->realize = imx_gpio_realize;
+    dc->reset = imx_gpio_reset;
+    dc->props = imx_gpio_properties;
+    dc->vmsd = &vmstate_imx_gpio;
+    dc->desc = "i.MX GPIO controller";
+}
+
+static const TypeInfo imx_gpio_info = {
+    .name = TYPE_IMX_GPIO,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(IMXGPIOState),
+    .class_init = imx_gpio_class_init,
+};
+
+static void imx_gpio_register_types(void)
+{
+    type_register_static(&imx_gpio_info);
+}
+
+type_init(imx_gpio_register_types)
diff --git a/include/hw/arm/fsl-imx25.h b/include/hw/arm/fsl-imx25.h
index 7f6bb64..73f50c6 100644
--- a/include/hw/arm/fsl-imx25.h
+++ b/include/hw/arm/fsl-imx25.h
@@ -25,6 +25,7 @@
 #include "hw/timer/imx_epit.h"
 #include "hw/net/imx_fec.h"
 #include "hw/i2c/imx_i2c.h"
+#include "hw/gpio/imx_gpio.h"
 #include "exec/memory.h"
 
 #define TYPE_FSL_IMX25 "fsl,imx25"
@@ -34,6 +35,7 @@
 #define FSL_IMX25_NUM_GPTS 4
 #define FSL_IMX25_NUM_EPITS 2
 #define FSL_IMX25_NUM_I2CS 3
+#define FSL_IMX25_NUM_GPIOS 4
 
 typedef struct FslIMX25State {
     /*< private >*/
@@ -48,6 +50,7 @@
     IMXEPITState   epit[FSL_IMX25_NUM_EPITS];
     IMXFECState    fec;
     IMXI2CState    i2c[FSL_IMX25_NUM_I2CS];
+    IMXGPIOState   gpio[FSL_IMX25_NUM_GPIOS];
     MemoryRegion   rom[2];
     MemoryRegion   iram;
     MemoryRegion   iram_alias;
@@ -204,6 +207,14 @@
 #define FSL_IMX25_EPIT1_SIZE    0x4000
 #define FSL_IMX25_EPIT2_ADDR    0x53F98000
 #define FSL_IMX25_EPIT2_SIZE    0x4000
+#define FSL_IMX25_GPIO4_ADDR    0x53F9C000
+#define FSL_IMX25_GPIO4_SIZE    0x4000
+#define FSL_IMX25_GPIO3_ADDR    0x53FA4000
+#define FSL_IMX25_GPIO3_SIZE    0x4000
+#define FSL_IMX25_GPIO1_ADDR    0x53FCC000
+#define FSL_IMX25_GPIO1_SIZE    0x4000
+#define FSL_IMX25_GPIO2_ADDR    0x53FD0000
+#define FSL_IMX25_GPIO2_SIZE    0x4000
 #define FSL_IMX25_AVIC_ADDR     0x68000000
 #define FSL_IMX25_AVIC_SIZE     0x4000
 #define FSL_IMX25_IRAM_ADDR     0x78000000
@@ -230,5 +241,9 @@
 #define FSL_IMX25_I2C1_IRQ      3
 #define FSL_IMX25_I2C2_IRQ      4
 #define FSL_IMX25_I2C3_IRQ      10
+#define FSL_IMX25_GPIO1_IRQ     52
+#define FSL_IMX25_GPIO2_IRQ     51
+#define FSL_IMX25_GPIO3_IRQ     16
+#define FSL_IMX25_GPIO4_IRQ     23
 
 #endif /* FSL_IMX25_H */
diff --git a/include/hw/arm/fsl-imx31.h b/include/hw/arm/fsl-imx31.h
index 891166f..5e8f795 100644
--- a/include/hw/arm/fsl-imx31.h
+++ b/include/hw/arm/fsl-imx31.h
@@ -24,6 +24,7 @@
 #include "hw/timer/imx_gpt.h"
 #include "hw/timer/imx_epit.h"
 #include "hw/i2c/imx_i2c.h"
+#include "hw/gpio/imx_gpio.h"
 #include "exec/memory.h"
 
 #define TYPE_FSL_IMX31 "fsl,imx31"
@@ -32,6 +33,7 @@
 #define FSL_IMX31_NUM_UARTS 2
 #define FSL_IMX31_NUM_EPITS 2
 #define FSL_IMX31_NUM_I2CS 3
+#define FSL_IMX31_NUM_GPIOS 3
 
 typedef struct FslIMX31State {
     /*< private >*/
@@ -45,6 +47,7 @@
     IMXGPTState    gpt;
     IMXEPITState   epit[FSL_IMX31_NUM_EPITS];
     IMXI2CState    i2c[FSL_IMX31_NUM_I2CS];
+    IMXGPIOState   gpio[FSL_IMX31_NUM_GPIOS];
     MemoryRegion   secure_rom;
     MemoryRegion   rom;
     MemoryRegion   iram;
@@ -77,6 +80,12 @@
 #define FSL_IMX31_EPIT1_SIZE            0x4000
 #define FSL_IMX31_EPIT2_ADDR            0x53F98000
 #define FSL_IMX31_EPIT2_SIZE            0x4000
+#define FSL_IMX31_GPIO3_ADDR            0x53FA4000
+#define FSL_IMX31_GPIO3_SIZE            0x4000
+#define FSL_IMX31_GPIO1_ADDR            0x53FCC000
+#define FSL_IMX31_GPIO1_SIZE            0x4000
+#define FSL_IMX31_GPIO2_ADDR            0x53FD0000
+#define FSL_IMX31_GPIO2_SIZE            0x4000
 #define FSL_IMX31_AVIC_ADDR             0x68000000
 #define FSL_IMX31_AVIC_SIZE             0x100
 #define FSL_IMX31_SDRAM0_ADDR           0x80000000
@@ -106,5 +115,8 @@
 #define FSL_IMX31_I2C1_IRQ              10
 #define FSL_IMX31_I2C2_IRQ              4
 #define FSL_IMX31_I2C3_IRQ              3
+#define FSL_IMX31_GPIO1_IRQ             52
+#define FSL_IMX31_GPIO2_IRQ             51
+#define FSL_IMX31_GPIO3_IRQ             56
 
 #endif /* FSL_IMX31_H */
diff --git a/include/hw/arm/xlnx-zynqmp.h b/include/hw/arm/xlnx-zynqmp.h
index 97622ec..4005a99 100644
--- a/include/hw/arm/xlnx-zynqmp.h
+++ b/include/hw/arm/xlnx-zynqmp.h
@@ -46,7 +46,7 @@
  * number of memory region aliases.
  */
 
-#define XLNX_ZYNQMP_GIC_REGION_SIZE 0x4000
+#define XLNX_ZYNQMP_GIC_REGION_SIZE 0x1000
 #define XLNX_ZYNQMP_GIC_ALIASES     (0x10000 / XLNX_ZYNQMP_GIC_REGION_SIZE - 1)
 
 typedef struct XlnxZynqMPState {
diff --git a/include/hw/gpio/imx_gpio.h b/include/hw/gpio/imx_gpio.h
new file mode 100644
index 0000000..517b261
--- /dev/null
+++ b/include/hw/gpio/imx_gpio.h
@@ -0,0 +1,62 @@
+/*
+ * i.MX processors GPIO registers definition.
+ *
+ * Copyright (C) 2015 Jean-Christophe Dubois <jcd@tribudubois.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __IMX_GPIO_H_
+#define __IMX_GPIO_H_
+
+#include <hw/sysbus.h>
+
+#define TYPE_IMX_GPIO "imx.gpio"
+#define IMX_GPIO(obj) OBJECT_CHECK(IMXGPIOState, (obj), TYPE_IMX_GPIO)
+
+#define IMX_GPIO_MEM_SIZE 0x20
+
+/* i.MX GPIO memory map */
+#define DR_ADDR             0x00 /* DATA REGISTER */
+#define GDIR_ADDR           0x04 /* DIRECTION REGISTER */
+#define PSR_ADDR            0x08 /* PAD STATUS REGISTER */
+#define ICR1_ADDR           0x0c /* INTERRUPT CONFIGURATION REGISTER 1 */
+#define ICR2_ADDR           0x10 /* INTERRUPT CONFIGURATION REGISTER 2 */
+#define IMR_ADDR            0x14 /* INTERRUPT MASK REGISTER */
+#define ISR_ADDR            0x18 /* INTERRUPT STATUS REGISTER */
+#define EDGE_SEL_ADDR       0x1c /* EDGE SEL REGISTER */
+
+#define IMX_GPIO_PIN_COUNT 32
+
+typedef struct IMXGPIOState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+
+    uint32_t dr;
+    uint32_t gdir;
+    uint32_t psr;
+    uint64_t icr;
+    uint32_t imr;
+    uint32_t isr;
+    bool has_edge_sel;
+    uint32_t edge_sel;
+
+    qemu_irq irq;
+    qemu_irq output[IMX_GPIO_PIN_COUNT];
+} IMXGPIOState;
+
+#endif /* __IMX_GPIO_H_ */
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index b9068c9..1b80516 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -222,8 +222,10 @@
             };
             uint64_t ttbr1_el[4];
         };
+        uint64_t vttbr_el2; /* Virtualization Translation Table Base.  */
         /* MMU translation table base control. */
         TCR tcr_el[4];
+        TCR vtcr_el2; /* Virtualization Translation Control.  */
         uint32_t c2_data; /* MPU data cacheable bits.  */
         uint32_t c2_insn; /* MPU instruction cacheable bits.  */
         union { /* MMU domain access control register
@@ -383,6 +385,8 @@
          */
         uint64_t c15_ccnt;
         uint64_t pmccfiltr_el0; /* Performance Monitor Filter Register */
+        uint64_t vpidr_el2; /* Virtualization Processor ID Register */
+        uint64_t vmpidr_el2; /* Virtualization Multiprocessor ID Register */
     } cp15;
 
     struct {
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 2c6ec9d..65b9ff5 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -325,6 +325,34 @@
     g_list_free(keys);
 }
 
+/*
+ * Some registers are not accessible if EL3.NS=0 and EL3 is using AArch32 but
+ * they are accessible when EL3 is using AArch64 regardless of EL3.NS.
+ *
+ * access_el3_aa32ns: Used to check AArch32 register views.
+ * access_el3_aa32ns_aa64any: Used to check both AArch32/64 register views.
+ */
+static CPAccessResult access_el3_aa32ns(CPUARMState *env,
+                                        const ARMCPRegInfo *ri)
+{
+    bool secure = arm_is_secure_below_el3(env);
+
+    assert(!arm_el_is_aa64(env, 3));
+    if (secure) {
+        return CP_ACCESS_TRAP_UNCATEGORIZED;
+    }
+    return CP_ACCESS_OK;
+}
+
+static CPAccessResult access_el3_aa32ns_aa64any(CPUARMState *env,
+                                                const ARMCPRegInfo *ri)
+{
+    if (!arm_el_is_aa64(env, 3)) {
+        return access_el3_aa32ns(env, ri);
+    }
+    return CP_ACCESS_OK;
+}
+
 static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
 {
     ARMCPU *cpu = arm_env_get_cpu(env);
@@ -2185,6 +2213,20 @@
     raw_write(env, ri, value);
 }
 
+static void vttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
+                        uint64_t value)
+{
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    CPUState *cs = CPU(cpu);
+
+    /* Accesses to VTTBR may change the VMID so we must flush the TLB.  */
+    if (raw_read(env, ri) != value) {
+        tlb_flush_by_mmuidx(cs, ARMMMUIdx_S12NSE1, ARMMMUIdx_S12NSE0,
+                            ARMMMUIdx_S2NS, -1);
+        raw_write(env, ri, value);
+    }
+}
+
 static const ARMCPRegInfo vmsa_pmsa_cp_reginfo[] = {
     { .name = "DFSR", .cp = 15, .crn = 5, .crm = 0, .opc1 = 0, .opc2 = 0,
       .access = PL1_RW, .type = ARM_CP_ALIAS,
@@ -2403,7 +2445,19 @@
     REGINFO_SENTINEL
 };
 
-static uint64_t mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+static uint64_t midr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    ARMCPU *cpu = arm_env_get_cpu(env);
+    unsigned int cur_el = arm_current_el(env);
+    bool secure = arm_is_secure(env);
+
+    if (arm_feature(&cpu->env, ARM_FEATURE_EL2) && !secure && cur_el == 1) {
+        return env->cp15.vpidr_el2;
+    }
+    return raw_read(env, ri);
+}
+
+static uint64_t mpidr_read_val(CPUARMState *env)
 {
     ARMCPU *cpu = ARM_CPU(arm_env_get_cpu(env));
     uint64_t mpidr = cpu->mp_affinity;
@@ -2421,6 +2475,17 @@
     return mpidr;
 }
 
+static uint64_t mpidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
+{
+    unsigned int cur_el = arm_current_el(env);
+    bool secure = arm_is_secure(env);
+
+    if (arm_feature(env, ARM_FEATURE_EL2) && !secure && cur_el == 1) {
+        return env->cp15.vmpidr_el2;
+    }
+    return mpidr_read_val(env);
+}
+
 static const ARMCPRegInfo mpidr_cp_reginfo[] = {
     { .name = "MPIDR", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 5,
@@ -3112,6 +3177,17 @@
     { .name = "TCR_EL2", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 0, .opc2 = 2,
       .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "VTCR_EL2", .state = ARM_CP_STATE_BOTH,
+      .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2,
+      .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any,
+      .type = ARM_CP_CONST, .resetvalue = 0 },
+    { .name = "VTTBR", .state = ARM_CP_STATE_AA32,
+      .cp = 15, .opc1 = 6, .crm = 2,
+      .access = PL2_RW, .accessfn = access_el3_aa32ns,
+      .type = ARM_CP_CONST | ARM_CP_64BIT, .resetvalue = 0 },
+    { .name = "VTTBR_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 0,
+      .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
     { .name = "SCTLR_EL2", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 0,
       .access = PL2_RW, .type = ARM_CP_CONST, .resetvalue = 0 },
@@ -3246,6 +3322,24 @@
       .access = PL2_RW, .writefn = vmsa_tcr_el1_write,
       .resetfn = vmsa_ttbcr_reset, .raw_writefn = raw_write,
       .fieldoffset = offsetof(CPUARMState, cp15.tcr_el[2]) },
+    { .name = "VTCR", .state = ARM_CP_STATE_AA32,
+      .cp = 15, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2,
+      .access = PL2_RW, .accessfn = access_el3_aa32ns,
+      .fieldoffset = offsetof(CPUARMState, cp15.vtcr_el2) },
+    { .name = "VTCR_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 2,
+      .access = PL2_RW, .type = ARM_CP_ALIAS,
+      .fieldoffset = offsetof(CPUARMState, cp15.vtcr_el2) },
+    { .name = "VTTBR", .state = ARM_CP_STATE_AA32,
+      .cp = 15, .opc1 = 6, .crm = 2,
+      .type = ARM_CP_64BIT | ARM_CP_ALIAS,
+      .access = PL2_RW, .accessfn = access_el3_aa32ns,
+      .fieldoffset = offsetof(CPUARMState, cp15.vttbr_el2),
+      .writefn = vttbr_write },
+    { .name = "VTTBR_EL2", .state = ARM_CP_STATE_AA64,
+      .opc0 = 3, .opc1 = 4, .crn = 2, .crm = 1, .opc2 = 0,
+      .access = PL2_RW, .writefn = vttbr_write,
+      .fieldoffset = offsetof(CPUARMState, cp15.vttbr_el2) },
     { .name = "SCTLR_EL2", .state = ARM_CP_STATE_BOTH,
       .opc0 = 3, .opc1 = 4, .crn = 1, .crm = 0, .opc2 = 0,
       .access = PL2_RW, .raw_writefn = raw_write, .writefn = sctlr_write,
@@ -4050,6 +4144,30 @@
         define_arm_cp_regs(cpu, v8_cp_reginfo);
     }
     if (arm_feature(env, ARM_FEATURE_EL2)) {
+        uint64_t vmpidr_def = mpidr_read_val(env);
+        ARMCPRegInfo vpidr_regs[] = {
+            { .name = "VPIDR", .state = ARM_CP_STATE_AA32,
+              .cp = 15, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 0,
+              .access = PL2_RW, .accessfn = access_el3_aa32ns,
+              .resetvalue = cpu->midr,
+              .fieldoffset = offsetof(CPUARMState, cp15.vpidr_el2) },
+            { .name = "VPIDR_EL2", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 0,
+              .access = PL2_RW, .resetvalue = cpu->midr,
+              .fieldoffset = offsetof(CPUARMState, cp15.vpidr_el2) },
+            { .name = "VMPIDR", .state = ARM_CP_STATE_AA32,
+              .cp = 15, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 5,
+              .access = PL2_RW, .accessfn = access_el3_aa32ns,
+              .resetvalue = vmpidr_def,
+              .fieldoffset = offsetof(CPUARMState, cp15.vmpidr_el2) },
+            { .name = "VMPIDR_EL2", .state = ARM_CP_STATE_AA64,
+              .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 5,
+              .access = PL2_RW,
+              .resetvalue = vmpidr_def,
+              .fieldoffset = offsetof(CPUARMState, cp15.vmpidr_el2) },
+            REGINFO_SENTINEL
+        };
+        define_arm_cp_regs(cpu, vpidr_regs);
         define_arm_cp_regs(cpu, el2_cp_reginfo);
         /* RVBAR_EL2 is only implemented if EL2 is the highest EL */
         if (!arm_feature(env, ARM_FEATURE_EL3)) {
@@ -4065,6 +4183,23 @@
          * register the no_el2 reginfos.
          */
         if (arm_feature(env, ARM_FEATURE_EL3)) {
+            /* When EL3 exists but not EL2, VPIDR and VMPIDR take the value
+             * of MIDR_EL1 and MPIDR_EL1.
+             */
+            ARMCPRegInfo vpidr_regs[] = {
+                { .name = "VPIDR_EL2", .state = ARM_CP_STATE_BOTH,
+                  .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 0,
+                  .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any,
+                  .type = ARM_CP_CONST, .resetvalue = cpu->midr,
+                  .fieldoffset = offsetof(CPUARMState, cp15.vpidr_el2) },
+                { .name = "VMPIDR_EL2", .state = ARM_CP_STATE_BOTH,
+                  .opc0 = 3, .opc1 = 4, .crn = 0, .crm = 0, .opc2 = 5,
+                  .access = PL2_RW, .accessfn = access_el3_aa32ns_aa64any,
+                  .type = ARM_CP_NO_RAW,
+                  .writefn = arm_cp_write_ignore, .readfn = mpidr_read },
+                REGINFO_SENTINEL
+            };
+            define_arm_cp_regs(cpu, vpidr_regs);
             define_arm_cp_regs(cpu, el3_no_el2_cp_reginfo);
         }
     }
@@ -4142,6 +4277,7 @@
               .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = CP_ANY,
               .access = PL1_R, .resetvalue = cpu->midr,
               .writefn = arm_cp_write_ignore, .raw_writefn = raw_write,
+              .readfn = midr_read,
               .fieldoffset = offsetof(CPUARMState, cp15.c0_cpuid),
               .type = ARM_CP_OVERRIDE },
             /* crn = 0 op1 = 0 crm = 3..7 : currently unassigned; we RAZ. */
@@ -4165,7 +4301,9 @@
         ARMCPRegInfo id_v8_midr_cp_reginfo[] = {
             { .name = "MIDR_EL1", .state = ARM_CP_STATE_BOTH,
               .opc0 = 3, .opc1 = 0, .crn = 0, .crm = 0, .opc2 = 0,
-              .access = PL1_R, .type = ARM_CP_CONST, .resetvalue = cpu->midr },
+              .access = PL1_R, .type = ARM_CP_NO_RAW, .resetvalue = cpu->midr,
+              .fieldoffset = offsetof(CPUARMState, cp15.c0_cpuid),
+              .readfn = midr_read },
             /* crn = 0 op1 = 0 crm = 0 op2 = 4,7 : AArch32 aliases of MIDR */
             { .name = "MIDR", .type = ARM_CP_ALIAS | ARM_CP_CONST,
               .cp = 15, .crn = 0, .crm = 0, .opc1 = 0, .opc2 = 4,
@@ -5741,8 +5879,7 @@
 static inline TCR *regime_tcr(CPUARMState *env, ARMMMUIdx mmu_idx)
 {
     if (mmu_idx == ARMMMUIdx_S2NS) {
-        /* TODO: return VTCR_EL2 */
-        g_assert_not_reached();
+        return &env->cp15.vtcr_el2;
     }
     return &env->cp15.tcr_el[regime_el(env, mmu_idx)];
 }
@@ -5752,8 +5889,7 @@
                                    int ttbrn)
 {
     if (mmu_idx == ARMMMUIdx_S2NS) {
-        /* TODO: return VTTBR_EL2 */
-        g_assert_not_reached();
+        return env->cp15.vttbr_el2;
     }
     if (ttbrn == 0) {
         return env->cp15.ttbr0_el[regime_el(env, mmu_idx)];
@@ -6275,7 +6411,7 @@
     /* Read an LPAE long-descriptor translation table. */
     MMUFaultType fault_type = translation_fault;
     uint32_t level = 1;
-    uint32_t epd;
+    uint32_t epd = 0;
     int32_t tsz;
     uint32_t tg;
     uint64_t ttbr;
@@ -6301,7 +6437,9 @@
     if (arm_el_is_aa64(env, el)) {
         va_size = 64;
         if (el > 1) {
-            tbi = extract64(tcr->raw_tcr, 20, 1);
+            if (mmu_idx != ARMMMUIdx_S2NS) {
+                tbi = extract64(tcr->raw_tcr, 20, 1);
+            }
         } else {
             if (extract64(address, 55, 1)) {
                 tbi = extract64(tcr->raw_tcr, 38, 1);
@@ -6367,7 +6505,9 @@
      */
     if (ttbr_select == 0) {
         ttbr = regime_ttbr(env, mmu_idx, 0);
-        epd = extract32(tcr->raw_tcr, 7, 1);
+        if (el < 2) {
+            epd = extract32(tcr->raw_tcr, 7, 1);
+        }
         tsz = t0sz;
 
         tg = extract32(tcr->raw_tcr, 14, 2);
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index faece2c..ec0936c 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -40,16 +40,9 @@
 
 static TCGv_i64 cpu_X[32];
 static TCGv_i64 cpu_pc;
-static TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF;
 
 /* Load/store exclusive handling */
-static TCGv_i64 cpu_exclusive_addr;
-static TCGv_i64 cpu_exclusive_val;
 static TCGv_i64 cpu_exclusive_high;
-#ifdef CONFIG_USER_ONLY
-static TCGv_i64 cpu_exclusive_test;
-static TCGv_i32 cpu_exclusive_info;
-#endif
 
 static const char *regnames[] = {
     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
@@ -105,23 +98,8 @@
                                           regnames[i]);
     }
 
-    cpu_NF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, NF), "NF");
-    cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
-    cpu_CF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, CF), "CF");
-    cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF");
-
-    cpu_exclusive_addr = tcg_global_mem_new_i64(TCG_AREG0,
-        offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
-    cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0,
-        offsetof(CPUARMState, exclusive_val), "exclusive_val");
     cpu_exclusive_high = tcg_global_mem_new_i64(TCG_AREG0,
         offsetof(CPUARMState, exclusive_high), "exclusive_high");
-#ifdef CONFIG_USER_ONLY
-    cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0,
-        offsetof(CPUARMState, exclusive_test), "exclusive_test");
-    cpu_exclusive_info = tcg_global_mem_new_i32(TCG_AREG0,
-        offsetof(CPUARMState, exclusive_info), "exclusive_info");
-#endif
 }
 
 static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s)
@@ -189,6 +167,31 @@
     tcg_gen_movi_i64(cpu_pc, val);
 }
 
+typedef struct DisasCompare64 {
+    TCGCond cond;
+    TCGv_i64 value;
+} DisasCompare64;
+
+static void a64_test_cc(DisasCompare64 *c64, int cc)
+{
+    DisasCompare c32;
+
+    arm_test_cc(&c32, cc);
+
+    /* Sign-extend the 32-bit value so that the GE/LT comparisons work
+       * properly.  The NE/EQ comparisons are also fine with this choice.  */
+    c64->cond = c32.cond;
+    c64->value = tcg_temp_new_i64();
+    tcg_gen_ext_i32_i64(c64->value, c32.value);
+
+    arm_free_cc(&c32);
+}
+
+static void a64_free_cc(DisasCompare64 *c64)
+{
+    tcg_temp_free_i64(c64->value);
+}
+
 static void gen_exception_internal(int excp)
 {
     TCGv_i32 tcg_excp = tcg_const_i32(excp);
@@ -526,13 +529,8 @@
  */
 static inline void gen_set_NZ64(TCGv_i64 result)
 {
-    TCGv_i64 flag = tcg_temp_new_i64();
-
-    tcg_gen_setcondi_i64(TCG_COND_NE, flag, result, 0);
-    tcg_gen_extrl_i64_i32(cpu_ZF, flag);
-    tcg_gen_shri_i64(flag, result, 32);
-    tcg_gen_extrl_i64_i32(cpu_NF, flag);
-    tcg_temp_free_i64(flag);
+    tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
+    tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 }
 
 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
@@ -542,7 +540,7 @@
         gen_set_NZ64(result);
     } else {
         tcg_gen_extrl_i64_i32(cpu_ZF, result);
-        tcg_gen_extrl_i64_i32(cpu_NF, result);
+        tcg_gen_mov_i32(cpu_NF, cpu_ZF);
     }
     tcg_gen_movi_i32(cpu_CF, 0);
     tcg_gen_movi_i32(cpu_VF, 0);
@@ -568,8 +566,7 @@
         tcg_gen_xor_i64(tmp, t0, t1);
         tcg_gen_andc_i64(flag, flag, tmp);
         tcg_temp_free_i64(tmp);
-        tcg_gen_shri_i64(flag, flag, 32);
-        tcg_gen_extrl_i64_i32(cpu_VF, flag);
+        tcg_gen_extrh_i64_i32(cpu_VF, flag);
 
         tcg_gen_mov_i64(dest, result);
         tcg_temp_free_i64(result);
@@ -617,8 +614,7 @@
         tcg_gen_xor_i64(tmp, t0, t1);
         tcg_gen_and_i64(flag, flag, tmp);
         tcg_temp_free_i64(tmp);
-        tcg_gen_shri_i64(flag, flag, 32);
-        tcg_gen_extrl_i64_i32(cpu_VF, flag);
+        tcg_gen_extrh_i64_i32(cpu_VF, flag);
         tcg_gen_mov_i64(dest, result);
         tcg_temp_free_i64(flag);
         tcg_temp_free_i64(result);
@@ -677,8 +673,7 @@
         tcg_gen_xor_i64(vf_64, result, t0);
         tcg_gen_xor_i64(tmp, t0, t1);
         tcg_gen_andc_i64(vf_64, vf_64, tmp);
-        tcg_gen_shri_i64(vf_64, vf_64, 32);
-        tcg_gen_extrl_i64_i32(cpu_VF, vf_64);
+        tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 
         tcg_gen_mov_i64(dest, result);
 
@@ -3012,9 +3007,51 @@
     }
 
     tcg_rd = cpu_reg(s, rd);
-    tcg_tmp = read_cpu_reg(s, rn, sf);
 
-    /* OPTME: probably worth recognizing common cases of ext{8,16,32}{u,s} */
+    /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
+       to be smaller than bitsize, we'll never reference data outside the
+       low 32-bits anyway.  */
+    tcg_tmp = read_cpu_reg(s, rn, 1);
+
+    /* Recognize the common aliases.  */
+    if (opc == 0) { /* SBFM */
+        if (ri == 0) {
+            if (si == 7) { /* SXTB */
+                tcg_gen_ext8s_i64(tcg_rd, tcg_tmp);
+                goto done;
+            } else if (si == 15) { /* SXTH */
+                tcg_gen_ext16s_i64(tcg_rd, tcg_tmp);
+                goto done;
+            } else if (si == 31) { /* SXTW */
+                tcg_gen_ext32s_i64(tcg_rd, tcg_tmp);
+                goto done;
+            }
+        }
+        if (si == 63 || (si == 31 && ri <= si)) { /* ASR */
+            if (si == 31) {
+                tcg_gen_ext32s_i64(tcg_tmp, tcg_tmp);
+            }
+            tcg_gen_sari_i64(tcg_rd, tcg_tmp, ri);
+            goto done;
+        }
+    } else if (opc == 2) { /* UBFM */
+        if (ri == 0) { /* UXTB, UXTH, plus non-canonical AND */
+            tcg_gen_andi_i64(tcg_rd, tcg_tmp, bitmask64(si + 1));
+            return;
+        }
+        if (si == 63 || (si == 31 && ri <= si)) { /* LSR */
+            if (si == 31) {
+                tcg_gen_ext32u_i64(tcg_tmp, tcg_tmp);
+            }
+            tcg_gen_shri_i64(tcg_rd, tcg_tmp, ri);
+            return;
+        }
+        if (si + 1 == ri && si != bitsize - 1) { /* LSL */
+            int shift = bitsize - 1 - si;
+            tcg_gen_shli_i64(tcg_rd, tcg_tmp, shift);
+            goto done;
+        }
+    }
 
     if (opc != 1) { /* SBFM or UBFM */
         tcg_gen_movi_i64(tcg_rd, 0);
@@ -3039,6 +3076,7 @@
         tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len));
     }
 
+ done:
     if (!sf) { /* zero extend final result */
         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
     }
@@ -3071,17 +3109,7 @@
 
         tcg_rd = cpu_reg(s, rd);
 
-        if (imm) {
-            /* OPTME: we can special case rm==rn as a rotate */
-            tcg_rm = read_cpu_reg(s, rm, sf);
-            tcg_rn = read_cpu_reg(s, rn, sf);
-            tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
-            tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
-            tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
-            if (!sf) {
-                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
-            }
-        } else {
+        if (unlikely(imm == 0)) {
             /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
              * so an extract from bit 0 is a special case.
              */
@@ -3090,8 +3118,27 @@
             } else {
                 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
             }
+        } else if (rm == rn) { /* ROR */
+            tcg_rm = cpu_reg(s, rm);
+            if (sf) {
+                tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
+            } else {
+                TCGv_i32 tmp = tcg_temp_new_i32();
+                tcg_gen_extrl_i64_i32(tmp, tcg_rm);
+                tcg_gen_rotri_i32(tmp, tmp, imm);
+                tcg_gen_extu_i32_i64(tcg_rd, tmp);
+                tcg_temp_free_i32(tmp);
+            }
+        } else {
+            tcg_rm = read_cpu_reg(s, rm, sf);
+            tcg_rn = read_cpu_reg(s, rn, sf);
+            tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
+            tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
+            tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
+            if (!sf) {
+                tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
+            }
         }
-
     }
 }
 
@@ -3567,8 +3614,9 @@
 static void disas_cc(DisasContext *s, uint32_t insn)
 {
     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
-    TCGLabel *label_continue = NULL;
+    TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
+    DisasCompare c;
 
     if (!extract32(insn, 29, 1)) {
         unallocated_encoding(s);
@@ -3586,19 +3634,13 @@
     rn = extract32(insn, 5, 5);
     nzcv = extract32(insn, 0, 4);
 
-    if (cond < 0x0e) { /* not always */
-        TCGLabel *label_match = gen_new_label();
-        label_continue = gen_new_label();
-        arm_gen_test_cc(cond, label_match);
-        /* nomatch: */
-        tcg_tmp = tcg_temp_new_i64();
-        tcg_gen_movi_i64(tcg_tmp, nzcv << 28);
-        gen_set_nzcv(tcg_tmp);
-        tcg_temp_free_i64(tcg_tmp);
-        tcg_gen_br(label_continue);
-        gen_set_label(label_match);
-    }
-    /* match, or condition is always */
+    /* Set T0 = !COND.  */
+    tcg_t0 = tcg_temp_new_i32();
+    arm_test_cc(&c, cond);
+    tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
+    arm_free_cc(&c);
+
+    /* Load the arguments for the new comparison.  */
     if (is_imm) {
         tcg_y = new_tmp_a64(s);
         tcg_gen_movi_i64(tcg_y, y);
@@ -3607,6 +3649,7 @@
     }
     tcg_rn = cpu_reg(s, rn);
 
+    /* Set the flags for the new comparison.  */
     tcg_tmp = tcg_temp_new_i64();
     if (op) {
         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
@@ -3615,9 +3658,55 @@
     }
     tcg_temp_free_i64(tcg_tmp);
 
-    if (cond < 0x0e) { /* continue */
-        gen_set_label(label_continue);
+    /* If COND was false, force the flags to #nzcv.  Compute two masks
+     * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
+     * For tcg hosts that support ANDC, we can make do with just T1.
+     * In either case, allow the tcg optimizer to delete any unused mask.
+     */
+    tcg_t1 = tcg_temp_new_i32();
+    tcg_t2 = tcg_temp_new_i32();
+    tcg_gen_neg_i32(tcg_t1, tcg_t0);
+    tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
+
+    if (nzcv & 8) { /* N */
+        tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
+    } else {
+        if (TCG_TARGET_HAS_andc_i32) {
+            tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
+        } else {
+            tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
+        }
     }
+    if (nzcv & 4) { /* Z */
+        if (TCG_TARGET_HAS_andc_i32) {
+            tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
+        } else {
+            tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
+        }
+    } else {
+        tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
+    }
+    if (nzcv & 2) { /* C */
+        tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
+    } else {
+        if (TCG_TARGET_HAS_andc_i32) {
+            tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
+        } else {
+            tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
+        }
+    }
+    if (nzcv & 1) { /* V */
+        tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
+    } else {
+        if (TCG_TARGET_HAS_andc_i32) {
+            tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
+        } else {
+            tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
+        }
+    }
+    tcg_temp_free_i32(tcg_t0);
+    tcg_temp_free_i32(tcg_t1);
+    tcg_temp_free_i32(tcg_t2);
 }
 
 /* C3.5.6 Conditional select
@@ -3629,7 +3718,8 @@
 static void disas_cond_select(DisasContext *s, uint32_t insn)
 {
     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
-    TCGv_i64 tcg_rd, tcg_src;
+    TCGv_i64 tcg_rd, zero;
+    DisasCompare64 c;
 
     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
         /* S == 1 or op2<1> == 1 */
@@ -3644,48 +3734,35 @@
     rn = extract32(insn, 5, 5);
     rd = extract32(insn, 0, 5);
 
-    if (rd == 31) {
-        /* silly no-op write; until we use movcond we must special-case
-         * this to avoid a dead temporary across basic blocks.
-         */
-        return;
-    }
-
     tcg_rd = cpu_reg(s, rd);
 
-    if (cond >= 0x0e) { /* condition "always" */
-        tcg_src = read_cpu_reg(s, rn, sf);
-        tcg_gen_mov_i64(tcg_rd, tcg_src);
+    a64_test_cc(&c, cond);
+    zero = tcg_const_i64(0);
+
+    if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
+        /* CSET & CSETM.  */
+        tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
+        if (else_inv) {
+            tcg_gen_neg_i64(tcg_rd, tcg_rd);
+        }
     } else {
-        /* OPTME: we could use movcond here, at the cost of duplicating
-         * a lot of the arm_gen_test_cc() logic.
-         */
-        TCGLabel *label_match = gen_new_label();
-        TCGLabel *label_continue = gen_new_label();
-
-        arm_gen_test_cc(cond, label_match);
-        /* nomatch: */
-        tcg_src = cpu_reg(s, rm);
-
+        TCGv_i64 t_true = cpu_reg(s, rn);
+        TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
         if (else_inv && else_inc) {
-            tcg_gen_neg_i64(tcg_rd, tcg_src);
+            tcg_gen_neg_i64(t_false, t_false);
         } else if (else_inv) {
-            tcg_gen_not_i64(tcg_rd, tcg_src);
+            tcg_gen_not_i64(t_false, t_false);
         } else if (else_inc) {
-            tcg_gen_addi_i64(tcg_rd, tcg_src, 1);
-        } else {
-            tcg_gen_mov_i64(tcg_rd, tcg_src);
+            tcg_gen_addi_i64(t_false, t_false, 1);
         }
-        if (!sf) {
-            tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
-        }
-        tcg_gen_br(label_continue);
-        /* match: */
-        gen_set_label(label_match);
-        tcg_src = read_cpu_reg(s, rn, sf);
-        tcg_gen_mov_i64(tcg_rd, tcg_src);
-        /* continue: */
-        gen_set_label(label_continue);
+        tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
+    }
+
+    tcg_temp_free_i64(zero);
+    a64_free_cc(&c);
+
+    if (!sf) {
+        tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
     }
 }
 
@@ -4172,20 +4249,6 @@
     }
 }
 
-/* copy src FP register to dst FP register; type specifies single or double */
-static void gen_mov_fp2fp(DisasContext *s, int type, int dst, int src)
-{
-    if (type) {
-        TCGv_i64 v = read_fp_dreg(s, src);
-        write_fp_dreg(s, dst, v);
-        tcg_temp_free_i64(v);
-    } else {
-        TCGv_i32 v = read_fp_sreg(s, src);
-        write_fp_sreg(s, dst, v);
-        tcg_temp_free_i32(v);
-    }
-}
-
 /* C3.6.24 Floating point conditional select
  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
  * +---+---+---+-----------+------+---+------+------+-----+------+------+
@@ -4195,7 +4258,8 @@
 static void disas_fp_csel(DisasContext *s, uint32_t insn)
 {
     unsigned int mos, type, rm, cond, rn, rd;
-    TCGLabel *label_continue = NULL;
+    TCGv_i64 t_true, t_false, t_zero;
+    DisasCompare64 c;
 
     mos = extract32(insn, 29, 3);
     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
@@ -4213,21 +4277,23 @@
         return;
     }
 
-    if (cond < 0x0e) { /* not always */
-        TCGLabel *label_match = gen_new_label();
-        label_continue = gen_new_label();
-        arm_gen_test_cc(cond, label_match);
-        /* nomatch: */
-        gen_mov_fp2fp(s, type, rd, rm);
-        tcg_gen_br(label_continue);
-        gen_set_label(label_match);
-    }
+    /* Zero extend sreg inputs to 64 bits now.  */
+    t_true = tcg_temp_new_i64();
+    t_false = tcg_temp_new_i64();
+    read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
+    read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
 
-    gen_mov_fp2fp(s, type, rd, rn);
+    a64_test_cc(&c, cond);
+    t_zero = tcg_const_i64(0);
+    tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
+    tcg_temp_free_i64(t_zero);
+    tcg_temp_free_i64(t_false);
+    a64_free_cc(&c);
 
-    if (cond < 0x0e) { /* continue */
-        gen_set_label(label_continue);
-    }
+    /* Note that sregs write back zeros to the high bits,
+       and we've already done the zero-extension.  */
+    write_fp_dreg(s, rd, t_true);
+    tcg_temp_free_i64(t_true);
 }
 
 /* C3.6.25 Floating-point data-processing (1 source) - single precision */
@@ -7701,10 +7767,8 @@
             } else {
                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
-                tcg_gen_extrl_i64_i32(tcg_lo, tcg_op);
+                tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
-                tcg_gen_shri_i64(tcg_op, tcg_op, 32);
-                tcg_gen_extrl_i64_i32(tcg_hi, tcg_op);
                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
                 tcg_temp_free_i32(tcg_lo);
@@ -8610,16 +8674,10 @@
     }
 }
 
-static void do_narrow_high_u32(TCGv_i32 res, TCGv_i64 in)
-{
-    tcg_gen_shri_i64(in, in, 32);
-    tcg_gen_extrl_i64_i32(res, in);
-}
-
 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
 {
     tcg_gen_addi_i64(in, in, 1U << 31);
-    do_narrow_high_u32(res, in);
+    tcg_gen_extrh_i64_i32(res, in);
 }
 
 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
@@ -8638,7 +8696,7 @@
               gen_helper_neon_narrow_round_high_u8 },
             { gen_helper_neon_narrow_high_u16,
               gen_helper_neon_narrow_round_high_u16 },
-            { do_narrow_high_u32, do_narrow_round_high_u32 },
+            { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
         };
         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
 
diff --git a/target-arm/translate.c b/target-arm/translate.c
index ae70577..84a21ac 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -64,12 +64,12 @@
 /* We reuse the same 64-bit temporaries for efficiency.  */
 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
 static TCGv_i32 cpu_R[16];
-static TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
-static TCGv_i64 cpu_exclusive_addr;
-static TCGv_i64 cpu_exclusive_val;
+TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
+TCGv_i64 cpu_exclusive_addr;
+TCGv_i64 cpu_exclusive_val;
 #ifdef CONFIG_USER_ONLY
-static TCGv_i64 cpu_exclusive_test;
-static TCGv_i32 cpu_exclusive_info;
+TCGv_i64 cpu_exclusive_test;
+TCGv_i32 cpu_exclusive_info;
 #endif
 
 /* FIXME:  These should be removed.  */
@@ -738,81 +738,113 @@
 #undef PAS_OP
 
 /*
- * generate a conditional branch based on ARM condition code cc.
+ * Generate a conditional based on ARM condition code cc.
  * This is common between ARM and Aarch64 targets.
  */
-void arm_gen_test_cc(int cc, TCGLabel *label)
+void arm_test_cc(DisasCompare *cmp, int cc)
 {
-    TCGv_i32 tmp;
-    TCGLabel *inv;
+    TCGv_i32 value;
+    TCGCond cond;
+    bool global = true;
 
     switch (cc) {
     case 0: /* eq: Z */
-        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
-        break;
     case 1: /* ne: !Z */
-        tcg_gen_brcondi_i32(TCG_COND_NE, cpu_ZF, 0, label);
+        cond = TCG_COND_EQ;
+        value = cpu_ZF;
         break;
+
     case 2: /* cs: C */
-        tcg_gen_brcondi_i32(TCG_COND_NE, cpu_CF, 0, label);
-        break;
     case 3: /* cc: !C */
-        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, label);
+        cond = TCG_COND_NE;
+        value = cpu_CF;
         break;
+
     case 4: /* mi: N */
-        tcg_gen_brcondi_i32(TCG_COND_LT, cpu_NF, 0, label);
-        break;
     case 5: /* pl: !N */
-        tcg_gen_brcondi_i32(TCG_COND_GE, cpu_NF, 0, label);
+        cond = TCG_COND_LT;
+        value = cpu_NF;
         break;
+
     case 6: /* vs: V */
-        tcg_gen_brcondi_i32(TCG_COND_LT, cpu_VF, 0, label);
-        break;
     case 7: /* vc: !V */
-        tcg_gen_brcondi_i32(TCG_COND_GE, cpu_VF, 0, label);
+        cond = TCG_COND_LT;
+        value = cpu_VF;
         break;
+
     case 8: /* hi: C && !Z */
-        inv = gen_new_label();
-        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, inv);
-        tcg_gen_brcondi_i32(TCG_COND_NE, cpu_ZF, 0, label);
-        gen_set_label(inv);
+    case 9: /* ls: !C || Z -> !(C && !Z) */
+        cond = TCG_COND_NE;
+        value = tcg_temp_new_i32();
+        global = false;
+        /* CF is 1 for C, so -CF is an all-bits-set mask for C;
+           ZF is non-zero for !Z; so AND the two subexpressions.  */
+        tcg_gen_neg_i32(value, cpu_CF);
+        tcg_gen_and_i32(value, value, cpu_ZF);
         break;
-    case 9: /* ls: !C || Z */
-        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, label);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
-        break;
+
     case 10: /* ge: N == V -> N ^ V == 0 */
-        tmp = tcg_temp_new_i32();
-        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
-        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
-        tcg_temp_free_i32(tmp);
-        break;
     case 11: /* lt: N != V -> N ^ V != 0 */
-        tmp = tcg_temp_new_i32();
-        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
-        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
-        tcg_temp_free_i32(tmp);
+        /* Since we're only interested in the sign bit, == 0 is >= 0.  */
+        cond = TCG_COND_GE;
+        value = tcg_temp_new_i32();
+        global = false;
+        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
         break;
+
     case 12: /* gt: !Z && N == V */
-        inv = gen_new_label();
-        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, inv);
-        tmp = tcg_temp_new_i32();
-        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
-        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
-        tcg_temp_free_i32(tmp);
-        gen_set_label(inv);
-        break;
     case 13: /* le: Z || N != V */
-        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
-        tmp = tcg_temp_new_i32();
-        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
-        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
-        tcg_temp_free_i32(tmp);
+        cond = TCG_COND_NE;
+        value = tcg_temp_new_i32();
+        global = false;
+        /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
+         * the sign bit then AND with ZF to yield the result.  */
+        tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
+        tcg_gen_sari_i32(value, value, 31);
+        tcg_gen_andc_i32(value, cpu_ZF, value);
         break;
+
+    case 14: /* always */
+    case 15: /* always */
+        /* Use the ALWAYS condition, which will fold early.
+         * It doesn't matter what we use for the value.  */
+        cond = TCG_COND_ALWAYS;
+        value = cpu_ZF;
+        goto no_invert;
+
     default:
         fprintf(stderr, "Bad condition code 0x%x\n", cc);
         abort();
     }
+
+    if (cc & 1) {
+        cond = tcg_invert_cond(cond);
+    }
+
+ no_invert:
+    cmp->cond = cond;
+    cmp->value = value;
+    cmp->value_global = global;
+}
+
+void arm_free_cc(DisasCompare *cmp)
+{
+    if (!cmp->value_global) {
+        tcg_temp_free_i32(cmp->value);
+    }
+}
+
+void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
+{
+    tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
+}
+
+void arm_gen_test_cc(int cc, TCGLabel *label)
+{
+    DisasCompare cmp;
+    arm_test_cc(&cmp, cc);
+    arm_jump_cc(&cmp, label);
+    arm_free_cc(&cmp);
 }
 
 static const uint8_t table_logic_cc[16] = {
diff --git a/target-arm/translate.h b/target-arm/translate.h
index 4b618a4..b8fe37a 100644
--- a/target-arm/translate.h
+++ b/target-arm/translate.h
@@ -63,7 +63,21 @@
     TCGv_i64 tmp_a64[TMP_A64_MAX];
 } DisasContext;
 
+typedef struct DisasCompare {
+    TCGCond cond;
+    TCGv_i32 value;
+    bool value_global;
+} DisasCompare;
+
+/* Share the TCG temporaries common between 32 and 64 bit modes.  */
 extern TCGv_ptr cpu_env;
+extern TCGv_i32 cpu_NF, cpu_ZF, cpu_CF, cpu_VF;
+extern TCGv_i64 cpu_exclusive_addr;
+extern TCGv_i64 cpu_exclusive_val;
+#ifdef CONFIG_USER_ONLY
+extern TCGv_i64 cpu_exclusive_test;
+extern TCGv_i32 cpu_exclusive_info;
+#endif
 
 static inline int arm_dc_feature(DisasContext *dc, int feature)
 {
@@ -136,6 +150,9 @@
 }
 #endif
 
+void arm_test_cc(DisasCompare *cmp, int cc);
+void arm_free_cc(DisasCompare *cmp);
+void arm_jump_cc(DisasCompare *cmp, TCGLabel *label);
 void arm_gen_test_cc(int cc, TCGLabel *label);
 
 #endif /* TARGET_ARM_TRANSLATE_H */