Merge tag 'dirtylimit-dirtyrate-pull-request-20240617' of https://github.com/newfriday/qemu into staging

dirtylimit-dirtyrate-pull-request-20240617: Fix a segmentation fault

# -----BEGIN PGP SIGNATURE-----
#
# iQIzBAABCAAdFiEEaF0CINwmSCgVLlfC3/Ij1rP+y5wFAmZwVvkACgkQ3/Ij1rP+
# y5xAyQ//T0ABDLvAmtkUWRlRxZvQBDiZdFuWiPv3ntA5GdL04hL9Mlxof0rmMBrM
# VCFLYOzDImy/cf9SawieUIUyKmhY0TN66PEupoJBMm/k+bccOQi/7uuCscau4YjA
# I5f1Ms7GI8tSMyigoPSKmkPO5gvTwptkM3AOtuKs0w/8sFt/FuBWCYi81Xye7eQe
# X5idndqaLbylg0PacoSPARL1xeXUaokpbvpbg3HAIVH1zDNiNSBkVZnysURb/OT1
# wjkY5OtD9s5MCdnqPImkoCn2WXsITtL+5YlGUz3+xUQlG+pHIaJIy4rK+y3v6RgX
# jgvLCLudeVSV//DLYnitp9wrJcpqoINijdvuSSTFyjANN3SsGN9A90TTZSaV5oyg
# TMLBpiGqAWGDnXvRCq3vg3tb8gVhBrpISF0AF+6UvuiyIVIfMJPSvSekEXKfxNs8
# JoqzM1yEjgzr+d6X5+jN0kRm61kcmMP09JOKBHFwx3ZlCuYVr3XeR8YVClMJVqFw
# ZC0WaTSs69ldeU2pHn6d451aMgip+l7ZdDcROCJEGmQxZSc7JXNxcJ9RMRINutTp
# ljw86yTs+tLqrtg6FZ+eSBPJCqHFN6hdn9sXlIgJFV+bIj5dO4M6FeNwWvDo6ZaK
# JwjBlX6FOIwUtGpXaRy+YSECtiEagRsIrFIcrwgYJAL52c59LAc=
# =wVGj
# -----END PGP SIGNATURE-----
# gpg: Signature made Mon 17 Jun 2024 08:32:09 AM PDT
# gpg:                using RSA key 685D0220DC264828152E57C2DFF223D6B3FECB9C
# gpg: Good signature from "Yong Huang <yong.huang@smartx.com>" [unknown]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 685D 0220 DC26 4828 152E  57C2 DFF2 23D6 B3FE CB9C

* tag 'dirtylimit-dirtyrate-pull-request-20240617' of https://github.com/newfriday/qemu:
  migration/dirtyrate: Fix segmentation fault

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
diff --git a/MAINTAINERS b/MAINTAINERS
index 9515562..0f63bcd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1158,6 +1158,9 @@
 ASPEED BMCs
 M: Cédric Le Goater <clg@kaod.org>
 M: Peter Maydell <peter.maydell@linaro.org>
+R: Steven Lee <steven_lee@aspeedtech.com>
+R: Troy Lee <leetroy@gmail.com>
+R: Jamin Lin <jamin_lin@aspeedtech.com>
 R: Andrew Jeffery <andrew@codeconstruct.com.au>
 R: Joel Stanley <joel@jms.id.au>
 L: qemu-arm@nongnu.org
diff --git a/docs/system/arm/aspeed.rst b/docs/system/arm/aspeed.rst
index b2dea54..cd9559e 100644
--- a/docs/system/arm/aspeed.rst
+++ b/docs/system/arm/aspeed.rst
@@ -1,11 +1,12 @@
-Aspeed family boards (``*-bmc``, ``ast2500-evb``, ``ast2600-evb``)
-==================================================================
+Aspeed family boards (``*-bmc``, ``ast2500-evb``, ``ast2600-evb``, ``ast2700-evb``)
+===================================================================================
 
 The QEMU Aspeed machines model BMCs of various OpenPOWER systems and
 Aspeed evaluation boards. They are based on different releases of the
 Aspeed SoC : the AST2400 integrating an ARM926EJ-S CPU (400MHz), the
-AST2500 with an ARM1176JZS CPU (800MHz) and more recently the AST2600
-with dual cores ARM Cortex-A7 CPUs (1.2GHz).
+AST2500 with an ARM1176JZS CPU (800MHz), the AST2600
+with dual cores ARM Cortex-A7 CPUs (1.2GHz) and more recently the AST2700
+with quad cores ARM Cortex-A35 64 bits CPUs (1.6GHz)
 
 The SoC comes with RAM, Gigabit ethernet, USB, SD/MMC, USB, SPI, I2C,
 etc.
@@ -38,6 +39,10 @@
 - ``qcom-dc-scm-v1-bmc``   Qualcomm DC-SCM V1 BMC
 - ``qcom-firework-bmc``    Qualcomm Firework BMC
 
+AST2700 SoC based machines :
+
+- ``ast2700-evb``          Aspeed AST2700 Evaluation board (Cortex-A35)
+
 Supported devices
 -----------------
 
@@ -66,6 +71,7 @@
  * eMMC Boot Controller (dummy)
  * PECI Controller (minimal)
  * I3C Controller
+ * Internal Bridge Controller (SLI dummy)
 
 
 Missing devices
@@ -95,6 +101,10 @@
 
    https://github.com/openbmc/openbmc/releases
 
+or directly from the ASPEED Forked OpenBMC GitHub release repository :
+
+   https://github.com/AspeedTech-BMC/openbmc/releases
+
 To boot a kernel directly from a Linux build tree:
 
 .. code-block:: bash
@@ -164,6 +174,27 @@
 
   -M ast2500-evb,bmc-console=uart3
 
+
+Boot the AST2700 machine from the flash image, use an MTD drive :
+
+.. code-block:: bash
+
+  IMGDIR=ast2700-default
+  UBOOT_SIZE=$(stat --format=%s -L ${IMGDIR}/u-boot-nodtb.bin)
+
+  $ qemu-system-aarch64 -M ast2700-evb \
+       -device loader,force-raw=on,addr=0x400000000,file=${IMGDIR}/u-boot-nodtb.bin \
+       -device loader,force-raw=on,addr=$((0x400000000 + ${UBOOT_SIZE})),file=${IMGDIR}/u-boot.dtb \
+       -device loader,force-raw=on,addr=0x430000000,file=${IMGDIR}/bl31.bin \
+       -device loader,force-raw=on,addr=0x430080000,file=${IMGDIR}/optee/tee-raw.bin \
+       -device loader,cpu-num=0,addr=0x430000000 \
+       -device loader,cpu-num=1,addr=0x430000000 \
+       -device loader,cpu-num=2,addr=0x430000000 \
+       -device loader,cpu-num=3,addr=0x430000000 \
+       -smp 4 \
+       -drive file=${IMGDIR}/image-bmc,format=raw,if=mtd \
+       -nographic
+
 Aspeed minibmc family boards (``ast1030-evb``)
 ==================================================================
 
diff --git a/hw/arm/aspeed.c b/hw/arm/aspeed.c
index 93ca87f..40dc0e4 100644
--- a/hw/arm/aspeed.c
+++ b/hw/arm/aspeed.c
@@ -178,6 +178,12 @@
 #define AST2600_EVB_HW_STRAP1 0x000000C0
 #define AST2600_EVB_HW_STRAP2 0x00000003
 
+#ifdef TARGET_AARCH64
+/* AST2700 evb hardware value */
+#define AST2700_EVB_HW_STRAP1 0x000000C0
+#define AST2700_EVB_HW_STRAP2 0x00000003
+#endif
+
 /* Tacoma hardware value */
 #define TACOMA_BMC_HW_STRAP1  0x00000000
 #define TACOMA_BMC_HW_STRAP2  0x00000040
@@ -1588,6 +1594,26 @@
     aspeed_machine_class_init_cpus_defaults(mc);
 }
 
+#ifdef TARGET_AARCH64
+static void aspeed_machine_ast2700_evb_class_init(ObjectClass *oc, void *data)
+{
+    MachineClass *mc = MACHINE_CLASS(oc);
+    AspeedMachineClass *amc = ASPEED_MACHINE_CLASS(oc);
+
+    mc->desc = "Aspeed AST2700 EVB (Cortex-A35)";
+    amc->soc_name  = "ast2700-a0";
+    amc->hw_strap1 = AST2700_EVB_HW_STRAP1;
+    amc->hw_strap2 = AST2700_EVB_HW_STRAP2;
+    amc->fmc_model = "w25q01jvq";
+    amc->spi_model = "w25q512jv";
+    amc->num_cs    = 2;
+    amc->macs_mask = ASPEED_MAC0_ON | ASPEED_MAC1_ON | ASPEED_MAC2_ON;
+    amc->uart_default = ASPEED_DEV_UART12;
+    mc->default_ram_size = 1 * GiB;
+    aspeed_machine_class_init_cpus_defaults(mc);
+}
+#endif
+
 static void aspeed_machine_qcom_dc_scm_v1_class_init(ObjectClass *oc,
                                                      void *data)
 {
@@ -1711,6 +1737,12 @@
         .name           = MACHINE_TYPE_NAME("ast1030-evb"),
         .parent         = TYPE_ASPEED_MACHINE,
         .class_init     = aspeed_minibmc_machine_ast1030_evb_class_init,
+#ifdef TARGET_AARCH64
+    }, {
+        .name          = MACHINE_TYPE_NAME("ast2700-evb"),
+        .parent        = TYPE_ASPEED_MACHINE,
+        .class_init    = aspeed_machine_ast2700_evb_class_init,
+#endif
     }, {
         .name          = TYPE_ASPEED_MACHINE,
         .parent        = TYPE_MACHINE,
diff --git a/hw/arm/aspeed_ast27x0.c b/hw/arm/aspeed_ast27x0.c
new file mode 100644
index 0000000..b6876b4
--- /dev/null
+++ b/hw/arm/aspeed_ast27x0.c
@@ -0,0 +1,648 @@
+/*
+ * ASPEED SoC 27x0 family
+ *
+ * Copyright (C) 2024 ASPEED Technology Inc.
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Implementation extracted from the AST2600 and adapted for AST27x0.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "hw/misc/unimp.h"
+#include "hw/arm/aspeed_soc.h"
+#include "qemu/module.h"
+#include "qemu/error-report.h"
+#include "hw/i2c/aspeed_i2c.h"
+#include "net/net.h"
+#include "sysemu/sysemu.h"
+#include "hw/intc/arm_gicv3.h"
+#include "qapi/qmp/qlist.h"
+#include "qemu/log.h"
+
+static const hwaddr aspeed_soc_ast2700_memmap[] = {
+    [ASPEED_DEV_SPI_BOOT]  =  0x400000000,
+    [ASPEED_DEV_SRAM]      =  0x10000000,
+    [ASPEED_DEV_SDMC]      =  0x12C00000,
+    [ASPEED_DEV_SCU]       =  0x12C02000,
+    [ASPEED_DEV_SCUIO]     =  0x14C02000,
+    [ASPEED_DEV_UART0]     =  0X14C33000,
+    [ASPEED_DEV_UART1]     =  0X14C33100,
+    [ASPEED_DEV_UART2]     =  0X14C33200,
+    [ASPEED_DEV_UART3]     =  0X14C33300,
+    [ASPEED_DEV_UART4]     =  0X12C1A000,
+    [ASPEED_DEV_UART5]     =  0X14C33400,
+    [ASPEED_DEV_UART6]     =  0X14C33500,
+    [ASPEED_DEV_UART7]     =  0X14C33600,
+    [ASPEED_DEV_UART8]     =  0X14C33700,
+    [ASPEED_DEV_UART9]     =  0X14C33800,
+    [ASPEED_DEV_UART10]    =  0X14C33900,
+    [ASPEED_DEV_UART11]    =  0X14C33A00,
+    [ASPEED_DEV_UART12]    =  0X14C33B00,
+    [ASPEED_DEV_WDT]       =  0x14C37000,
+    [ASPEED_DEV_VUART]     =  0X14C30000,
+    [ASPEED_DEV_FMC]       =  0x14000000,
+    [ASPEED_DEV_SPI0]      =  0x14010000,
+    [ASPEED_DEV_SPI1]      =  0x14020000,
+    [ASPEED_DEV_SPI2]      =  0x14030000,
+    [ASPEED_DEV_SDRAM]     =  0x400000000,
+    [ASPEED_DEV_MII1]      =  0x14040000,
+    [ASPEED_DEV_MII2]      =  0x14040008,
+    [ASPEED_DEV_MII3]      =  0x14040010,
+    [ASPEED_DEV_ETH1]      =  0x14050000,
+    [ASPEED_DEV_ETH2]      =  0x14060000,
+    [ASPEED_DEV_ETH3]      =  0x14070000,
+    [ASPEED_DEV_EMMC]      =  0x12090000,
+    [ASPEED_DEV_INTC]      =  0x12100000,
+    [ASPEED_DEV_SLI]       =  0x12C17000,
+    [ASPEED_DEV_SLIIO]     =  0x14C1E000,
+    [ASPEED_GIC_DIST]      =  0x12200000,
+    [ASPEED_GIC_REDIST]    =  0x12280000,
+};
+
+#define AST2700_MAX_IRQ 288
+
+/* Shared Peripheral Interrupt values below are offset by -32 from datasheet */
+static const int aspeed_soc_ast2700_irqmap[] = {
+    [ASPEED_DEV_UART0]     = 132,
+    [ASPEED_DEV_UART1]     = 132,
+    [ASPEED_DEV_UART2]     = 132,
+    [ASPEED_DEV_UART3]     = 132,
+    [ASPEED_DEV_UART4]     = 8,
+    [ASPEED_DEV_UART5]     = 132,
+    [ASPEED_DEV_UART6]     = 132,
+    [ASPEED_DEV_UART7]     = 132,
+    [ASPEED_DEV_UART8]     = 132,
+    [ASPEED_DEV_UART9]     = 132,
+    [ASPEED_DEV_UART10]    = 132,
+    [ASPEED_DEV_UART11]    = 132,
+    [ASPEED_DEV_UART12]    = 132,
+    [ASPEED_DEV_FMC]       = 131,
+    [ASPEED_DEV_SDMC]      = 0,
+    [ASPEED_DEV_SCU]       = 12,
+    [ASPEED_DEV_ADC]       = 130,
+    [ASPEED_DEV_XDMA]      = 5,
+    [ASPEED_DEV_EMMC]      = 15,
+    [ASPEED_DEV_GPIO]      = 11,
+    [ASPEED_DEV_GPIO_1_8V] = 130,
+    [ASPEED_DEV_RTC]       = 13,
+    [ASPEED_DEV_TIMER1]    = 16,
+    [ASPEED_DEV_TIMER2]    = 17,
+    [ASPEED_DEV_TIMER3]    = 18,
+    [ASPEED_DEV_TIMER4]    = 19,
+    [ASPEED_DEV_TIMER5]    = 20,
+    [ASPEED_DEV_TIMER6]    = 21,
+    [ASPEED_DEV_TIMER7]    = 22,
+    [ASPEED_DEV_TIMER8]    = 23,
+    [ASPEED_DEV_WDT]       = 131,
+    [ASPEED_DEV_PWM]       = 131,
+    [ASPEED_DEV_LPC]       = 128,
+    [ASPEED_DEV_IBT]       = 128,
+    [ASPEED_DEV_I2C]       = 130,
+    [ASPEED_DEV_PECI]      = 133,
+    [ASPEED_DEV_ETH1]      = 132,
+    [ASPEED_DEV_ETH2]      = 132,
+    [ASPEED_DEV_ETH3]      = 132,
+    [ASPEED_DEV_HACE]      = 4,
+    [ASPEED_DEV_KCS]       = 128,
+    [ASPEED_DEV_DP]        = 28,
+    [ASPEED_DEV_I3C]       = 131,
+};
+
+/* GICINT 128 */
+static const int aspeed_soc_ast2700_gic128_intcmap[] = {
+    [ASPEED_DEV_LPC]       = 0,
+    [ASPEED_DEV_IBT]       = 2,
+    [ASPEED_DEV_KCS]       = 4,
+};
+
+/* GICINT 130 */
+static const int aspeed_soc_ast2700_gic130_intcmap[] = {
+    [ASPEED_DEV_I2C]        = 0,
+    [ASPEED_DEV_ADC]        = 16,
+    [ASPEED_DEV_GPIO_1_8V]  = 18,
+};
+
+/* GICINT 131 */
+static const int aspeed_soc_ast2700_gic131_intcmap[] = {
+    [ASPEED_DEV_I3C]       = 0,
+    [ASPEED_DEV_WDT]       = 16,
+    [ASPEED_DEV_FMC]       = 25,
+    [ASPEED_DEV_PWM]       = 29,
+};
+
+/* GICINT 132 */
+static const int aspeed_soc_ast2700_gic132_intcmap[] = {
+    [ASPEED_DEV_ETH1]      = 0,
+    [ASPEED_DEV_ETH2]      = 1,
+    [ASPEED_DEV_ETH3]      = 2,
+    [ASPEED_DEV_UART0]     = 7,
+    [ASPEED_DEV_UART1]     = 8,
+    [ASPEED_DEV_UART2]     = 9,
+    [ASPEED_DEV_UART3]     = 10,
+    [ASPEED_DEV_UART5]     = 11,
+    [ASPEED_DEV_UART6]     = 12,
+    [ASPEED_DEV_UART7]     = 13,
+    [ASPEED_DEV_UART8]     = 14,
+    [ASPEED_DEV_UART9]     = 15,
+    [ASPEED_DEV_UART10]    = 16,
+    [ASPEED_DEV_UART11]    = 17,
+    [ASPEED_DEV_UART12]    = 18,
+};
+
+/* GICINT 133 */
+static const int aspeed_soc_ast2700_gic133_intcmap[] = {
+    [ASPEED_DEV_PECI]      = 4,
+};
+
+/* GICINT 128 ~ 136 */
+struct gic_intc_irq_info {
+    int irq;
+    const int *ptr;
+};
+
+static const struct gic_intc_irq_info aspeed_soc_ast2700_gic_intcmap[] = {
+    {128,  aspeed_soc_ast2700_gic128_intcmap},
+    {129,  NULL},
+    {130,  aspeed_soc_ast2700_gic130_intcmap},
+    {131,  aspeed_soc_ast2700_gic131_intcmap},
+    {132,  aspeed_soc_ast2700_gic132_intcmap},
+    {133,  aspeed_soc_ast2700_gic133_intcmap},
+    {134,  NULL},
+    {135,  NULL},
+    {136,  NULL},
+};
+
+static qemu_irq aspeed_soc_ast2700_get_irq(AspeedSoCState *s, int dev)
+{
+    Aspeed27x0SoCState *a = ASPEED27X0_SOC(s);
+    AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
+    int i;
+
+    for (i = 0; i < ARRAY_SIZE(aspeed_soc_ast2700_gic_intcmap); i++) {
+        if (sc->irqmap[dev] == aspeed_soc_ast2700_gic_intcmap[i].irq) {
+            assert(aspeed_soc_ast2700_gic_intcmap[i].ptr);
+            return qdev_get_gpio_in(DEVICE(&a->intc.orgates[i]),
+                aspeed_soc_ast2700_gic_intcmap[i].ptr[dev]);
+        }
+    }
+
+    return qdev_get_gpio_in(DEVICE(&a->gic), sc->irqmap[dev]);
+}
+
+static uint64_t aspeed_ram_capacity_read(void *opaque, hwaddr addr,
+                                                    unsigned int size)
+{
+    qemu_log_mask(LOG_GUEST_ERROR,
+                  "%s: DRAM read out of ram size, addr:0x%" PRIx64 "\n",
+                   __func__, addr);
+    return 0;
+}
+
+static void aspeed_ram_capacity_write(void *opaque, hwaddr addr, uint64_t data,
+                                                unsigned int size)
+{
+    AspeedSoCState *s = ASPEED_SOC(opaque);
+    ram_addr_t ram_size;
+    MemTxResult result;
+
+    ram_size = object_property_get_uint(OBJECT(&s->sdmc), "ram-size",
+                                        &error_abort);
+
+    /*
+     * Emulate ddr capacity hardware behavior.
+     * If writes the data to the address which is beyond the ram size,
+     * it would write the data to the "address % ram_size".
+     */
+    result = address_space_write(&s->dram_as, addr % ram_size,
+                                 MEMTXATTRS_UNSPECIFIED, &data, 4);
+    if (result != MEMTX_OK) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: DRAM write failed, addr:0x%" HWADDR_PRIx
+                      ", data :0x%" PRIx64  "\n",
+                      __func__, addr % ram_size, data);
+    }
+}
+
+static const MemoryRegionOps aspeed_ram_capacity_ops = {
+    .read = aspeed_ram_capacity_read,
+    .write = aspeed_ram_capacity_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+    },
+};
+
+/*
+ * SDMC should be realized first to get correct RAM size and max size
+ * values
+ */
+static bool aspeed_soc_ast2700_dram_init(DeviceState *dev, Error **errp)
+{
+    ram_addr_t ram_size, max_ram_size;
+    Aspeed27x0SoCState *a = ASPEED27X0_SOC(dev);
+    AspeedSoCState *s = ASPEED_SOC(dev);
+    AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
+
+    ram_size = object_property_get_uint(OBJECT(&s->sdmc), "ram-size",
+                                        &error_abort);
+    max_ram_size = object_property_get_uint(OBJECT(&s->sdmc), "max-ram-size",
+                                            &error_abort);
+
+    memory_region_init(&s->dram_container, OBJECT(s), "ram-container",
+                       ram_size);
+    memory_region_add_subregion(&s->dram_container, 0, s->dram_mr);
+    address_space_init(&s->dram_as, s->dram_mr, "dram");
+
+    /*
+     * Add a memory region beyond the RAM region to emulate
+     * ddr capacity hardware behavior.
+     */
+    if (ram_size < max_ram_size) {
+        memory_region_init_io(&a->dram_empty, OBJECT(s),
+                              &aspeed_ram_capacity_ops, s,
+                              "ram-empty", max_ram_size - ram_size);
+
+        memory_region_add_subregion(s->memory,
+                                    sc->memmap[ASPEED_DEV_SDRAM] + ram_size,
+                                    &a->dram_empty);
+    }
+
+    memory_region_add_subregion(s->memory,
+                      sc->memmap[ASPEED_DEV_SDRAM], &s->dram_container);
+    return true;
+}
+
+static void aspeed_soc_ast2700_init(Object *obj)
+{
+    Aspeed27x0SoCState *a = ASPEED27X0_SOC(obj);
+    AspeedSoCState *s = ASPEED_SOC(obj);
+    AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
+    int i;
+    char socname[8];
+    char typename[64];
+
+    if (sscanf(sc->name, "%7s", socname) != 1) {
+        g_assert_not_reached();
+    }
+
+    for (i = 0; i < sc->num_cpus; i++) {
+        object_initialize_child(obj, "cpu[*]", &a->cpu[i],
+                                aspeed_soc_cpu_type(sc));
+    }
+
+    object_initialize_child(obj, "gic", &a->gic, gicv3_class_name());
+
+    object_initialize_child(obj, "scu", &s->scu, TYPE_ASPEED_2700_SCU);
+    qdev_prop_set_uint32(DEVICE(&s->scu), "silicon-rev",
+                         sc->silicon_rev);
+    object_property_add_alias(obj, "hw-strap1", OBJECT(&s->scu),
+                              "hw-strap1");
+    object_property_add_alias(obj, "hw-strap2", OBJECT(&s->scu),
+                              "hw-strap2");
+    object_property_add_alias(obj, "hw-prot-key", OBJECT(&s->scu),
+                              "hw-prot-key");
+
+    object_initialize_child(obj, "scuio", &s->scuio, TYPE_ASPEED_2700_SCUIO);
+    qdev_prop_set_uint32(DEVICE(&s->scuio), "silicon-rev",
+                         sc->silicon_rev);
+
+    snprintf(typename, sizeof(typename), "aspeed.fmc-%s", socname);
+    object_initialize_child(obj, "fmc", &s->fmc, typename);
+
+    for (i = 0; i < sc->spis_num; i++) {
+        snprintf(typename, sizeof(typename), "aspeed.spi%d-%s", i, socname);
+        object_initialize_child(obj, "spi[*]", &s->spi[i], typename);
+    }
+
+    snprintf(typename, sizeof(typename), "aspeed.sdmc-%s", socname);
+    object_initialize_child(obj, "sdmc", &s->sdmc, typename);
+    object_property_add_alias(obj, "ram-size", OBJECT(&s->sdmc),
+                              "ram-size");
+
+    for (i = 0; i < sc->wdts_num; i++) {
+        snprintf(typename, sizeof(typename), "aspeed.wdt-%s", socname);
+        object_initialize_child(obj, "wdt[*]", &s->wdt[i], typename);
+    }
+
+    for (i = 0; i < sc->macs_num; i++) {
+        object_initialize_child(obj, "ftgmac100[*]", &s->ftgmac100[i],
+                                TYPE_FTGMAC100);
+
+        object_initialize_child(obj, "mii[*]", &s->mii[i], TYPE_ASPEED_MII);
+    }
+
+    for (i = 0; i < sc->uarts_num; i++) {
+        object_initialize_child(obj, "uart[*]", &s->uart[i], TYPE_SERIAL_MM);
+    }
+
+    object_initialize_child(obj, "sli", &s->sli, TYPE_ASPEED_2700_SLI);
+    object_initialize_child(obj, "sliio", &s->sliio, TYPE_ASPEED_2700_SLIIO);
+    object_initialize_child(obj, "intc", &a->intc, TYPE_ASPEED_2700_INTC);
+}
+
+/*
+ * ASPEED ast2700 has 0x0 as cluster ID
+ *
+ * https://developer.arm.com/documentation/100236/0100/register-descriptions/aarch64-system-registers/multiprocessor-affinity-register--el1
+ */
+static uint64_t aspeed_calc_affinity(int cpu)
+{
+    return (0x0 << ARM_AFF1_SHIFT) | cpu;
+}
+
+static bool aspeed_soc_ast2700_gic_realize(DeviceState *dev, Error **errp)
+{
+    Aspeed27x0SoCState *a = ASPEED27X0_SOC(dev);
+    AspeedSoCState *s = ASPEED_SOC(dev);
+    AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
+    SysBusDevice *gicbusdev;
+    DeviceState *gicdev;
+    QList *redist_region_count;
+    int i;
+
+    gicbusdev = SYS_BUS_DEVICE(&a->gic);
+    gicdev = DEVICE(&a->gic);
+    qdev_prop_set_uint32(gicdev, "revision", 3);
+    qdev_prop_set_uint32(gicdev, "num-cpu", sc->num_cpus);
+    qdev_prop_set_uint32(gicdev, "num-irq", AST2700_MAX_IRQ);
+
+    redist_region_count = qlist_new();
+    qlist_append_int(redist_region_count, sc->num_cpus);
+    qdev_prop_set_array(gicdev, "redist-region-count", redist_region_count);
+
+    if (!sysbus_realize(gicbusdev, errp)) {
+        return false;
+    }
+    sysbus_mmio_map(gicbusdev, 0, sc->memmap[ASPEED_GIC_DIST]);
+    sysbus_mmio_map(gicbusdev, 1, sc->memmap[ASPEED_GIC_REDIST]);
+
+    for (i = 0; i < sc->num_cpus; i++) {
+        DeviceState *cpudev = DEVICE(&a->cpu[i]);
+        int NUM_IRQS = 256, ARCH_GIC_MAINT_IRQ = 9, VIRTUAL_PMU_IRQ = 7;
+        int ppibase = NUM_IRQS + i * GIC_INTERNAL + GIC_NR_SGIS;
+
+        const int timer_irq[] = {
+            [GTIMER_PHYS] = 14,
+            [GTIMER_VIRT] = 11,
+            [GTIMER_HYP]  = 10,
+            [GTIMER_SEC]  = 13,
+        };
+        int j;
+
+        for (j = 0; j < ARRAY_SIZE(timer_irq); j++) {
+            qdev_connect_gpio_out(cpudev, j,
+                    qdev_get_gpio_in(gicdev, ppibase + timer_irq[j]));
+        }
+
+        qemu_irq irq = qdev_get_gpio_in(gicdev,
+                                        ppibase + ARCH_GIC_MAINT_IRQ);
+        qdev_connect_gpio_out_named(cpudev, "gicv3-maintenance-interrupt",
+                                    0, irq);
+        qdev_connect_gpio_out_named(cpudev, "pmu-interrupt", 0,
+                qdev_get_gpio_in(gicdev, ppibase + VIRTUAL_PMU_IRQ));
+
+        sysbus_connect_irq(gicbusdev, i, qdev_get_gpio_in(cpudev, ARM_CPU_IRQ));
+        sysbus_connect_irq(gicbusdev, i + sc->num_cpus,
+                           qdev_get_gpio_in(cpudev, ARM_CPU_FIQ));
+        sysbus_connect_irq(gicbusdev, i + 2 * sc->num_cpus,
+                           qdev_get_gpio_in(cpudev, ARM_CPU_VIRQ));
+        sysbus_connect_irq(gicbusdev, i + 3 * sc->num_cpus,
+                           qdev_get_gpio_in(cpudev, ARM_CPU_VFIQ));
+    }
+
+    return true;
+}
+
+static void aspeed_soc_ast2700_realize(DeviceState *dev, Error **errp)
+{
+    int i;
+    Aspeed27x0SoCState *a = ASPEED27X0_SOC(dev);
+    AspeedSoCState *s = ASPEED_SOC(dev);
+    AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
+    AspeedINTCClass *ic = ASPEED_INTC_GET_CLASS(&a->intc);
+    g_autofree char *sram_name = NULL;
+
+    /* Default boot region (SPI memory or ROMs) */
+    memory_region_init(&s->spi_boot_container, OBJECT(s),
+                       "aspeed.spi_boot_container", 0x400000000);
+    memory_region_add_subregion(s->memory, sc->memmap[ASPEED_DEV_SPI_BOOT],
+                                &s->spi_boot_container);
+
+    /* CPU */
+    for (i = 0; i < sc->num_cpus; i++) {
+        object_property_set_int(OBJECT(&a->cpu[i]), "mp-affinity",
+                                aspeed_calc_affinity(i), &error_abort);
+
+        object_property_set_int(OBJECT(&a->cpu[i]), "cntfrq", 1125000000,
+                                &error_abort);
+        object_property_set_link(OBJECT(&a->cpu[i]), "memory",
+                                 OBJECT(s->memory), &error_abort);
+
+        if (!qdev_realize(DEVICE(&a->cpu[i]), NULL, errp)) {
+            return;
+        }
+    }
+
+    /* GIC */
+    if (!aspeed_soc_ast2700_gic_realize(dev, errp)) {
+        return;
+    }
+
+    /* INTC */
+    if (!sysbus_realize(SYS_BUS_DEVICE(&a->intc), errp)) {
+        return;
+    }
+
+    aspeed_mmio_map(s, SYS_BUS_DEVICE(&a->intc), 0,
+                    sc->memmap[ASPEED_DEV_INTC]);
+
+    /* GICINT orgates -> INTC -> GIC */
+    for (i = 0; i < ic->num_ints; i++) {
+        qdev_connect_gpio_out(DEVICE(&a->intc.orgates[i]), 0,
+                                qdev_get_gpio_in(DEVICE(&a->intc), i));
+        sysbus_connect_irq(SYS_BUS_DEVICE(&a->intc), i,
+                           qdev_get_gpio_in(DEVICE(&a->gic),
+                                aspeed_soc_ast2700_gic_intcmap[i].irq));
+    }
+
+    /* SRAM */
+    sram_name = g_strdup_printf("aspeed.sram.%d", CPU(&a->cpu[0])->cpu_index);
+    if (!memory_region_init_ram(&s->sram, OBJECT(s), sram_name, sc->sram_size,
+                                 errp)) {
+        return;
+    }
+    memory_region_add_subregion(s->memory,
+                                sc->memmap[ASPEED_DEV_SRAM], &s->sram);
+
+    /* SCU */
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->scu), errp)) {
+        return;
+    }
+    aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->scu), 0, sc->memmap[ASPEED_DEV_SCU]);
+
+    /* SCU1 */
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->scuio), errp)) {
+        return;
+    }
+    aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->scuio), 0,
+                    sc->memmap[ASPEED_DEV_SCUIO]);
+
+    /* UART */
+    if (!aspeed_soc_uart_realize(s, errp)) {
+        return;
+    }
+
+    /* FMC, The number of CS is set at the board level */
+    object_property_set_int(OBJECT(&s->fmc), "dram-base",
+                            sc->memmap[ASPEED_DEV_SDRAM],
+                            &error_abort);
+    object_property_set_link(OBJECT(&s->fmc), "dram", OBJECT(s->dram_mr),
+                             &error_abort);
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->fmc), errp)) {
+        return;
+    }
+    aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->fmc), 0, sc->memmap[ASPEED_DEV_FMC]);
+    aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->fmc), 1,
+                    ASPEED_SMC_GET_CLASS(&s->fmc)->flash_window_base);
+    sysbus_connect_irq(SYS_BUS_DEVICE(&s->fmc), 0,
+                       aspeed_soc_get_irq(s, ASPEED_DEV_FMC));
+
+    /* Set up an alias on the FMC CE0 region (boot default) */
+    MemoryRegion *fmc0_mmio = &s->fmc.flashes[0].mmio;
+    memory_region_init_alias(&s->spi_boot, OBJECT(s), "aspeed.spi_boot",
+                             fmc0_mmio, 0, memory_region_size(fmc0_mmio));
+    memory_region_add_subregion(&s->spi_boot_container, 0x0, &s->spi_boot);
+
+    /* SPI */
+    for (i = 0; i < sc->spis_num; i++) {
+        object_property_set_link(OBJECT(&s->spi[i]), "dram",
+                                 OBJECT(s->dram_mr), &error_abort);
+        if (!sysbus_realize(SYS_BUS_DEVICE(&s->spi[i]), errp)) {
+            return;
+        }
+        aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->spi[i]), 0,
+                        sc->memmap[ASPEED_DEV_SPI0 + i]);
+        aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->spi[i]), 1,
+                        ASPEED_SMC_GET_CLASS(&s->spi[i])->flash_window_base);
+    }
+
+    /*
+     * SDMC - SDRAM Memory Controller
+     * The SDMC controller is unlocked at SPL stage.
+     * At present, only supports to emulate booting
+     * start from u-boot stage. Set SDMC controller
+     * unlocked by default. It is a temporarily solution.
+     */
+    object_property_set_bool(OBJECT(&s->sdmc), "unlocked", true,
+                                 &error_abort);
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->sdmc), errp)) {
+        return;
+    }
+    aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->sdmc), 0,
+                    sc->memmap[ASPEED_DEV_SDMC]);
+
+    /* RAM */
+    if (!aspeed_soc_ast2700_dram_init(dev, errp)) {
+        return;
+    }
+
+    for (i = 0; i < sc->macs_num; i++) {
+        object_property_set_bool(OBJECT(&s->ftgmac100[i]), "aspeed", true,
+                                 &error_abort);
+        if (!sysbus_realize(SYS_BUS_DEVICE(&s->ftgmac100[i]), errp)) {
+            return;
+        }
+        aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->ftgmac100[i]), 0,
+                        sc->memmap[ASPEED_DEV_ETH1 + i]);
+        sysbus_connect_irq(SYS_BUS_DEVICE(&s->ftgmac100[i]), 0,
+                           aspeed_soc_get_irq(s, ASPEED_DEV_ETH1 + i));
+
+        object_property_set_link(OBJECT(&s->mii[i]), "nic",
+                                 OBJECT(&s->ftgmac100[i]), &error_abort);
+        if (!sysbus_realize(SYS_BUS_DEVICE(&s->mii[i]), errp)) {
+            return;
+        }
+
+        aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->mii[i]), 0,
+                        sc->memmap[ASPEED_DEV_MII1 + i]);
+    }
+
+    /* Watch dog */
+    for (i = 0; i < sc->wdts_num; i++) {
+        AspeedWDTClass *awc = ASPEED_WDT_GET_CLASS(&s->wdt[i]);
+        hwaddr wdt_offset = sc->memmap[ASPEED_DEV_WDT] + i * awc->iosize;
+
+        object_property_set_link(OBJECT(&s->wdt[i]), "scu", OBJECT(&s->scu),
+                                 &error_abort);
+        if (!sysbus_realize(SYS_BUS_DEVICE(&s->wdt[i]), errp)) {
+            return;
+        }
+        aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->wdt[i]), 0, wdt_offset);
+    }
+
+    /* SLI */
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->sli), errp)) {
+        return;
+    }
+    aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->sli), 0, sc->memmap[ASPEED_DEV_SLI]);
+
+    if (!sysbus_realize(SYS_BUS_DEVICE(&s->sliio), errp)) {
+        return;
+    }
+    aspeed_mmio_map(s, SYS_BUS_DEVICE(&s->sliio), 0,
+                    sc->memmap[ASPEED_DEV_SLIIO]);
+
+    create_unimplemented_device("ast2700.dpmcu", 0x11000000, 0x40000);
+    create_unimplemented_device("ast2700.iomem0", 0x12000000, 0x01000000);
+    create_unimplemented_device("ast2700.iomem1", 0x14000000, 0x01000000);
+    create_unimplemented_device("ast2700.ltpi", 0x30000000, 0x1000000);
+    create_unimplemented_device("ast2700.io", 0x0, 0x4000000);
+}
+
+static void aspeed_soc_ast2700_class_init(ObjectClass *oc, void *data)
+{
+    static const char * const valid_cpu_types[] = {
+        ARM_CPU_TYPE_NAME("cortex-a35"),
+        NULL
+    };
+    DeviceClass *dc = DEVICE_CLASS(oc);
+    AspeedSoCClass *sc = ASPEED_SOC_CLASS(oc);
+
+    /* Reason: The Aspeed SoC can only be instantiated from a board */
+    dc->user_creatable = false;
+    dc->realize      = aspeed_soc_ast2700_realize;
+
+    sc->name         = "ast2700-a0";
+    sc->valid_cpu_types = valid_cpu_types;
+    sc->silicon_rev  = AST2700_A0_SILICON_REV;
+    sc->sram_size    = 0x20000;
+    sc->spis_num     = 3;
+    sc->wdts_num     = 8;
+    sc->macs_num     = 1;
+    sc->uarts_num    = 13;
+    sc->num_cpus     = 4;
+    sc->uarts_base   = ASPEED_DEV_UART0;
+    sc->irqmap       = aspeed_soc_ast2700_irqmap;
+    sc->memmap       = aspeed_soc_ast2700_memmap;
+    sc->get_irq      = aspeed_soc_ast2700_get_irq;
+}
+
+static const TypeInfo aspeed_soc_ast27x0_types[] = {
+    {
+        .name           = TYPE_ASPEED27X0_SOC,
+        .parent         = TYPE_ASPEED_SOC,
+        .instance_size  = sizeof(Aspeed27x0SoCState),
+        .abstract       = true,
+    }, {
+        .name           = "ast2700-a0",
+        .parent         = TYPE_ASPEED27X0_SOC,
+        .instance_init  = aspeed_soc_ast2700_init,
+        .class_init     = aspeed_soc_ast2700_class_init,
+    },
+};
+
+DEFINE_TYPES(aspeed_soc_ast27x0_types)
diff --git a/hw/arm/meson.build b/hw/arm/meson.build
index aefde0c..0c07ab5 100644
--- a/hw/arm/meson.build
+++ b/hw/arm/meson.build
@@ -49,6 +49,7 @@
   'aspeed_ast10x0.c',
   'aspeed_eeprom.c',
   'fby35.c'))
+arm_ss.add(when: ['CONFIG_ASPEED_SOC', 'TARGET_AARCH64'], if_true: files('aspeed_ast27x0.c'))
 arm_ss.add(when: 'CONFIG_MPS2', if_true: files('mps2.c'))
 arm_ss.add(when: 'CONFIG_MPS2', if_true: files('mps2-tz.c'))
 arm_ss.add(when: 'CONFIG_MSF2', if_true: files('msf2-soc.c'))
diff --git a/hw/core/machine.c b/hw/core/machine.c
index c93d249..655d75c 100644
--- a/hw/core/machine.c
+++ b/hw/core/machine.c
@@ -36,6 +36,7 @@
 
 GlobalProperty hw_compat_9_0[] = {
     {"arm-cpu", "backcompat-cntfrq", "true" },
+    {"scsi-disk-base", "migrate-emulated-scsi-request", "false" },
     {"vfio-pci", "skip-vsc-check", "false" },
 };
 const size_t hw_compat_9_0_len = G_N_ELEMENTS(hw_compat_9_0);
diff --git a/hw/intc/aspeed_intc.c b/hw/intc/aspeed_intc.c
new file mode 100644
index 0000000..7515558
--- /dev/null
+++ b/hw/intc/aspeed_intc.c
@@ -0,0 +1,361 @@
+/*
+ * ASPEED INTC Controller
+ *
+ * Copyright (C) 2024 ASPEED Technology Inc.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "hw/intc/aspeed_intc.h"
+#include "hw/irq.h"
+#include "qemu/log.h"
+#include "trace.h"
+#include "hw/registerfields.h"
+#include "qapi/error.h"
+
+/* INTC Registers */
+REG32(GICINT128_EN,         0x1000)
+REG32(GICINT128_STATUS,     0x1004)
+REG32(GICINT129_EN,         0x1100)
+REG32(GICINT129_STATUS,     0x1104)
+REG32(GICINT130_EN,         0x1200)
+REG32(GICINT130_STATUS,     0x1204)
+REG32(GICINT131_EN,         0x1300)
+REG32(GICINT131_STATUS,     0x1304)
+REG32(GICINT132_EN,         0x1400)
+REG32(GICINT132_STATUS,     0x1404)
+REG32(GICINT133_EN,         0x1500)
+REG32(GICINT133_STATUS,     0x1504)
+REG32(GICINT134_EN,         0x1600)
+REG32(GICINT134_STATUS,     0x1604)
+REG32(GICINT135_EN,         0x1700)
+REG32(GICINT135_STATUS,     0x1704)
+REG32(GICINT136_EN,         0x1800)
+REG32(GICINT136_STATUS,     0x1804)
+
+#define GICINT_STATUS_BASE     R_GICINT128_STATUS
+
+static void aspeed_intc_update(AspeedINTCState *s, int irq, int level)
+{
+    AspeedINTCClass *aic = ASPEED_INTC_GET_CLASS(s);
+
+    if (irq >= aic->num_ints) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Invalid interrupt number: %d\n",
+                      __func__, irq);
+        return;
+    }
+
+    trace_aspeed_intc_update_irq(irq, level);
+    qemu_set_irq(s->output_pins[irq], level);
+}
+
+/*
+ * The address of GICINT128 to GICINT136 are from 0x1000 to 0x1804.
+ * Utilize "address & 0x0f00" to get the irq and irq output pin index
+ * The value of irq should be 0 to num_ints.
+ * The irq 0 indicates GICINT128, irq 1 indicates GICINT129 and so on.
+ */
+static void aspeed_intc_set_irq(void *opaque, int irq, int level)
+{
+    AspeedINTCState *s = (AspeedINTCState *)opaque;
+    AspeedINTCClass *aic = ASPEED_INTC_GET_CLASS(s);
+    uint32_t status_addr = GICINT_STATUS_BASE + ((0x100 * irq) >> 2);
+    uint32_t select = 0;
+    uint32_t enable;
+    int i;
+
+    if (irq >= aic->num_ints) {
+        qemu_log_mask(LOG_GUEST_ERROR, "%s: Invalid interrupt number: %d\n",
+                      __func__, irq);
+        return;
+    }
+
+    trace_aspeed_intc_set_irq(irq, level);
+    enable = s->enable[irq];
+
+    if (!level) {
+        return;
+    }
+
+    for (i = 0; i < aic->num_lines; i++) {
+        if (s->orgates[irq].levels[i]) {
+            if (enable & BIT(i)) {
+                select |= BIT(i);
+            }
+        }
+    }
+
+    if (!select) {
+        return;
+    }
+
+    trace_aspeed_intc_select(select);
+
+    if (s->mask[irq] || s->regs[status_addr]) {
+        /*
+         * a. mask is not 0 means in ISR mode
+         * sources interrupt routine are executing.
+         * b. status register value is not 0 means previous
+         * source interrupt does not be executed, yet.
+         *
+         * save source interrupt to pending variable.
+         */
+        s->pending[irq] |= select;
+        trace_aspeed_intc_pending_irq(irq, s->pending[irq]);
+    } else {
+        /*
+         * notify firmware which source interrupt are coming
+         * by setting status register
+         */
+        s->regs[status_addr] = select;
+        trace_aspeed_intc_trigger_irq(irq, s->regs[status_addr]);
+        aspeed_intc_update(s, irq, 1);
+    }
+}
+
+static uint64_t aspeed_intc_read(void *opaque, hwaddr offset, unsigned int size)
+{
+    AspeedINTCState *s = ASPEED_INTC(opaque);
+    uint32_t addr = offset >> 2;
+    uint32_t value = 0;
+
+    if (addr >= ASPEED_INTC_NR_REGS) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Out-of-bounds read at offset 0x%" HWADDR_PRIx "\n",
+                      __func__, offset);
+        return 0;
+    }
+
+    value = s->regs[addr];
+    trace_aspeed_intc_read(offset, size, value);
+
+    return value;
+}
+
+static void aspeed_intc_write(void *opaque, hwaddr offset, uint64_t data,
+                                        unsigned size)
+{
+    AspeedINTCState *s = ASPEED_INTC(opaque);
+    AspeedINTCClass *aic = ASPEED_INTC_GET_CLASS(s);
+    uint32_t addr = offset >> 2;
+    uint32_t old_enable;
+    uint32_t change;
+    uint32_t irq;
+
+    if (addr >= ASPEED_INTC_NR_REGS) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Out-of-bounds write at offset 0x%" HWADDR_PRIx "\n",
+                      __func__, offset);
+        return;
+    }
+
+    trace_aspeed_intc_write(offset, size, data);
+
+    switch (addr) {
+    case R_GICINT128_EN:
+    case R_GICINT129_EN:
+    case R_GICINT130_EN:
+    case R_GICINT131_EN:
+    case R_GICINT132_EN:
+    case R_GICINT133_EN:
+    case R_GICINT134_EN:
+    case R_GICINT135_EN:
+    case R_GICINT136_EN:
+        irq = (offset & 0x0f00) >> 8;
+
+        if (irq >= aic->num_ints) {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: Invalid interrupt number: %d\n",
+                          __func__, irq);
+            return;
+        }
+
+        /*
+         * These registers are used for enable sources interrupt and
+         * mask and unmask source interrupt while executing source ISR.
+         */
+
+        /* disable all source interrupt */
+        if (!data && !s->enable[irq]) {
+            s->regs[addr] = data;
+            return;
+        }
+
+        old_enable = s->enable[irq];
+        s->enable[irq] |= data;
+
+        /* enable new source interrupt */
+        if (old_enable != s->enable[irq]) {
+            trace_aspeed_intc_enable(s->enable[irq]);
+            s->regs[addr] = data;
+            return;
+        }
+
+        /* mask and unmask source interrupt */
+        change = s->regs[addr] ^ data;
+        if (change & data) {
+            s->mask[irq] &= ~change;
+            trace_aspeed_intc_unmask(change, s->mask[irq]);
+        } else {
+            s->mask[irq] |= change;
+            trace_aspeed_intc_mask(change, s->mask[irq]);
+        }
+        s->regs[addr] = data;
+        break;
+    case R_GICINT128_STATUS:
+    case R_GICINT129_STATUS:
+    case R_GICINT130_STATUS:
+    case R_GICINT131_STATUS:
+    case R_GICINT132_STATUS:
+    case R_GICINT133_STATUS:
+    case R_GICINT134_STATUS:
+    case R_GICINT135_STATUS:
+    case R_GICINT136_STATUS:
+        irq = (offset & 0x0f00) >> 8;
+
+        if (irq >= aic->num_ints) {
+            qemu_log_mask(LOG_GUEST_ERROR, "%s: Invalid interrupt number: %d\n",
+                          __func__, irq);
+            return;
+        }
+
+        /* clear status */
+        s->regs[addr] &= ~data;
+
+        /*
+         * These status registers are used for notify sources ISR are executed.
+         * If one source ISR is executed, it will clear one bit.
+         * If it clear all bits, it means to initialize this register status
+         * rather than sources ISR are executed.
+         */
+        if (data == 0xffffffff) {
+            return;
+        }
+
+        /* All source ISR execution are done */
+        if (!s->regs[addr]) {
+            trace_aspeed_intc_all_isr_done(irq);
+            if (s->pending[irq]) {
+                /*
+                 * handle pending source interrupt
+                 * notify firmware which source interrupt are pending
+                 * by setting status register
+                 */
+                s->regs[addr] = s->pending[irq];
+                s->pending[irq] = 0;
+                trace_aspeed_intc_trigger_irq(irq, s->regs[addr]);
+                aspeed_intc_update(s, irq, 1);
+            } else {
+                /* clear irq */
+                trace_aspeed_intc_clear_irq(irq, 0);
+                aspeed_intc_update(s, irq, 0);
+            }
+        }
+        break;
+    default:
+        s->regs[addr] = data;
+        break;
+    }
+
+    return;
+}
+
+static const MemoryRegionOps aspeed_intc_ops = {
+    .read = aspeed_intc_read,
+    .write = aspeed_intc_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4,
+    }
+};
+
+static void aspeed_intc_instance_init(Object *obj)
+{
+    AspeedINTCState *s = ASPEED_INTC(obj);
+    AspeedINTCClass *aic = ASPEED_INTC_GET_CLASS(s);
+    int i;
+
+    assert(aic->num_ints <= ASPEED_INTC_NR_INTS);
+    for (i = 0; i < aic->num_ints; i++) {
+        object_initialize_child(obj, "intc-orgates[*]", &s->orgates[i],
+                                TYPE_OR_IRQ);
+        object_property_set_int(OBJECT(&s->orgates[i]), "num-lines",
+                                aic->num_lines, &error_abort);
+    }
+}
+
+static void aspeed_intc_reset(DeviceState *dev)
+{
+    AspeedINTCState *s = ASPEED_INTC(dev);
+
+    memset(s->regs, 0, sizeof(s->regs));
+    memset(s->enable, 0, sizeof(s->enable));
+    memset(s->mask, 0, sizeof(s->mask));
+    memset(s->pending, 0, sizeof(s->pending));
+}
+
+static void aspeed_intc_realize(DeviceState *dev, Error **errp)
+{
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+    AspeedINTCState *s = ASPEED_INTC(dev);
+    AspeedINTCClass *aic = ASPEED_INTC_GET_CLASS(s);
+    int i;
+
+    memory_region_init_io(&s->iomem, OBJECT(s), &aspeed_intc_ops, s,
+                          TYPE_ASPEED_INTC ".regs", ASPEED_INTC_NR_REGS << 2);
+
+    sysbus_init_mmio(sbd, &s->iomem);
+    qdev_init_gpio_in(dev, aspeed_intc_set_irq, aic->num_ints);
+
+    for (i = 0; i < aic->num_ints; i++) {
+        if (!qdev_realize(DEVICE(&s->orgates[i]), NULL, errp)) {
+            return;
+        }
+        sysbus_init_irq(sbd, &s->output_pins[i]);
+    }
+}
+
+static void aspeed_intc_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->desc = "ASPEED INTC Controller";
+    dc->realize = aspeed_intc_realize;
+    dc->reset = aspeed_intc_reset;
+    dc->vmsd = NULL;
+}
+
+static const TypeInfo aspeed_intc_info = {
+    .name = TYPE_ASPEED_INTC,
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_init = aspeed_intc_instance_init,
+    .instance_size = sizeof(AspeedINTCState),
+    .class_init = aspeed_intc_class_init,
+    .class_size = sizeof(AspeedINTCClass),
+    .abstract = true,
+};
+
+static void aspeed_2700_intc_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedINTCClass *aic = ASPEED_INTC_CLASS(klass);
+
+    dc->desc = "ASPEED 2700 INTC Controller";
+    aic->num_lines = 32;
+    aic->num_ints = 9;
+}
+
+static const TypeInfo aspeed_2700_intc_info = {
+    .name = TYPE_ASPEED_2700_INTC,
+    .parent = TYPE_ASPEED_INTC,
+    .class_init = aspeed_2700_intc_class_init,
+};
+
+static void aspeed_intc_register_types(void)
+{
+    type_register_static(&aspeed_intc_info);
+    type_register_static(&aspeed_2700_intc_info);
+}
+
+type_init(aspeed_intc_register_types);
diff --git a/hw/intc/meson.build b/hw/intc/meson.build
index 0d1b7d0..afd1aa5 100644
--- a/hw/intc/meson.build
+++ b/hw/intc/meson.build
@@ -14,6 +14,7 @@
 ))
 system_ss.add(when: 'CONFIG_ALLWINNER_A10_PIC', if_true: files('allwinner-a10-pic.c'))
 system_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files('aspeed_vic.c'))
+system_ss.add(when: 'CONFIG_ASPEED_SOC', if_true: files('aspeed_intc.c'))
 system_ss.add(when: 'CONFIG_ETRAXFS', if_true: files('etraxfs_pic.c'))
 system_ss.add(when: 'CONFIG_EXYNOS4', if_true: files('exynos4210_gic.c', 'exynos4210_combiner.c'))
 system_ss.add(when: 'CONFIG_GOLDFISH_PIC', if_true: files('goldfish_pic.c'))
diff --git a/hw/intc/trace-events b/hw/intc/trace-events
index b815cea..3dcf147 100644
--- a/hw/intc/trace-events
+++ b/hw/intc/trace-events
@@ -79,6 +79,19 @@
 aspeed_vic_update_irq(int flags) "Raising IRQ: %d"
 aspeed_vic_read(uint64_t offset, unsigned size, uint32_t value) "From 0x%" PRIx64 " of size %u: 0x%" PRIx32
 aspeed_vic_write(uint64_t offset, unsigned size, uint32_t data) "To 0x%" PRIx64 " of size %u: 0x%" PRIx32
+# aspeed_intc.c
+aspeed_intc_read(uint64_t offset, unsigned size, uint32_t value) "From 0x%" PRIx64 " of size %u: 0x%" PRIx32
+aspeed_intc_write(uint64_t offset, unsigned size, uint32_t data) "To 0x%" PRIx64 " of size %u: 0x%" PRIx32
+aspeed_intc_set_irq(int irq, int level) "Set IRQ %d: %d"
+aspeed_intc_clear_irq(int irq, int level) "Clear IRQ %d: %d"
+aspeed_intc_update_irq(int irq, int level) "Update IRQ: %d: %d"
+aspeed_intc_pending_irq(int irq, uint32_t value) "Pending IRQ: %d: 0x%x"
+aspeed_intc_trigger_irq(int irq, uint32_t value) "Trigger IRQ: %d: 0x%x"
+aspeed_intc_all_isr_done(int irq) "All source ISR execution are done: %d"
+aspeed_intc_enable(uint32_t value) "Enable: 0x%x"
+aspeed_intc_select(uint32_t value) "Select: 0x%x"
+aspeed_intc_mask(uint32_t change, uint32_t value) "Mask: 0x%x: 0x%x"
+aspeed_intc_unmask(uint32_t change, uint32_t value) "UnMask: 0x%x: 0x%x"
 
 # arm_gic.c
 gic_enable_irq(int irq) "irq %d enabled"
diff --git a/hw/misc/aspeed_scu.c b/hw/misc/aspeed_scu.c
index 1ac04b6..451e837 100644
--- a/hw/misc/aspeed_scu.c
+++ b/hw/misc/aspeed_scu.c
@@ -134,6 +134,48 @@
 
 #define AST2600_CLK TO_REG(0x40)
 
+#define AST2700_SILICON_REV       TO_REG(0x00)
+#define AST2700_HW_STRAP1         TO_REG(0x10)
+#define AST2700_HW_STRAP1_CLR     TO_REG(0x14)
+#define AST2700_HW_STRAP1_LOCK    TO_REG(0x20)
+#define AST2700_HW_STRAP1_SEC1    TO_REG(0x24)
+#define AST2700_HW_STRAP1_SEC2    TO_REG(0x28)
+#define AST2700_HW_STRAP1_SEC3    TO_REG(0x2C)
+
+#define AST2700_SCU_CLK_SEL_1       TO_REG(0x280)
+#define AST2700_SCU_HPLL_PARAM      TO_REG(0x300)
+#define AST2700_SCU_HPLL_EXT_PARAM  TO_REG(0x304)
+#define AST2700_SCU_DPLL_PARAM      TO_REG(0x308)
+#define AST2700_SCU_DPLL_EXT_PARAM  TO_REG(0x30c)
+#define AST2700_SCU_MPLL_PARAM      TO_REG(0x310)
+#define AST2700_SCU_MPLL_EXT_PARAM  TO_REG(0x314)
+#define AST2700_SCU_D1CLK_PARAM     TO_REG(0x320)
+#define AST2700_SCU_D2CLK_PARAM     TO_REG(0x330)
+#define AST2700_SCU_CRT1CLK_PARAM   TO_REG(0x340)
+#define AST2700_SCU_CRT2CLK_PARAM   TO_REG(0x350)
+#define AST2700_SCU_MPHYCLK_PARAM   TO_REG(0x360)
+#define AST2700_SCU_FREQ_CNTR       TO_REG(0x3b0)
+#define AST2700_SCU_CPU_SCRATCH_0   TO_REG(0x780)
+#define AST2700_SCU_CPU_SCRATCH_1   TO_REG(0x784)
+
+#define AST2700_SCUIO_CLK_STOP_CTL_1    TO_REG(0x240)
+#define AST2700_SCUIO_CLK_STOP_CLR_1    TO_REG(0x244)
+#define AST2700_SCUIO_CLK_STOP_CTL_2    TO_REG(0x260)
+#define AST2700_SCUIO_CLK_STOP_CLR_2    TO_REG(0x264)
+#define AST2700_SCUIO_CLK_SEL_1         TO_REG(0x280)
+#define AST2700_SCUIO_CLK_SEL_2         TO_REG(0x284)
+#define AST2700_SCUIO_HPLL_PARAM        TO_REG(0x300)
+#define AST2700_SCUIO_HPLL_EXT_PARAM    TO_REG(0x304)
+#define AST2700_SCUIO_APLL_PARAM        TO_REG(0x310)
+#define AST2700_SCUIO_APLL_EXT_PARAM    TO_REG(0x314)
+#define AST2700_SCUIO_DPLL_PARAM        TO_REG(0x320)
+#define AST2700_SCUIO_DPLL_EXT_PARAM    TO_REG(0x324)
+#define AST2700_SCUIO_DPLL_PARAM_READ   TO_REG(0x328)
+#define AST2700_SCUIO_DPLL_EXT_PARAM_READ TO_REG(0x32c)
+#define AST2700_SCUIO_UARTCLK_GEN       TO_REG(0x330)
+#define AST2700_SCUIO_HUARTCLK_GEN      TO_REG(0x334)
+#define AST2700_SCUIO_CLK_DUTY_MEAS_RST TO_REG(0x388)
+
 #define SCU_IO_REGION_SIZE 0x1000
 
 static const uint32_t ast2400_a0_resets[ASPEED_SCU_NR_REGS] = {
@@ -244,6 +286,25 @@
         / asc->apb_divider;
 }
 
+static uint32_t aspeed_2700_scu_get_apb_freq(AspeedSCUState *s)
+{
+    AspeedSCUClass *asc = ASPEED_SCU_GET_CLASS(s);
+    uint32_t hpll = asc->calc_hpll(s, s->regs[AST2700_SCU_HPLL_PARAM]);
+
+    return hpll / (SCU_CLK_GET_PCLK_DIV(s->regs[AST2700_SCU_CLK_SEL_1]) + 1)
+           / asc->apb_divider;
+}
+
+static uint32_t aspeed_2700_scuio_get_apb_freq(AspeedSCUState *s)
+{
+    AspeedSCUClass *asc = ASPEED_SCU_GET_CLASS(s);
+    uint32_t hpll = asc->calc_hpll(s, s->regs[AST2700_SCUIO_HPLL_PARAM]);
+
+    return hpll /
+        (SCUIO_AST2700_CLK_GET_PCLK_DIV(s->regs[AST2700_SCUIO_CLK_SEL_1]) + 1)
+        / asc->apb_divider;
+}
+
 static uint64_t aspeed_scu_read(void *opaque, hwaddr offset, unsigned size)
 {
     AspeedSCUState *s = ASPEED_SCU(opaque);
@@ -258,7 +319,8 @@
 
     switch (reg) {
     case RNG_DATA:
-        /* On hardware, RNG_DATA works regardless of
+        /*
+         * On hardware, RNG_DATA works regardless of
          * the state of the enable bit in RNG_CTRL
          */
         s->regs[RNG_DATA] = aspeed_scu_get_random();
@@ -494,6 +556,9 @@
     AST2600_A3_SILICON_REV,
     AST1030_A0_SILICON_REV,
     AST1030_A1_SILICON_REV,
+    AST2700_A0_SILICON_REV,
+    AST2720_A0_SILICON_REV,
+    AST2750_A0_SILICON_REV,
 };
 
 bool is_supported_silicon_rev(uint32_t silicon_rev)
@@ -783,6 +848,243 @@
     .class_init = aspeed_2600_scu_class_init,
 };
 
+static uint64_t aspeed_ast2700_scu_read(void *opaque, hwaddr offset,
+                                        unsigned size)
+{
+    AspeedSCUState *s = ASPEED_SCU(opaque);
+    int reg = TO_REG(offset);
+
+    if (reg >= ASPEED_AST2700_SCU_NR_REGS) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Out-of-bounds read at offset 0x%" HWADDR_PRIx "\n",
+                __func__, offset);
+        return 0;
+    }
+
+    switch (reg) {
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Unhandled read at offset 0x%" HWADDR_PRIx "\n",
+                      __func__, offset);
+    }
+
+    trace_aspeed_ast2700_scu_read(offset, size, s->regs[reg]);
+    return s->regs[reg];
+}
+
+static void aspeed_ast2700_scu_write(void *opaque, hwaddr offset,
+                                     uint64_t data64, unsigned size)
+{
+    AspeedSCUState *s = ASPEED_SCU(opaque);
+    int reg = TO_REG(offset);
+    /* Truncate here so bitwise operations below behave as expected */
+    uint32_t data = data64;
+
+    if (reg >= ASPEED_AST2700_SCU_NR_REGS) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Out-of-bounds write at offset 0x%" HWADDR_PRIx "\n",
+                __func__, offset);
+        return;
+    }
+
+    trace_aspeed_ast2700_scu_write(offset, size, data);
+
+    switch (reg) {
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Unhandled write at offset 0x%" HWADDR_PRIx "\n",
+                      __func__, offset);
+        break;
+    }
+
+    s->regs[reg] = data;
+}
+
+static const MemoryRegionOps aspeed_ast2700_scu_ops = {
+    .read = aspeed_ast2700_scu_read,
+    .write = aspeed_ast2700_scu_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 8,
+    .valid.unaligned = false,
+};
+
+static const uint32_t ast2700_a0_resets[ASPEED_AST2700_SCU_NR_REGS] = {
+    [AST2700_SILICON_REV]           = AST2700_A0_SILICON_REV,
+    [AST2700_HW_STRAP1]             = 0x00000800,
+    [AST2700_HW_STRAP1_CLR]         = 0xFFF0FFF0,
+    [AST2700_HW_STRAP1_LOCK]        = 0x00000FFF,
+    [AST2700_HW_STRAP1_SEC1]        = 0x000000FF,
+    [AST2700_HW_STRAP1_SEC2]        = 0x00000000,
+    [AST2700_HW_STRAP1_SEC3]        = 0x1000408F,
+    [AST2700_SCU_HPLL_PARAM]        = 0x0000009f,
+    [AST2700_SCU_HPLL_EXT_PARAM]    = 0x8000004f,
+    [AST2700_SCU_DPLL_PARAM]        = 0x0080009f,
+    [AST2700_SCU_DPLL_EXT_PARAM]    = 0x8000004f,
+    [AST2700_SCU_MPLL_PARAM]        = 0x00000040,
+    [AST2700_SCU_MPLL_EXT_PARAM]    = 0x80000000,
+    [AST2700_SCU_D1CLK_PARAM]       = 0x00050002,
+    [AST2700_SCU_D2CLK_PARAM]       = 0x00050002,
+    [AST2700_SCU_CRT1CLK_PARAM]     = 0x00050002,
+    [AST2700_SCU_CRT2CLK_PARAM]     = 0x00050002,
+    [AST2700_SCU_MPHYCLK_PARAM]     = 0x0000004c,
+    [AST2700_SCU_FREQ_CNTR]         = 0x000375eb,
+    [AST2700_SCU_CPU_SCRATCH_0]     = 0x00000000,
+    [AST2700_SCU_CPU_SCRATCH_1]     = 0x00000004,
+};
+
+static void aspeed_ast2700_scu_reset(DeviceState *dev)
+{
+    AspeedSCUState *s = ASPEED_SCU(dev);
+    AspeedSCUClass *asc = ASPEED_SCU_GET_CLASS(dev);
+
+    memcpy(s->regs, asc->resets, asc->nr_regs * 4);
+}
+
+static void aspeed_2700_scu_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedSCUClass *asc = ASPEED_SCU_CLASS(klass);
+
+    dc->desc = "ASPEED 2700 System Control Unit";
+    dc->reset = aspeed_ast2700_scu_reset;
+    asc->resets = ast2700_a0_resets;
+    asc->calc_hpll = aspeed_2600_scu_calc_hpll;
+    asc->get_apb = aspeed_2700_scu_get_apb_freq;
+    asc->apb_divider = 4;
+    asc->nr_regs = ASPEED_AST2700_SCU_NR_REGS;
+    asc->clkin_25Mhz = true;
+    asc->ops = &aspeed_ast2700_scu_ops;
+}
+
+static uint64_t aspeed_ast2700_scuio_read(void *opaque, hwaddr offset,
+                                        unsigned size)
+{
+    AspeedSCUState *s = ASPEED_SCU(opaque);
+    int reg = TO_REG(offset);
+    if (reg >= ASPEED_AST2700_SCU_NR_REGS) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Out-of-bounds read at offset 0x%" HWADDR_PRIx "\n",
+                __func__, offset);
+        return 0;
+    }
+
+    switch (reg) {
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Unhandled read at offset 0x%" HWADDR_PRIx "\n",
+                      __func__, offset);
+    }
+
+    trace_aspeed_ast2700_scuio_read(offset, size, s->regs[reg]);
+    return s->regs[reg];
+}
+
+static void aspeed_ast2700_scuio_write(void *opaque, hwaddr offset,
+                                     uint64_t data64, unsigned size)
+{
+    AspeedSCUState *s = ASPEED_SCU(opaque);
+    int reg = TO_REG(offset);
+    /* Truncate here so bitwise operations below behave as expected */
+    uint32_t data = data64;
+    bool updated = false;
+
+    if (reg >= ASPEED_AST2700_SCU_NR_REGS) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Out-of-bounds write at offset 0x%" HWADDR_PRIx "\n",
+                __func__, offset);
+        return;
+    }
+
+    trace_aspeed_ast2700_scuio_write(offset, size, data);
+
+    switch (reg) {
+    case AST2700_SCUIO_CLK_STOP_CTL_1:
+    case AST2700_SCUIO_CLK_STOP_CTL_2:
+        s->regs[reg] |= data;
+        updated = true;
+        break;
+    case AST2700_SCUIO_CLK_STOP_CLR_1:
+    case AST2700_SCUIO_CLK_STOP_CLR_2:
+        s->regs[reg - 1] ^= data;
+        updated = true;
+        break;
+    default:
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Unhandled write at offset 0x%" HWADDR_PRIx "\n",
+                      __func__, offset);
+        break;
+    }
+
+    if (!updated) {
+        s->regs[reg] = data;
+    }
+}
+
+static const MemoryRegionOps aspeed_ast2700_scuio_ops = {
+    .read = aspeed_ast2700_scuio_read,
+    .write = aspeed_ast2700_scuio_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 8,
+    .valid.unaligned = false,
+};
+
+static const uint32_t ast2700_a0_resets_io[ASPEED_AST2700_SCU_NR_REGS] = {
+    [AST2700_SILICON_REV]               = 0x06000003,
+    [AST2700_HW_STRAP1]                 = 0x00000504,
+    [AST2700_HW_STRAP1_CLR]             = 0xFFF0FFF0,
+    [AST2700_HW_STRAP1_LOCK]            = 0x00000FFF,
+    [AST2700_HW_STRAP1_SEC1]            = 0x000000FF,
+    [AST2700_HW_STRAP1_SEC2]            = 0x00000000,
+    [AST2700_HW_STRAP1_SEC3]            = 0x1000408F,
+    [AST2700_SCUIO_CLK_STOP_CTL_1]      = 0xffff8400,
+    [AST2700_SCUIO_CLK_STOP_CTL_2]      = 0x00005f30,
+    [AST2700_SCUIO_CLK_SEL_1]           = 0x86900000,
+    [AST2700_SCUIO_CLK_SEL_2]           = 0x00400000,
+    [AST2700_SCUIO_HPLL_PARAM]          = 0x10000027,
+    [AST2700_SCUIO_HPLL_EXT_PARAM]      = 0x80000014,
+    [AST2700_SCUIO_APLL_PARAM]          = 0x1000001f,
+    [AST2700_SCUIO_APLL_EXT_PARAM]      = 0x8000000f,
+    [AST2700_SCUIO_DPLL_PARAM]          = 0x106e42ce,
+    [AST2700_SCUIO_DPLL_EXT_PARAM]      = 0x80000167,
+    [AST2700_SCUIO_DPLL_PARAM_READ]     = 0x106e42ce,
+    [AST2700_SCUIO_DPLL_EXT_PARAM_READ] = 0x80000167,
+    [AST2700_SCUIO_UARTCLK_GEN]         = 0x00014506,
+    [AST2700_SCUIO_HUARTCLK_GEN]        = 0x000145c0,
+    [AST2700_SCUIO_CLK_DUTY_MEAS_RST]   = 0x0c9100d2,
+};
+
+static void aspeed_2700_scuio_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedSCUClass *asc = ASPEED_SCU_CLASS(klass);
+
+    dc->desc = "ASPEED 2700 System Control Unit I/O";
+    dc->reset = aspeed_ast2700_scu_reset;
+    asc->resets = ast2700_a0_resets_io;
+    asc->calc_hpll = aspeed_2600_scu_calc_hpll;
+    asc->get_apb = aspeed_2700_scuio_get_apb_freq;
+    asc->apb_divider = 2;
+    asc->nr_regs = ASPEED_AST2700_SCU_NR_REGS;
+    asc->clkin_25Mhz = true;
+    asc->ops = &aspeed_ast2700_scuio_ops;
+}
+
+static const TypeInfo aspeed_2700_scu_info = {
+    .name = TYPE_ASPEED_2700_SCU,
+    .parent = TYPE_ASPEED_SCU,
+    .instance_size = sizeof(AspeedSCUState),
+    .class_init = aspeed_2700_scu_class_init,
+};
+
+static const TypeInfo aspeed_2700_scuio_info = {
+    .name = TYPE_ASPEED_2700_SCUIO,
+    .parent = TYPE_ASPEED_SCU,
+    .instance_size = sizeof(AspeedSCUState),
+    .class_init = aspeed_2700_scuio_class_init,
+};
+
 static const uint32_t ast1030_a1_resets[ASPEED_AST2600_SCU_NR_REGS] = {
     [AST2600_SYS_RST_CTRL]      = 0xFFC3FED8,
     [AST2600_SYS_RST_CTRL2]     = 0x09FFFFFC,
@@ -841,6 +1143,8 @@
     type_register_static(&aspeed_2500_scu_info);
     type_register_static(&aspeed_2600_scu_info);
     type_register_static(&aspeed_1030_scu_info);
+    type_register_static(&aspeed_2700_scu_info);
+    type_register_static(&aspeed_2700_scuio_info);
 }
 
 type_init(aspeed_scu_register_types);
diff --git a/hw/misc/aspeed_sdmc.c b/hw/misc/aspeed_sdmc.c
index 64cd1a8..93e2e29 100644
--- a/hw/misc/aspeed_sdmc.c
+++ b/hw/misc/aspeed_sdmc.c
@@ -27,6 +27,7 @@
 #define   PROT_SOFTLOCKED    0x00
 
 #define   PROT_KEY_UNLOCK     0xFC600309
+#define   PROT_2700_KEY_UNLOCK  0x1688A8A8
 #define   PROT_KEY_HARDLOCK   0xDEADDEAD /* AST2600 */
 
 /* Configuration Register */
@@ -54,6 +55,46 @@
 #define R_DRAM_TIME       (0x8c / 4)
 #define R_ECC_ERR_INJECT  (0xb4 / 4)
 
+/* AST2700 Register */
+#define R_2700_PROT                 (0x00 / 4)
+#define R_INT_STATUS                (0x04 / 4)
+#define R_INT_CLEAR                 (0x08 / 4)
+#define R_INT_MASK                  (0x0c / 4)
+#define R_MAIN_CONF                 (0x10 / 4)
+#define R_MAIN_CONTROL              (0x14 / 4)
+#define R_MAIN_STATUS               (0x18 / 4)
+#define R_ERR_STATUS                (0x1c / 4)
+#define R_ECC_FAIL_STATUS           (0x78 / 4)
+#define R_ECC_FAIL_ADDR             (0x7c / 4)
+#define R_ECC_TESTING_CONTROL       (0x80 / 4)
+#define R_PROT_REGION_LOCK_STATUS   (0x94 / 4)
+#define R_TEST_FAIL_ADDR            (0xd4 / 4)
+#define R_TEST_FAIL_D0              (0xd8 / 4)
+#define R_TEST_FAIL_D1              (0xdc / 4)
+#define R_TEST_FAIL_D2              (0xe0 / 4)
+#define R_TEST_FAIL_D3              (0xe4 / 4)
+#define R_DBG_STATUS                (0xf4 / 4)
+#define R_PHY_INTERFACE_STATUS      (0xf8 / 4)
+#define R_GRAPHIC_MEM_BASE_ADDR     (0x10c / 4)
+#define R_PORT0_INTERFACE_MONITOR0  (0x240 / 4)
+#define R_PORT0_INTERFACE_MONITOR1  (0x244 / 4)
+#define R_PORT0_INTERFACE_MONITOR2  (0x248 / 4)
+#define R_PORT1_INTERFACE_MONITOR0  (0x2c0 / 4)
+#define R_PORT1_INTERFACE_MONITOR1  (0x2c4 / 4)
+#define R_PORT1_INTERFACE_MONITOR2  (0x2c8 / 4)
+#define R_PORT2_INTERFACE_MONITOR0  (0x340 / 4)
+#define R_PORT2_INTERFACE_MONITOR1  (0x344 / 4)
+#define R_PORT2_INTERFACE_MONITOR2  (0x348 / 4)
+#define R_PORT3_INTERFACE_MONITOR0  (0x3c0 / 4)
+#define R_PORT3_INTERFACE_MONITOR1  (0x3c4 / 4)
+#define R_PORT3_INTERFACE_MONITOR2  (0x3c8 / 4)
+#define R_PORT4_INTERFACE_MONITOR0  (0x440 / 4)
+#define R_PORT4_INTERFACE_MONITOR1  (0x444 / 4)
+#define R_PORT4_INTERFACE_MONITOR2  (0x448 / 4)
+#define R_PORT5_INTERFACE_MONITOR0  (0x4c0 / 4)
+#define R_PORT5_INTERFACE_MONITOR1  (0x4c4 / 4)
+#define R_PORT5_INTERFACE_MONITOR2  (0x4c8 / 4)
+
 /*
  * Configuration register Ox4 (for Aspeed AST2400 SOC)
  *
@@ -76,10 +117,6 @@
 #define     ASPEED_SDMC_VGA_32MB            0x2
 #define     ASPEED_SDMC_VGA_64MB            0x3
 #define ASPEED_SDMC_DRAM_SIZE(x)        (x & 0x3)
-#define     ASPEED_SDMC_DRAM_64MB           0x0
-#define     ASPEED_SDMC_DRAM_128MB          0x1
-#define     ASPEED_SDMC_DRAM_256MB          0x2
-#define     ASPEED_SDMC_DRAM_512MB          0x3
 
 #define ASPEED_SDMC_READONLY_MASK                       \
     (ASPEED_SDMC_RESERVED | ASPEED_SDMC_VGA_COMPAT |    \
@@ -100,22 +137,24 @@
 #define ASPEED_SDMC_CACHE_ENABLE        (1 << 10) /* differs from AST2400 */
 #define ASPEED_SDMC_DRAM_TYPE           (1 << 4)  /* differs from AST2400 */
 
-/* DRAM size definitions differs */
-#define     ASPEED_SDMC_AST2500_128MB       0x0
-#define     ASPEED_SDMC_AST2500_256MB       0x1
-#define     ASPEED_SDMC_AST2500_512MB       0x2
-#define     ASPEED_SDMC_AST2500_1024MB      0x3
-
-#define     ASPEED_SDMC_AST2600_256MB       0x0
-#define     ASPEED_SDMC_AST2600_512MB       0x1
-#define     ASPEED_SDMC_AST2600_1024MB      0x2
-#define     ASPEED_SDMC_AST2600_2048MB      0x3
-
 #define ASPEED_SDMC_AST2500_READONLY_MASK                               \
     (ASPEED_SDMC_HW_VERSION(0xf) | ASPEED_SDMC_CACHE_INITIAL_DONE |     \
      ASPEED_SDMC_AST2500_RESERVED | ASPEED_SDMC_VGA_COMPAT |            \
      ASPEED_SDMC_VGA_APERTURE(ASPEED_SDMC_VGA_64MB))
 
+/*
+ * Main Configuration register Ox10 (for Aspeed AST2700 SOC and higher)
+ *
+ */
+#define ASPEED_SDMC_AST2700_RESERVED        0xFFFF2082 /* 31:16, 13, 7, 1 */
+#define ASPEED_SDMC_AST2700_DATA_SCRAMBLE           (1 << 8)
+#define ASPEED_SDMC_AST2700_ECC_ENABLE              (1 << 6)
+#define ASPEED_SDMC_AST2700_PAGE_MATCHING_ENABLE    (1 << 5)
+#define ASPEED_SDMC_AST2700_DRAM_SIZE(x)            ((x & 0x7) << 2)
+
+#define ASPEED_SDMC_AST2700_READONLY_MASK   \
+     (ASPEED_SDMC_AST2700_RESERVED)
+
 static uint64_t aspeed_sdmc_read(void *opaque, hwaddr addr, unsigned size)
 {
     AspeedSDMCState *s = ASPEED_SDMC(opaque);
@@ -231,7 +270,7 @@
     AspeedSDMCState *s = ASPEED_SDMC(dev);
     AspeedSDMCClass *asc = ASPEED_SDMC_GET_CLASS(s);
 
-    assert(asc->max_ram_size < 4 * GiB); /* 32-bit address bus */
+    assert(asc->max_ram_size < 4 * GiB || asc->is_bus64bit);
     s->max_ram_size = asc->max_ram_size;
 
     memory_region_init_io(&s->iomem, OBJECT(s), &aspeed_sdmc_ops, s,
@@ -241,8 +280,8 @@
 
 static const VMStateDescription vmstate_aspeed_sdmc = {
     .name = "aspeed.sdmc",
-    .version_id = 1,
-    .minimum_version_id = 1,
+    .version_id = 2,
+    .minimum_version_id = 2,
     .fields = (const VMStateField[]) {
         VMSTATE_UINT32_ARRAY(regs, AspeedSDMCState, ASPEED_SDMC_NR_REGS),
         VMSTATE_END_OF_LIST()
@@ -251,6 +290,7 @@
 
 static Property aspeed_sdmc_properties[] = {
     DEFINE_PROP_UINT64("max-ram-size", AspeedSDMCState, max_ram_size, 0),
+    DEFINE_PROP_BOOL("unlocked", AspeedSDMCState, unlocked, false),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -311,7 +351,8 @@
                                    uint32_t data)
 {
     if (reg == R_PROT) {
-        s->regs[reg] = (data == PROT_KEY_UNLOCK) ? PROT_UNLOCKED : PROT_SOFTLOCKED;
+        s->regs[reg] =
+            (data == PROT_KEY_UNLOCK) ? PROT_UNLOCKED : PROT_SOFTLOCKED;
         return;
     }
 
@@ -369,7 +410,8 @@
                                    uint32_t data)
 {
     if (reg == R_PROT) {
-        s->regs[reg] = (data == PROT_KEY_UNLOCK) ? PROT_UNLOCKED : PROT_SOFTLOCKED;
+        s->regs[reg] =
+            (data == PROT_KEY_UNLOCK) ? PROT_UNLOCKED : PROT_SOFTLOCKED;
         return;
     }
 
@@ -449,8 +491,9 @@
     }
 
     if (s->regs[R_PROT] == PROT_HARDLOCKED) {
-        qemu_log_mask(LOG_GUEST_ERROR, "%s: SDMC is locked until system reset!\n",
-                __func__);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: SDMC is locked until system reset!\n",
+                      __func__);
         return;
     }
 
@@ -512,12 +555,145 @@
     .class_init = aspeed_2600_sdmc_class_init,
 };
 
+static void aspeed_2700_sdmc_reset(DeviceState *dev)
+{
+    AspeedSDMCState *s = ASPEED_SDMC(dev);
+    AspeedSDMCClass *asc = ASPEED_SDMC_GET_CLASS(s);
+
+    memset(s->regs, 0, sizeof(s->regs));
+
+    /* Set ram size bit and defaults values */
+    s->regs[R_MAIN_CONF] = asc->compute_conf(s, 0);
+
+    if (s->unlocked) {
+        s->regs[R_2700_PROT] = PROT_UNLOCKED;
+    }
+}
+
+static uint32_t aspeed_2700_sdmc_compute_conf(AspeedSDMCState *s, uint32_t data)
+{
+    uint32_t fixed_conf = ASPEED_SDMC_AST2700_PAGE_MATCHING_ENABLE |
+        ASPEED_SDMC_AST2700_DRAM_SIZE(aspeed_sdmc_get_ram_bits(s));
+
+    /* Make sure readonly bits are kept */
+    data &= ~ASPEED_SDMC_AST2700_READONLY_MASK;
+
+    return data | fixed_conf;
+}
+
+static void aspeed_2700_sdmc_write(AspeedSDMCState *s, uint32_t reg,
+                                   uint32_t data)
+{
+    /* Unprotected registers */
+    switch (reg) {
+    case R_INT_STATUS:
+    case R_INT_CLEAR:
+    case R_INT_MASK:
+    case R_MAIN_STATUS:
+    case R_ERR_STATUS:
+    case R_ECC_FAIL_STATUS:
+    case R_ECC_FAIL_ADDR:
+    case R_PROT_REGION_LOCK_STATUS:
+    case R_TEST_FAIL_ADDR:
+    case R_TEST_FAIL_D0:
+    case R_TEST_FAIL_D1:
+    case R_TEST_FAIL_D2:
+    case R_TEST_FAIL_D3:
+    case R_DBG_STATUS:
+    case R_PHY_INTERFACE_STATUS:
+    case R_GRAPHIC_MEM_BASE_ADDR:
+    case R_PORT0_INTERFACE_MONITOR0:
+    case R_PORT0_INTERFACE_MONITOR1:
+    case R_PORT0_INTERFACE_MONITOR2:
+    case R_PORT1_INTERFACE_MONITOR0:
+    case R_PORT1_INTERFACE_MONITOR1:
+    case R_PORT1_INTERFACE_MONITOR2:
+    case R_PORT2_INTERFACE_MONITOR0:
+    case R_PORT2_INTERFACE_MONITOR1:
+    case R_PORT2_INTERFACE_MONITOR2:
+    case R_PORT3_INTERFACE_MONITOR0:
+    case R_PORT3_INTERFACE_MONITOR1:
+    case R_PORT3_INTERFACE_MONITOR2:
+    case R_PORT4_INTERFACE_MONITOR0:
+    case R_PORT4_INTERFACE_MONITOR1:
+    case R_PORT4_INTERFACE_MONITOR2:
+    case R_PORT5_INTERFACE_MONITOR0:
+    case R_PORT5_INTERFACE_MONITOR1:
+    case R_PORT5_INTERFACE_MONITOR2:
+        s->regs[reg] = data;
+        return;
+    }
+
+    if (s->regs[R_2700_PROT] == PROT_HARDLOCKED) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: SDMC is locked until system reset!\n",
+                      __func__);
+        return;
+    }
+
+    if (reg != R_2700_PROT && s->regs[R_2700_PROT] == PROT_SOFTLOCKED) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: SDMC is locked! (write to MCR%02x blocked)\n",
+                      __func__, reg * 4);
+        return;
+    }
+
+    switch (reg) {
+    case R_2700_PROT:
+        if (data == PROT_2700_KEY_UNLOCK)  {
+            data = PROT_UNLOCKED;
+        } else if (data == PROT_KEY_HARDLOCK) {
+            data = PROT_HARDLOCKED;
+        } else {
+            data = PROT_SOFTLOCKED;
+        }
+        break;
+    case R_MAIN_CONF:
+        data = aspeed_2700_sdmc_compute_conf(s, data);
+        break;
+    case R_MAIN_STATUS:
+        /* Will never return 'busy'. */
+        data &= ~PHY_BUSY_STATE;
+        break;
+    default:
+        break;
+    }
+
+    s->regs[reg] = data;
+}
+
+static const uint64_t
+    aspeed_2700_ram_sizes[] = { 256 * MiB, 512 * MiB, 1024 * MiB,
+                                2048 * MiB, 4096 * MiB, 8192 * MiB, 0};
+
+static void aspeed_2700_sdmc_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedSDMCClass *asc = ASPEED_SDMC_CLASS(klass);
+
+    dc->desc = "ASPEED 2700 SDRAM Memory Controller";
+    dc->reset = aspeed_2700_sdmc_reset;
+
+    asc->is_bus64bit = true;
+    asc->max_ram_size = 8 * GiB;
+    asc->compute_conf = aspeed_2700_sdmc_compute_conf;
+    asc->write = aspeed_2700_sdmc_write;
+    asc->valid_ram_sizes = aspeed_2700_ram_sizes;
+}
+
+static const TypeInfo aspeed_2700_sdmc_info = {
+    .name = TYPE_ASPEED_2700_SDMC,
+    .parent = TYPE_ASPEED_SDMC,
+    .class_init = aspeed_2700_sdmc_class_init,
+};
+
 static void aspeed_sdmc_register_types(void)
 {
     type_register_static(&aspeed_sdmc_info);
     type_register_static(&aspeed_2400_sdmc_info);
     type_register_static(&aspeed_2500_sdmc_info);
     type_register_static(&aspeed_2600_sdmc_info);
+    type_register_static(&aspeed_2700_sdmc_info);
 }
 
 type_init(aspeed_sdmc_register_types);
diff --git a/hw/misc/aspeed_sli.c b/hw/misc/aspeed_sli.c
new file mode 100644
index 0000000..fe720ea
--- /dev/null
+++ b/hw/misc/aspeed_sli.c
@@ -0,0 +1,177 @@
+/*
+ * ASPEED SLI Controller
+ *
+ * Copyright (C) 2024 ASPEED Technology Inc.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/log.h"
+#include "qemu/error-report.h"
+#include "hw/qdev-properties.h"
+#include "hw/misc/aspeed_sli.h"
+#include "qapi/error.h"
+#include "migration/vmstate.h"
+#include "trace.h"
+
+#define SLI_REGION_SIZE 0x500
+#define TO_REG(addr) ((addr) >> 2)
+
+static uint64_t aspeed_sli_read(void *opaque, hwaddr addr, unsigned int size)
+{
+    AspeedSLIState *s = ASPEED_SLI(opaque);
+    int reg = TO_REG(addr);
+
+    if (reg >= ARRAY_SIZE(s->regs)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Out-of-bounds read at offset 0x%" HWADDR_PRIx "\n",
+                      __func__, addr);
+        return 0;
+    }
+
+    trace_aspeed_sli_read(addr, size, s->regs[reg]);
+    return s->regs[reg];
+}
+
+static void aspeed_sli_write(void *opaque, hwaddr addr, uint64_t data,
+                              unsigned int size)
+{
+    AspeedSLIState *s = ASPEED_SLI(opaque);
+    int reg = TO_REG(addr);
+
+    if (reg >= ARRAY_SIZE(s->regs)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Out-of-bounds write at offset 0x%" HWADDR_PRIx "\n",
+                      __func__, addr);
+        return;
+    }
+
+    trace_aspeed_sli_write(addr, size, data);
+    s->regs[reg] = data;
+}
+
+static uint64_t aspeed_sliio_read(void *opaque, hwaddr addr, unsigned int size)
+{
+    AspeedSLIState *s = ASPEED_SLI(opaque);
+    int reg = TO_REG(addr);
+
+    if (reg >= ARRAY_SIZE(s->regs)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Out-of-bounds read at offset 0x%" HWADDR_PRIx "\n",
+                      __func__, addr);
+        return 0;
+    }
+
+    trace_aspeed_sliio_read(addr, size, s->regs[reg]);
+    return s->regs[reg];
+}
+
+static void aspeed_sliio_write(void *opaque, hwaddr addr, uint64_t data,
+                              unsigned int size)
+{
+    AspeedSLIState *s = ASPEED_SLI(opaque);
+    int reg = TO_REG(addr);
+
+    if (reg >= ARRAY_SIZE(s->regs)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Out-of-bounds write at offset 0x%" HWADDR_PRIx "\n",
+                      __func__, addr);
+        return;
+    }
+
+    trace_aspeed_sliio_write(addr, size, data);
+    s->regs[reg] = data;
+}
+
+static const MemoryRegionOps aspeed_sli_ops = {
+    .read = aspeed_sli_read,
+    .write = aspeed_sli_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 4,
+    },
+};
+
+static const MemoryRegionOps aspeed_sliio_ops = {
+    .read = aspeed_sliio_read,
+    .write = aspeed_sliio_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 4,
+    },
+};
+
+static void aspeed_sli_realize(DeviceState *dev, Error **errp)
+{
+    AspeedSLIState *s = ASPEED_SLI(dev);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+
+    memory_region_init_io(&s->iomem, OBJECT(s), &aspeed_sli_ops, s,
+                          TYPE_ASPEED_SLI, SLI_REGION_SIZE);
+    sysbus_init_mmio(sbd, &s->iomem);
+}
+
+static void aspeed_sliio_realize(DeviceState *dev, Error **errp)
+{
+    AspeedSLIState *s = ASPEED_SLI(dev);
+    SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
+
+    memory_region_init_io(&s->iomem, OBJECT(s), &aspeed_sliio_ops, s,
+                          TYPE_ASPEED_SLI, SLI_REGION_SIZE);
+    sysbus_init_mmio(sbd, &s->iomem);
+}
+
+static void aspeed_sli_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->desc = "Aspeed SLI Controller";
+    dc->realize = aspeed_sli_realize;
+}
+
+static const TypeInfo aspeed_sli_info = {
+    .name          = TYPE_ASPEED_SLI,
+    .parent        = TYPE_SYS_BUS_DEVICE,
+    .instance_size = sizeof(AspeedSLIState),
+    .class_init    = aspeed_sli_class_init,
+    .abstract      = true,
+};
+
+static void aspeed_2700_sli_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->desc = "AST2700 SLI Controller";
+}
+
+static void aspeed_2700_sliio_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+
+    dc->desc = "AST2700 I/O SLI Controller";
+    dc->realize = aspeed_sliio_realize;
+}
+
+static const TypeInfo aspeed_2700_sli_info = {
+    .name           = TYPE_ASPEED_2700_SLI,
+    .parent         = TYPE_ASPEED_SLI,
+    .class_init     = aspeed_2700_sli_class_init,
+};
+
+static const TypeInfo aspeed_2700_sliio_info = {
+    .name           = TYPE_ASPEED_2700_SLIIO,
+    .parent         = TYPE_ASPEED_SLI,
+    .class_init     = aspeed_2700_sliio_class_init,
+};
+
+static void aspeed_sli_register_types(void)
+{
+    type_register_static(&aspeed_sli_info);
+    type_register_static(&aspeed_2700_sli_info);
+    type_register_static(&aspeed_2700_sliio_info);
+}
+
+type_init(aspeed_sli_register_types);
diff --git a/hw/misc/meson.build b/hw/misc/meson.build
index 86596a3..2ca8717 100644
--- a/hw/misc/meson.build
+++ b/hw/misc/meson.build
@@ -136,7 +136,8 @@
   'aspeed_sbc.c',
   'aspeed_sdmc.c',
   'aspeed_xdma.c',
-  'aspeed_peci.c'))
+  'aspeed_peci.c',
+  'aspeed_sli.c'))
 
 system_ss.add(when: 'CONFIG_MSF2', if_true: files('msf2-sysreg.c'))
 system_ss.add(when: 'CONFIG_NRF51_SOC', if_true: files('nrf51_rng.c'))
diff --git a/hw/misc/trace-events b/hw/misc/trace-events
index 5d241cb..1be0717 100644
--- a/hw/misc/trace-events
+++ b/hw/misc/trace-events
@@ -93,6 +93,10 @@
 # aspeed_scu.c
 aspeed_scu_write(uint64_t offset, unsigned size, uint32_t data) "To 0x%" PRIx64 " of size %u: 0x%" PRIx32
 aspeed_scu_read(uint64_t offset, unsigned size, uint32_t data) "To 0x%" PRIx64 " of size %u: 0x%" PRIx32
+aspeed_ast2700_scu_write(uint64_t offset, unsigned size, uint32_t data) "To 0x%" PRIx64 " of size %u: 0x%" PRIx32
+aspeed_ast2700_scu_read(uint64_t offset, unsigned size, uint32_t data) "To 0x%" PRIx64 " of size %u: 0x%" PRIx32
+aspeed_ast2700_scuio_write(uint64_t offset, unsigned size, uint32_t data) "To 0x%" PRIx64 " of size %u: 0x%" PRIx32
+aspeed_ast2700_scuio_read(uint64_t offset, unsigned size, uint32_t data) "To 0x%" PRIx64 " of size %u: 0x%" PRIx32
 
 # mps2-scc.c
 mps2_scc_read(uint64_t offset, uint64_t data, unsigned size) "MPS2 SCC read: offset 0x%" PRIx64 " data 0x%" PRIx64 " size %u"
@@ -351,3 +355,10 @@
 # iosb.c
 iosb_read(int reg, uint64_t value, unsigned int size) "reg=0x%x value=0x%"PRIx64" size=%u"
 iosb_write(int reg, uint64_t value, unsigned int size) "reg=0x%x value=0x%"PRIx64" size=%u"
+
+# aspeed_sli.c
+aspeed_sli_write(uint64_t offset, unsigned int size, uint32_t data) "To 0x%" PRIx64 " of size %u: 0x%" PRIx32
+aspeed_sli_read(uint64_t offset, unsigned int size, uint32_t data) "To 0x%" PRIx64 " of size %u: 0x%" PRIx32
+aspeed_sliio_write(uint64_t offset, unsigned int size, uint32_t data) "To 0x%" PRIx64 " of size %u: 0x%" PRIx32
+aspeed_sliio_read(uint64_t offset, unsigned int size, uint32_t data) "To 0x%" PRIx64 " of size %u: 0x%" PRIx32
+
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 0812d39..a67092d 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -114,6 +114,7 @@
      * 0xffff        - reserved
      */
     uint16_t rotation_rate;
+    bool migrate_emulated_scsi_request;
 };
 
 static void scsi_free_request(SCSIRequest *req)
@@ -162,6 +163,15 @@
     }
 }
 
+static void scsi_disk_emulate_save_request(QEMUFile *f, SCSIRequest *req)
+{
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
+
+    if (s->migrate_emulated_scsi_request) {
+        scsi_disk_save_request(f, req);
+    }
+}
+
 static void scsi_disk_load_request(QEMUFile *f, SCSIRequest *req)
 {
     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
@@ -185,6 +195,15 @@
     qemu_iovec_init_external(&r->qiov, &r->iov, 1);
 }
 
+static void scsi_disk_emulate_load_request(QEMUFile *f, SCSIRequest *req)
+{
+    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
+
+    if (s->migrate_emulated_scsi_request) {
+        scsi_disk_load_request(f, req);
+    }
+}
+
 /*
  * scsi_handle_rw_error has two return values.  False means that the error
  * must be ignored, true means that the error has been processed and the
@@ -2606,6 +2625,8 @@
     .read_data    = scsi_disk_emulate_read_data,
     .write_data   = scsi_disk_emulate_write_data,
     .get_buf      = scsi_get_buf,
+    .load_request = scsi_disk_emulate_load_request,
+    .save_request = scsi_disk_emulate_save_request,
 };
 
 static const SCSIReqOps scsi_disk_dma_reqops = {
@@ -3114,7 +3135,8 @@
     DEFINE_PROP_STRING("serial", SCSIDiskState, serial),                \
     DEFINE_PROP_STRING("vendor", SCSIDiskState, vendor),                \
     DEFINE_PROP_STRING("product", SCSIDiskState, product),              \
-    DEFINE_PROP_STRING("device_id", SCSIDiskState, device_id)
+    DEFINE_PROP_STRING("device_id", SCSIDiskState, device_id),          \
+    DEFINE_PROP_BOOL("migrate-emulated-scsi-request", SCSIDiskState, migrate_emulated_scsi_request, true)
 
 
 static Property scsi_hd_properties[] = {
diff --git a/hw/ssi/aspeed_smc.c b/hw/ssi/aspeed_smc.c
index 6e1a84c..49205ab 100644
--- a/hw/ssi/aspeed_smc.c
+++ b/hw/ssi/aspeed_smc.c
@@ -132,6 +132,9 @@
 #define   FMC_WDT2_CTRL_BOOT_SOURCE      BIT(4) /* O: primary 1: alternate */
 #define   FMC_WDT2_CTRL_EN               BIT(0)
 
+/* DMA DRAM Side Address High Part (AST2700) */
+#define R_DMA_DRAM_ADDR_HIGH   (0x7c / 4)
+
 /* DMA Control/Status Register */
 #define R_DMA_CTRL        (0x80 / 4)
 #define   DMA_CTRL_REQUEST      (1 << 31)
@@ -178,13 +181,18 @@
  * DMA flash addresses should be 4 bytes aligned and the valid address
  * range is 0x20000000 - 0x2FFFFFFF.
  *
- * DMA length is from 4 bytes to 32MB
+ * DMA length is from 4 bytes to 32MB (AST2500)
  *   0: 4 bytes
- *   0x7FFFFF: 32M bytes
+ *   0x1FFFFFC: 32M bytes
+ *
+ * DMA length is from 1 byte to 32MB (AST2600, AST10x0 and AST2700)
+ *   0: 1 byte
+ *   0x1FFFFFF: 32M bytes
  */
 #define DMA_DRAM_ADDR(asc, val)   ((val) & (asc)->dma_dram_mask)
+#define DMA_DRAM_ADDR_HIGH(val)   ((val) & 0xf)
 #define DMA_FLASH_ADDR(asc, val)  ((val) & (asc)->dma_flash_mask)
-#define DMA_LENGTH(val)         ((val) & 0x01FFFFFC)
+#define DMA_LENGTH(val)         ((val) & 0x01FFFFFF)
 
 /* Flash opcodes. */
 #define SPI_OP_READ       0x03    /* Read data bytes (low frequency) */
@@ -203,6 +211,7 @@
 #define ASPEED_SMC_FEATURE_DMA       0x1
 #define ASPEED_SMC_FEATURE_DMA_GRANT 0x2
 #define ASPEED_SMC_FEATURE_WDT_CONTROL 0x4
+#define ASPEED_SMC_FEATURE_DMA_DRAM_ADDR_HIGH 0x08
 
 static inline bool aspeed_smc_has_dma(const AspeedSMCClass *asc)
 {
@@ -214,6 +223,11 @@
     return !!(asc->features & ASPEED_SMC_FEATURE_WDT_CONTROL);
 }
 
+static inline bool aspeed_smc_has_dma64(const AspeedSMCClass *asc)
+{
+    return !!(asc->features & ASPEED_SMC_FEATURE_DMA_DRAM_ADDR_HIGH);
+}
+
 #define aspeed_smc_error(fmt, ...)                                      \
     qemu_log_mask(LOG_GUEST_ERROR, "%s: " fmt "\n", __func__, ## __VA_ARGS__)
 
@@ -743,6 +757,8 @@
         (aspeed_smc_has_dma(asc) && addr == R_DMA_CTRL) ||
         (aspeed_smc_has_dma(asc) && addr == R_DMA_FLASH_ADDR) ||
         (aspeed_smc_has_dma(asc) && addr == R_DMA_DRAM_ADDR) ||
+        (aspeed_smc_has_dma(asc) && aspeed_smc_has_dma64(asc) &&
+         addr == R_DMA_DRAM_ADDR_HIGH) ||
         (aspeed_smc_has_dma(asc) && addr == R_DMA_LEN) ||
         (aspeed_smc_has_dma(asc) && addr == R_DMA_CHECKSUM) ||
         (addr >= R_SEG_ADDR0 &&
@@ -843,6 +859,19 @@
     }
 }
 
+static uint64_t aspeed_smc_dma_dram_addr(AspeedSMCState *s)
+{
+    return s->regs[R_DMA_DRAM_ADDR] |
+        ((uint64_t) s->regs[R_DMA_DRAM_ADDR_HIGH] << 32);
+}
+
+static uint32_t aspeed_smc_dma_len(AspeedSMCState *s)
+{
+    AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
+
+    return QEMU_ALIGN_UP(s->regs[R_DMA_LEN] + asc->dma_start_length, 4);
+}
+
 /*
  * Accumulate the result of the reads to provide a checksum that will
  * be used to validate the read timing settings.
@@ -850,6 +879,7 @@
 static void aspeed_smc_dma_checksum(AspeedSMCState *s)
 {
     MemTxResult result;
+    uint32_t dma_len;
     uint32_t data;
 
     if (s->regs[R_DMA_CTRL] & DMA_CTRL_WRITE) {
@@ -861,7 +891,9 @@
         aspeed_smc_dma_calibration(s);
     }
 
-    while (s->regs[R_DMA_LEN]) {
+    dma_len = aspeed_smc_dma_len(s);
+
+    while (dma_len) {
         data = address_space_ldl_le(&s->flash_as, s->regs[R_DMA_FLASH_ADDR],
                                     MEMTXATTRS_UNSPECIFIED, &result);
         if (result != MEMTX_OK) {
@@ -877,7 +909,8 @@
          */
         s->regs[R_DMA_CHECKSUM] += data;
         s->regs[R_DMA_FLASH_ADDR] += 4;
-        s->regs[R_DMA_LEN] -= 4;
+        dma_len -= 4;
+        s->regs[R_DMA_LEN] = dma_len;
     }
 
     if (s->inject_failure && aspeed_smc_inject_read_failure(s)) {
@@ -888,21 +921,34 @@
 
 static void aspeed_smc_dma_rw(AspeedSMCState *s)
 {
+    AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
+    uint64_t dma_dram_offset;
+    uint64_t dma_dram_addr;
     MemTxResult result;
+    uint32_t dma_len;
     uint32_t data;
 
+    dma_len = aspeed_smc_dma_len(s);
+    dma_dram_addr = aspeed_smc_dma_dram_addr(s);
+
+    if (aspeed_smc_has_dma64(asc)) {
+        dma_dram_offset = dma_dram_addr - s->dram_base;
+    } else {
+        dma_dram_offset = dma_dram_addr;
+    }
+
     trace_aspeed_smc_dma_rw(s->regs[R_DMA_CTRL] & DMA_CTRL_WRITE ?
                             "write" : "read",
                             s->regs[R_DMA_FLASH_ADDR],
-                            s->regs[R_DMA_DRAM_ADDR],
-                            s->regs[R_DMA_LEN]);
-    while (s->regs[R_DMA_LEN]) {
+                            dma_dram_offset,
+                            dma_len);
+    while (dma_len) {
         if (s->regs[R_DMA_CTRL] & DMA_CTRL_WRITE) {
-            data = address_space_ldl_le(&s->dram_as, s->regs[R_DMA_DRAM_ADDR],
+            data = address_space_ldl_le(&s->dram_as, dma_dram_offset,
                                         MEMTXATTRS_UNSPECIFIED, &result);
             if (result != MEMTX_OK) {
-                aspeed_smc_error("DRAM read failed @%08x",
-                                 s->regs[R_DMA_DRAM_ADDR]);
+                aspeed_smc_error("DRAM read failed @%" PRIx64,
+                                 dma_dram_offset);
                 return;
             }
 
@@ -922,11 +968,11 @@
                 return;
             }
 
-            address_space_stl_le(&s->dram_as, s->regs[R_DMA_DRAM_ADDR],
+            address_space_stl_le(&s->dram_as, dma_dram_offset,
                                  data, MEMTXATTRS_UNSPECIFIED, &result);
             if (result != MEMTX_OK) {
-                aspeed_smc_error("DRAM write failed @%08x",
-                                 s->regs[R_DMA_DRAM_ADDR]);
+                aspeed_smc_error("DRAM write failed @%" PRIx64,
+                                 dma_dram_offset);
                 return;
             }
         }
@@ -935,9 +981,14 @@
          * When the DMA is on-going, the DMA registers are updated
          * with the current working addresses and length.
          */
+        dma_dram_offset += 4;
+        dma_dram_addr += 4;
+
+        s->regs[R_DMA_DRAM_ADDR_HIGH] = dma_dram_addr >> 32;
+        s->regs[R_DMA_DRAM_ADDR] = dma_dram_addr & 0xffffffff;
         s->regs[R_DMA_FLASH_ADDR] += 4;
-        s->regs[R_DMA_DRAM_ADDR] += 4;
-        s->regs[R_DMA_LEN] -= 4;
+        dma_len -= 4;
+        s->regs[R_DMA_LEN] = dma_len;
         s->regs[R_DMA_CHECKSUM] += data;
     }
 }
@@ -1088,6 +1139,9 @@
     } else if (aspeed_smc_has_dma(asc) && addr == R_DMA_LEN &&
                aspeed_smc_dma_granted(s)) {
         s->regs[addr] = DMA_LENGTH(value);
+    } else if (aspeed_smc_has_dma(asc) && aspeed_smc_has_dma64(asc) &&
+               addr == R_DMA_DRAM_ADDR_HIGH) {
+        s->regs[addr] = DMA_DRAM_ADDR_HIGH(value);
     } else {
         qemu_log_mask(LOG_UNIMP, "%s: not implemented: 0x%" HWADDR_PRIx "\n",
                       __func__, addr);
@@ -1220,6 +1274,7 @@
 
 static Property aspeed_smc_properties[] = {
     DEFINE_PROP_BOOL("inject-failure", AspeedSMCState, inject_failure, false),
+    DEFINE_PROP_UINT64("dram-base", AspeedSMCState, dram_base, 0),
     DEFINE_PROP_LINK("dram", AspeedSMCState, dram_mr,
                      TYPE_MEMORY_REGION, MemoryRegion *),
     DEFINE_PROP_END_OF_LIST(),
@@ -1261,7 +1316,7 @@
      * Use the default segment value to size the memory region. This
      * can be changed by FW at runtime.
      */
-    memory_region_init_io(&s->mmio, OBJECT(s), &aspeed_smc_flash_ops,
+    memory_region_init_io(&s->mmio, OBJECT(s), s->asc->reg_ops,
                           s, name, s->asc->segments[s->cs].size);
     sysbus_init_mmio(SYS_BUS_DEVICE(dev), &s->mmio);
 }
@@ -1336,6 +1391,7 @@
     asc->segment_to_reg    = aspeed_smc_segment_to_reg;
     asc->reg_to_segment    = aspeed_smc_reg_to_segment;
     asc->dma_ctrl          = aspeed_smc_dma_ctrl;
+    asc->reg_ops           = &aspeed_smc_flash_ops;
 }
 
 static const TypeInfo aspeed_2400_smc_info = {
@@ -1381,10 +1437,12 @@
     asc->features          = ASPEED_SMC_FEATURE_DMA;
     asc->dma_flash_mask    = 0x0FFFFFFC;
     asc->dma_dram_mask     = 0x1FFFFFFC;
+    asc->dma_start_length  = 4;
     asc->nregs             = ASPEED_SMC_R_MAX;
     asc->segment_to_reg    = aspeed_smc_segment_to_reg;
     asc->reg_to_segment    = aspeed_smc_reg_to_segment;
     asc->dma_ctrl          = aspeed_smc_dma_ctrl;
+    asc->reg_ops           = &aspeed_smc_flash_ops;
 }
 
 static const TypeInfo aspeed_2400_fmc_info = {
@@ -1424,6 +1482,7 @@
     asc->reg_to_segment    = aspeed_smc_reg_to_segment;
     asc->dma_ctrl          = aspeed_smc_dma_ctrl;
     asc->addr_width        = aspeed_2400_spi1_addr_width;
+    asc->reg_ops           = &aspeed_smc_flash_ops;
 }
 
 static const TypeInfo aspeed_2400_spi1_info = {
@@ -1448,7 +1507,7 @@
     DeviceClass *dc = DEVICE_CLASS(klass);
     AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass);
 
-    dc->desc               = "Aspeed 2600 FMC Controller";
+    dc->desc               = "Aspeed 2500 FMC Controller";
     asc->r_conf            = R_CONF;
     asc->r_ce_ctrl         = R_CE_CTRL;
     asc->r_ctrl0           = R_CTRL0;
@@ -1464,10 +1523,12 @@
     asc->features          = ASPEED_SMC_FEATURE_DMA;
     asc->dma_flash_mask    = 0x0FFFFFFC;
     asc->dma_dram_mask     = 0x3FFFFFFC;
+    asc->dma_start_length  = 4;
     asc->nregs             = ASPEED_SMC_R_MAX;
     asc->segment_to_reg    = aspeed_smc_segment_to_reg;
     asc->reg_to_segment    = aspeed_smc_reg_to_segment;
     asc->dma_ctrl          = aspeed_smc_dma_ctrl;
+    asc->reg_ops           = &aspeed_smc_flash_ops;
 }
 
 static const TypeInfo aspeed_2500_fmc_info = {
@@ -1486,7 +1547,7 @@
     DeviceClass *dc = DEVICE_CLASS(klass);
     AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass);
 
-    dc->desc               = "Aspeed 2600 SPI1 Controller";
+    dc->desc               = "Aspeed 2500 SPI1 Controller";
     asc->r_conf            = R_CONF;
     asc->r_ce_ctrl         = R_CE_CTRL;
     asc->r_ctrl0           = R_CTRL0;
@@ -1503,6 +1564,7 @@
     asc->segment_to_reg    = aspeed_smc_segment_to_reg;
     asc->reg_to_segment    = aspeed_smc_reg_to_segment;
     asc->dma_ctrl          = aspeed_smc_dma_ctrl;
+    asc->reg_ops           = &aspeed_smc_flash_ops;
 }
 
 static const TypeInfo aspeed_2500_spi1_info = {
@@ -1521,7 +1583,7 @@
     DeviceClass *dc = DEVICE_CLASS(klass);
     AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass);
 
-    dc->desc               = "Aspeed 2600 SPI2 Controller";
+    dc->desc               = "Aspeed 2500 SPI2 Controller";
     asc->r_conf            = R_CONF;
     asc->r_ce_ctrl         = R_CE_CTRL;
     asc->r_ctrl0           = R_CTRL0;
@@ -1538,6 +1600,7 @@
     asc->segment_to_reg    = aspeed_smc_segment_to_reg;
     asc->reg_to_segment    = aspeed_smc_reg_to_segment;
     asc->dma_ctrl          = aspeed_smc_dma_ctrl;
+    asc->reg_ops           = &aspeed_smc_flash_ops;
 }
 
 static const TypeInfo aspeed_2500_spi2_info = {
@@ -1620,10 +1683,12 @@
                              ASPEED_SMC_FEATURE_WDT_CONTROL;
     asc->dma_flash_mask    = 0x0FFFFFFC;
     asc->dma_dram_mask     = 0x3FFFFFFC;
+    asc->dma_start_length  = 1;
     asc->nregs             = ASPEED_SMC_R_MAX;
     asc->segment_to_reg    = aspeed_2600_smc_segment_to_reg;
     asc->reg_to_segment    = aspeed_2600_smc_reg_to_segment;
     asc->dma_ctrl          = aspeed_2600_smc_dma_ctrl;
+    asc->reg_ops           = &aspeed_smc_flash_ops;
 }
 
 static const TypeInfo aspeed_2600_fmc_info = {
@@ -1658,10 +1723,12 @@
                              ASPEED_SMC_FEATURE_DMA_GRANT;
     asc->dma_flash_mask    = 0x0FFFFFFC;
     asc->dma_dram_mask     = 0x3FFFFFFC;
+    asc->dma_start_length  = 1;
     asc->nregs             = ASPEED_SMC_R_MAX;
     asc->segment_to_reg    = aspeed_2600_smc_segment_to_reg;
     asc->reg_to_segment    = aspeed_2600_smc_reg_to_segment;
     asc->dma_ctrl          = aspeed_2600_smc_dma_ctrl;
+    asc->reg_ops           = &aspeed_smc_flash_ops;
 }
 
 static const TypeInfo aspeed_2600_spi1_info = {
@@ -1697,10 +1764,12 @@
                              ASPEED_SMC_FEATURE_DMA_GRANT;
     asc->dma_flash_mask    = 0x0FFFFFFC;
     asc->dma_dram_mask     = 0x3FFFFFFC;
+    asc->dma_start_length  = 1;
     asc->nregs             = ASPEED_SMC_R_MAX;
     asc->segment_to_reg    = aspeed_2600_smc_segment_to_reg;
     asc->reg_to_segment    = aspeed_2600_smc_reg_to_segment;
     asc->dma_ctrl          = aspeed_2600_smc_dma_ctrl;
+    asc->reg_ops           = &aspeed_smc_flash_ops;
 }
 
 static const TypeInfo aspeed_2600_spi2_info = {
@@ -1778,10 +1847,12 @@
     asc->features          = ASPEED_SMC_FEATURE_DMA;
     asc->dma_flash_mask    = 0x0FFFFFFC;
     asc->dma_dram_mask     = 0x000BFFFC;
+    asc->dma_start_length  = 1;
     asc->nregs             = ASPEED_SMC_R_MAX;
     asc->segment_to_reg    = aspeed_1030_smc_segment_to_reg;
     asc->reg_to_segment    = aspeed_1030_smc_reg_to_segment;
     asc->dma_ctrl          = aspeed_2600_smc_dma_ctrl;
+    asc->reg_ops           = &aspeed_smc_flash_ops;
 }
 
 static const TypeInfo aspeed_1030_fmc_info = {
@@ -1815,10 +1886,12 @@
     asc->features          = ASPEED_SMC_FEATURE_DMA;
     asc->dma_flash_mask    = 0x0FFFFFFC;
     asc->dma_dram_mask     = 0x000BFFFC;
+    asc->dma_start_length  = 1;
     asc->nregs             = ASPEED_SMC_R_MAX;
     asc->segment_to_reg    = aspeed_2600_smc_segment_to_reg;
     asc->reg_to_segment    = aspeed_2600_smc_reg_to_segment;
     asc->dma_ctrl          = aspeed_2600_smc_dma_ctrl;
+    asc->reg_ops           = &aspeed_smc_flash_ops;
 }
 
 static const TypeInfo aspeed_1030_spi1_info = {
@@ -1851,10 +1924,12 @@
     asc->features          = ASPEED_SMC_FEATURE_DMA;
     asc->dma_flash_mask    = 0x0FFFFFFC;
     asc->dma_dram_mask     = 0x000BFFFC;
+    asc->dma_start_length  = 1;
     asc->nregs             = ASPEED_SMC_R_MAX;
     asc->segment_to_reg    = aspeed_2600_smc_segment_to_reg;
     asc->reg_to_segment    = aspeed_2600_smc_reg_to_segment;
     asc->dma_ctrl          = aspeed_2600_smc_dma_ctrl;
+    asc->reg_ops           = &aspeed_smc_flash_ops;
 }
 
 static const TypeInfo aspeed_1030_spi2_info = {
@@ -1863,6 +1938,234 @@
     .class_init = aspeed_1030_spi2_class_init,
 };
 
+/*
+ * The FMC Segment Registers of the AST2700 have a 64KB unit.
+ * Only bits [31:16] are used for decoding.
+ */
+#define AST2700_SEG_ADDR_MASK 0xffff0000
+
+static uint32_t aspeed_2700_smc_segment_to_reg(const AspeedSMCState *s,
+                                               const AspeedSegments *seg)
+{
+    uint32_t reg = 0;
+
+    /* Disabled segments have a nil register */
+    if (!seg->size) {
+        return 0;
+    }
+
+    reg |= (seg->addr & AST2700_SEG_ADDR_MASK) >> 16; /* start offset */
+    reg |= (seg->addr + seg->size - 1) & AST2700_SEG_ADDR_MASK; /* end offset */
+    return reg;
+}
+
+static void aspeed_2700_smc_reg_to_segment(const AspeedSMCState *s,
+                                           uint32_t reg, AspeedSegments *seg)
+{
+    uint32_t start_offset = (reg << 16) & AST2700_SEG_ADDR_MASK;
+    uint32_t end_offset = reg & AST2700_SEG_ADDR_MASK;
+    AspeedSMCClass *asc = ASPEED_SMC_GET_CLASS(s);
+
+    if (reg) {
+        seg->addr = asc->flash_window_base + start_offset;
+        seg->size = end_offset + (64 * KiB) - start_offset;
+    } else {
+        seg->addr = asc->flash_window_base;
+        seg->size = 0;
+    }
+}
+
+static const uint32_t aspeed_2700_fmc_resets[ASPEED_SMC_R_MAX] = {
+    [R_CONF] = (CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE0 |
+            CONF_FLASH_TYPE_SPI << CONF_FLASH_TYPE1),
+    [R_CE_CTRL] = 0x0000aa00,
+    [R_CTRL0] = 0x406b0641,
+    [R_CTRL1] = 0x00000400,
+    [R_CTRL2] = 0x00000400,
+    [R_CTRL3] = 0x00000400,
+    [R_SEG_ADDR0] = 0x08000000,
+    [R_SEG_ADDR1] = 0x10000800,
+    [R_SEG_ADDR2] = 0x00000000,
+    [R_SEG_ADDR3] = 0x00000000,
+    [R_DUMMY_DATA] = 0x00010000,
+    [R_DMA_DRAM_ADDR_HIGH] = 0x00000000,
+    [R_TIMINGS] = 0x007b0000,
+};
+
+static const MemoryRegionOps aspeed_2700_smc_flash_ops = {
+    .read = aspeed_smc_flash_read,
+    .write = aspeed_smc_flash_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+    .valid = {
+        .min_access_size = 1,
+        .max_access_size = 8,
+    },
+};
+
+static const AspeedSegments aspeed_2700_fmc_segments[] = {
+    { 0x0, 128 * MiB }, /* start address is readonly */
+    { 128 * MiB, 128 * MiB }, /* default is disabled but needed for -kernel */
+    { 256 * MiB, 128 * MiB }, /* default is disabled but needed for -kernel */
+    { 0x0, 0 }, /* disabled */
+};
+
+static void aspeed_2700_fmc_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass);
+
+    dc->desc               = "Aspeed 2700 FMC Controller";
+    asc->r_conf            = R_CONF;
+    asc->r_ce_ctrl         = R_CE_CTRL;
+    asc->r_ctrl0           = R_CTRL0;
+    asc->r_timings         = R_TIMINGS;
+    asc->nregs_timings     = 3;
+    asc->conf_enable_w0    = CONF_ENABLE_W0;
+    asc->cs_num_max        = 3;
+    asc->segments          = aspeed_2700_fmc_segments;
+    asc->segment_addr_mask = 0xffffffff;
+    asc->resets            = aspeed_2700_fmc_resets;
+    asc->flash_window_base = 0x100000000;
+    asc->flash_window_size = 1 * GiB;
+    asc->features          = ASPEED_SMC_FEATURE_DMA |
+                             ASPEED_SMC_FEATURE_DMA_DRAM_ADDR_HIGH;
+    asc->dma_flash_mask    = 0x2FFFFFFC;
+    asc->dma_dram_mask     = 0xFFFFFFFC;
+    asc->dma_start_length  = 1;
+    asc->nregs             = ASPEED_SMC_R_MAX;
+    asc->segment_to_reg    = aspeed_2700_smc_segment_to_reg;
+    asc->reg_to_segment    = aspeed_2700_smc_reg_to_segment;
+    asc->dma_ctrl          = aspeed_2600_smc_dma_ctrl;
+    asc->reg_ops           = &aspeed_2700_smc_flash_ops;
+}
+
+static const TypeInfo aspeed_2700_fmc_info = {
+    .name =  "aspeed.fmc-ast2700",
+    .parent = TYPE_ASPEED_SMC,
+    .class_init = aspeed_2700_fmc_class_init,
+};
+
+static const AspeedSegments aspeed_2700_spi0_segments[] = {
+    { 0x0, 128 * MiB }, /* start address is readonly */
+    { 128 * MiB, 128 * MiB }, /* start address is readonly */
+    { 0x0, 0 }, /* disabled */
+};
+
+static void aspeed_2700_spi0_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass);
+
+    dc->desc               = "Aspeed 2700 SPI0 Controller";
+    asc->r_conf            = R_CONF;
+    asc->r_ce_ctrl         = R_CE_CTRL;
+    asc->r_ctrl0           = R_CTRL0;
+    asc->r_timings         = R_TIMINGS;
+    asc->nregs_timings     = 2;
+    asc->conf_enable_w0    = CONF_ENABLE_W0;
+    asc->cs_num_max        = 2;
+    asc->segments          = aspeed_2700_spi0_segments;
+    asc->segment_addr_mask = 0xffffffff;
+    asc->flash_window_base = 0x180000000;
+    asc->flash_window_size = 1 * GiB;
+    asc->features          = ASPEED_SMC_FEATURE_DMA |
+                             ASPEED_SMC_FEATURE_DMA_DRAM_ADDR_HIGH;
+    asc->dma_flash_mask    = 0x2FFFFFFC;
+    asc->dma_dram_mask     = 0xFFFFFFFC;
+    asc->dma_start_length  = 1;
+    asc->nregs             = ASPEED_SMC_R_MAX;
+    asc->segment_to_reg    = aspeed_2700_smc_segment_to_reg;
+    asc->reg_to_segment    = aspeed_2700_smc_reg_to_segment;
+    asc->dma_ctrl          = aspeed_2600_smc_dma_ctrl;
+    asc->reg_ops           = &aspeed_2700_smc_flash_ops;
+}
+
+static const TypeInfo aspeed_2700_spi0_info = {
+    .name =  "aspeed.spi0-ast2700",
+    .parent = TYPE_ASPEED_SMC,
+    .class_init = aspeed_2700_spi0_class_init,
+};
+
+static const AspeedSegments aspeed_2700_spi1_segments[] = {
+    { 0x0, 128 * MiB }, /* start address is readonly */
+    { 0x0, 0 }, /* disabled */
+};
+
+static void aspeed_2700_spi1_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass);
+
+    dc->desc               = "Aspeed 2700 SPI1 Controller";
+    asc->r_conf            = R_CONF;
+    asc->r_ce_ctrl         = R_CE_CTRL;
+    asc->r_ctrl0           = R_CTRL0;
+    asc->r_timings         = R_TIMINGS;
+    asc->nregs_timings     = 2;
+    asc->conf_enable_w0    = CONF_ENABLE_W0;
+    asc->cs_num_max        = 2;
+    asc->segments          = aspeed_2700_spi1_segments;
+    asc->segment_addr_mask = 0xffffffff;
+    asc->flash_window_base = 0x200000000;
+    asc->flash_window_size = 1 * GiB;
+    asc->features          = ASPEED_SMC_FEATURE_DMA |
+                             ASPEED_SMC_FEATURE_DMA_DRAM_ADDR_HIGH;
+    asc->dma_flash_mask    = 0x2FFFFFFC;
+    asc->dma_dram_mask     = 0xFFFFFFFC;
+    asc->dma_start_length  = 1;
+    asc->nregs             = ASPEED_SMC_R_MAX;
+    asc->segment_to_reg    = aspeed_2700_smc_segment_to_reg;
+    asc->reg_to_segment    = aspeed_2700_smc_reg_to_segment;
+    asc->dma_ctrl          = aspeed_2600_smc_dma_ctrl;
+    asc->reg_ops           = &aspeed_2700_smc_flash_ops;
+}
+
+static const TypeInfo aspeed_2700_spi1_info = {
+        .name =  "aspeed.spi1-ast2700",
+        .parent = TYPE_ASPEED_SMC,
+        .class_init = aspeed_2700_spi1_class_init,
+};
+
+static const AspeedSegments aspeed_2700_spi2_segments[] = {
+    { 0x0, 128 * MiB }, /* start address is readonly */
+    { 0x0, 0 }, /* disabled */
+};
+
+static void aspeed_2700_spi2_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedSMCClass *asc = ASPEED_SMC_CLASS(klass);
+
+    dc->desc               = "Aspeed 2700 SPI2 Controller";
+    asc->r_conf            = R_CONF;
+    asc->r_ce_ctrl         = R_CE_CTRL;
+    asc->r_ctrl0           = R_CTRL0;
+    asc->r_timings         = R_TIMINGS;
+    asc->nregs_timings     = 2;
+    asc->conf_enable_w0    = CONF_ENABLE_W0;
+    asc->cs_num_max        = 2;
+    asc->segments          = aspeed_2700_spi2_segments;
+    asc->segment_addr_mask = 0xffffffff;
+    asc->flash_window_base = 0x280000000;
+    asc->flash_window_size = 1 * GiB;
+    asc->features          = ASPEED_SMC_FEATURE_DMA |
+                             ASPEED_SMC_FEATURE_DMA_DRAM_ADDR_HIGH;
+    asc->dma_flash_mask    = 0x0FFFFFFC;
+    asc->dma_dram_mask     = 0xFFFFFFFC;
+    asc->dma_start_length  = 1;
+    asc->nregs             = ASPEED_SMC_R_MAX;
+    asc->segment_to_reg    = aspeed_2700_smc_segment_to_reg;
+    asc->reg_to_segment    = aspeed_2700_smc_reg_to_segment;
+    asc->dma_ctrl          = aspeed_2600_smc_dma_ctrl;
+    asc->reg_ops           = &aspeed_2700_smc_flash_ops;
+}
+
+static const TypeInfo aspeed_2700_spi2_info = {
+        .name =  "aspeed.spi2-ast2700",
+        .parent = TYPE_ASPEED_SMC,
+        .class_init = aspeed_2700_spi2_class_init,
+};
+
 static void aspeed_smc_register_types(void)
 {
     type_register_static(&aspeed_smc_flash_info);
@@ -1879,6 +2182,10 @@
     type_register_static(&aspeed_1030_fmc_info);
     type_register_static(&aspeed_1030_spi1_info);
     type_register_static(&aspeed_1030_spi2_info);
+    type_register_static(&aspeed_2700_fmc_info);
+    type_register_static(&aspeed_2700_spi0_info);
+    type_register_static(&aspeed_2700_spi1_info);
+    type_register_static(&aspeed_2700_spi2_info);
 }
 
 type_init(aspeed_smc_register_types)
diff --git a/hw/ssi/trace-events b/hw/ssi/trace-events
index 2d5bd2b..7b5ad6a 100644
--- a/hw/ssi/trace-events
+++ b/hw/ssi/trace-events
@@ -6,7 +6,7 @@
 aspeed_smc_flash_write(int cs, uint64_t addr,  uint32_t size, uint64_t data, int mode) "CS%d @0x%" PRIx64 " size %u: 0x%" PRIx64" mode:%d"
 aspeed_smc_read(uint64_t addr,  uint32_t size, uint64_t data) "@0x%" PRIx64 " size %u: 0x%" PRIx64
 aspeed_smc_dma_checksum(uint32_t addr, uint32_t data) "0x%08x: 0x%08x"
-aspeed_smc_dma_rw(const char *dir, uint32_t flash_addr, uint32_t dram_addr, uint32_t size) "%s flash:@0x%08x dram:@0x%08x size:0x%08x"
+aspeed_smc_dma_rw(const char *dir, uint32_t flash_addr, uint64_t dram_addr, uint32_t size) "%s flash:@0x%08x dram:@0x%" PRIx64 " size:0x%08x"
 aspeed_smc_write(uint64_t addr,  uint32_t size, uint64_t data) "@0x%" PRIx64 " size %u: 0x%" PRIx64
 aspeed_smc_flash_select(int cs, const char *prefix) "CS%d %sselect"
 
diff --git a/hw/watchdog/wdt_aspeed.c b/hw/watchdog/wdt_aspeed.c
index d70b656..75685c5 100644
--- a/hw/watchdog/wdt_aspeed.c
+++ b/hw/watchdog/wdt_aspeed.c
@@ -422,12 +422,36 @@
     .class_init = aspeed_1030_wdt_class_init,
 };
 
+static void aspeed_2700_wdt_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    AspeedWDTClass *awc = ASPEED_WDT_CLASS(klass);
+
+    dc->desc = "ASPEED 2700 Watchdog Controller";
+    awc->iosize = 0x80;
+    awc->ext_pulse_width_mask = 0xfffff; /* TODO */
+    awc->reset_ctrl_reg = AST2600_SCU_RESET_CONTROL1;
+    awc->reset_pulse = aspeed_2500_wdt_reset_pulse;
+    awc->wdt_reload = aspeed_wdt_reload_1mhz;
+    awc->sanitize_ctrl = aspeed_2600_sanitize_ctrl;
+    awc->default_status = 0x014FB180;
+    awc->default_reload_value = 0x014FB180;
+}
+
+static const TypeInfo aspeed_2700_wdt_info = {
+    .name = TYPE_ASPEED_2700_WDT,
+    .parent = TYPE_ASPEED_WDT,
+    .instance_size = sizeof(AspeedWDTState),
+    .class_init = aspeed_2700_wdt_class_init,
+};
+
 static void wdt_aspeed_register_types(void)
 {
     type_register_static(&aspeed_wdt_info);
     type_register_static(&aspeed_2400_wdt_info);
     type_register_static(&aspeed_2500_wdt_info);
     type_register_static(&aspeed_2600_wdt_info);
+    type_register_static(&aspeed_2700_wdt_info);
     type_register_static(&aspeed_1030_wdt_info);
 }
 
diff --git a/include/hw/arm/aspeed_soc.h b/include/hw/arm/aspeed_soc.h
index c60fac9..849ba37 100644
--- a/include/hw/arm/aspeed_soc.h
+++ b/include/hw/arm/aspeed_soc.h
@@ -15,6 +15,7 @@
 #include "hw/cpu/a15mpcore.h"
 #include "hw/arm/armv7m.h"
 #include "hw/intc/aspeed_vic.h"
+#include "hw/intc/aspeed_intc.h"
 #include "hw/misc/aspeed_scu.h"
 #include "hw/adc/aspeed_adc.h"
 #include "hw/misc/aspeed_sdmc.h"
@@ -26,6 +27,7 @@
 #include "hw/ssi/aspeed_smc.h"
 #include "hw/misc/aspeed_hace.h"
 #include "hw/misc/aspeed_sbc.h"
+#include "hw/misc/aspeed_sli.h"
 #include "hw/watchdog/wdt_aspeed.h"
 #include "hw/net/ftgmac100.h"
 #include "target/arm/cpu.h"
@@ -38,11 +40,12 @@
 #include "hw/misc/aspeed_peci.h"
 #include "hw/fsi/aspeed_apb2opb.h"
 #include "hw/char/serial.h"
+#include "hw/intc/arm_gicv3.h"
 
 #define ASPEED_SPIS_NUM  2
 #define ASPEED_EHCIS_NUM 2
-#define ASPEED_WDTS_NUM  4
-#define ASPEED_CPUS_NUM  2
+#define ASPEED_WDTS_NUM  8
+#define ASPEED_CPUS_NUM  4
 #define ASPEED_MACS_NUM  4
 #define ASPEED_UARTS_NUM 13
 #define ASPEED_JTAG_NUM  2
@@ -56,11 +59,13 @@
     MemoryRegion sram;
     MemoryRegion spi_boot_container;
     MemoryRegion spi_boot;
+    AddressSpace dram_as;
     AspeedRtcState rtc;
     AspeedTimerCtrlState timerctrl;
     AspeedI2CState i2c;
     AspeedI3CState i3c;
     AspeedSCUState scu;
+    AspeedSCUState scuio;
     AspeedHACEState hace;
     AspeedXDMAState xdma;
     AspeedADCState adc;
@@ -68,6 +73,8 @@
     AspeedSMCState spi[ASPEED_SPIS_NUM];
     EHCISysBusState ehci[ASPEED_EHCIS_NUM];
     AspeedSBCState sbc;
+    AspeedSLIState sli;
+    AspeedSLIState sliio;
     MemoryRegion secsram;
     UnimplementedDeviceState sbc_unimplemented;
     AspeedSDMCState sdmc;
@@ -117,6 +124,18 @@
 #define TYPE_ASPEED2600_SOC "aspeed2600-soc"
 OBJECT_DECLARE_SIMPLE_TYPE(Aspeed2600SoCState, ASPEED2600_SOC)
 
+struct Aspeed27x0SoCState {
+    AspeedSoCState parent;
+
+    ARMCPU cpu[ASPEED_CPUS_NUM];
+    AspeedINTCState intc;
+    GICv3State gic;
+    MemoryRegion dram_empty;
+};
+
+#define TYPE_ASPEED27X0_SOC "aspeed27x0-soc"
+OBJECT_DECLARE_SIMPLE_TYPE(Aspeed27x0SoCState, ASPEED27X0_SOC)
+
 struct Aspeed10x0SoCState {
     AspeedSoCState parent;
 
@@ -168,11 +187,13 @@
     ASPEED_DEV_UART13,
     ASPEED_DEV_VUART,
     ASPEED_DEV_FMC,
+    ASPEED_DEV_SPI0,
     ASPEED_DEV_SPI1,
     ASPEED_DEV_SPI2,
     ASPEED_DEV_EHCI1,
     ASPEED_DEV_EHCI2,
     ASPEED_DEV_VIC,
+    ASPEED_DEV_INTC,
     ASPEED_DEV_SDMC,
     ASPEED_DEV_SCU,
     ASPEED_DEV_ADC,
@@ -222,6 +243,11 @@
     ASPEED_DEV_JTAG1,
     ASPEED_DEV_FSI1,
     ASPEED_DEV_FSI2,
+    ASPEED_DEV_SCUIO,
+    ASPEED_DEV_SLI,
+    ASPEED_DEV_SLIIO,
+    ASPEED_GIC_DIST,
+    ASPEED_GIC_REDIST,
 };
 
 qemu_irq aspeed_soc_get_irq(AspeedSoCState *s, int dev);
diff --git a/include/hw/intc/aspeed_intc.h b/include/hw/intc/aspeed_intc.h
new file mode 100644
index 0000000..18cb434
--- /dev/null
+++ b/include/hw/intc/aspeed_intc.h
@@ -0,0 +1,44 @@
+/*
+ * ASPEED INTC Controller
+ *
+ * Copyright (C) 2024 ASPEED Technology Inc.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef ASPEED_INTC_H
+#define ASPEED_INTC_H
+
+#include "hw/sysbus.h"
+#include "qom/object.h"
+#include "hw/or-irq.h"
+
+#define TYPE_ASPEED_INTC "aspeed.intc"
+#define TYPE_ASPEED_2700_INTC TYPE_ASPEED_INTC "-ast2700"
+OBJECT_DECLARE_TYPE(AspeedINTCState, AspeedINTCClass, ASPEED_INTC)
+
+#define ASPEED_INTC_NR_REGS (0x2000 >> 2)
+#define ASPEED_INTC_NR_INTS 9
+
+struct AspeedINTCState {
+    /*< private >*/
+    SysBusDevice parent_obj;
+
+    /*< public >*/
+    MemoryRegion iomem;
+    uint32_t regs[ASPEED_INTC_NR_REGS];
+    OrIRQState orgates[ASPEED_INTC_NR_INTS];
+    qemu_irq output_pins[ASPEED_INTC_NR_INTS];
+
+    uint32_t enable[ASPEED_INTC_NR_INTS];
+    uint32_t mask[ASPEED_INTC_NR_INTS];
+    uint32_t pending[ASPEED_INTC_NR_INTS];
+};
+
+struct AspeedINTCClass {
+    SysBusDeviceClass parent_class;
+
+    uint32_t num_lines;
+    uint32_t num_ints;
+};
+
+#endif /* ASPEED_INTC_H */
diff --git a/include/hw/misc/aspeed_scu.h b/include/hw/misc/aspeed_scu.h
index 7cb6018..58db28d 100644
--- a/include/hw/misc/aspeed_scu.h
+++ b/include/hw/misc/aspeed_scu.h
@@ -19,10 +19,13 @@
 #define TYPE_ASPEED_2400_SCU TYPE_ASPEED_SCU "-ast2400"
 #define TYPE_ASPEED_2500_SCU TYPE_ASPEED_SCU "-ast2500"
 #define TYPE_ASPEED_2600_SCU TYPE_ASPEED_SCU "-ast2600"
+#define TYPE_ASPEED_2700_SCU TYPE_ASPEED_SCU "-ast2700"
+#define TYPE_ASPEED_2700_SCUIO TYPE_ASPEED_SCU "io" "-ast2700"
 #define TYPE_ASPEED_1030_SCU TYPE_ASPEED_SCU "-ast1030"
 
 #define ASPEED_SCU_NR_REGS (0x1A8 >> 2)
 #define ASPEED_AST2600_SCU_NR_REGS (0xE20 >> 2)
+#define ASPEED_AST2700_SCU_NR_REGS (0xE20 >> 2)
 
 struct AspeedSCUState {
     /*< private >*/
@@ -31,7 +34,7 @@
     /*< public >*/
     MemoryRegion iomem;
 
-    uint32_t regs[ASPEED_AST2600_SCU_NR_REGS];
+    uint32_t regs[ASPEED_AST2700_SCU_NR_REGS];
     uint32_t silicon_rev;
     uint32_t hw_strap1;
     uint32_t hw_strap2;
@@ -48,6 +51,9 @@
 #define AST2600_A3_SILICON_REV   0x05030303U
 #define AST1030_A0_SILICON_REV   0x80000000U
 #define AST1030_A1_SILICON_REV   0x80010000U
+#define AST2700_A0_SILICON_REV   0x06000103U
+#define AST2720_A0_SILICON_REV   0x06000203U
+#define AST2750_A0_SILICON_REV   0x06000003U
 
 #define ASPEED_IS_AST2500(si_rev)     ((((si_rev) >> 24) & 0xff) == 0x04)
 
@@ -87,7 +93,8 @@
  *       1. 2012/12/29 Ryan Chen Create
  */
 
-/* SCU08   Clock Selection Register
+/*
+ * SCU08   Clock Selection Register
  *
  *  31     Enable Video Engine clock dynamic slow down
  *  30:28  Video Engine clock slow down setting
@@ -109,7 +116,8 @@
  */
 #define SCU_CLK_GET_PCLK_DIV(x)                    (((x) >> 23) & 0x7)
 
-/* SCU24   H-PLL Parameter Register (for Aspeed AST2400 SOC)
+/*
+ * SCU24   H-PLL Parameter Register (for Aspeed AST2400 SOC)
  *
  *  18     H-PLL parameter selection
  *           0: Select H-PLL by strapping resistors
@@ -127,7 +135,8 @@
 #define SCU_AST2400_H_PLL_BYPASS_EN                (0x1 << 17)
 #define SCU_AST2400_H_PLL_OFF                      (0x1 << 16)
 
-/* SCU24   H-PLL Parameter Register (for Aspeed AST2500 SOC)
+/*
+ * SCU24   H-PLL Parameter Register (for Aspeed AST2500 SOC)
  *
  *  21     Enable H-PLL reset
  *  20     Enable H-PLL bypass mode
@@ -144,7 +153,8 @@
 #define SCU_H_PLL_BYPASS_EN                        (0x1 << 20)
 #define SCU_H_PLL_OFF                              (0x1 << 19)
 
-/* SCU70  Hardware Strapping Register definition (for Aspeed AST2400 SOC)
+/*
+ * SCU70  Hardware Strapping Register definition (for Aspeed AST2400 SOC)
  *
  * 31:29  Software defined strapping registers
  * 28:27  DRAM size setting (for VGA driver use)
@@ -361,4 +371,31 @@
  */
 #define SCU_AST1030_CLK_GET_PCLK_DIV(x)                    (((x) >> 8) & 0xf)
 
+/*
+ * SCU280   Clock Selection 1 Register (for Aspeed AST2700 SCUIO)
+ *
+ *  31:29  MHCLK_DIV
+ *  28     Reserved
+ *  27:25  RGMIICLK_DIV
+ *  24     Reserved
+ *  23:21  RMIICLK_DIV
+ *  20:18  PCLK_DIV
+ *  17:14  SDCLK_DIV
+ *  13     SDCLK_SEL
+ *  12     UART13CLK_SEL
+ *  11     UART12CLK_SEL
+ *  10     UART11CLK_SEL
+ *  9      UART10CLK_SEL
+ *  8      UART9CLK_SEL
+ *  7      UART8CLK_SEL
+ *  6      UART7CLK_SEL
+ *  5      UART6CLK_SEL
+ *  4      UARTDBCLK_SEL
+ *  3      UART4CLK_SEL
+ *  2      UART3CLK_SEL
+ *  1      UART2CLK_SEL
+ *  0      UART1CLK_SEL
+ */
+#define SCUIO_AST2700_CLK_GET_PCLK_DIV(x)                    (((x) >> 18) & 0x7)
+
 #endif /* ASPEED_SCU_H */
diff --git a/include/hw/misc/aspeed_sdmc.h b/include/hw/misc/aspeed_sdmc.h
index ec2d59a..61c9795 100644
--- a/include/hw/misc/aspeed_sdmc.h
+++ b/include/hw/misc/aspeed_sdmc.h
@@ -17,6 +17,7 @@
 #define TYPE_ASPEED_2400_SDMC TYPE_ASPEED_SDMC "-ast2400"
 #define TYPE_ASPEED_2500_SDMC TYPE_ASPEED_SDMC "-ast2500"
 #define TYPE_ASPEED_2600_SDMC TYPE_ASPEED_SDMC "-ast2600"
+#define TYPE_ASPEED_2700_SDMC TYPE_ASPEED_SDMC "-ast2700"
 
 /*
  * SDMC has 174 documented registers. In addition the u-boot device tree
@@ -29,7 +30,7 @@
  * time, and the other is in the DDR-PHY IP which is used during DDR-PHY
  * training.
  */
-#define ASPEED_SDMC_NR_REGS (0x500 >> 2)
+#define ASPEED_SDMC_NR_REGS (0x1000 >> 2)
 
 struct AspeedSDMCState {
     /*< private >*/
@@ -41,6 +42,7 @@
     uint32_t regs[ASPEED_SDMC_NR_REGS];
     uint64_t ram_size;
     uint64_t max_ram_size;
+    bool unlocked;
 };
 
 
@@ -51,6 +53,7 @@
     const uint64_t *valid_ram_sizes;
     uint32_t (*compute_conf)(AspeedSDMCState *s, uint32_t data);
     void (*write)(AspeedSDMCState *s, uint32_t reg, uint32_t data);
+    bool is_bus64bit;
 };
 
 #endif /* ASPEED_SDMC_H */
diff --git a/include/hw/misc/aspeed_sli.h b/include/hw/misc/aspeed_sli.h
new file mode 100644
index 0000000..23f346a
--- /dev/null
+++ b/include/hw/misc/aspeed_sli.h
@@ -0,0 +1,27 @@
+/*
+ * ASPEED SLI Controller
+ *
+ * Copyright (C) 2024 ASPEED Technology Inc.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#ifndef ASPEED_SLI_H
+#define ASPEED_SLI_H
+
+#include "hw/sysbus.h"
+
+#define TYPE_ASPEED_SLI "aspeed.sli"
+#define TYPE_ASPEED_2700_SLI TYPE_ASPEED_SLI "-ast2700"
+#define TYPE_ASPEED_2700_SLIIO TYPE_ASPEED_SLI "io" "-ast2700"
+OBJECT_DECLARE_SIMPLE_TYPE(AspeedSLIState, ASPEED_SLI)
+
+#define ASPEED_SLI_NR_REGS  (0x500 >> 2)
+
+struct AspeedSLIState {
+    SysBusDevice parent;
+    MemoryRegion iomem;
+
+    uint32_t regs[ASPEED_SLI_NR_REGS];
+};
+
+#endif /* ASPEED_SLI_H */
diff --git a/include/hw/ssi/aspeed_smc.h b/include/hw/ssi/aspeed_smc.h
index 8e1dda5..234dca3 100644
--- a/include/hw/ssi/aspeed_smc.h
+++ b/include/hw/ssi/aspeed_smc.h
@@ -76,6 +76,7 @@
     AddressSpace flash_as;
     MemoryRegion *dram_mr;
     AddressSpace dram_as;
+    uint64_t     dram_base;
 
     AspeedSMCFlash flashes[ASPEED_SMC_CS_MAX];
 
@@ -106,6 +107,7 @@
     uint32_t features;
     hwaddr dma_flash_mask;
     hwaddr dma_dram_mask;
+    uint32_t dma_start_length;
     uint32_t nregs;
     uint32_t (*segment_to_reg)(const AspeedSMCState *s,
                                const AspeedSegments *seg);
@@ -113,6 +115,7 @@
                            AspeedSegments *seg);
     void (*dma_ctrl)(AspeedSMCState *s, uint32_t value);
     int (*addr_width)(const AspeedSMCState *s);
+    const MemoryRegionOps *reg_ops;
 };
 
 #endif /* ASPEED_SMC_H */
diff --git a/include/hw/watchdog/wdt_aspeed.h b/include/hw/watchdog/wdt_aspeed.h
index e90ef86..830b0a7 100644
--- a/include/hw/watchdog/wdt_aspeed.h
+++ b/include/hw/watchdog/wdt_aspeed.h
@@ -19,9 +19,10 @@
 #define TYPE_ASPEED_2400_WDT TYPE_ASPEED_WDT "-ast2400"
 #define TYPE_ASPEED_2500_WDT TYPE_ASPEED_WDT "-ast2500"
 #define TYPE_ASPEED_2600_WDT TYPE_ASPEED_WDT "-ast2600"
+#define TYPE_ASPEED_2700_WDT TYPE_ASPEED_WDT "-ast2700"
 #define TYPE_ASPEED_1030_WDT TYPE_ASPEED_WDT "-ast1030"
 
-#define ASPEED_WDT_REGS_MAX        (0x30 / 4)
+#define ASPEED_WDT_REGS_MAX        (0x80 / 4)
 
 struct AspeedWDTState {
     /*< private >*/
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 7466217..365852c 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -6455,10 +6455,8 @@
             if (*eax & 31) {
                 int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14);
 
-                if (cores_per_pkg > 1) {
-                    *eax &= ~0xFC000000;
-                    *eax |= max_core_ids_in_package(&topo_info) << 26;
-                }
+                *eax &= ~0xFC000000;
+                *eax |= max_core_ids_in_package(&topo_info) << 26;
                 if (host_vcpus_per_cache > threads_per_pkg) {
                     *eax &= ~0x3FFC000;
 
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index 8fe28b6..7e2a9b5 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1260,6 +1260,8 @@
 /* Use a clearer name for this.  */
 #define CPU_INTERRUPT_INIT      CPU_INTERRUPT_RESET
 
+#define CC_OP_HAS_EFLAGS(op) ((op) >= CC_OP_EFLAGS && (op) <= CC_OP_ADCOX)
+
 /* Instead of computing the condition codes after each x86 instruction,
  * QEMU just stores one operand (called CC_SRC), the result
  * (called CC_DST) and the type of operation (called CC_OP). When the
@@ -1270,6 +1272,9 @@
 typedef enum {
     CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */
     CC_OP_EFLAGS,  /* all cc are explicitly computed, CC_SRC = flags */
+    CC_OP_ADCX, /* CC_DST = C, CC_SRC = rest.  */
+    CC_OP_ADOX, /* CC_SRC2 = O, CC_SRC = rest.  */
+    CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest.  */
 
     CC_OP_MULB, /* modify all flags, C, O = (CC_SRC != 0) */
     CC_OP_MULW,
@@ -1326,10 +1331,6 @@
     CC_OP_BMILGL,
     CC_OP_BMILGQ,
 
-    CC_OP_ADCX, /* CC_DST = C, CC_SRC = rest.  */
-    CC_OP_ADOX, /* CC_DST = O, CC_SRC = rest.  */
-    CC_OP_ADCOX, /* CC_DST = C, CC_SRC2 = O, CC_SRC = rest.  */
-
     CC_OP_CLR, /* Z set, all other flags clear.  */
     CC_OP_POPCNT, /* Z via CC_SRC, all other flags clear.  */
 
diff --git a/target/i386/helper.h b/target/i386/helper.h
index 2f46cff..eeb8df5 100644
--- a/target/i386/helper.h
+++ b/target/i386/helper.h
@@ -95,7 +95,7 @@
 DEF_HELPER_FLAGS_2(mwait, TCG_CALL_NO_WG, noreturn, env, int)
 DEF_HELPER_1(rdmsr, void, env)
 DEF_HELPER_1(wrmsr, void, env)
-DEF_HELPER_FLAGS_2(read_crN, TCG_CALL_NO_RWG, tl, env, int)
+DEF_HELPER_FLAGS_1(read_cr8, TCG_CALL_NO_RWG, tl, env)
 DEF_HELPER_FLAGS_3(write_crN, TCG_CALL_NO_RWG, void, env, int, tl)
 #endif /* !CONFIG_USER_ONLY */
 
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 004c667..30b83f1 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -587,6 +587,7 @@
     sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
     if (!sev_common) {
         error_setg(errp, "SEV is not configured");
+        return NULL;
     }
 
     sev_device = object_property_get_str(OBJECT(sev_common), "sev-device",
@@ -1529,11 +1530,12 @@
 sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp)
 {
     SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
-    SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common);
+    SevCommonStateClass *klass;
 
     if (!sev_common) {
         return 0;
     }
+    klass = SEV_COMMON_GET_CLASS(sev_common);
 
     /* if SEV is in update state then encrypt the data else do nothing */
     if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) {
@@ -1710,7 +1712,9 @@
 {
     X86CPU *x86;
     CPUX86State *env;
-    SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
+    ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs;
+    SevCommonState *sev_common = SEV_COMMON(
+        object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON));
 
     /* Only update if we have valid reset information */
     if (!sev_common || !sev_common->reset_data_valid) {
@@ -2165,6 +2169,7 @@
     struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf;
     gsize len;
 
+    finish->id_block_en = 0;
     g_free(sev_snp_guest->id_block);
     g_free((guchar *)finish->id_block_uaddr);
 
@@ -2184,7 +2189,7 @@
         return;
     }
 
-    finish->id_block_en = (len) ? 1 : 0;
+    finish->id_block_en = 1;
 }
 
 static char *
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index c2d8da8..0d846c3 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -151,6 +151,8 @@
     X86_OP_GROUP3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
 #define X86_OP_GROUPw(op, op0, s0, ...)                           \
     X86_OP_GROUP3(op, op0, s0, None, None, None, None, ## __VA_ARGS__)
+#define X86_OP_GROUPwr(op, op0, s0, op1, s1, ...)                 \
+    X86_OP_GROUP3(op, op0, s0, op1, s1, None, None, ## __VA_ARGS__)
 #define X86_OP_GROUP0(op, ...)                                    \
     X86_OP_GROUP3(op, None, None, None, None, None, None, ## __VA_ARGS__)
 
@@ -180,20 +182,20 @@
 #define X86_OP_ENTRYrr(op, op0, s0, op1, s1, ...)                 \
     X86_OP_ENTRY3(op, None, None, op0, s0, op1, s1, ## __VA_ARGS__)
 #define X86_OP_ENTRYwr(op, op0, s0, op1, s1, ...)                 \
-    X86_OP_ENTRY3(op, op0, s0, None, None, op1, s1, ## __VA_ARGS__)
+    X86_OP_ENTRY3(op, op0, s0, op1, s1, None, None, ## __VA_ARGS__)
 #define X86_OP_ENTRY2(op, op0, s0, op1, s1, ...)                  \
     X86_OP_ENTRY3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__)
 #define X86_OP_ENTRYw(op, op0, s0, ...)                           \
     X86_OP_ENTRY3(op, op0, s0, None, None, None, None, ## __VA_ARGS__)
 #define X86_OP_ENTRYr(op, op0, s0, ...)                           \
-    X86_OP_ENTRY3(op, None, None, None, None, op0, s0, ## __VA_ARGS__)
+    X86_OP_ENTRY3(op, None, None, op0, s0, None, None, ## __VA_ARGS__)
 #define X86_OP_ENTRY1(op, op0, s0, ...)                           \
     X86_OP_ENTRY3(op, op0, s0, 2op, s0, None, None, ## __VA_ARGS__)
 #define X86_OP_ENTRY0(op, ...)                                    \
     X86_OP_ENTRY3(op, None, None, None, None, None, None, ## __VA_ARGS__)
 
 #define cpuid(feat) .cpuid = X86_FEAT_##feat,
-#define noseg .special = X86_SPECIAL_NoSeg,
+#define nolea .special = X86_SPECIAL_NoLoadEA,
 #define xchg .special = X86_SPECIAL_Locked,
 #define lock .special = X86_SPECIAL_HasLock,
 #define mmx .special = X86_SPECIAL_MMX,
@@ -221,7 +223,9 @@
 #define vex13 .vex_class = 13,
 
 #define chk(a) .check = X86_CHECK_##a,
-#define svm(a) .intercept = SVM_EXIT_##a,
+#define chk2(a, b) .check = X86_CHECK_##a | X86_CHECK_##b,
+#define chk3(a, b, c) .check = X86_CHECK_##a | X86_CHECK_##b | X86_CHECK_##c,
+#define svm(a) .intercept = SVM_EXIT_##a, .has_intercept = true,
 
 #define avx2_256 .vex_special = X86_VEX_AVX2_256,
 
@@ -267,20 +271,41 @@
 
 static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
 {
-    /* only includes ldmxcsr and stmxcsr, because they have AVX variants.  */
     static const X86OpEntry group15_reg[8] = {
+        [0] = X86_OP_ENTRYw(RDxxBASE,   R,y, cpuid(FSGSBASE) chk(o64) p_f3),
+        [1] = X86_OP_ENTRYw(RDxxBASE,   R,y, cpuid(FSGSBASE) chk(o64) p_f3),
+        [2] = X86_OP_ENTRYr(WRxxBASE,   R,y, cpuid(FSGSBASE) chk(o64) p_f3 zextT0),
+        [3] = X86_OP_ENTRYr(WRxxBASE,   R,y, cpuid(FSGSBASE) chk(o64) p_f3 zextT0),
+        [5] = X86_OP_ENTRY0(LFENCE,          cpuid(SSE2) p_00),
+        [6] = X86_OP_ENTRY0(MFENCE,          cpuid(SSE2) p_00),
+        [7] = X86_OP_ENTRY0(SFENCE,          cpuid(SSE2) p_00),
     };
 
     static const X86OpEntry group15_mem[8] = {
-        [2] = X86_OP_ENTRYr(LDMXCSR,    E,d, vex5 chk(VEX128)),
-        [3] = X86_OP_ENTRYw(STMXCSR,    E,d, vex5 chk(VEX128)),
+        [0] = X86_OP_ENTRYw(FXSAVE,     M,y, cpuid(FXSR) p_00),
+        [1] = X86_OP_ENTRYr(FXRSTOR,    M,y, cpuid(FXSR) p_00),
+        [2] = X86_OP_ENTRYr(LDMXCSR,    E,d, vex5 chk(VEX128) p_00),
+        [3] = X86_OP_ENTRYw(STMXCSR,    E,d, vex5 chk(VEX128) p_00),
+        [4] = X86_OP_ENTRYw(XSAVE,      M,y, cpuid(XSAVE) p_00),
+        [5] = X86_OP_ENTRYr(XRSTOR,     M,y, cpuid(XSAVE) p_00),
+        [6] = X86_OP_ENTRYw(XSAVEOPT,   M,b, cpuid(XSAVEOPT) p_00),
+        [7] = X86_OP_ENTRYw(NOP,        M,b, cpuid(CLFLUSH) p_00),
+    };
+
+    static const X86OpEntry group15_mem_66[8] = {
+        [6] = X86_OP_ENTRYw(NOP,        M,b, cpuid(CLWB)),
+        [7] = X86_OP_ENTRYw(NOP,        M,b, cpuid(CLFLUSHOPT)),
     };
 
     uint8_t modrm = get_modrm(s, env);
+    int op = (modrm >> 3) & 7;
+
     if ((modrm >> 6) == 3) {
-        *entry = group15_reg[(modrm >> 3) & 7];
+        *entry = group15_reg[op];
+    } else if (s->prefix & PREFIX_DATA) {
+        *entry = group15_mem_66[op];
     } else {
-        *entry = group15_mem[(modrm >> 3) & 7];
+        *entry = group15_mem[op];
     }
 }
 
@@ -425,6 +450,50 @@
     *entry = *decode_by_prefix(s, opcodes_0F7F);
 }
 
+static void decode_0FB8(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+    static const X86OpEntry popcnt =
+        X86_OP_ENTRYwr(POPCNT,    G,v, E,v,  cpuid(POPCNT) zextT0);
+
+    if (s->prefix & PREFIX_REPZ) {
+        *entry = popcnt;
+    } else {
+        memset(entry, 0, sizeof(*entry));
+    }
+}
+
+static void decode_0FBC(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+    /* For BSF, pass 2op as the third operand so that we can use zextT0 */
+    static const X86OpEntry opcodes_0FBC[4] = {
+        X86_OP_ENTRY3(BSF,    G,v, E,v, 2op,v, zextT0),
+        X86_OP_ENTRY3(BSF,    G,v, E,v, 2op,v, zextT0), /* 0x66 */
+        X86_OP_ENTRYwr(TZCNT, G,v, E,v,        zextT0), /* 0xf3 */
+        X86_OP_ENTRY3(BSF,    G,v, E,v, 2op,v, zextT0), /* 0xf2 */
+    };
+    if (!(s->cpuid_ext3_features & CPUID_EXT3_ABM)) {
+        *entry = opcodes_0FBC[0];
+    } else {
+        *entry = *decode_by_prefix(s, opcodes_0FBC);
+    }
+}
+
+static void decode_0FBD(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
+    /* For BSR, pass 2op as the third operand so that we can use zextT0 */
+    static const X86OpEntry opcodes_0FBD[4] = {
+        X86_OP_ENTRY3(BSR,    G,v, E,v, 2op,v, zextT0),
+        X86_OP_ENTRY3(BSR,    G,v, E,v, 2op,v, zextT0), /* 0x66 */
+        X86_OP_ENTRYwr(LZCNT, G,v, E,v,        zextT0), /* 0xf3 */
+        X86_OP_ENTRY3(BSR,    G,v, E,v, 2op,v, zextT0), /* 0xf2 */
+    };
+    if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
+        *entry = opcodes_0FBD[0];
+    } else {
+        *entry = *decode_by_prefix(s, opcodes_0FBD);
+    }
+}
+
 static void decode_0FD6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
 {
     static const X86OpEntry movq[4] = {
@@ -612,15 +681,15 @@
 /* five rows for no prefix, 66, F3, F2, 66+F2  */
 static const X86OpEntry opcodes_0F38_F0toFF[16][5] = {
     [0] = {
-        X86_OP_ENTRY3(MOVBE, G,y, M,y, None,None, cpuid(MOVBE)),
-        X86_OP_ENTRY3(MOVBE, G,w, M,w, None,None, cpuid(MOVBE)),
+        X86_OP_ENTRYwr(MOVBE, G,y, M,y, cpuid(MOVBE)),
+        X86_OP_ENTRYwr(MOVBE, G,w, M,w, cpuid(MOVBE)),
         {},
         X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
         X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
     },
     [1] = {
-        X86_OP_ENTRY3(MOVBE, M,y, G,y, None,None, cpuid(MOVBE)),
-        X86_OP_ENTRY3(MOVBE, M,w, G,w, None,None, cpuid(MOVBE)),
+        X86_OP_ENTRYwr(MOVBE, M,y, G,y, cpuid(MOVBE)),
+        X86_OP_ENTRYwr(MOVBE, M,w, G,w, cpuid(MOVBE)),
         {},
         X86_OP_ENTRY2(CRC32, G,d, E,y, cpuid(SSE42)),
         X86_OP_ENTRY2(CRC32, G,d, E,w, cpuid(SSE42)),
@@ -633,7 +702,7 @@
         {},
     },
     [3] = {
-        X86_OP_GROUP3(group17, B,y, E,y, None,None, vex13 cpuid(BMI1)),
+        X86_OP_GROUP3(group17, B,y, None,None, E,y, vex13 cpuid(BMI1)),
         {},
         {},
         {},
@@ -985,14 +1054,30 @@
     *entry = *decode_by_prefix(s, opcodes_0FE6);
 }
 
-static const X86OpEntry opcodes_0F[256] = {
-    [0x0E] = X86_OP_ENTRY0(EMMS,                              cpuid(3DNOW)), /* femms */
+/*
+ * These ignore the mod bits (assume (modrm&0xc0)==0xc0), so group the
+ * pre-decode tweak here for all MOVs from/to CR and DR.
+ *
+ * AMD documentation (24594.pdf) and testing of Intel 386 and 486
+ * processors all show that the mod bits are assumed to be 1's,
+ * regardless of actual values.
+ */
+static void decode_MOV_CR_DR(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b)
+{
     /*
-     * 3DNow!'s opcode byte comes *after* modrm and displacements, making it
-     * more like an Ib operand.  Dispatch to the right helper in a single gen_*
-     * function.
      */
-    [0x0F] = X86_OP_ENTRY3(3dnow,       P,q, Q,q, I,b,        cpuid(3DNOW)),
+    get_modrm(s, env);
+    s->modrm |= 0xC0;
+
+    entry->gen = gen_MOV;
+}
+
+static const X86OpEntry opcodes_0F[256] = {
+    [0x02] = X86_OP_ENTRYwr(LAR,        G,v, E,w,             chk(prot)),
+    [0x03] = X86_OP_ENTRYwr(LSL,        G,v, E,w,             chk(prot)),
+    [0x05] = X86_OP_ENTRY0(SYSCALL,                           chk(o64_intel)),
+    [0x06] = X86_OP_ENTRY0(CLTS,                              chk(cpl0) svm(WRITE_CR0)),
+    [0x07] = X86_OP_ENTRY0(SYSRET,                            chk3(o64_intel, prot, cpl0)),
 
     [0x10] = X86_OP_GROUP0(0F10),
     [0x11] = X86_OP_GROUP0(0F11),
@@ -1004,6 +1089,22 @@
     /* Incorrectly listed as Mq,Vq in the manual */
     [0x17] = X86_OP_ENTRY3(VMOVHPx_st,  M,q, None,None, V,dq, vex5 p_00_66),
 
+    /*
+     * Incorrectly listed as using "d" operand type in the manual.  In reality
+     * there's no 16-bit version (like y) and it does not use REX.W (like d64).
+     */
+    [0x20] = X86_OP_GROUPwr(MOV_CR_DR,   R,y_d64, C,y_d64, chk(cpl0) svm(READ_CR0)),
+    [0x21] = X86_OP_GROUPwr(MOV_CR_DR,   R,y_d64, D,y_d64, chk(cpl0) svm(READ_DR0)),
+    [0x22] = X86_OP_GROUPwr(MOV_CR_DR,   C,y_d64, R,y_d64, zextT0 chk(cpl0) svm(WRITE_CR0)),
+    [0x23] = X86_OP_GROUPwr(MOV_CR_DR,   D,y_d64, R,y_d64, zextT0 chk(cpl0) svm(WRITE_DR0)),
+
+    [0x30] = X86_OP_ENTRY0(WRMSR,                             chk(cpl0)),
+    [0x31] = X86_OP_ENTRY0(RDTSC),
+    [0x32] = X86_OP_ENTRY0(RDMSR,                             chk(cpl0)),
+    [0x33] = X86_OP_ENTRY0(RDPMC),
+    [0x34] = X86_OP_ENTRY0(SYSENTER,                          chk2(i64_amd, prot_or_vm86)),
+    [0x35] = X86_OP_ENTRY0(SYSEXIT,                           chk3(i64_amd, prot, cpl0)),
+
     [0x40] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
     [0x41] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
     [0x42] = X86_OP_ENTRY2(CMOVcc,     G,v, E,v, cpuid(CMOV)),
@@ -1060,9 +1161,64 @@
 
     [0xa0] = X86_OP_ENTRYr(PUSH, FS, w),
     [0xa1] = X86_OP_ENTRYw(POP, FS, w),
+    [0xa2] = X86_OP_ENTRY0(CPUID),
+    [0xa4] = X86_OP_ENTRY4(SHLD,  E,v, 2op,v, G,v),
+    [0xa5] = X86_OP_ENTRY3(SHLD,  E,v, 2op,v, G,v),
 
+    [0xb0] = X86_OP_ENTRY2(CMPXCHG,E,b, G,b, lock),
+    [0xb1] = X86_OP_ENTRY2(CMPXCHG,E,v, G,v, lock),
+    [0xb2] = X86_OP_ENTRY3(LSS,    G,v, EM,p, None, None),
+    [0xb4] = X86_OP_ENTRY3(LFS,    G,v, EM,p, None, None),
+    [0xb5] = X86_OP_ENTRY3(LGS,    G,v, EM,p, None, None),
+    [0xb6] = X86_OP_ENTRY3(MOV,    G,v, E,b, None, None, zextT0), /* MOVZX */
+    [0xb7] = X86_OP_ENTRY3(MOV,    G,v, E,w, None, None, zextT0), /* MOVZX */
+
+    [0xc0] = X86_OP_ENTRY2(XADD,       E,b, G,b,            lock),
+    [0xc1] = X86_OP_ENTRY2(XADD,       E,v, G,v,            lock),
+    [0xc2] = X86_OP_ENTRY4(VCMP,       V,x, H,x, W,x,       vex2_rep3 p_00_66_f3_f2),
+    [0xc3] = X86_OP_ENTRY3(MOV,        EM,y,G,y, None,None, cpuid(SSE2)), /* MOVNTI */
+    [0xc4] = X86_OP_ENTRY4(PINSRW,     V,dq,H,dq,E,w,       vex5 mmx p_00_66),
+    [0xc5] = X86_OP_ENTRY3(PEXTRW,     G,d, U,dq,I,b,       vex5 mmx p_00_66),
+    [0xc6] = X86_OP_ENTRY4(VSHUF,      V,x, H,x, W,x,       vex4 p_00_66),
+
+    [0xd0] = X86_OP_ENTRY3(VADDSUB,   V,x, H,x, W,x,        vex2 cpuid(SSE3) p_66_f2),
+    [0xd1] = X86_OP_ENTRY3(PSRLW_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
+    [0xd2] = X86_OP_ENTRY3(PSRLD_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
+    [0xd3] = X86_OP_ENTRY3(PSRLQ_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
+    [0xd4] = X86_OP_ENTRY3(PADDQ,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
+    [0xd5] = X86_OP_ENTRY3(PMULLW,    V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
+    [0xd6] = X86_OP_GROUP0(0FD6),
+    [0xd7] = X86_OP_ENTRY3(PMOVMSKB,  G,d, None,None, U,x,  vex7 mmx avx2_256 p_00_66),
+
+    [0xe0] = X86_OP_ENTRY3(PAVGB,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
+    [0xe1] = X86_OP_ENTRY3(PSRAW_r,   V,x, H,x, W,x,        vex7 mmx avx2_256 p_00_66),
+    [0xe2] = X86_OP_ENTRY3(PSRAD_r,   V,x, H,x, W,x,        vex7 mmx avx2_256 p_00_66),
+    [0xe3] = X86_OP_ENTRY3(PAVGW,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
+    [0xe4] = X86_OP_ENTRY3(PMULHUW,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
+    [0xe5] = X86_OP_ENTRY3(PMULHW,    V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
+    [0xe6] = X86_OP_GROUP0(0FE6),
+    [0xe7] = X86_OP_ENTRY3(MOVDQ,     W,x, None,None, V,x,  vex1 mmx p_00_66), /* MOVNTQ/MOVNTDQ */
+
+    [0xf0] = X86_OP_ENTRY3(MOVDQ,    V,x, None,None, WM,x,  vex4_unal cpuid(SSE3) p_f2), /* LDDQU */
+    [0xf1] = X86_OP_ENTRY3(PSLLW_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
+    [0xf2] = X86_OP_ENTRY3(PSLLD_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
+    [0xf3] = X86_OP_ENTRY3(PSLLQ_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
+    [0xf4] = X86_OP_ENTRY3(PMULUDQ,  V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
+    [0xf5] = X86_OP_ENTRY3(PMADDWD,  V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
+    [0xf6] = X86_OP_ENTRY3(PSADBW,   V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
+    [0xf7] = X86_OP_ENTRY3(MASKMOV,  None,None, V,dq, U,dq, vex4_unal avx2_256 mmx p_00_66),
+
+    [0x08] = X86_OP_ENTRY0(NOP,           svm(INVD)),
+    [0x09] = X86_OP_ENTRY0(NOP,           svm(WBINVD)),
     [0x0b] = X86_OP_ENTRY0(UD),           /* UD2 */
     [0x0d] = X86_OP_ENTRY1(NOP,  M,v),    /* 3DNow! prefetch */
+    [0x0e] = X86_OP_ENTRY0(EMMS,                              cpuid(3DNOW)), /* femms */
+    /*
+     * 3DNow!'s opcode byte comes *after* modrm and displacements, making it
+     * more like an Ib operand.  Dispatch to the right helper in a single gen_*
+     * function.
+     */
+    [0x0f] = X86_OP_ENTRY3(3dnow,       P,q, Q,q, I,b,        cpuid(3DNOW)),
 
     [0x18] = X86_OP_ENTRY1(NOP,  nop,v),  /* prefetch/reserved NOP */
     [0x19] = X86_OP_ENTRY1(NOP,  nop,v),  /* reserved NOP */
@@ -1137,6 +1293,9 @@
 
     [0xa8] = X86_OP_ENTRYr(PUSH,   GS, w),
     [0xa9] = X86_OP_ENTRYw(POP,    GS, w),
+    [0xaa] = X86_OP_ENTRY0(RSM,             chk(smm) svm(RSM)),
+    [0xac] = X86_OP_ENTRY4(SHRD,   E,v, 2op,v, G,v),
+    [0xad] = X86_OP_ENTRY3(SHRD,   E,v, 2op,v, G,v),
     [0xae] = X86_OP_GROUP0(group15),
     /*
      * It's slightly more efficient to put Ev operand in T0 and allow gen_IMUL3
@@ -1144,23 +1303,14 @@
      */
     [0xaf] = X86_OP_ENTRY3(IMUL3,  G,v, E,v, 2op,v, sextT0),
 
-    [0xb2] = X86_OP_ENTRY3(LSS,    G,v, EM,p, None, None),
-    [0xb4] = X86_OP_ENTRY3(LFS,    G,v, EM,p, None, None),
-    [0xb5] = X86_OP_ENTRY3(LGS,    G,v, EM,p, None, None),
-    [0xb6] = X86_OP_ENTRY3(MOV,    G,v, E,b, None, None, zextT0), /* MOVZX */
-    [0xb7] = X86_OP_ENTRY3(MOV,    G,v, E,w, None, None, zextT0), /* MOVZX */
-
+    [0xb8] = X86_OP_GROUP0(0FB8),
     /* decoded as modrm, which is visible as a difference between page fault and #UD */
     [0xb9] = X86_OP_ENTRYr(UD,     nop,v),                        /* UD1 */
+    [0xbc] = X86_OP_GROUP0(0FBC),
+    [0xbd] = X86_OP_GROUP0(0FBD),
     [0xbe] = X86_OP_ENTRY3(MOV,    G,v, E,b, None, None, sextT0), /* MOVSX */
     [0xbf] = X86_OP_ENTRY3(MOV,    G,v, E,w, None, None, sextT0), /* MOVSX */
 
-    [0xc2] = X86_OP_ENTRY4(VCMP,       V,x, H,x, W,x,       vex2_rep3 p_00_66_f3_f2),
-    [0xc3] = X86_OP_ENTRY3(MOV,        EM,y,G,y, None,None, cpuid(SSE2)), /* MOVNTI */
-    [0xc4] = X86_OP_ENTRY4(PINSRW,     V,dq,H,dq,E,w,       vex5 mmx p_00_66),
-    [0xc5] = X86_OP_ENTRY3(PEXTRW,     G,d, U,dq,I,b,       vex5 mmx p_00_66),
-    [0xc6] = X86_OP_ENTRY4(VSHUF,      V,x, H,x, W,x,       vex4 p_00_66),
-
     [0xc8] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
     [0xc9] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
     [0xca] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
@@ -1170,33 +1320,6 @@
     [0xce] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
     [0xcf] = X86_OP_ENTRY1(BSWAP,     LoBits,y),
 
-    [0xd0] = X86_OP_ENTRY3(VADDSUB,   V,x, H,x, W,x,        vex2 cpuid(SSE3) p_66_f2),
-    [0xd1] = X86_OP_ENTRY3(PSRLW_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
-    [0xd2] = X86_OP_ENTRY3(PSRLD_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
-    [0xd3] = X86_OP_ENTRY3(PSRLQ_r,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
-    [0xd4] = X86_OP_ENTRY3(PADDQ,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
-    [0xd5] = X86_OP_ENTRY3(PMULLW,    V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
-    [0xd6] = X86_OP_GROUP0(0FD6),
-    [0xd7] = X86_OP_ENTRY3(PMOVMSKB,  G,d, None,None, U,x,  vex7 mmx avx2_256 p_00_66),
-
-    [0xe0] = X86_OP_ENTRY3(PAVGB,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
-    [0xe1] = X86_OP_ENTRY3(PSRAW_r,   V,x, H,x, W,x,        vex7 mmx avx2_256 p_00_66),
-    [0xe2] = X86_OP_ENTRY3(PSRAD_r,   V,x, H,x, W,x,        vex7 mmx avx2_256 p_00_66),
-    [0xe3] = X86_OP_ENTRY3(PAVGW,     V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
-    [0xe4] = X86_OP_ENTRY3(PMULHUW,   V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
-    [0xe5] = X86_OP_ENTRY3(PMULHW,    V,x, H,x, W,x,        vex4 mmx avx2_256 p_00_66),
-    [0xe6] = X86_OP_GROUP0(0FE6),
-    [0xe7] = X86_OP_ENTRY3(MOVDQ,     W,x, None,None, V,x,  vex1 mmx p_00_66), /* MOVNTQ/MOVNTDQ */
-
-    [0xf0] = X86_OP_ENTRY3(MOVDQ,    V,x, None,None, WM,x,  vex4_unal cpuid(SSE3) p_f2), /* LDDQU */
-    [0xf1] = X86_OP_ENTRY3(PSLLW_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
-    [0xf2] = X86_OP_ENTRY3(PSLLD_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
-    [0xf3] = X86_OP_ENTRY3(PSLLQ_r,  V,x, H,x, W,x,         vex7 mmx avx2_256 p_00_66),
-    [0xf4] = X86_OP_ENTRY3(PMULUDQ,  V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
-    [0xf5] = X86_OP_ENTRY3(PMADDWD,  V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
-    [0xf6] = X86_OP_ENTRY3(PSADBW,   V,x, H,x, W,x,         vex4 mmx avx2_256 p_00_66),
-    [0xf7] = X86_OP_ENTRY3(MASKMOV,  None,None, V,dq, U,dq, vex4_unal avx2_256 mmx p_00_66),
-
     /* Incorrectly missing from 2-17 */
     [0xd8] = X86_OP_ENTRY3(PSUBUSB,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
     [0xd9] = X86_OP_ENTRY3(PSUBUSW,  V,x, H,x, W,x,  vex4 mmx avx2_256 p_00_66),
@@ -1335,9 +1458,9 @@
         /* 0xff */
         [0x08] = X86_OP_ENTRY1(INC,     E,v,                           lock),
         [0x09] = X86_OP_ENTRY1(DEC,     E,v,                           lock),
-        [0x0a] = X86_OP_ENTRY3(CALL_m,  None, None, E,f64, None, None, zextT0),
+        [0x0a] = X86_OP_ENTRYr(CALL_m,  E,f64,                         zextT0),
         [0x0b] = X86_OP_ENTRYr(CALLF_m, M,p),
-        [0x0c] = X86_OP_ENTRY3(JMP_m,   None, None, E,f64, None, None, zextT0),
+        [0x0c] = X86_OP_ENTRYr(JMP_m,   E,f64,                         zextT0),
         [0x0d] = X86_OP_ENTRYr(JMPF_m,  M,p),
         [0x0e] = X86_OP_ENTRYr(PUSH,    E,f64),
     };
@@ -1586,18 +1709,18 @@
     [0x7E] = X86_OP_ENTRYr(Jcc, J,b),
     [0x7F] = X86_OP_ENTRYr(Jcc, J,b),
 
-    [0x88] = X86_OP_ENTRY3(MOV, E,b, G,b, None, None),
-    [0x89] = X86_OP_ENTRY3(MOV, E,v, G,v, None, None),
-    [0x8A] = X86_OP_ENTRY3(MOV, G,b, E,b, None, None),
-    [0x8B] = X86_OP_ENTRY3(MOV, G,v, E,v, None, None),
-    /* Missing in Table A-2: memory destination is always 16-bit.  */
-    [0x8C] = X86_OP_ENTRY3(MOV, E,v, S,w, None, None, op0_Mw),
-    [0x8D] = X86_OP_ENTRY3(LEA, G,v, M,v, None, None, noseg),
-    [0x8E] = X86_OP_ENTRY3(MOV, S,w, E,w, None, None),
+    [0x88] = X86_OP_ENTRYwr(MOV, E,b, G,b),
+    [0x89] = X86_OP_ENTRYwr(MOV, E,v, G,v),
+    [0x8A] = X86_OP_ENTRYwr(MOV, G,b, E,b),
+    [0x8B] = X86_OP_ENTRYwr(MOV, G,v, E,v),
+     /* Missing in Table A-2: memory destination is always 16-bit.  */
+    [0x8C] = X86_OP_ENTRYwr(MOV, E,v, S,w, op0_Mw),
+    [0x8D] = X86_OP_ENTRYwr(LEA, G,v, M,v, nolea),
+    [0x8E] = X86_OP_ENTRYwr(MOV, S,w, E,w),
     [0x8F] = X86_OP_GROUPw(group1A, E,v),
 
     [0x98] = X86_OP_ENTRY1(CBW,    0,v), /* rAX */
-    [0x99] = X86_OP_ENTRY3(CWD,    2,v, 0,v, None, None), /* rDX, rAX */
+    [0x99] = X86_OP_ENTRYwr(CWD,   2,v, 0,v), /* rDX, rAX */
     [0x9A] = X86_OP_ENTRYrr(CALLF, I_unsigned,p, I_unsigned,w, chk(i64)),
     [0x9B] = X86_OP_ENTRY0(WAIT),
     [0x9C] = X86_OP_ENTRY0(PUSHF,  chk(vm86_iopl) svm(PUSHF)),
@@ -1607,22 +1730,22 @@
 
     [0xA8] = X86_OP_ENTRYrr(AND, 0,b, I,b),   /* AL, Ib */
     [0xA9] = X86_OP_ENTRYrr(AND, 0,v, I,z),   /* rAX, Iz */
-    [0xAA] = X86_OP_ENTRY3(STOS, Y,b, 0,b, None, None),
-    [0xAB] = X86_OP_ENTRY3(STOS, Y,v, 0,v, None, None),
+    [0xAA] = X86_OP_ENTRYwr(STOS, Y,b, 0,b),
+    [0xAB] = X86_OP_ENTRYwr(STOS, Y,v, 0,v),
     /* Manual writeback because REP LODS (!) has to write EAX/RAX after every LODS.  */
     [0xAC] = X86_OP_ENTRYr(LODS, X,b),
     [0xAD] = X86_OP_ENTRYr(LODS, X,v),
     [0xAE] = X86_OP_ENTRYrr(SCAS, 0,b, Y,b),
     [0xAF] = X86_OP_ENTRYrr(SCAS, 0,v, Y,v),
 
-    [0xB8] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
-    [0xB9] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
-    [0xBA] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
-    [0xBB] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
-    [0xBC] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
-    [0xBD] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
-    [0xBE] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
-    [0xBF] = X86_OP_ENTRY3(MOV, LoBits,v, I,v, None, None),
+    [0xB8] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
+    [0xB9] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
+    [0xBA] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
+    [0xBB] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
+    [0xBC] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
+    [0xBD] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
+    [0xBE] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
+    [0xBF] = X86_OP_ENTRYwr(MOV, LoBits,v, I,v),
 
     [0xC8] = X86_OP_ENTRYrr(ENTER, I,w, I,b),
     [0xC9] = X86_OP_ENTRY1(LEAVE, A,d64),
@@ -1725,6 +1848,10 @@
         *ot = s->dflag == MO_16 ? MO_32 : s->dflag;
         return true;
 
+    case X86_SIZE_y_d64:  /* Full (not 16-bit) register access */
+        *ot = CODE64(s) ? MO_64 : MO_32;
+        return true;
+
     case X86_SIZE_z:  /* 16-bit for 16-bit operand size, else 32-bit */
         *ot = s->dflag == MO_16 ? MO_16 : MO_32;
         return true;
@@ -1802,11 +1929,34 @@
 
     case X86_TYPE_C:  /* REG in the modrm byte selects a control register */
         op->unit = X86_OP_CR;
-        goto get_reg;
+        op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s);
+        if (op->n == 0 && (s->prefix & PREFIX_LOCK) &&
+            (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
+            op->n = 8;
+            s->prefix &= ~PREFIX_LOCK;
+        }
+        if (op->n != 0 && op->n != 2 && op->n != 3 && op->n != 4 && op->n != 8) {
+            return false;
+        }
+        if (decode->e.intercept) {
+            decode->e.intercept += op->n;
+        }
+        break;
 
     case X86_TYPE_D:  /* REG in the modrm byte selects a debug register */
         op->unit = X86_OP_DR;
-        goto get_reg;
+        op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s);
+        if (op->n >= 8) {
+            /*
+             * illegal opcode.  The DR4 and DR5 case is checked in the generated
+             * code instead, to save on hflags bits.
+             */
+            return false;
+        }
+        if (decode->e.intercept) {
+            decode->e.intercept += op->n;
+        }
+        break;
 
     case X86_TYPE_G:  /* REG in the modrm byte selects a GPR */
         op->unit = X86_OP_INT;
@@ -2047,6 +2197,10 @@
         return true;
     case X86_FEAT_CMOV:
         return (s->cpuid_features & CPUID_CMOV);
+    case X86_FEAT_CLFLUSH:
+        return (s->cpuid_features & CPUID_CLFLUSH);
+    case X86_FEAT_FXSR:
+        return (s->cpuid_features & CPUID_FXSR);
     case X86_FEAT_F16C:
         return (s->cpuid_ext_features & CPUID_EXT_F16C);
     case X86_FEAT_FMA:
@@ -2055,6 +2209,8 @@
         return (s->cpuid_ext_features & CPUID_EXT_MOVBE);
     case X86_FEAT_PCLMULQDQ:
         return (s->cpuid_ext_features & CPUID_EXT_PCLMULQDQ);
+    case X86_FEAT_POPCNT:
+        return (s->cpuid_ext_features & CPUID_EXT_POPCNT);
     case X86_FEAT_SSE:
         return (s->cpuid_features & CPUID_SSE);
     case X86_FEAT_SSE2:
@@ -2080,6 +2236,8 @@
 
     case X86_FEAT_AVX:
         return (s->cpuid_ext_features & CPUID_EXT_AVX);
+    case X86_FEAT_XSAVE:
+        return (s->cpuid_ext_features & CPUID_EXT_XSAVE);
 
     case X86_FEAT_3DNOW:
         return (s->cpuid_ext2_features & CPUID_EXT2_3DNOW);
@@ -2094,11 +2252,20 @@
         return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2);
     case X86_FEAT_AVX2:
         return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2);
+    case X86_FEAT_CLFLUSHOPT:
+        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT);
+    case X86_FEAT_CLWB:
+        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB);
+    case X86_FEAT_FSGSBASE:
+        return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE);
     case X86_FEAT_SHA_NI:
         return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SHA_NI);
 
     case X86_FEAT_CMPCCXADD:
         return (s->cpuid_7_1_eax_features & CPUID_7_1_EAX_CMPCCXADD);
+
+    case X86_FEAT_XSAVEOPT:
+        return (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT);
     }
     g_assert_not_reached();
 }
@@ -2428,18 +2595,12 @@
         if (b == 0x0f) {
             b = x86_ldub_code(env, s);
             switch (b) {
-            case 0x00 ... 0x03: /* mostly privileged instructions */
-            case 0x05 ... 0x09:
+            case 0x00 ... 0x01: /* mostly privileged instructions */
             case 0x1a ... 0x1b: /* MPX */
-            case 0x20 ... 0x23: /* mov from/to CR and DR */
-            case 0x30 ... 0x35: /* more privileged instructions */
-            case 0xa2 ... 0xa5: /* CPUID, BT, SHLD */
-            case 0xaa ... 0xae: /* RSM, SHRD, grp15 */
-            case 0xb0 ... 0xb1: /* cmpxchg */
+            case 0xa3:          /* bt */
+            case 0xab:          /* bts */
             case 0xb3:          /* btr */
-            case 0xb8:          /* integer ops */
-            case 0xba ... 0xbd: /* integer ops */
-            case 0xc0 ... 0xc1: /* xadd */
+            case 0xba ... 0xbb: /* grp8, btc */
             case 0xc7:          /* grp9 */
                 disas_insn_old(s, cpu, b + 0x100);
                 return;
@@ -2466,18 +2627,28 @@
 
     /* Checks that result in #UD come first.  */
     if (decode.e.check) {
-        if (decode.e.check & X86_CHECK_i64) {
-            if (CODE64(s)) {
+        if (CODE64(s)) {
+            if (decode.e.check & X86_CHECK_i64) {
+                goto illegal_op;
+            }
+            if ((decode.e.check & X86_CHECK_i64_amd) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) {
+                goto illegal_op;
+            }
+        } else {
+            if (decode.e.check & X86_CHECK_o64) {
+                goto illegal_op;
+            }
+            if ((decode.e.check & X86_CHECK_o64_intel) && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1) {
                 goto illegal_op;
             }
         }
-        if (decode.e.check & X86_CHECK_o64) {
-            if (!CODE64(s)) {
+        if (decode.e.check & X86_CHECK_prot_or_vm86) {
+            if (!PE(s)) {
                 goto illegal_op;
             }
         }
-        if (decode.e.check & X86_CHECK_prot) {
-            if (!PE(s) || VM86(s)) {
+        if (decode.e.check & X86_CHECK_no_vm86) {
+            if (VM86(s)) {
                 goto illegal_op;
             }
         }
@@ -2524,11 +2695,6 @@
         assert(decode.op[1].unit == X86_OP_INT);
         break;
 
-    case X86_SPECIAL_NoSeg:
-        decode.mem.def_seg = -1;
-        s->override = -1;
-        break;
-
     case X86_SPECIAL_Op0_Mw:
         assert(decode.op[0].unit == X86_OP_INT);
         if (decode.op[0].has_ea) {
@@ -2556,19 +2722,21 @@
      * exceptions if there is no memory operand).  Exceptions are
      * vm86 checks (INTn, IRET, PUSHF/POPF), RSM and XSETBV (!).
      *
-     * RSM and XSETBV will be handled in the gen_* functions
-     * instead of using chk().
+     * XSETBV will check for CPL0 in the gen_* function instead of using chk().
      */
     if (decode.e.check & X86_CHECK_cpl0) {
         if (CPL(s) != 0) {
             goto gp_fault;
         }
     }
-    if (decode.e.intercept && unlikely(GUEST(s))) {
+    if (decode.e.has_intercept && unlikely(GUEST(s))) {
         gen_helper_svm_check_intercept(tcg_env,
                                        tcg_constant_i32(decode.e.intercept));
     }
     if (decode.e.check) {
+        if ((decode.e.check & X86_CHECK_smm) && !(s->flags & HF_SMM_MASK)) {
+            goto illegal_op;
+        }
         if ((decode.e.check & X86_CHECK_vm86_iopl) && VM86(s)) {
             if (IOPL(s) < 3) {
                 goto gp_fault;
@@ -2585,12 +2753,13 @@
         gen_helper_enter_mmx(tcg_env);
     }
 
-    if (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea) {
+    if (decode.e.special != X86_SPECIAL_NoLoadEA &&
+        (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea)) {
         gen_load_ea(s, &decode.mem, decode.e.vex_class == 12);
     }
     if (s->prefix & PREFIX_LOCK) {
         gen_load(s, &decode, 2, s->T1);
-        decode.e.gen(s, env, &decode);
+        decode.e.gen(s, &decode);
     } else {
         if (decode.op[0].unit == X86_OP_MMX) {
             compute_mmx_offset(&decode.op[0]);
@@ -2599,12 +2768,12 @@
         }
         gen_load(s, &decode, 1, s->T0);
         gen_load(s, &decode, 2, s->T1);
-        decode.e.gen(s, env, &decode);
+        decode.e.gen(s, &decode);
         gen_writeback(s, &decode, 0, s->T0);
     }
 
     /*
-     * Write back flags after last memory access.  Some newer ALU instructions, as
+     * Write back flags after last memory access.  Some older ALU instructions, as
      * well as SSE instructions, write flags in the gen_* function, but that can
      * cause incorrect tracking of CC_OP for instructions that write to both memory
      * and flags.
diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h
index 1f90cf9..f9bf9a6 100644
--- a/target/i386/tcg/decode-new.h
+++ b/target/i386/tcg/decode-new.h
@@ -90,6 +90,7 @@
     X86_SIZE_w,  /* 16-bit */
     X86_SIZE_x,  /* 128/256-bit, based on operand size */
     X86_SIZE_y,  /* 32/64-bit, based on operand size */
+    X86_SIZE_y_d64,  /* 32/64-bit, based on 64-bit mode */
     X86_SIZE_z,  /* 16-bit for 16-bit operand size, else 32-bit */
     X86_SIZE_z_f64,  /* 32-bit for 32-bit operand size or 64-bit mode, else 16-bit */
 
@@ -108,12 +109,18 @@
     X86_FEAT_AVX2,
     X86_FEAT_BMI1,
     X86_FEAT_BMI2,
+    X86_FEAT_CLFLUSH,
+    X86_FEAT_CLFLUSHOPT,
+    X86_FEAT_CLWB,
     X86_FEAT_CMOV,
     X86_FEAT_CMPCCXADD,
     X86_FEAT_F16C,
     X86_FEAT_FMA,
+    X86_FEAT_FSGSBASE,
+    X86_FEAT_FXSR,
     X86_FEAT_MOVBE,
     X86_FEAT_PCLMULQDQ,
+    X86_FEAT_POPCNT,
     X86_FEAT_SHA_NI,
     X86_FEAT_SSE,
     X86_FEAT_SSE2,
@@ -122,6 +129,8 @@
     X86_FEAT_SSE41,
     X86_FEAT_SSE42,
     X86_FEAT_SSE4A,
+    X86_FEAT_XSAVE,
+    X86_FEAT_XSAVEOPT,
 } X86CPUIDFeature;
 
 /* Execution flags */
@@ -142,8 +151,8 @@
     X86_CHECK_i64 = 1,
     X86_CHECK_o64 = 2,
 
-    /* Fault outside protected mode */
-    X86_CHECK_prot = 4,
+    /* Fault in vm86 mode */
+    X86_CHECK_no_vm86 = 4,
 
     /* Privileged instruction checks */
     X86_CHECK_cpl0 = 8,
@@ -159,6 +168,17 @@
 
     /* Fault if VEX.W=0 */
     X86_CHECK_W1 = 256,
+
+    /* Fault outside protected mode, possibly including vm86 mode */
+    X86_CHECK_prot_or_vm86 = 512,
+    X86_CHECK_prot = X86_CHECK_prot_or_vm86 | X86_CHECK_no_vm86,
+
+    /* Fault outside SMM */
+    X86_CHECK_smm = 1024,
+
+    /* Vendor-specific checks for Intel/AMD differences */
+    X86_CHECK_i64_amd = 2048,
+    X86_CHECK_o64_intel = 4096,
 } X86InsnCheck;
 
 typedef enum X86InsnSpecial {
@@ -170,8 +190,9 @@
     /* Always locked if it has a memory operand (XCHG) */
     X86_SPECIAL_Locked,
 
-    /* Do not apply segment base to effective address */
-    X86_SPECIAL_NoSeg,
+    /* Do not load effective address in s->A0 */
+    X86_SPECIAL_NoLoadEA,
+
     /*
      * Rd/Mb or Rd/Mw in the manual: register operand 0 is treated as 32 bits
      * (and writeback zero-extends it to 64 bits if applicable).  PREFIX_DATA
@@ -245,7 +266,7 @@
 typedef void (*X86DecodeFunc)(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b);
 
 /* Code generation function.  */
-typedef void (*X86GenFunc)(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode);
+typedef void (*X86GenFunc)(DisasContext *s, X86DecodedInsn *decode);
 
 struct X86OpEntry {
     /* Based on the is_decode flags.  */
@@ -271,6 +292,7 @@
     unsigned     valid_prefix:16;
     unsigned     check:16;
     unsigned     intercept:8;
+    bool         has_intercept:1;
     bool         is_decode:1;
 };
 
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 4be3d9a..11faa70 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -60,8 +60,8 @@
                                   TCGv_ptr reg_c, TCGv_ptr reg_d, TCGv_i32 even,
                                   TCGv_i32 odd);
 
-static void gen_JMP_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode);
-static void gen_JMP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode);
+static void gen_JMP_m(DisasContext *s, X86DecodedInsn *decode);
+static void gen_JMP(DisasContext *s, X86DecodedInsn *decode);
 
 static inline TCGv_i32 tcg_constant8u_i32(uint8_t val)
 {
@@ -242,12 +242,20 @@
         tcg_gen_ld32u_tl(v, tcg_env,
                          offsetof(CPUX86State,segs[op->n].selector));
         break;
+#ifndef CONFIG_USER_ONLY
     case X86_OP_CR:
-        tcg_gen_ld_tl(v, tcg_env, offsetof(CPUX86State, cr[op->n]));
+        if (op->n == 8) {
+            translator_io_start(&s->base);
+            gen_helper_read_cr8(v, tcg_env);
+        } else {
+            tcg_gen_ld_tl(v, tcg_env, offsetof(CPUX86State, cr[op->n]));
+        }
         break;
     case X86_OP_DR:
-        tcg_gen_ld_tl(v, tcg_env, offsetof(CPUX86State, dr[op->n]));
+        /* CR4.DE tested in the helper.  */
+        gen_helper_get_dr(v, tcg_env, tcg_constant_i32(op->n));
         break;
+#endif
     case X86_OP_INT:
         if (op->has_ea) {
             if (v == s->T0 && decode->e.special == X86_SPECIAL_SExtT0) {
@@ -343,8 +351,20 @@
                                  16, 16, 0);
         }
         break;
+#ifndef CONFIG_USER_ONLY
     case X86_OP_CR:
+        if (op->n == 8) {
+            translator_io_start(&s->base);
+        }
+        gen_helper_write_crN(tcg_env, tcg_constant_i32(op->n), v);
+        s->base.is_jmp = DISAS_EOB_NEXT;
+        break;
     case X86_OP_DR:
+        /* CR4.DE tested in the helper.  */
+        gen_helper_set_dr(tcg_env, tcg_constant_i32(op->n), v);
+        s->base.is_jmp = DISAS_EOB_NEXT;
+        break;
+#endif
     default:
         g_assert_not_reached();
     }
@@ -446,7 +466,7 @@
     [0xbf] = gen_helper_pavgusb,
 };
 
-static void gen_3dnow(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_3dnow(DisasContext *s, X86DecodedInsn *decode)
 {
     uint8_t b = decode->immediate;
     SSEFunc_0_epp fn = b < ARRAY_SIZE(fns_3dnow) ? fns_3dnow[b] : NULL;
@@ -479,7 +499,7 @@
  * f3 = v*ss Vss, Hss, Wps
  * f2 = v*sd Vsd, Hsd, Wps
  */
-static inline void gen_unary_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_unary_fp_sse(DisasContext *s, X86DecodedInsn *decode,
                               SSEFunc_0_epp pd_xmm, SSEFunc_0_epp ps_xmm,
                               SSEFunc_0_epp pd_ymm, SSEFunc_0_epp ps_ymm,
                               SSEFunc_0_eppp sd, SSEFunc_0_eppp ss)
@@ -504,9 +524,9 @@
     }
 }
 #define UNARY_FP_SSE(uname, lname)                                                 \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode)                   \
 {                                                                                  \
-    gen_unary_fp_sse(s, env, decode,                                               \
+    gen_unary_fp_sse(s, decode,                                                    \
                      gen_helper_##lname##pd_xmm,                                   \
                      gen_helper_##lname##ps_xmm,                                   \
                      gen_helper_##lname##pd_ymm,                                   \
@@ -522,7 +542,7 @@
  * f3 = v*ss Vss, Hss, Wps
  * f2 = v*sd Vsd, Hsd, Wps
  */
-static inline void gen_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_fp_sse(DisasContext *s, X86DecodedInsn *decode,
                               SSEFunc_0_eppp pd_xmm, SSEFunc_0_eppp ps_xmm,
                               SSEFunc_0_eppp pd_ymm, SSEFunc_0_eppp ps_ymm,
                               SSEFunc_0_eppp sd, SSEFunc_0_eppp ss)
@@ -543,9 +563,9 @@
 }
 
 #define FP_SSE(uname, lname)                                                       \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode)                   \
 {                                                                                  \
-    gen_fp_sse(s, env, decode,                                                     \
+    gen_fp_sse(s, decode,                                                          \
                gen_helper_##lname##pd_xmm,                                         \
                gen_helper_##lname##ps_xmm,                                         \
                gen_helper_##lname##pd_ymm,                                         \
@@ -561,7 +581,7 @@
 FP_SSE(VMAX, max)
 
 #define FMA_SSE_PACKED(uname, ptr0, ptr1, ptr2, even, odd)                         \
-static void gen_##uname##Px(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname##Px(DisasContext *s, X86DecodedInsn *decode)               \
 {                                                                                  \
     SSEFunc_0_eppppii xmm = s->vex_w ? gen_helper_fma4pd_xmm : gen_helper_fma4ps_xmm; \
     SSEFunc_0_eppppii ymm = s->vex_w ? gen_helper_fma4pd_ymm : gen_helper_fma4ps_ymm; \
@@ -574,7 +594,7 @@
 
 #define FMA_SSE(uname, ptr0, ptr1, ptr2, flags)                                    \
 FMA_SSE_PACKED(uname, ptr0, ptr1, ptr2, flags, flags)                              \
-static void gen_##uname##Sx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname##Sx(DisasContext *s, X86DecodedInsn *decode)               \
 {                                                                                  \
     SSEFunc_0_eppppi fn = s->vex_w ? gen_helper_fma4sd : gen_helper_fma4ss;        \
                                                                                    \
@@ -607,10 +627,10 @@
 FMA_SSE_PACKED(VFMSUBADD132, OP_PTR0, OP_PTR2, OP_PTR1, 0, float_muladd_negate_c)
 
 #define FP_UNPACK_SSE(uname, lname)                                                \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode)                   \
 {                                                                                  \
     /* PS maps to the DQ integer instruction, PD maps to QDQ.  */                  \
-    gen_fp_sse(s, env, decode,                                                     \
+    gen_fp_sse(s, decode,                                                          \
                gen_helper_##lname##qdq_xmm,                                        \
                gen_helper_##lname##dq_xmm,                                         \
                gen_helper_##lname##qdq_ymm,                                        \
@@ -624,7 +644,7 @@
  * 00 = v*ps Vps, Wpd
  * f3 = v*ss Vss, Wps
  */
-static inline void gen_unary_fp32_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_unary_fp32_sse(DisasContext *s, X86DecodedInsn *decode,
                                       SSEFunc_0_epp ps_xmm,
                                       SSEFunc_0_epp ps_ymm,
                                       SSEFunc_0_eppp ss)
@@ -649,9 +669,9 @@
     gen_illegal_opcode(s);
 }
 #define UNARY_FP32_SSE(uname, lname)                                               \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode)                   \
 {                                                                                  \
-    gen_unary_fp32_sse(s, env, decode,                                             \
+    gen_unary_fp32_sse(s, decode,                                                  \
                        gen_helper_##lname##ps_xmm,                                 \
                        gen_helper_##lname##ps_ymm,                                 \
                        gen_helper_##lname##ss);                                    \
@@ -663,7 +683,7 @@
  * 66 = v*pd Vpd, Hpd, Wpd
  * f2 = v*ps Vps, Hps, Wps
  */
-static inline void gen_horizontal_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_horizontal_fp_sse(DisasContext *s, X86DecodedInsn *decode,
                                          SSEFunc_0_eppp pd_xmm, SSEFunc_0_eppp ps_xmm,
                                          SSEFunc_0_eppp pd_ymm, SSEFunc_0_eppp ps_ymm)
 {
@@ -674,9 +694,9 @@
     fn(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2);
 }
 #define HORIZONTAL_FP_SSE(uname, lname)                                            \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode)                   \
 {                                                                                  \
-    gen_horizontal_fp_sse(s, env, decode,                                          \
+    gen_horizontal_fp_sse(s, decode,                                               \
                           gen_helper_##lname##pd_xmm, gen_helper_##lname##ps_xmm,  \
                           gen_helper_##lname##pd_ymm, gen_helper_##lname##ps_ymm); \
 }
@@ -684,7 +704,7 @@
 HORIZONTAL_FP_SSE(VHSUB, hsub)
 HORIZONTAL_FP_SSE(VADDSUB, addsub)
 
-static inline void gen_ternary_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_ternary_sse(DisasContext *s, X86DecodedInsn *decode,
                                    int op3, SSEFunc_0_epppp xmm, SSEFunc_0_epppp ymm)
 {
     SSEFunc_0_epppp fn = s->vex_l ? ymm : xmm;
@@ -695,21 +715,21 @@
     fn(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, ptr3);
 }
 #define TERNARY_SSE(uname, uvname, lname)                                          \
-static void gen_##uvname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uvname(DisasContext *s, X86DecodedInsn *decode)                  \
 {                                                                                  \
-    gen_ternary_sse(s, env, decode, (uint8_t)decode->immediate >> 4,               \
+    gen_ternary_sse(s, decode, (uint8_t)decode->immediate >> 4,                    \
                     gen_helper_##lname##_xmm, gen_helper_##lname##_ymm);           \
 }                                                                                  \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode)                   \
 {                                                                                  \
-    gen_ternary_sse(s, env, decode, 0,                                             \
+    gen_ternary_sse(s, decode, 0,                                                  \
                   gen_helper_##lname##_xmm, gen_helper_##lname##_ymm);             \
 }
 TERNARY_SSE(BLENDVPS, VBLENDVPS, blendvps)
 TERNARY_SSE(BLENDVPD, VBLENDVPD, blendvpd)
 TERNARY_SSE(PBLENDVB, VPBLENDVB, pblendvb)
 
-static inline void gen_binary_imm_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_binary_imm_sse(DisasContext *s, X86DecodedInsn *decode,
                                       SSEFunc_0_epppi xmm, SSEFunc_0_epppi ymm)
 {
     TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
@@ -721,9 +741,9 @@
 }
 
 #define BINARY_IMM_SSE(uname, lname)                                               \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode)                   \
 {                                                                                  \
-    gen_binary_imm_sse(s, env, decode,                                             \
+    gen_binary_imm_sse(s, decode,                                                  \
                        gen_helper_##lname##_xmm,                                   \
                        gen_helper_##lname##_ymm);                                  \
 }
@@ -739,7 +759,7 @@
 
 
 #define UNARY_INT_GVEC(uname, func, ...)                                           \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode)                   \
 {                                                                                  \
     int vec_len = vector_len(s, decode);                                          \
                                                                                    \
@@ -757,7 +777,7 @@
 
 
 #define BINARY_INT_GVEC(uname, func, ...)                                          \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode)                   \
 {                                                                                  \
     int vec_len = vector_len(s, decode);                                          \
                                                                                    \
@@ -816,7 +836,7 @@
  * These are really the same encoding, because 1) V is the same as P when VEX.V
  * is not present 2) P and Q are the same as H and W apart from MM/XMM
  */
-static inline void gen_binary_int_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_binary_int_sse(DisasContext *s, X86DecodedInsn *decode,
                                       SSEFunc_0_eppp mmx, SSEFunc_0_eppp xmm, SSEFunc_0_eppp ymm)
 {
     assert(!!mmx == !!(decode->e.special == X86_SPECIAL_MMX));
@@ -837,9 +857,9 @@
 
 
 #define BINARY_INT_MMX(uname, lname)                                               \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode)                   \
 {                                                                                  \
-    gen_binary_int_sse(s, env, decode,                                             \
+    gen_binary_int_sse(s, decode,                                                  \
                           gen_helper_##lname##_mmx,                                \
                           gen_helper_##lname##_xmm,                                \
                           gen_helper_##lname##_ymm);                               \
@@ -886,9 +906,9 @@
 
 /* Instructions with no MMX equivalent.  */
 #define BINARY_INT_SSE(uname, lname)                                               \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode)                   \
 {                                                                                  \
-    gen_binary_int_sse(s, env, decode,                                             \
+    gen_binary_int_sse(s, decode,                                                  \
                           NULL,                                                    \
                           gen_helper_##lname##_xmm,                                \
                           gen_helper_##lname##_ymm);                               \
@@ -911,7 +931,7 @@
 BINARY_INT_SSE(VAESENCLAST, aesenclast)
 
 #define UNARY_CMP_SSE(uname, lname)                                                \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode)                   \
 {                                                                                  \
     if (!s->vex_l) {                                                               \
         gen_helper_##lname##_xmm(tcg_env, OP_PTR1, OP_PTR2);                       \
@@ -924,7 +944,7 @@
 UNARY_CMP_SSE(VTESTPS,    vtestps)
 UNARY_CMP_SSE(VTESTPD,    vtestpd)
 
-static inline void gen_unary_int_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_unary_int_sse(DisasContext *s, X86DecodedInsn *decode,
                                      SSEFunc_0_epp xmm, SSEFunc_0_epp ymm)
 {
     if (!s->vex_l) {
@@ -935,9 +955,9 @@
 }
 
 #define UNARY_INT_SSE(uname, lname)                                                \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode)                   \
 {                                                                                  \
-    gen_unary_int_sse(s, env, decode,                                              \
+    gen_unary_int_sse(s, decode,                                                   \
                       gen_helper_##lname##_xmm,                                    \
                       gen_helper_##lname##_ymm);                                   \
 }
@@ -969,7 +989,7 @@
 UNARY_INT_SSE(VCVTPH2PS, cvtph2ps)
 
 
-static inline void gen_unary_imm_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_unary_imm_sse(DisasContext *s, X86DecodedInsn *decode,
                                      SSEFunc_0_ppi xmm, SSEFunc_0_ppi ymm)
 {
     TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
@@ -981,9 +1001,9 @@
 }
 
 #define UNARY_IMM_SSE(uname, lname)                                                \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode)                   \
 {                                                                                  \
-    gen_unary_imm_sse(s, env, decode,                                              \
+    gen_unary_imm_sse(s, decode,                                                   \
                       gen_helper_##lname##_xmm,                                    \
                       gen_helper_##lname##_ymm);                                   \
 }
@@ -996,7 +1016,7 @@
 UNARY_IMM_SSE(VPERMILPS_i, vpermilps_imm)
 UNARY_IMM_SSE(VPERMILPD_i, vpermilpd_imm)
 
-static inline void gen_unary_imm_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_unary_imm_fp_sse(DisasContext *s, X86DecodedInsn *decode,
                                         SSEFunc_0_eppi xmm, SSEFunc_0_eppi ymm)
 {
     TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
@@ -1008,9 +1028,9 @@
 }
 
 #define UNARY_IMM_FP_SSE(uname, lname)                                             \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode)                   \
 {                                                                                  \
-    gen_unary_imm_fp_sse(s, env, decode,                                           \
+    gen_unary_imm_fp_sse(s, decode,                                                \
                       gen_helper_##lname##_xmm,                                    \
                       gen_helper_##lname##_ymm);                                   \
 }
@@ -1018,7 +1038,7 @@
 UNARY_IMM_FP_SSE(VROUNDPS,    roundps)
 UNARY_IMM_FP_SSE(VROUNDPD,    roundpd)
 
-static inline void gen_vexw_avx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_vexw_avx(DisasContext *s, X86DecodedInsn *decode,
                                 SSEFunc_0_eppp d_xmm, SSEFunc_0_eppp q_xmm,
                                 SSEFunc_0_eppp d_ymm, SSEFunc_0_eppp q_ymm)
 {
@@ -1030,9 +1050,9 @@
 
 /* VEX.W affects whether to operate on 32- or 64-bit elements.  */
 #define VEXW_AVX(uname, lname)                                                     \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode)                   \
 {                                                                                  \
-    gen_vexw_avx(s, env, decode,                                                   \
+    gen_vexw_avx(s, decode,                                                        \
                  gen_helper_##lname##d_xmm, gen_helper_##lname##q_xmm,             \
                  gen_helper_##lname##d_ymm, gen_helper_##lname##q_ymm);            \
 }
@@ -1042,7 +1062,7 @@
 VEXW_AVX(VPMASKMOV, vpmaskmov)
 
 /* Same as above, but with extra arguments to the helper.  */
-static inline void gen_vsib_avx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_vsib_avx(DisasContext *s, X86DecodedInsn *decode,
                                 SSEFunc_0_epppti d_xmm, SSEFunc_0_epppti q_xmm,
                                 SSEFunc_0_epppti d_ymm, SSEFunc_0_epppti q_ymm)
 {
@@ -1066,29 +1086,29 @@
     }
 }
 #define VSIB_AVX(uname, lname)                                                     \
-static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \
+static void gen_##uname(DisasContext *s, X86DecodedInsn *decode)                   \
 {                                                                                  \
-    gen_vsib_avx(s, env, decode,                                                   \
+    gen_vsib_avx(s, decode,                                                        \
                  gen_helper_##lname##d_xmm, gen_helper_##lname##q_xmm,             \
                  gen_helper_##lname##d_ymm, gen_helper_##lname##q_ymm);            \
 }
 VSIB_AVX(VPGATHERD, vpgatherd)
 VSIB_AVX(VPGATHERQ, vpgatherq)
 
-static void gen_AAA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_AAA(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_update_cc_op(s);
     gen_helper_aaa(tcg_env);
     assume_cc_op(s, CC_OP_EFLAGS);
 }
 
-static void gen_AAD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_AAD(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_helper_aad(s->T0, s->T0, s->T1);
     prepare_update1_cc(decode, s, CC_OP_LOGICB);
 }
 
-static void gen_AAM(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_AAM(DisasContext *s, X86DecodedInsn *decode)
 {
     if (decode->immediate == 0) {
         gen_exception(s, EXCP00_DIVZ);
@@ -1098,14 +1118,14 @@
     }
 }
 
-static void gen_AAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_AAS(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_update_cc_op(s);
     gen_helper_aas(tcg_env);
     assume_cc_op(s, CC_OP_EFLAGS);
 }
 
-static void gen_ADC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_ADC(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[1].ot;
     TCGv c_in = tcg_temp_new();
@@ -1122,24 +1142,41 @@
     prepare_update3_cc(decode, s, CC_OP_ADCB + ot, c_in);
 }
 
-/* ADCX/ADOX do not have memory operands and can use set_cc_op.  */
-static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
+static void gen_ADCOX(DisasContext *s, X86DecodedInsn *decode, int cc_op)
 {
-    int opposite_cc_op;
+    MemOp ot = decode->op[0].ot;
     TCGv carry_in = NULL;
-    TCGv carry_out = (cc_op == CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2);
+    TCGv *carry_out = (cc_op == CC_OP_ADCX ? &decode->cc_dst : &decode->cc_src2);
     TCGv zero;
 
-    if (cc_op == s->cc_op || s->cc_op == CC_OP_ADCOX) {
-        /* Re-use the carry-out from a previous round.  */
-        carry_in = carry_out;
-    } else {
-        /* We don't have a carry-in, get it out of EFLAGS.  */
-        if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
-            gen_compute_eflags(s);
+    decode->cc_op = cc_op;
+    *carry_out = tcg_temp_new();
+    if (CC_OP_HAS_EFLAGS(s->cc_op)) {
+        decode->cc_src = cpu_cc_src;
+
+        /* Re-use the carry-out from a previous round?  */
+        if (s->cc_op == cc_op || s->cc_op == CC_OP_ADCOX) {
+            carry_in = (cc_op == CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2);
         }
-        carry_in = s->tmp0;
-        tcg_gen_extract_tl(carry_in, cpu_cc_src,
+
+        /* Preserve the opposite carry from previous rounds?  */
+        if (s->cc_op != cc_op && s->cc_op != CC_OP_EFLAGS) {
+            decode->cc_op = CC_OP_ADCOX;
+            if (carry_out == &decode->cc_dst) {
+                decode->cc_src2 = cpu_cc_src2;
+            } else {
+                decode->cc_dst = cpu_cc_dst;
+            }
+        }
+    } else {
+        decode->cc_src = tcg_temp_new();
+        gen_mov_eflags(s, decode->cc_src);
+    }
+
+    if (!carry_in) {
+        /* Get carry_in out of EFLAGS.  */
+        carry_in = tcg_temp_new();
+        tcg_gen_extract_tl(carry_in, decode->cc_src,
             ctz32(cc_op == CC_OP_ADCX ? CC_C : CC_O), 1);
     }
 
@@ -1151,31 +1188,23 @@
         tcg_gen_ext32u_tl(s->T1, s->T1);
         tcg_gen_add_i64(s->T0, s->T0, s->T1);
         tcg_gen_add_i64(s->T0, s->T0, carry_in);
-        tcg_gen_shri_i64(carry_out, s->T0, 32);
+        tcg_gen_shri_i64(*carry_out, s->T0, 32);
         break;
 #endif
     default:
         zero = tcg_constant_tl(0);
-        tcg_gen_add2_tl(s->T0, carry_out, s->T0, zero, carry_in, zero);
-        tcg_gen_add2_tl(s->T0, carry_out, s->T0, carry_out, s->T1, zero);
+        tcg_gen_add2_tl(s->T0, *carry_out, s->T0, zero, carry_in, zero);
+        tcg_gen_add2_tl(s->T0, *carry_out, s->T0, *carry_out, s->T1, zero);
         break;
     }
-
-    opposite_cc_op = cc_op == CC_OP_ADCX ? CC_OP_ADOX : CC_OP_ADCX;
-    if (s->cc_op == CC_OP_ADCOX || s->cc_op == opposite_cc_op) {
-        /* Merge with the carry-out from the opposite instruction.  */
-        set_cc_op(s, CC_OP_ADCOX);
-    } else {
-        set_cc_op(s, cc_op);
-    }
 }
 
-static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_ADCX(DisasContext *s, X86DecodedInsn *decode)
 {
-    gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADCX);
+    gen_ADCOX(s, decode, CC_OP_ADCX);
 }
 
-static void gen_ADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_ADD(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[1].ot;
 
@@ -1188,12 +1217,12 @@
     prepare_update2_cc(decode, s, CC_OP_ADDB + ot);
 }
 
-static void gen_ADOX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_ADOX(DisasContext *s, X86DecodedInsn *decode)
 {
-    gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADOX);
+    gen_ADCOX(s, decode, CC_OP_ADOX);
 }
 
-static void gen_AND(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_AND(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[1].ot;
 
@@ -1206,7 +1235,7 @@
     prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
 }
 
-static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_ANDN(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
 
@@ -1214,7 +1243,7 @@
     prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
 }
 
-static void gen_ARPL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_ARPL(DisasContext *s, X86DecodedInsn *decode)
 {
     TCGv zf = tcg_temp_new();
     TCGv flags = tcg_temp_new();
@@ -1235,7 +1264,7 @@
     decode->cc_op = CC_OP_EFLAGS;
 }
 
-static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_BEXTR(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
     TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31);
@@ -1263,43 +1292,37 @@
     prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
 }
 
-/* BLSI do not have memory operands and can use set_cc_op.  */
-static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_BLSI(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
 
-    tcg_gen_mov_tl(cpu_cc_src, s->T0);
-    tcg_gen_neg_tl(s->T1, s->T0);
+    /* input in T1, which is ready for prepare_update2_cc  */
+    tcg_gen_neg_tl(s->T0, s->T1);
     tcg_gen_and_tl(s->T0, s->T0, s->T1);
-    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-    set_cc_op(s, CC_OP_BMILGB + ot);
+    prepare_update2_cc(decode, s, CC_OP_BMILGB + ot);
 }
 
-/* BLSMSK do not have memory operands and can use set_cc_op.  */
-static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_BLSMSK(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
 
-    tcg_gen_mov_tl(cpu_cc_src, s->T0);
-    tcg_gen_subi_tl(s->T1, s->T0, 1);
+    /* input in T1, which is ready for prepare_update2_cc  */
+    tcg_gen_subi_tl(s->T0, s->T1, 1);
     tcg_gen_xor_tl(s->T0, s->T0, s->T1);
-    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-    set_cc_op(s, CC_OP_BMILGB + ot);
+    prepare_update2_cc(decode, s, CC_OP_BMILGB + ot);
 }
 
-/* BLSR do not have memory operands and can use set_cc_op.  */
-static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_BLSR(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
 
-    tcg_gen_mov_tl(cpu_cc_src, s->T0);
-    tcg_gen_subi_tl(s->T1, s->T0, 1);
+    /* input in T1, which is ready for prepare_update2_cc  */
+    tcg_gen_subi_tl(s->T0, s->T1, 1);
     tcg_gen_and_tl(s->T0, s->T0, s->T1);
-    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-    set_cc_op(s, CC_OP_BMILGB + ot);
+    prepare_update2_cc(decode, s, CC_OP_BMILGB + ot);
 }
 
-static void gen_BOUND(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_BOUND(DisasContext *s, X86DecodedInsn *decode)
 {
     TCGv_i32 op = tcg_temp_new_i32();
     tcg_gen_trunc_tl_i32(op, s->T0);
@@ -1310,7 +1333,48 @@
     }
 }
 
-static void gen_BSWAP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+/* Non-standard convention - on entry T0 is zero-extended input, T1 is the output.  */
+static void gen_BSF(DisasContext *s, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[0].ot;
+
+    /* Only the Z bit is defined and it is related to the input.  */
+    decode->cc_dst = tcg_temp_new();
+    decode->cc_op = CC_OP_LOGICB + ot;
+    tcg_gen_mov_tl(decode->cc_dst, s->T0);
+
+    /*
+     * The manual says that the output is undefined when the
+     * input is zero, but real hardware leaves it unchanged, and
+     * real programs appear to depend on that.  Accomplish this
+     * by passing the output as the value to return upon zero.
+     */
+    tcg_gen_ctz_tl(s->T0, s->T0, s->T1);
+}
+
+/* Non-standard convention - on entry T0 is zero-extended input, T1 is the output.  */
+static void gen_BSR(DisasContext *s, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[0].ot;
+
+    /* Only the Z bit is defined and it is related to the input.  */
+    decode->cc_dst = tcg_temp_new();
+    decode->cc_op = CC_OP_LOGICB + ot;
+    tcg_gen_mov_tl(decode->cc_dst, s->T0);
+
+    /*
+     * The manual says that the output is undefined when the
+     * input is zero, but real hardware leaves it unchanged, and
+     * real programs appear to depend on that.  Accomplish this
+     * by passing the output as the value to return upon zero.
+     * Plus, return the bit index of the first 1 bit.
+     */
+    tcg_gen_xori_tl(s->T1, s->T1, TARGET_LONG_BITS - 1);
+    tcg_gen_clz_tl(s->T0, s->T0, s->T1);
+    tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
+}
+
+static void gen_BSWAP(DisasContext *s, X86DecodedInsn *decode)
 {
 #ifdef TARGET_X86_64
     if (s->dflag == MO_64) {
@@ -1321,7 +1385,7 @@
     tcg_gen_bswap32_tl(s->T0, s->T0, TCG_BSWAP_OZ);
 }
 
-static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_BZHI(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
     TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31);
@@ -1341,26 +1405,26 @@
     prepare_update2_cc(decode, s, CC_OP_BMILGB + ot);
 }
 
-static void gen_CALL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CALL(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_push_v(s, eip_next_tl(s));
-    gen_JMP(s, env, decode);
+    gen_JMP(s, decode);
 }
 
-static void gen_CALL_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CALL_m(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_push_v(s, eip_next_tl(s));
-    gen_JMP_m(s, env, decode);
+    gen_JMP_m(s, decode);
 }
 
-static void gen_CALLF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CALLF(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_far_call(s);
 }
 
-static void gen_CALLF_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CALLF_m(DisasContext *s, X86DecodedInsn *decode)
 {
-    MemOp ot = decode->op[2].ot;
+    MemOp ot = decode->op[1].ot;
 
     gen_op_ld_v(s, ot, s->T0, s->A0);
     gen_add_A0_im(s, 1 << ot);
@@ -1368,41 +1432,48 @@
     gen_far_call(s);
 }
 
-static void gen_CBW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CBW(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp src_ot = decode->op[0].ot - 1;
 
     tcg_gen_ext_tl(s->T0, s->T0, src_ot | MO_SIGN);
 }
 
-static void gen_CLC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CLC(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_compute_eflags(s);
     tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_C);
 }
 
-static void gen_CLD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CLD(DisasContext *s, X86DecodedInsn *decode)
 {
     tcg_gen_st_i32(tcg_constant_i32(1), tcg_env, offsetof(CPUX86State, df));
 }
 
-static void gen_CLI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CLI(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_reset_eflags(s, IF_MASK);
 }
 
-static void gen_CMC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CLTS(DisasContext *s, X86DecodedInsn *decode)
+{
+    gen_helper_clts(tcg_env);
+    /* abort block because static cpu state changed */
+    s->base.is_jmp = DISAS_EOB_NEXT;
+}
+
+static void gen_CMC(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_compute_eflags(s);
     tcg_gen_xori_tl(cpu_cc_src, cpu_cc_src, CC_C);
 }
 
-static void gen_CMOVcc(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CMOVcc(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_cmovcc1(s, decode->b & 0xf, s->T0, s->T1);
 }
 
-static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CMPccXADD(DisasContext *s, X86DecodedInsn *decode)
 {
     TCGLabel *label_top = gen_new_label();
     TCGLabel *label_bottom = gen_new_label();
@@ -1505,7 +1576,7 @@
     decode->cc_op = CC_OP_SUBB + ot;
 }
 
-static void gen_CMPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CMPS(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[2].ot;
     if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
@@ -1515,7 +1586,65 @@
     }
 }
 
-static void gen_CRC32(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CMPXCHG(DisasContext *s, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[2].ot;
+    TCGv cmpv = tcg_temp_new();
+    TCGv oldv = tcg_temp_new();
+    TCGv newv = tcg_temp_new();
+    TCGv dest;
+
+    tcg_gen_ext_tl(cmpv, cpu_regs[R_EAX], ot);
+    tcg_gen_ext_tl(newv, s->T1, ot);
+    if (s->prefix & PREFIX_LOCK) {
+        tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
+                                  s->mem_index, ot | MO_LE);
+    } else {
+        tcg_gen_ext_tl(oldv, s->T0, ot);
+        if (decode->op[0].has_ea) {
+            /*
+             * Perform an unconditional store cycle like physical cpu;
+             * must be before changing accumulator to ensure
+             * idempotency if the store faults and the instruction
+             * is restarted
+             */
+            tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
+            gen_op_st_v(s, ot, newv, s->A0);
+        } else {
+            /*
+             * Unlike the memory case, where "the destination operand receives
+             * a write cycle without regard to the result of the comparison",
+             * rm must not be touched altogether if the write fails, including
+             * not zero-extending it on 64-bit processors.  So, precompute
+             * the result of a successful writeback and perform the movcond
+             * directly on cpu_regs.  In case rm is part of RAX, note that this
+             * movcond and the one below are mutually exclusive is executed.
+             */
+            dest = gen_op_deposit_reg_v(s, ot, decode->op[0].n, newv, newv);
+            tcg_gen_movcond_tl(TCG_COND_EQ, dest, oldv, cmpv, newv, dest);
+        }
+        decode->op[0].unit = X86_OP_SKIP;
+    }
+
+    /* Write RAX only if the cmpxchg fails.  */
+    dest = gen_op_deposit_reg_v(s, ot, R_EAX, s->T0, oldv);
+    tcg_gen_movcond_tl(TCG_COND_NE, dest, oldv, cmpv, s->T0, dest);
+
+    tcg_gen_mov_tl(s->cc_srcT, cmpv);
+    tcg_gen_sub_tl(cmpv, cmpv, oldv);
+    decode->cc_dst = cmpv;
+    decode->cc_src = oldv;
+    decode->cc_op = CC_OP_SUBB + ot;
+}
+
+static void gen_CPUID(DisasContext *s, X86DecodedInsn *decode)
+{
+    gen_update_cc_op(s);
+    gen_update_eip_cur(s);
+    gen_helper_cpuid(tcg_env);
+}
+
+static void gen_CRC32(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[2].ot;
 
@@ -1523,7 +1652,7 @@
     gen_helper_crc32(s->T0, s->tmp2_i32, s->T1, tcg_constant_i32(8 << ot));
 }
 
-static void gen_CVTPI2Px(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CVTPI2Px(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_helper_enter_mmx(tcg_env);
     if (s->prefix & PREFIX_DATA) {
@@ -1533,7 +1662,7 @@
     }
 }
 
-static void gen_CVTPx2PI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CVTPx2PI(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_helper_enter_mmx(tcg_env);
     if (s->prefix & PREFIX_DATA) {
@@ -1543,7 +1672,7 @@
     }
 }
 
-static void gen_CVTTPx2PI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CVTTPx2PI(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_helper_enter_mmx(tcg_env);
     if (s->prefix & PREFIX_DATA) {
@@ -1553,28 +1682,28 @@
     }
 }
 
-static void gen_CWD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_CWD(DisasContext *s, X86DecodedInsn *decode)
 {
     int shift = 8 << decode->op[0].ot;
 
     tcg_gen_sextract_tl(s->T0, s->T0, shift - 1, 1);
 }
 
-static void gen_DAA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_DAA(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_update_cc_op(s);
     gen_helper_daa(tcg_env);
     assume_cc_op(s, CC_OP_EFLAGS);
 }
 
-static void gen_DAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_DAS(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_update_cc_op(s);
     gen_helper_das(tcg_env);
     assume_cc_op(s, CC_OP_EFLAGS);
 }
 
-static void gen_DEC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_DEC(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[1].ot;
 
@@ -1588,40 +1717,40 @@
     prepare_update_cc_incdec(decode, s, CC_OP_DECB + ot);
 }
 
-static void gen_DIV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_DIV(DisasContext *s, X86DecodedInsn *decode)
 {
-    MemOp ot = decode->op[2].ot;
+    MemOp ot = decode->op[1].ot;
 
     switch(ot) {
     case MO_8:
-        gen_helper_divb_AL(tcg_env, s->T1);
+        gen_helper_divb_AL(tcg_env, s->T0);
         break;
     case MO_16:
-        gen_helper_divw_AX(tcg_env, s->T1);
+        gen_helper_divw_AX(tcg_env, s->T0);
         break;
     default:
     case MO_32:
-        gen_helper_divl_EAX(tcg_env, s->T1);
+        gen_helper_divl_EAX(tcg_env, s->T0);
         break;
 #ifdef TARGET_X86_64
     case MO_64:
-        gen_helper_divq_EAX(tcg_env, s->T1);
+        gen_helper_divq_EAX(tcg_env, s->T0);
         break;
 #endif
     }
 }
 
-static void gen_EMMS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_EMMS(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_helper_emms(tcg_env);
 }
 
-static void gen_ENTER(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_ENTER(DisasContext *s, X86DecodedInsn *decode)
 {
    gen_enter(s, decode->op[1].imm, decode->op[2].imm);
 }
 
-static void gen_EXTRQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_EXTRQ_i(DisasContext *s, X86DecodedInsn *decode)
 {
     TCGv_i32 length = tcg_constant_i32(decode->immediate & 63);
     TCGv_i32 index = tcg_constant_i32((decode->immediate >> 8) & 63);
@@ -1629,12 +1758,30 @@
     gen_helper_extrq_i(tcg_env, OP_PTR0, index, length);
 }
 
-static void gen_EXTRQ_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_EXTRQ_r(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_helper_extrq_r(tcg_env, OP_PTR0, OP_PTR2);
 }
 
-static void gen_HLT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_FXRSTOR(DisasContext *s, X86DecodedInsn *decode)
+{
+    if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
+        gen_NM_exception(s);
+    } else {
+        gen_helper_fxrstor(tcg_env, s->A0);
+    }
+}
+
+static void gen_FXSAVE(DisasContext *s, X86DecodedInsn *decode)
+{
+    if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
+        gen_NM_exception(s);
+    } else {
+        gen_helper_fxsave(tcg_env, s->A0);
+    }
+}
+
+static void gen_HLT(DisasContext *s, X86DecodedInsn *decode)
 {
 #ifdef CONFIG_SYSTEM_ONLY
     gen_update_cc_op(s);
@@ -1644,30 +1791,30 @@
 #endif
 }
 
-static void gen_IDIV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_IDIV(DisasContext *s, X86DecodedInsn *decode)
 {
-    MemOp ot = decode->op[2].ot;
+    MemOp ot = decode->op[1].ot;
 
     switch(ot) {
     case MO_8:
-        gen_helper_idivb_AL(tcg_env, s->T1);
+        gen_helper_idivb_AL(tcg_env, s->T0);
         break;
     case MO_16:
-        gen_helper_idivw_AX(tcg_env, s->T1);
+        gen_helper_idivw_AX(tcg_env, s->T0);
         break;
     default:
     case MO_32:
-        gen_helper_idivl_EAX(tcg_env, s->T1);
+        gen_helper_idivl_EAX(tcg_env, s->T0);
         break;
 #ifdef TARGET_X86_64
     case MO_64:
-        gen_helper_idivq_EAX(tcg_env, s->T1);
+        gen_helper_idivq_EAX(tcg_env, s->T0);
         break;
 #endif
     }
 }
 
-static void gen_IMUL3(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_IMUL3(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
     TCGv cc_src_rhs;
@@ -1730,7 +1877,7 @@
     prepare_update2_cc(decode, s, CC_OP_MULB + ot);
 }
 
-static void gen_IMUL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_IMUL(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[1].ot;
     TCGv cc_src_rhs;
@@ -1788,12 +1935,12 @@
     prepare_update2_cc(decode, s, CC_OP_MULB + ot);
 }
 
-static void gen_IN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_IN(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
     TCGv_i32 port = tcg_temp_new_i32();
 
-    tcg_gen_trunc_tl_i32(port, s->T1);
+    tcg_gen_trunc_tl_i32(port, s->T0);
     tcg_gen_ext16u_i32(port, port);
     if (!gen_check_io(s, ot, port, SVM_IOIO_TYPE_MASK)) {
         return;
@@ -1804,7 +1951,7 @@
     gen_bpt_io(s, port, ot);
 }
 
-static void gen_INC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_INC(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[1].ot;
 
@@ -1818,7 +1965,7 @@
     prepare_update_cc_incdec(decode, s, CC_OP_INCB + ot);
 }
 
-static void gen_INS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_INS(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[1].ot;
     TCGv_i32 port = tcg_temp_new_i32();
@@ -1838,7 +1985,7 @@
     }
 }
 
-static void gen_INSERTQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_INSERTQ_i(DisasContext *s, X86DecodedInsn *decode)
 {
     TCGv_i32 length = tcg_constant_i32(decode->immediate & 63);
     TCGv_i32 index = tcg_constant_i32((decode->immediate >> 8) & 63);
@@ -1846,17 +1993,17 @@
     gen_helper_insertq_i(tcg_env, OP_PTR0, OP_PTR1, index, length);
 }
 
-static void gen_INSERTQ_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_INSERTQ_r(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_helper_insertq_r(tcg_env, OP_PTR0, OP_PTR2);
 }
 
-static void gen_INT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_INT(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_interrupt(s, decode->immediate);
 }
 
-static void gen_INT1(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_INT1(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_update_cc_op(s);
     gen_update_eip_next(s);
@@ -1864,19 +2011,19 @@
     s->base.is_jmp = DISAS_NORETURN;
 }
 
-static void gen_INT3(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_INT3(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_interrupt(s, EXCP03_INT3);
 }
 
-static void gen_INTO(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_INTO(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_update_cc_op(s);
     gen_update_eip_cur(s);
     gen_helper_into(tcg_env, cur_insn_len_i32(s));
 }
 
-static void gen_IRET(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_IRET(DisasContext *s, X86DecodedInsn *decode)
 {
     if (!PE(s) || VM86(s)) {
         gen_helper_iret_real(tcg_env, tcg_constant_i32(s->dflag - 1));
@@ -1888,13 +2035,13 @@
     s->base.is_jmp = DISAS_EOB_ONLY;
 }
 
-static void gen_Jcc(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_Jcc(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_bnd_jmp(s);
     gen_jcc(s, decode->b & 0xf, decode->immediate);
 }
 
-static void gen_JCXZ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_JCXZ(DisasContext *s, X86DecodedInsn *decode)
 {
     TCGLabel *taken = gen_new_label();
 
@@ -1903,27 +2050,27 @@
     gen_conditional_jump_labels(s, decode->immediate, NULL, taken);
 }
 
-static void gen_JMP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_JMP(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_update_cc_op(s);
     gen_jmp_rel(s, s->dflag, decode->immediate, 0);
 }
 
-static void gen_JMP_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_JMP_m(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_op_jmp_v(s, s->T0);
     gen_bnd_jmp(s);
     s->base.is_jmp = DISAS_JUMP;
 }
 
-static void gen_JMPF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_JMPF(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_far_jmp(s);
 }
 
-static void gen_JMPF_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_JMPF_m(DisasContext *s, X86DecodedInsn *decode)
 {
-    MemOp ot = decode->op[2].ot;
+    MemOp ot = decode->op[1].ot;
 
     gen_op_ld_v(s, ot, s->T0, s->A0);
     gen_add_A0_im(s, 1 << ot);
@@ -1931,7 +2078,7 @@
     gen_far_jmp(s);
 }
 
-static void gen_LAHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LAHF(DisasContext *s, X86DecodedInsn *decode)
 {
     if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) {
         return gen_illegal_opcode(s);
@@ -1942,13 +2089,30 @@
     tcg_gen_deposit_tl(cpu_regs[R_EAX], cpu_regs[R_EAX], s->T0, 8, 8);
 }
 
-static void gen_LDMXCSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LAR(DisasContext *s, X86DecodedInsn *decode)
 {
-    tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T1);
+    MemOp ot = decode->op[0].ot;
+    TCGv result = tcg_temp_new();
+    TCGv dest;
+
+    gen_compute_eflags(s);
+    gen_update_cc_op(s);
+    gen_helper_lar(result, tcg_env, s->T0);
+
+    /* Perform writeback here to skip it if ZF=0.  */
+    decode->op[0].unit = X86_OP_SKIP;
+    dest = gen_op_deposit_reg_v(s, ot, decode->op[0].n, result, result);
+    tcg_gen_movcond_tl(TCG_COND_TSTNE, dest, cpu_cc_src, tcg_constant_tl(CC_Z),
+                       result, dest);
+}
+
+static void gen_LDMXCSR(DisasContext *s, X86DecodedInsn *decode)
+{
+    tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
     gen_helper_ldmxcsr(tcg_env, s->tmp2_i32);
 }
 
-static void gen_lxx_seg(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, int seg)
+static void gen_lxx_seg(DisasContext *s, X86DecodedInsn *decode, int seg)
 {
     MemOp ot = decode->op[0].ot;
 
@@ -1960,39 +2124,45 @@
     gen_movl_seg(s, seg, s->T1);
 }
 
-static void gen_LDS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LDS(DisasContext *s, X86DecodedInsn *decode)
 {
-    gen_lxx_seg(s, env, decode, R_DS);
+    gen_lxx_seg(s, decode, R_DS);
 }
 
-static void gen_LEA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LEA(DisasContext *s, X86DecodedInsn *decode)
 {
-    tcg_gen_mov_tl(s->T0, s->A0);
+    TCGv ea = gen_lea_modrm_1(s, decode->mem, false);
+    gen_lea_v_seg_dest(s, s->aflag, s->T0, ea, -1, -1);
 }
 
-static void gen_LEAVE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LEAVE(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_leave(s);
 }
 
-static void gen_LES(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LES(DisasContext *s, X86DecodedInsn *decode)
 {
-    gen_lxx_seg(s, env, decode, R_ES);
+    gen_lxx_seg(s, decode, R_ES);
 }
 
-static void gen_LFS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LFENCE(DisasContext *s, X86DecodedInsn *decode)
 {
-    gen_lxx_seg(s, env, decode, R_FS);
+    tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
 }
 
-static void gen_LGS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LFS(DisasContext *s, X86DecodedInsn *decode)
 {
-    gen_lxx_seg(s, env, decode, R_GS);
+    gen_lxx_seg(s, decode, R_FS);
 }
 
-static void gen_LODS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LGS(DisasContext *s, X86DecodedInsn *decode)
 {
-    MemOp ot = decode->op[2].ot;
+    gen_lxx_seg(s, decode, R_GS);
+}
+
+static void gen_LODS(DisasContext *s, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[1].ot;
     if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
         gen_repz(s, ot, gen_lods);
     } else {
@@ -2000,7 +2170,7 @@
     }
 }
 
-static void gen_LOOP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LOOP(DisasContext *s, X86DecodedInsn *decode)
 {
     TCGLabel *taken = gen_new_label();
 
@@ -2010,7 +2180,7 @@
     gen_conditional_jump_labels(s, decode->immediate, NULL, taken);
 }
 
-static void gen_LOOPE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LOOPE(DisasContext *s, X86DecodedInsn *decode)
 {
     TCGLabel *taken = gen_new_label();
     TCGLabel *not_taken = gen_new_label();
@@ -2022,7 +2192,7 @@
     gen_conditional_jump_labels(s, decode->immediate, not_taken, taken);
 }
 
-static void gen_LOOPNE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LOOPNE(DisasContext *s, X86DecodedInsn *decode)
 {
     TCGLabel *taken = gen_new_label();
     TCGLabel *not_taken = gen_new_label();
@@ -2034,18 +2204,58 @@
     gen_conditional_jump_labels(s, decode->immediate, not_taken, taken);
 }
 
-static void gen_LSS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LSL(DisasContext *s, X86DecodedInsn *decode)
 {
-    gen_lxx_seg(s, env, decode, R_SS);
+    MemOp ot = decode->op[0].ot;
+    TCGv result = tcg_temp_new();
+    TCGv dest;
+
+    gen_compute_eflags(s);
+    gen_update_cc_op(s);
+    gen_helper_lsl(result, tcg_env, s->T0);
+
+    /* Perform writeback here to skip it if ZF=0.  */
+    decode->op[0].unit = X86_OP_SKIP;
+    dest = gen_op_deposit_reg_v(s, ot, decode->op[0].n, result, result);
+    tcg_gen_movcond_tl(TCG_COND_TSTNE, dest, cpu_cc_src, tcg_constant_tl(CC_Z),
+                       result, dest);
 }
 
-static void gen_MOV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_LSS(DisasContext *s, X86DecodedInsn *decode)
+{
+    gen_lxx_seg(s, decode, R_SS);
+}
+
+static void gen_LZCNT(DisasContext *s, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[0].ot;
+
+    /* C bit (cc_src) is defined related to the input.  */
+    decode->cc_src = tcg_temp_new();
+    decode->cc_dst = s->T0;
+    decode->cc_op = CC_OP_BMILGB + ot;
+    tcg_gen_mov_tl(decode->cc_src, s->T0);
+
+    /*
+     * Reduce the target_ulong result by the number of zeros that
+     * we expect to find at the top.
+     */
+    tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
+    tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - (8 << ot));
+}
+
+static void gen_MFENCE(DisasContext *s, X86DecodedInsn *decode)
+{
+    tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
+}
+
+static void gen_MOV(DisasContext *s, X86DecodedInsn *decode)
 {
     /* nothing to do! */
 }
 #define gen_NOP gen_MOV
 
-static void gen_MASKMOV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_MASKMOV(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_lea_v_seg(s, cpu_regs[R_EDI], R_DS, s->override);
 
@@ -2056,7 +2266,7 @@
     }
 }
 
-static void gen_MOVBE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_MOVBE(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
 
@@ -2068,7 +2278,7 @@
     }
 }
 
-static void gen_MOVD_from(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_MOVD_from(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[2].ot;
 
@@ -2086,7 +2296,7 @@
     }
 }
 
-static void gen_MOVD_to(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_MOVD_to(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[2].ot;
     int vec_len = vector_len(s, decode);
@@ -2108,12 +2318,12 @@
     }
 }
 
-static void gen_MOVDQ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_MOVDQ(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_store_sse(s, decode, decode->op[2].offset);
 }
 
-static void gen_MOVMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_MOVMSK(DisasContext *s, X86DecodedInsn *decode)
 {
     typeof(gen_helper_movmskps_ymm) *ps, *pd, *fn;
     ps = s->vex_l ? gen_helper_movmskps_ymm : gen_helper_movmskps_xmm;
@@ -2123,7 +2333,7 @@
     tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
 }
 
-static void gen_MOVQ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_MOVQ(DisasContext *s, X86DecodedInsn *decode)
 {
     int vec_len = vector_len(s, decode);
     int lo_ofs = vector_elem_offset(&decode->op[0], MO_64, 0);
@@ -2145,14 +2355,14 @@
     }
 }
 
-static void gen_MOVq_dq(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_MOVq_dq(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_helper_enter_mmx(tcg_env);
     /* Otherwise the same as any other movq.  */
-    return gen_MOVQ(s, env, decode);
+    return gen_MOVQ(s, decode);
 }
 
-static void gen_MOVS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_MOVS(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[2].ot;
     if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
@@ -2162,7 +2372,7 @@
     }
 }
 
-static void gen_MUL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_MUL(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[1].ot;
 
@@ -2213,7 +2423,7 @@
     decode->cc_op = CC_OP_MULB + ot;
 }
 
-static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_MULX(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
 
@@ -2239,7 +2449,7 @@
     }
 }
 
-static void gen_NEG(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_NEG(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
     TCGv oldv = tcg_temp_new();
@@ -2266,7 +2476,7 @@
     decode->cc_op = CC_OP_SUBB + ot;
 }
 
-static void gen_NOT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_NOT(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
 
@@ -2279,7 +2489,7 @@
     }
 }
 
-static void gen_OR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_OR(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[1].ot;
 
@@ -2292,7 +2502,7 @@
     prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
 }
 
-static void gen_OUT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_OUT(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[1].ot;
     TCGv_i32 port = tcg_temp_new_i32();
@@ -2309,7 +2519,7 @@
     gen_bpt_io(s, port, ot);
 }
 
-static void gen_OUTS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_OUTS(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[1].ot;
     TCGv_i32 port = tcg_temp_new_i32();
@@ -2328,7 +2538,7 @@
     }
 }
 
-static void gen_PALIGNR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PALIGNR(DisasContext *s, X86DecodedInsn *decode)
 {
     TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
     if (!(s->prefix & PREFIX_DATA)) {
@@ -2340,7 +2550,7 @@
     }
 }
 
-static void gen_PANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PANDN(DisasContext *s, X86DecodedInsn *decode)
 {
     int vec_len = vector_len(s, decode);
 
@@ -2350,7 +2560,7 @@
                       decode->op[1].offset, vec_len, vec_len);
 }
 
-static void gen_PAUSE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PAUSE(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_update_cc_op(s);
     gen_update_eip_next(s);
@@ -2358,14 +2568,14 @@
     s->base.is_jmp = DISAS_NORETURN;
 }
 
-static void gen_PCMPESTRI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PCMPESTRI(DisasContext *s, X86DecodedInsn *decode)
 {
     TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
     gen_helper_pcmpestri_xmm(tcg_env, OP_PTR1, OP_PTR2, imm);
     assume_cc_op(s, CC_OP_EFLAGS);
 }
 
-static void gen_PCMPESTRM(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PCMPESTRM(DisasContext *s, X86DecodedInsn *decode)
 {
     TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
     gen_helper_pcmpestrm_xmm(tcg_env, OP_PTR1, OP_PTR2, imm);
@@ -2376,14 +2586,14 @@
     }
 }
 
-static void gen_PCMPISTRI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PCMPISTRI(DisasContext *s, X86DecodedInsn *decode)
 {
     TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
     gen_helper_pcmpistri_xmm(tcg_env, OP_PTR1, OP_PTR2, imm);
     assume_cc_op(s, CC_OP_EFLAGS);
 }
 
-static void gen_PCMPISTRM(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PCMPISTRM(DisasContext *s, X86DecodedInsn *decode)
 {
     TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
     gen_helper_pcmpistrm_xmm(tcg_env, OP_PTR1, OP_PTR2, imm);
@@ -2394,17 +2604,17 @@
     }
 }
 
-static void gen_PDEP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PDEP(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_helper_pdep(s->T0, s->T0, s->T1);
 }
 
-static void gen_PEXT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PEXT(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_helper_pext(s->T0, s->T0, s->T1);
 }
 
-static inline void gen_pextr(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, MemOp ot)
+static inline void gen_pextr(DisasContext *s, X86DecodedInsn *decode, MemOp ot)
 {
     int vec_len = vector_len(s, decode);
     int mask = (vec_len >> ot) - 1;
@@ -2430,23 +2640,23 @@
     }
 }
 
-static void gen_PEXTRB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PEXTRB(DisasContext *s, X86DecodedInsn *decode)
 {
-    gen_pextr(s, env, decode, MO_8);
+    gen_pextr(s, decode, MO_8);
 }
 
-static void gen_PEXTRW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PEXTRW(DisasContext *s, X86DecodedInsn *decode)
 {
-    gen_pextr(s, env, decode, MO_16);
+    gen_pextr(s, decode, MO_16);
 }
 
-static void gen_PEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PEXTR(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
-    gen_pextr(s, env, decode, ot);
+    gen_pextr(s, decode, ot);
 }
 
-static inline void gen_pinsr(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, MemOp ot)
+static inline void gen_pinsr(DisasContext *s, X86DecodedInsn *decode, MemOp ot)
 {
     int vec_len = vector_len(s, decode);
     int mask = (vec_len >> ot) - 1;
@@ -2477,19 +2687,19 @@
     }
 }
 
-static void gen_PINSRB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PINSRB(DisasContext *s, X86DecodedInsn *decode)
 {
-    gen_pinsr(s, env, decode, MO_8);
+    gen_pinsr(s, decode, MO_8);
 }
 
-static void gen_PINSRW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PINSRW(DisasContext *s, X86DecodedInsn *decode)
 {
-    gen_pinsr(s, env, decode, MO_16);
+    gen_pinsr(s, decode, MO_16);
 }
 
-static void gen_PINSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PINSR(DisasContext *s, X86DecodedInsn *decode)
 {
-    gen_pinsr(s, env, decode, decode->op[2].ot);
+    gen_pinsr(s, decode, decode->op[2].ot);
 }
 
 static void gen_pmovmskb_i64(TCGv_i64 d, TCGv_i64 s)
@@ -2529,7 +2739,7 @@
     tcg_gen_or_vec(vece, d, d, t);
 }
 
-static void gen_PMOVMSKB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PMOVMSKB(DisasContext *s, X86DecodedInsn *decode)
 {
     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
     static const GVecGen2 g = {
@@ -2573,7 +2783,7 @@
     }
 }
 
-static void gen_POP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_POP(DisasContext *s, X86DecodedInsn *decode)
 {
     X86DecodedOp *op = &decode->op[0];
     MemOp ot = gen_pop_T0(s);
@@ -2587,12 +2797,21 @@
     gen_pop_update(s, ot);
 }
 
-static void gen_POPA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_POPA(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_popa(s);
 }
 
-static void gen_POPF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_POPCNT(DisasContext *s, X86DecodedInsn *decode)
+{
+    decode->cc_src = tcg_temp_new();
+    decode->cc_op = CC_OP_POPCNT;
+
+    tcg_gen_mov_tl(decode->cc_src, s->T0);
+    tcg_gen_ctpop_tl(s->T0, s->T0);
+}
+
+static void gen_POPF(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot;
     int mask = TF_MASK | AC_MASK | ID_MASK | NT_MASK;
@@ -2614,13 +2833,13 @@
     s->base.is_jmp = DISAS_EOB_NEXT;
 }
 
-static void gen_PSHUFW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PSHUFW(DisasContext *s, X86DecodedInsn *decode)
 {
     TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
     gen_helper_pshufw_mmx(OP_PTR0, OP_PTR1, imm);
 }
 
-static void gen_PSRLW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PSRLW_i(DisasContext *s, X86DecodedInsn *decode)
 {
     int vec_len = vector_len(s, decode);
 
@@ -2633,7 +2852,7 @@
     }
 }
 
-static void gen_PSLLW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PSLLW_i(DisasContext *s, X86DecodedInsn *decode)
 {
     int vec_len = vector_len(s, decode);
 
@@ -2646,7 +2865,7 @@
     }
 }
 
-static void gen_PSRAW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PSRAW_i(DisasContext *s, X86DecodedInsn *decode)
 {
     int vec_len = vector_len(s, decode);
 
@@ -2658,7 +2877,7 @@
                       decode->immediate, vec_len, vec_len);
 }
 
-static void gen_PSRLD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PSRLD_i(DisasContext *s, X86DecodedInsn *decode)
 {
     int vec_len = vector_len(s, decode);
 
@@ -2671,7 +2890,7 @@
     }
 }
 
-static void gen_PSLLD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PSLLD_i(DisasContext *s, X86DecodedInsn *decode)
 {
     int vec_len = vector_len(s, decode);
 
@@ -2684,7 +2903,7 @@
     }
 }
 
-static void gen_PSRAD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PSRAD_i(DisasContext *s, X86DecodedInsn *decode)
 {
     int vec_len = vector_len(s, decode);
 
@@ -2696,7 +2915,7 @@
                       decode->immediate, vec_len, vec_len);
 }
 
-static void gen_PSRLQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PSRLQ_i(DisasContext *s, X86DecodedInsn *decode)
 {
     int vec_len = vector_len(s, decode);
 
@@ -2709,7 +2928,7 @@
     }
 }
 
-static void gen_PSLLQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PSLLQ_i(DisasContext *s, X86DecodedInsn *decode)
 {
     int vec_len = vector_len(s, decode);
 
@@ -2736,7 +2955,7 @@
     return ptr;
 }
 
-static void gen_PSRLDQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PSRLDQ_i(DisasContext *s, X86DecodedInsn *decode)
 {
     int vec_len = vector_len(s, decode);
     TCGv_ptr imm_vec = make_imm8u_xmm_vec(decode->immediate, vec_len);
@@ -2748,7 +2967,7 @@
     }
 }
 
-static void gen_PSLLDQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PSLLDQ_i(DisasContext *s, X86DecodedInsn *decode)
 {
     int vec_len = vector_len(s, decode);
     TCGv_ptr imm_vec = make_imm8u_xmm_vec(decode->immediate, vec_len);
@@ -2760,17 +2979,17 @@
     }
 }
 
-static void gen_PUSH(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PUSH(DisasContext *s, X86DecodedInsn *decode)
 {
-    gen_push_v(s, s->T1);
+    gen_push_v(s, s->T0);
 }
 
-static void gen_PUSHA(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PUSHA(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_pusha(s);
 }
 
-static void gen_PUSHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_PUSHF(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_update_cc_op(s);
     gen_helper_read_eflags(s->T0, tcg_env);
@@ -2778,16 +2997,16 @@
 }
 
 static MemOp gen_shift_count(DisasContext *s, X86DecodedInsn *decode,
-                             bool *can_be_zero, TCGv *count)
+                             bool *can_be_zero, TCGv *count, int unit)
 {
     MemOp ot = decode->op[0].ot;
     int mask = (ot <= MO_32 ? 0x1f : 0x3f);
 
     *can_be_zero = false;
-    switch (decode->op[2].unit) {
+    switch (unit) {
     case X86_OP_INT:
         *count = tcg_temp_new();
-        tcg_gen_andi_tl(*count, s->T1, mask);
+        tcg_gen_andi_tl(*count, cpu_regs[R_ECX], mask);
         *can_be_zero = true;
         break;
 
@@ -2967,12 +3186,12 @@
  * length - count, because (length-1) - (count-1) can be computed with
  * a XOR, and that is commutative unlike subtraction.
  */
-static void gen_RCL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_RCL(DisasContext *s, X86DecodedInsn *decode)
 {
     bool have_1bit_cin, can_be_zero;
     TCGv count;
     TCGLabel *zero_label = NULL;
-    MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+    MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, decode->op[2].unit);
     TCGv low, high, low_count;
 
     if (!count) {
@@ -3019,12 +3238,12 @@
     }
 }
 
-static void gen_RCR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_RCR(DisasContext *s, X86DecodedInsn *decode)
 {
     bool have_1bit_cin, can_be_zero;
     TCGv count;
     TCGLabel *zero_label = NULL;
-    MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+    MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, decode->op[2].unit);
     TCGv low, high, high_count;
 
     if (!count) {
@@ -3072,9 +3291,53 @@
     }
 }
 
-static void gen_RET(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+#ifdef CONFIG_USER_ONLY
+static void gen_unreachable(DisasContext *s, X86DecodedInsn *decode)
 {
-    int16_t adjust = decode->e.op2 == X86_TYPE_I ? decode->immediate : 0;
+    g_assert_not_reached();
+}
+#endif
+
+#ifndef CONFIG_USER_ONLY
+static void gen_RDMSR(DisasContext *s, X86DecodedInsn *decode)
+{
+    gen_update_cc_op(s);
+    gen_update_eip_cur(s);
+    gen_helper_rdmsr(tcg_env);
+}
+#else
+#define gen_RDMSR gen_unreachable
+#endif
+
+static void gen_RDPMC(DisasContext *s, X86DecodedInsn *decode)
+{
+    gen_update_cc_op(s);
+    gen_update_eip_cur(s);
+    translator_io_start(&s->base);
+    gen_helper_rdpmc(tcg_env);
+    s->base.is_jmp = DISAS_NORETURN;
+}
+
+static void gen_RDTSC(DisasContext *s, X86DecodedInsn *decode)
+{
+    gen_update_cc_op(s);
+    gen_update_eip_cur(s);
+    translator_io_start(&s->base);
+    gen_helper_rdtsc(tcg_env);
+}
+
+static void gen_RDxxBASE(DisasContext *s, X86DecodedInsn *decode)
+{
+    TCGv base = cpu_seg_base[s->modrm & 8 ? R_GS : R_FS];
+
+    /* Preserve hflags bits by testing CR4 at runtime.  */
+    gen_helper_cr4_testbit(tcg_env, tcg_constant_i32(CR4_FSGSBASE_MASK));
+    tcg_gen_mov_tl(s->T0, base);
+}
+
+static void gen_RET(DisasContext *s, X86DecodedInsn *decode)
+{
+    int16_t adjust = decode->e.op1 == X86_TYPE_I ? decode->immediate : 0;
 
     MemOp ot = gen_pop_T0(s);
     gen_stack_update(s, adjust + (1 << ot));
@@ -3083,9 +3346,9 @@
     s->base.is_jmp = DISAS_JUMP;
 }
 
-static void gen_RETF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_RETF(DisasContext *s, X86DecodedInsn *decode)
 {
-    int16_t adjust = decode->e.op2 == X86_TYPE_I ? decode->immediate : 0;
+    int16_t adjust = decode->e.op1 == X86_TYPE_I ? decode->immediate : 0;
 
     if (!PE(s) || VM86(s)) {
         gen_lea_ss_ofs(s, s->A0, cpu_regs[R_ESP], 0);
@@ -3154,11 +3417,11 @@
     }
 }
 
-static void gen_ROL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_ROL(DisasContext *s, X86DecodedInsn *decode)
 {
     bool can_be_zero;
     TCGv count;
-    MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+    MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, decode->op[2].unit);
     TCGv_i32 temp32, count32;
     TCGv old = tcg_temp_new();
 
@@ -3182,11 +3445,11 @@
     gen_rot_overflow(decode, s->T0, old, can_be_zero, count);
 }
 
-static void gen_ROR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_ROR(DisasContext *s, X86DecodedInsn *decode)
 {
     bool can_be_zero;
     TCGv count;
-    MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+    MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, decode->op[2].unit);
     TCGv_i32 temp32, count32;
     TCGv old = tcg_temp_new();
 
@@ -3211,7 +3474,7 @@
     gen_rot_overflow(decode, s->T0, old, can_be_zero, count);
 }
 
-static void gen_RORX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_RORX(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
     int mask = ot == MO_64 ? 63 : 31;
@@ -3235,7 +3498,18 @@
     }
 }
 
-static void gen_SAHF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+#ifndef CONFIG_USER_ONLY
+static void gen_RSM(DisasContext *s, X86DecodedInsn *decode)
+{
+    gen_helper_rsm(tcg_env);
+    assume_cc_op(s, CC_OP_EFLAGS);
+    s->base.is_jmp = DISAS_EOB_ONLY;
+}
+#else
+#define gen_RSM gen_UD
+#endif
+
+static void gen_SAHF(DisasContext *s, X86DecodedInsn *decode)
 {
     if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) {
         return gen_illegal_opcode(s);
@@ -3247,7 +3521,7 @@
     tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, s->T0);
 }
 
-static void gen_SALC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SALC(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_compute_eflags_c(s, s->T0);
     tcg_gen_neg_tl(s->T0, s->T0);
@@ -3283,11 +3557,11 @@
                         old_cc_op, tcg_constant_i32(cc_op));
 }
 
-static void gen_SAR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SAR(DisasContext *s, X86DecodedInsn *decode)
 {
     bool can_be_zero;
     TCGv count;
-    MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+    MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, decode->op[2].unit);
 
     if (!count) {
         return;
@@ -3305,7 +3579,7 @@
     }
 }
 
-static void gen_SARX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SARX(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
     int mask;
@@ -3315,7 +3589,7 @@
     tcg_gen_sar_tl(s->T0, s->T0, s->T1);
 }
 
-static void gen_SBB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SBB(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
     TCGv c_in = tcg_temp_new();
@@ -3337,7 +3611,7 @@
     prepare_update3_cc(decode, s, CC_OP_SBBB + ot, c_in);
 }
 
-static void gen_SCAS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SCAS(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[2].ot;
     if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
@@ -3347,27 +3621,32 @@
     }
 }
 
-static void gen_SETcc(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SETcc(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_setcc1(s, decode->b & 0xf, s->T0);
 }
 
-static void gen_SHA1NEXTE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SFENCE(DisasContext *s, X86DecodedInsn *decode)
+{
+    tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
+}
+
+static void gen_SHA1NEXTE(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_helper_sha1nexte(OP_PTR0, OP_PTR1, OP_PTR2);
 }
 
-static void gen_SHA1MSG1(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SHA1MSG1(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_helper_sha1msg1(OP_PTR0, OP_PTR1, OP_PTR2);
 }
 
-static void gen_SHA1MSG2(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SHA1MSG2(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_helper_sha1msg2(OP_PTR0, OP_PTR1, OP_PTR2);
 }
 
-static void gen_SHA1RNDS4(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SHA1RNDS4(DisasContext *s, X86DecodedInsn *decode)
 {
     switch(decode->immediate & 3) {
     case 0:
@@ -3385,17 +3664,17 @@
     }
 }
 
-static void gen_SHA256MSG1(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SHA256MSG1(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_helper_sha256msg1(OP_PTR0, OP_PTR1, OP_PTR2);
 }
 
-static void gen_SHA256MSG2(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SHA256MSG2(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_helper_sha256msg2(OP_PTR0, OP_PTR1, OP_PTR2);
 }
 
-static void gen_SHA256RNDS2(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SHA256RNDS2(DisasContext *s, X86DecodedInsn *decode)
 {
     TCGv_i32 wk0 = tcg_temp_new_i32();
     TCGv_i32 wk1 = tcg_temp_new_i32();
@@ -3406,11 +3685,11 @@
     gen_helper_sha256rnds2(OP_PTR0, OP_PTR1, OP_PTR2, wk0, wk1);
 }
 
-static void gen_SHL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SHL(DisasContext *s, X86DecodedInsn *decode)
 {
     bool can_be_zero;
     TCGv count;
-    MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+    MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, decode->op[2].unit);
 
     if (!count) {
         return;
@@ -3428,7 +3707,28 @@
     }
 }
 
-static void gen_SHLX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SHLD(DisasContext *s, X86DecodedInsn *decode)
+{
+    bool can_be_zero;
+    TCGv count;
+    int unit = decode->e.op3 == X86_TYPE_I ? X86_OP_IMM : X86_OP_INT;
+    MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, unit);
+
+    if (!count) {
+        return;
+    }
+
+    decode->cc_dst = s->T0;
+    decode->cc_src = s->tmp0;
+    gen_shiftd_rm_T1(s, ot, false, count);
+    if (can_be_zero) {
+        gen_shift_dynamic_flags(s, decode, count, CC_OP_SHLB + ot);
+    } else {
+        decode->cc_op = CC_OP_SHLB + ot;
+    }
+}
+
+static void gen_SHLX(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
     int mask;
@@ -3438,11 +3738,11 @@
     tcg_gen_shl_tl(s->T0, s->T0, s->T1);
 }
 
-static void gen_SHR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SHR(DisasContext *s, X86DecodedInsn *decode)
 {
     bool can_be_zero;
     TCGv count;
-    MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count);
+    MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, decode->op[2].unit);
 
     if (!count) {
         return;
@@ -3460,7 +3760,28 @@
     }
 }
 
-static void gen_SHRX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SHRD(DisasContext *s, X86DecodedInsn *decode)
+{
+    bool can_be_zero;
+    TCGv count;
+    int unit = decode->e.op3 == X86_TYPE_I ? X86_OP_IMM : X86_OP_INT;
+    MemOp ot = gen_shift_count(s, decode, &can_be_zero, &count, unit);
+
+    if (!count) {
+        return;
+    }
+
+    decode->cc_dst = s->T0;
+    decode->cc_src = s->tmp0;
+    gen_shiftd_rm_T1(s, ot, true, count);
+    if (can_be_zero) {
+        gen_shift_dynamic_flags(s, decode, count, CC_OP_SARB + ot);
+    } else {
+        decode->cc_op = CC_OP_SARB + ot;
+    }
+}
+
+static void gen_SHRX(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[0].ot;
     int mask;
@@ -3470,37 +3791,37 @@
     tcg_gen_shr_tl(s->T0, s->T0, s->T1);
 }
 
-static void gen_STC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_STC(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_compute_eflags(s);
     tcg_gen_ori_tl(cpu_cc_src, cpu_cc_src, CC_C);
 }
 
-static void gen_STD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_STD(DisasContext *s, X86DecodedInsn *decode)
 {
     tcg_gen_st_i32(tcg_constant_i32(-1), tcg_env, offsetof(CPUX86State, df));
 }
 
-static void gen_STI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_STI(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_set_eflags(s, IF_MASK);
     s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ;
 }
 
-static void gen_VAESKEYGEN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VAESKEYGEN(DisasContext *s, X86DecodedInsn *decode)
 {
     TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
     assert(!s->vex_l);
     gen_helper_aeskeygenassist_xmm(tcg_env, OP_PTR0, OP_PTR1, imm);
 }
 
-static void gen_STMXCSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_STMXCSR(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_helper_update_mxcsr(tcg_env);
     tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, mxcsr));
 }
 
-static void gen_STOS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_STOS(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[1].ot;
     if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
@@ -3510,7 +3831,7 @@
     }
 }
 
-static void gen_SUB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SUB(DisasContext *s, X86DecodedInsn *decode)
 {
     MemOp ot = decode->op[1].ot;
 
@@ -3526,12 +3847,71 @@
     prepare_update2_cc(decode, s, CC_OP_SUBB + ot);
 }
 
-static void gen_UD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_SYSCALL(DisasContext *s, X86DecodedInsn *decode)
+{
+    gen_update_cc_op(s);
+    gen_update_eip_cur(s);
+    gen_helper_syscall(tcg_env, cur_insn_len_i32(s));
+    if (LMA(s)) {
+        assume_cc_op(s, CC_OP_EFLAGS);
+    }
+
+    /*
+     * TF handling for the syscall insn is different. The TF bit is checked
+     * after the syscall insn completes. This allows #DB to not be
+     * generated after one has entered CPL0 if TF is set in FMASK.
+     */
+    s->base.is_jmp = DISAS_EOB_RECHECK_TF;
+}
+
+static void gen_SYSENTER(DisasContext *s, X86DecodedInsn *decode)
+{
+    gen_helper_sysenter(tcg_env);
+    s->base.is_jmp = DISAS_EOB_ONLY;
+}
+
+static void gen_SYSEXIT(DisasContext *s, X86DecodedInsn *decode)
+{
+    gen_helper_sysexit(tcg_env, tcg_constant_i32(s->dflag - 1));
+    s->base.is_jmp = DISAS_EOB_ONLY;
+}
+
+static void gen_SYSRET(DisasContext *s, X86DecodedInsn *decode)
+{
+    gen_helper_sysret(tcg_env, tcg_constant_i32(s->dflag - 1));
+    if (LMA(s)) {
+        assume_cc_op(s, CC_OP_EFLAGS);
+    }
+
+    /*
+     * TF handling for the sysret insn is different. The TF bit is checked
+     * after the sysret insn completes. This allows #DB to be
+     * generated "as if" the syscall insn in userspace has just
+     * completed.
+     */
+    s->base.is_jmp = DISAS_EOB_RECHECK_TF;
+}
+
+static void gen_TZCNT(DisasContext *s, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[0].ot;
+
+    /* C bit (cc_src) is defined related to the input.  */
+    decode->cc_src = tcg_temp_new();
+    decode->cc_dst = s->T0;
+    decode->cc_op = CC_OP_BMILGB + ot;
+    tcg_gen_mov_tl(decode->cc_src, s->T0);
+
+    /* A zero input returns the operand size.  */
+    tcg_gen_ctzi_tl(s->T0, s->T0, 8 << ot);
+}
+
+static void gen_UD(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_illegal_opcode(s);
 }
 
-static void gen_VAESIMC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VAESIMC(DisasContext *s, X86DecodedInsn *decode)
 {
     assert(!s->vex_l);
     gen_helper_aesimc_xmm(tcg_env, OP_PTR0, OP_PTR2);
@@ -3586,7 +3966,7 @@
 };
 #undef SSE_CMP
 
-static void gen_VCMP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VCMP(DisasContext *s, X86DecodedInsn *decode)
 {
     int index = decode->immediate & (s->prefix & PREFIX_VEX ? 31 : 7);
     int b =
@@ -3597,7 +3977,7 @@
     gen_helper_cmp_funcs[index][b](tcg_env, OP_PTR0, OP_PTR1, OP_PTR2);
 }
 
-static void gen_VCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VCOMI(DisasContext *s, X86DecodedInsn *decode)
 {
     SSEFunc_0_epp fn;
     fn = s->prefix & PREFIX_DATA ? gen_helper_comisd : gen_helper_comiss;
@@ -3605,7 +3985,7 @@
     assume_cc_op(s, CC_OP_EFLAGS);
 }
 
-static void gen_VCVTPD2PS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VCVTPD2PS(DisasContext *s, X86DecodedInsn *decode)
 {
     if (s->vex_l) {
         gen_helper_cvtpd2ps_ymm(tcg_env, OP_PTR0, OP_PTR2);
@@ -3614,7 +3994,7 @@
     }
 }
 
-static void gen_VCVTPS2PD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VCVTPS2PD(DisasContext *s, X86DecodedInsn *decode)
 {
     if (s->vex_l) {
         gen_helper_cvtps2pd_ymm(tcg_env, OP_PTR0, OP_PTR2);
@@ -3623,9 +4003,9 @@
     }
 }
 
-static void gen_VCVTPS2PH(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VCVTPS2PH(DisasContext *s, X86DecodedInsn *decode)
 {
-    gen_unary_imm_fp_sse(s, env, decode,
+    gen_unary_imm_fp_sse(s, decode,
                       gen_helper_cvtps2ph_xmm,
                       gen_helper_cvtps2ph_ymm);
     /*
@@ -3637,17 +4017,17 @@
     }
 }
 
-static void gen_VCVTSD2SS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VCVTSD2SS(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_helper_cvtsd2ss(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2);
 }
 
-static void gen_VCVTSS2SD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VCVTSS2SD(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_helper_cvtss2sd(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2);
 }
 
-static void gen_VCVTSI2Sx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VCVTSI2Sx(DisasContext *s, X86DecodedInsn *decode)
 {
     int vec_len = vector_len(s, decode);
     TCGv_i32 in;
@@ -3677,7 +4057,7 @@
     }
 }
 
-static inline void gen_VCVTtSx2SI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_VCVTtSx2SI(DisasContext *s, X86DecodedInsn *decode,
                                   SSEFunc_i_ep ss2si, SSEFunc_l_ep ss2sq,
                                   SSEFunc_i_ep sd2si, SSEFunc_l_ep sd2sq)
 {
@@ -3715,21 +4095,21 @@
 #define gen_helper_cvttsd2sq NULL
 #endif
 
-static void gen_VCVTSx2SI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VCVTSx2SI(DisasContext *s, X86DecodedInsn *decode)
 {
-    gen_VCVTtSx2SI(s, env, decode,
+    gen_VCVTtSx2SI(s, decode,
                    gen_helper_cvtss2si, gen_helper_cvtss2sq,
                    gen_helper_cvtsd2si, gen_helper_cvtsd2sq);
 }
 
-static void gen_VCVTTSx2SI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VCVTTSx2SI(DisasContext *s, X86DecodedInsn *decode)
 {
-    gen_VCVTtSx2SI(s, env, decode,
+    gen_VCVTtSx2SI(s, decode,
                    gen_helper_cvttss2si, gen_helper_cvttss2sq,
                    gen_helper_cvttsd2si, gen_helper_cvttsd2sq);
 }
 
-static void gen_VEXTRACTx128(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VEXTRACTx128(DisasContext *s, X86DecodedInsn *decode)
 {
     int mask = decode->immediate & 1;
     int src_ofs = vector_elem_offset(&decode->op[1], MO_128, mask);
@@ -3741,12 +4121,12 @@
     }
 }
 
-static void gen_VEXTRACTPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VEXTRACTPS(DisasContext *s, X86DecodedInsn *decode)
 {
-    gen_pextr(s, env, decode, MO_32);
+    gen_pextr(s, decode, MO_32);
 }
 
-static void gen_vinsertps(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_vinsertps(DisasContext *s, X86DecodedInsn *decode)
 {
     int val = decode->immediate;
     int dest_word = (val >> 4) & 3;
@@ -3779,21 +4159,21 @@
     }
 }
 
-static void gen_VINSERTPS_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VINSERTPS_r(DisasContext *s, X86DecodedInsn *decode)
 {
     int val = decode->immediate;
     tcg_gen_ld_i32(s->tmp2_i32, tcg_env,
                    vector_elem_offset(&decode->op[2], MO_32, (val >> 6) & 3));
-    gen_vinsertps(s, env, decode);
+    gen_vinsertps(s, decode);
 }
 
-static void gen_VINSERTPS_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VINSERTPS_m(DisasContext *s, X86DecodedInsn *decode)
 {
     tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
-    gen_vinsertps(s, env, decode);
+    gen_vinsertps(s, decode);
 }
 
-static void gen_VINSERTx128(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VINSERTx128(DisasContext *s, X86DecodedInsn *decode)
 {
     int mask = decode->immediate & 1;
     tcg_gen_gvec_mov(MO_64,
@@ -3804,7 +4184,7 @@
                      decode->op[1].offset + offsetof(YMMReg, YMM_X(!mask)), 16, 16);
 }
 
-static inline void gen_maskmov(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode,
+static inline void gen_maskmov(DisasContext *s, X86DecodedInsn *decode,
                                SSEFunc_0_eppt xmm, SSEFunc_0_eppt ymm)
 {
     if (!s->vex_l) {
@@ -3814,17 +4194,17 @@
     }
 }
 
-static void gen_VMASKMOVPD_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMASKMOVPD_st(DisasContext *s, X86DecodedInsn *decode)
 {
-    gen_maskmov(s, env, decode, gen_helper_vpmaskmovq_st_xmm, gen_helper_vpmaskmovq_st_ymm);
+    gen_maskmov(s, decode, gen_helper_vpmaskmovq_st_xmm, gen_helper_vpmaskmovq_st_ymm);
 }
 
-static void gen_VMASKMOVPS_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMASKMOVPS_st(DisasContext *s, X86DecodedInsn *decode)
 {
-    gen_maskmov(s, env, decode, gen_helper_vpmaskmovd_st_xmm, gen_helper_vpmaskmovd_st_ymm);
+    gen_maskmov(s, decode, gen_helper_vpmaskmovd_st_xmm, gen_helper_vpmaskmovd_st_ymm);
 }
 
-static void gen_VMOVHPx_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMOVHPx_ld(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_ldq_env_A0(s, decode->op[0].offset + offsetof(XMMReg, XMM_Q(1)));
     if (decode->op[0].offset != decode->op[1].offset) {
@@ -3833,12 +4213,12 @@
     }
 }
 
-static void gen_VMOVHPx_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMOVHPx_st(DisasContext *s, X86DecodedInsn *decode)
 {
     gen_stq_env_A0(s, decode->op[2].offset + offsetof(XMMReg, XMM_Q(1)));
 }
 
-static void gen_VMOVHPx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMOVHPx(DisasContext *s, X86DecodedInsn *decode)
 {
     if (decode->op[0].offset != decode->op[2].offset) {
         tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[2].offset + offsetof(XMMReg, XMM_Q(1)));
@@ -3850,7 +4230,7 @@
     }
 }
 
-static void gen_VMOVHLPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMOVHLPS(DisasContext *s, X86DecodedInsn *decode)
 {
     tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[2].offset + offsetof(XMMReg, XMM_Q(1)));
     tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0)));
@@ -3860,7 +4240,7 @@
     }
 }
 
-static void gen_VMOVLHPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMOVLHPS(DisasContext *s, X86DecodedInsn *decode)
 {
     tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[2].offset);
     tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(1)));
@@ -3875,7 +4255,7 @@
  * Use a gvec move to move everything above the bottom 64 bits.
  */
 
-static void gen_VMOVLPx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMOVLPx(DisasContext *s, X86DecodedInsn *decode)
 {
     int vec_len = vector_len(s, decode);
 
@@ -3884,7 +4264,7 @@
     tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0)));
 }
 
-static void gen_VMOVLPx_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMOVLPx_ld(DisasContext *s, X86DecodedInsn *decode)
 {
     int vec_len = vector_len(s, decode);
 
@@ -3893,13 +4273,13 @@
     tcg_gen_st_i64(s->tmp1_i64, OP_PTR0, offsetof(ZMMReg, ZMM_Q(0)));
 }
 
-static void gen_VMOVLPx_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMOVLPx_st(DisasContext *s, X86DecodedInsn *decode)
 {
     tcg_gen_ld_i64(s->tmp1_i64, OP_PTR2, offsetof(ZMMReg, ZMM_Q(0)));
     tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ);
 }
 
-static void gen_VMOVSD_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMOVSD_ld(DisasContext *s, X86DecodedInsn *decode)
 {
     TCGv_i64 zero = tcg_constant_i64(0);
 
@@ -3908,7 +4288,7 @@
     tcg_gen_st_i64(s->tmp1_i64, OP_PTR0, offsetof(ZMMReg, ZMM_Q(0)));
 }
 
-static void gen_VMOVSS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMOVSS(DisasContext *s, X86DecodedInsn *decode)
 {
     int vec_len = vector_len(s, decode);
 
@@ -3917,7 +4297,7 @@
     tcg_gen_st_i32(s->tmp2_i32, OP_PTR0, offsetof(ZMMReg, ZMM_L(0)));
 }
 
-static void gen_VMOVSS_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMOVSS_ld(DisasContext *s, X86DecodedInsn *decode)
 {
     int vec_len = vector_len(s, decode);
 
@@ -3926,55 +4306,55 @@
     tcg_gen_st_i32(s->tmp2_i32, OP_PTR0, offsetof(ZMMReg, ZMM_L(0)));
 }
 
-static void gen_VMOVSS_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VMOVSS_st(DisasContext *s, X86DecodedInsn *decode)
 {
     tcg_gen_ld_i32(s->tmp2_i32, OP_PTR2, offsetof(ZMMReg, ZMM_L(0)));
     tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
 }
 
-static void gen_VPMASKMOV_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VPMASKMOV_st(DisasContext *s, X86DecodedInsn *decode)
 {
     if (s->vex_w) {
-        gen_VMASKMOVPD_st(s, env, decode);
+        gen_VMASKMOVPD_st(s, decode);
     } else {
-        gen_VMASKMOVPS_st(s, env, decode);
+        gen_VMASKMOVPS_st(s, decode);
     }
 }
 
-static void gen_VPERMD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VPERMD(DisasContext *s, X86DecodedInsn *decode)
 {
     assert(s->vex_l);
     gen_helper_vpermd_ymm(OP_PTR0, OP_PTR1, OP_PTR2);
 }
 
-static void gen_VPERM2x128(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VPERM2x128(DisasContext *s, X86DecodedInsn *decode)
 {
     TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
     assert(s->vex_l);
     gen_helper_vpermdq_ymm(OP_PTR0, OP_PTR1, OP_PTR2, imm);
 }
 
-static void gen_VPHMINPOSUW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VPHMINPOSUW(DisasContext *s, X86DecodedInsn *decode)
 {
     assert(!s->vex_l);
     gen_helper_phminposuw_xmm(tcg_env, OP_PTR0, OP_PTR2);
 }
 
-static void gen_VROUNDSD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VROUNDSD(DisasContext *s, X86DecodedInsn *decode)
 {
     TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
     assert(!s->vex_l);
     gen_helper_roundsd_xmm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm);
 }
 
-static void gen_VROUNDSS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VROUNDSS(DisasContext *s, X86DecodedInsn *decode)
 {
     TCGv_i32 imm = tcg_constant8u_i32(decode->immediate);
     assert(!s->vex_l);
     gen_helper_roundss_xmm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm);
 }
 
-static void gen_VSHUF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VSHUF(DisasContext *s, X86DecodedInsn *decode)
 {
     TCGv_i32 imm = tcg_constant_i32(decode->immediate);
     SSEFunc_0_pppi ps, pd, fn;
@@ -3984,7 +4364,7 @@
     fn(OP_PTR0, OP_PTR1, OP_PTR2, imm);
 }
 
-static void gen_VUCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VUCOMI(DisasContext *s, X86DecodedInsn *decode)
 {
     SSEFunc_0_epp fn;
     fn = s->prefix & PREFIX_DATA ? gen_helper_ucomisd : gen_helper_ucomiss;
@@ -3992,7 +4372,7 @@
     assume_cc_op(s, CC_OP_EFLAGS);
 }
 
-static void gen_VZEROALL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VZEROALL(DisasContext *s, X86DecodedInsn *decode)
 {
     TCGv_ptr ptr = tcg_temp_new_ptr();
 
@@ -4001,7 +4381,7 @@
                       tcg_constant_ptr(CPU_NB_REGS * sizeof(ZMMReg)));
 }
 
-static void gen_VZEROUPPER(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_VZEROUPPER(DisasContext *s, X86DecodedInsn *decode)
 {
     int i;
 
@@ -4011,7 +4391,7 @@
     }
 }
 
-static void gen_WAIT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_WAIT(DisasContext *s, X86DecodedInsn *decode)
 {
     if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) == (HF_MP_MASK | HF_TS_MASK)) {
         gen_NM_exception(s);
@@ -4022,7 +4402,52 @@
     }
 }
 
-static void gen_XCHG(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+#ifndef CONFIG_USER_ONLY
+static void gen_WRMSR(DisasContext *s, X86DecodedInsn *decode)
+{
+    gen_update_cc_op(s);
+    gen_update_eip_cur(s);
+    gen_helper_wrmsr(tcg_env);
+    s->base.is_jmp = DISAS_EOB_NEXT;
+}
+#else
+#define gen_WRMSR gen_unreachable
+#endif
+
+static void gen_WRxxBASE(DisasContext *s, X86DecodedInsn *decode)
+{
+    TCGv base = cpu_seg_base[s->modrm & 8 ? R_GS : R_FS];
+
+    /* Preserve hflags bits by testing CR4 at runtime.  */
+    gen_helper_cr4_testbit(tcg_env, tcg_constant_i32(CR4_FSGSBASE_MASK));
+    tcg_gen_mov_tl(base, s->T0);
+}
+
+static void gen_XADD(DisasContext *s, X86DecodedInsn *decode)
+{
+    MemOp ot = decode->op[1].ot;
+
+    decode->cc_dst = tcg_temp_new();
+    decode->cc_src = s->T1;
+    decode->cc_op = CC_OP_ADDB + ot;
+
+    if (s->prefix & PREFIX_LOCK) {
+        tcg_gen_atomic_fetch_add_tl(s->T0, s->A0, s->T1, s->mem_index, ot | MO_LE);
+        tcg_gen_add_tl(decode->cc_dst, s->T0, s->T1);
+    } else {
+        tcg_gen_add_tl(decode->cc_dst, s->T0, s->T1);
+        /*
+         * NOTE: writing memory first is important for MMU exceptions,
+         * but "new result" wins for XADD AX, AX.
+         */
+        gen_writeback(s, decode, 0, decode->cc_dst);
+    }
+    if (decode->op[0].has_ea || decode->op[2].n != decode->op[0].n) {
+        gen_writeback(s, decode, 2, s->T0);
+    }
+}
+
+static void gen_XCHG(DisasContext *s, X86DecodedInsn *decode)
 {
     if (s->prefix & PREFIX_LOCK) {
         tcg_gen_atomic_xchg_tl(s->T0, s->A0, s->T1,
@@ -4036,7 +4461,7 @@
     }
 }
 
-static void gen_XLAT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_XLAT(DisasContext *s, X86DecodedInsn *decode)
 {
     /* AL is already zero-extended into s->T0.  */
     tcg_gen_add_tl(s->A0, cpu_regs[R_EBX], s->T0);
@@ -4044,7 +4469,7 @@
     gen_op_ld_v(s, MO_8, s->T0, s->A0);
 }
 
-static void gen_XOR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+static void gen_XOR(DisasContext *s, X86DecodedInsn *decode)
 {
     /* special case XOR reg, reg */
     if (decode->op[1].unit == X86_OP_INT &&
@@ -4064,3 +4489,34 @@
         prepare_update1_cc(decode, s, CC_OP_LOGICB + ot);
     }
 }
+
+static void gen_XRSTOR(DisasContext *s, X86DecodedInsn *decode)
+{
+    TCGv_i64 features = tcg_temp_new_i64();
+
+    tcg_gen_concat_tl_i64(features, cpu_regs[R_EAX], cpu_regs[R_EDX]);
+    gen_helper_xrstor(tcg_env, s->A0, features);
+    if (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_MPX) {
+        /*
+         * XRSTOR is how MPX is enabled, which changes how
+         * we translate.  Thus we need to end the TB.
+         */
+        s->base.is_jmp = DISAS_EOB_NEXT;
+    }
+}
+
+static void gen_XSAVE(DisasContext *s, X86DecodedInsn *decode)
+{
+    TCGv_i64 features = tcg_temp_new_i64();
+
+    tcg_gen_concat_tl_i64(features, cpu_regs[R_EAX], cpu_regs[R_EDX]);
+    gen_helper_xsave(tcg_env, s->A0, features);
+}
+
+static void gen_XSAVEOPT(DisasContext *s, X86DecodedInsn *decode)
+{
+    TCGv_i64 features = tcg_temp_new_i64();
+
+    tcg_gen_concat_tl_i64(features, cpu_regs[R_EAX], cpu_regs[R_EDX]);
+    gen_helper_xsave(tcg_env, s->A0, features);
+}
diff --git a/target/i386/tcg/seg_helper.c b/target/i386/tcg/seg_helper.c
index 715db1f..aee3d19 100644
--- a/target/i386/tcg/seg_helper.c
+++ b/target/i386/tcg/seg_helper.c
@@ -2265,11 +2265,11 @@
 target_ulong helper_lsl(CPUX86State *env, target_ulong selector1)
 {
     unsigned int limit;
-    uint32_t e1, e2, eflags, selector;
+    uint32_t e1, e2, selector;
     int rpl, dpl, cpl, type;
 
     selector = selector1 & 0xffff;
-    eflags = cpu_cc_compute_all(env);
+    assert(CC_OP == CC_OP_EFLAGS);
     if ((selector & 0xfffc) == 0) {
         goto fail;
     }
@@ -2301,22 +2301,22 @@
         }
         if (dpl < cpl || dpl < rpl) {
         fail:
-            CC_SRC = eflags & ~CC_Z;
+            CC_SRC &= ~CC_Z;
             return 0;
         }
     }
     limit = get_seg_limit(e1, e2);
-    CC_SRC = eflags | CC_Z;
+    CC_SRC |= CC_Z;
     return limit;
 }
 
 target_ulong helper_lar(CPUX86State *env, target_ulong selector1)
 {
-    uint32_t e1, e2, eflags, selector;
+    uint32_t e1, e2, selector;
     int rpl, dpl, cpl, type;
 
     selector = selector1 & 0xffff;
-    eflags = cpu_cc_compute_all(env);
+    assert(CC_OP == CC_OP_EFLAGS);
     if ((selector & 0xfffc) == 0) {
         goto fail;
     }
@@ -2351,11 +2351,11 @@
         }
         if (dpl < cpl || dpl < rpl) {
         fail:
-            CC_SRC = eflags & ~CC_Z;
+            CC_SRC &= ~CC_Z;
             return 0;
         }
     }
-    CC_SRC = eflags | CC_Z;
+    CC_SRC |= CC_Z;
     return e2 & 0x00f0ff00;
 }
 
diff --git a/target/i386/tcg/sysemu/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c
index 7fa0c5a..094aa56 100644
--- a/target/i386/tcg/sysemu/misc_helper.c
+++ b/target/i386/tcg/sysemu/misc_helper.c
@@ -63,23 +63,13 @@
                              cpu_get_mem_attrs(env), NULL);
 }
 
-target_ulong helper_read_crN(CPUX86State *env, int reg)
+target_ulong helper_read_cr8(CPUX86State *env)
 {
-    target_ulong val;
-
-    switch (reg) {
-    default:
-        val = env->cr[reg];
-        break;
-    case 8:
-        if (!(env->hflags2 & HF2_VINTR_MASK)) {
-            val = cpu_get_apic_tpr(env_archcpu(env)->apic_state);
-        } else {
-            val = env->int_ctl & V_TPR_MASK;
-        }
-        break;
+    if (!(env->hflags2 & HF2_VINTR_MASK)) {
+        return cpu_get_apic_tpr(env_archcpu(env)->apic_state);
+    } else {
+        return env->int_ctl & V_TPR_MASK;
     }
-    return val;
 }
 
 void helper_write_crN(CPUX86State *env, int reg, target_ulong t0)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index fcba9c1..ad18198 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -246,10 +246,6 @@
 STUB_HELPER(outb, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
 STUB_HELPER(outw, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
 STUB_HELPER(outl, TCGv_env env, TCGv_i32 port, TCGv_i32 val)
-STUB_HELPER(rdmsr, TCGv_env env)
-STUB_HELPER(read_crN, TCGv ret, TCGv_env env, TCGv_i32 reg)
-STUB_HELPER(get_dr, TCGv ret, TCGv_env env, TCGv_i32 reg)
-STUB_HELPER(set_dr, TCGv_env env, TCGv_i32 reg, TCGv val)
 STUB_HELPER(stgi, TCGv_env env)
 STUB_HELPER(svm_check_intercept, TCGv_env env, TCGv_i32 type)
 STUB_HELPER(vmload, TCGv_env env, TCGv_i32 aflag)
@@ -257,7 +253,6 @@
 STUB_HELPER(vmrun, TCGv_env env, TCGv_i32 aflag, TCGv_i32 pc_ofs)
 STUB_HELPER(vmsave, TCGv_env env, TCGv_i32 aflag)
 STUB_HELPER(write_crN, TCGv_env env, TCGv_i32 reg, TCGv val)
-STUB_HELPER(wrmsr, TCGv_env env)
 #endif
 
 static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num);
@@ -439,13 +434,6 @@
     return CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16;
 }
 
-/* Select size 8 if lsb of B is clear, else OT.  Used for decoding
-   byte vs word opcodes.  */
-static inline MemOp mo_b_d(int b, MemOp ot)
-{
-    return b & 1 ? ot : MO_8;
-}
-
 /* Compute the result of writing t0 to the OT-sized register REG.
  *
  * If DEST is NULL, store the result into the register and return the
@@ -540,15 +528,6 @@
     tcg_gen_qemu_st_tl(t0, a0, s->mem_index, idx | MO_LE);
 }
 
-static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d)
-{
-    if (d == OR_TMP0) {
-        gen_op_st_v(s, idx, s->T0, s->A0);
-    } else {
-        gen_op_mov_reg_v(s, idx, d, s->T0);
-    }
-}
-
 static void gen_update_eip_next(DisasContext *s)
 {
     assert(s->pc_save != -1);
@@ -729,11 +708,6 @@
     return dst;
 }
 
-static void gen_extu(MemOp ot, TCGv reg)
-{
-    gen_ext_tl(reg, reg, ot, false);
-}
-
 static void gen_exts(MemOp ot, TCGv reg)
 {
     gen_ext_tl(reg, reg, ot, true);
@@ -837,17 +811,6 @@
     gen_op_add_reg(s, s->aflag, R_EDI, dshift);
 }
 
-static void gen_op_update1_cc(DisasContext *s)
-{
-    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-}
-
-static void gen_op_update2_cc(DisasContext *s)
-{
-    tcg_gen_mov_tl(cpu_cc_src, s->T1);
-    tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-}
-
 /* compute all eflags to reg */
 static void gen_mov_eflags(DisasContext *s, TCGv reg)
 {
@@ -1448,64 +1411,11 @@
     return false;
 }
 
-static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result,
-                            TCGv shm1, TCGv count, bool is_right)
-{
-    TCGv_i32 z32, s32, oldop;
-    TCGv z_tl;
-
-    /* Store the results into the CC variables.  If we know that the
-       variable must be dead, store unconditionally.  Otherwise we'll
-       need to not disrupt the current contents.  */
-    z_tl = tcg_constant_tl(0);
-    if (cc_op_live[s->cc_op] & USES_CC_DST) {
-        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl,
-                           result, cpu_cc_dst);
-    } else {
-        tcg_gen_mov_tl(cpu_cc_dst, result);
-    }
-    if (cc_op_live[s->cc_op] & USES_CC_SRC) {
-        tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_src, count, z_tl,
-                           shm1, cpu_cc_src);
-    } else {
-        tcg_gen_mov_tl(cpu_cc_src, shm1);
-    }
-
-    /* Get the two potential CC_OP values into temporaries.  */
-    tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot);
-    if (s->cc_op == CC_OP_DYNAMIC) {
-        oldop = cpu_cc_op;
-    } else {
-        tcg_gen_movi_i32(s->tmp3_i32, s->cc_op);
-        oldop = s->tmp3_i32;
-    }
-
-    /* Conditionally store the CC_OP value.  */
-    z32 = tcg_constant_i32(0);
-    s32 = tcg_temp_new_i32();
-    tcg_gen_trunc_tl_i32(s32, count);
-    tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop);
-
-    /* The CC_OP value is no longer predictable.  */
-    set_cc_op(s, CC_OP_DYNAMIC);
-}
-
 /* XXX: add faster immediate case */
-static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1,
-                             bool is_right, TCGv count_in)
+static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot,
+                             bool is_right, TCGv count)
 {
     target_ulong mask = (ot == MO_64 ? 63 : 31);
-    TCGv count;
-
-    /* load */
-    if (op1 == OR_TMP0) {
-        gen_op_ld_v(s, ot, s->T0, s->A0);
-    } else {
-        gen_op_mov_v_reg(s, ot, s->T0, op1);
-    }
-
-    count = tcg_temp_new();
-    tcg_gen_andi_tl(count, count_in, mask);
 
     switch (ot) {
     case MO_16:
@@ -1567,11 +1477,6 @@
         tcg_gen_or_tl(s->T0, s->T0, s->T1);
         break;
     }
-
-    /* store */
-    gen_op_st_rm_T0_A0(s, ot, op1);
-
-    gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right);
 }
 
 #define X86_MAX_INSN_LENGTH 15
@@ -3081,108 +2986,11 @@
     CPUX86State *env = cpu_env(cpu);
     int prefixes = s->prefix;
     MemOp dflag = s->dflag;
-    int shift;
     MemOp ot;
-    int modrm, reg, rm, mod, op, opreg, val;
+    int modrm, reg, rm, mod, op, val;
 
     /* now check op code */
     switch (b) {
-        /**************************/
-        /* arith & logic */
-    case 0x1c0:
-    case 0x1c1: /* xadd Ev, Gv */
-        ot = mo_b_d(b, dflag);
-        modrm = x86_ldub_code(env, s);
-        reg = ((modrm >> 3) & 7) | REX_R(s);
-        mod = (modrm >> 6) & 3;
-        gen_op_mov_v_reg(s, ot, s->T0, reg);
-        if (mod == 3) {
-            rm = (modrm & 7) | REX_B(s);
-            gen_op_mov_v_reg(s, ot, s->T1, rm);
-            tcg_gen_add_tl(s->T0, s->T0, s->T1);
-            gen_op_mov_reg_v(s, ot, reg, s->T1);
-            gen_op_mov_reg_v(s, ot, rm, s->T0);
-        } else {
-            gen_lea_modrm(env, s, modrm);
-            if (s->prefix & PREFIX_LOCK) {
-                tcg_gen_atomic_fetch_add_tl(s->T1, s->A0, s->T0,
-                                            s->mem_index, ot | MO_LE);
-                tcg_gen_add_tl(s->T0, s->T0, s->T1);
-            } else {
-                gen_op_ld_v(s, ot, s->T1, s->A0);
-                tcg_gen_add_tl(s->T0, s->T0, s->T1);
-                gen_op_st_v(s, ot, s->T0, s->A0);
-            }
-            gen_op_mov_reg_v(s, ot, reg, s->T1);
-        }
-        gen_op_update2_cc(s);
-        set_cc_op(s, CC_OP_ADDB + ot);
-        break;
-    case 0x1b0:
-    case 0x1b1: /* cmpxchg Ev, Gv */
-        {
-            TCGv oldv, newv, cmpv, dest;
-
-            ot = mo_b_d(b, dflag);
-            modrm = x86_ldub_code(env, s);
-            reg = ((modrm >> 3) & 7) | REX_R(s);
-            mod = (modrm >> 6) & 3;
-            oldv = tcg_temp_new();
-            newv = tcg_temp_new();
-            cmpv = tcg_temp_new();
-            gen_op_mov_v_reg(s, ot, newv, reg);
-            tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]);
-            gen_extu(ot, cmpv);
-            if (s->prefix & PREFIX_LOCK) {
-                if (mod == 3) {
-                    goto illegal_op;
-                }
-                gen_lea_modrm(env, s, modrm);
-                tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv,
-                                          s->mem_index, ot | MO_LE);
-            } else {
-                if (mod == 3) {
-                    rm = (modrm & 7) | REX_B(s);
-                    gen_op_mov_v_reg(s, ot, oldv, rm);
-                    gen_extu(ot, oldv);
-
-                    /*
-                     * Unlike the memory case, where "the destination operand receives
-                     * a write cycle without regard to the result of the comparison",
-                     * rm must not be touched altogether if the write fails, including
-                     * not zero-extending it on 64-bit processors.  So, precompute
-                     * the result of a successful writeback and perform the movcond
-                     * directly on cpu_regs.  Also need to write accumulator first, in
-                     * case rm is part of RAX too.
-                     */
-                    dest = gen_op_deposit_reg_v(s, ot, rm, newv, newv);
-                    tcg_gen_movcond_tl(TCG_COND_EQ, dest, oldv, cmpv, newv, dest);
-                } else {
-                    gen_lea_modrm(env, s, modrm);
-                    gen_op_ld_v(s, ot, oldv, s->A0);
-
-                    /*
-                     * Perform an unconditional store cycle like physical cpu;
-                     * must be before changing accumulator to ensure
-                     * idempotency if the store faults and the instruction
-                     * is restarted
-                     */
-                    tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv);
-                    gen_op_st_v(s, ot, newv, s->A0);
-                }
-            }
-	    /*
-	     * Write EAX only if the cmpxchg fails; reuse newv as the destination,
-	     * since it's dead here.
-	     */
-            dest = gen_op_deposit_reg_v(s, ot, R_EAX, newv, oldv);
-            tcg_gen_movcond_tl(TCG_COND_EQ, dest, oldv, cmpv, dest, newv);
-            tcg_gen_mov_tl(cpu_cc_src, oldv);
-            tcg_gen_mov_tl(s->cc_srcT, cmpv);
-            tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv);
-            set_cc_op(s, CC_OP_SUBB + ot);
-        }
-        break;
     case 0x1c7: /* cmpxchg8b */
         modrm = x86_ldub_code(env, s);
         mod = (modrm >> 6) & 3;
@@ -3245,45 +3053,6 @@
         }
         break;
 
-        /**************************/
-        /* shifts */
-    case 0x1a4: /* shld imm */
-        op = 0;
-        shift = 1;
-        goto do_shiftd;
-    case 0x1a5: /* shld cl */
-        op = 0;
-        shift = 0;
-        goto do_shiftd;
-    case 0x1ac: /* shrd imm */
-        op = 1;
-        shift = 1;
-        goto do_shiftd;
-    case 0x1ad: /* shrd cl */
-        op = 1;
-        shift = 0;
-    do_shiftd:
-        ot = dflag;
-        modrm = x86_ldub_code(env, s);
-        mod = (modrm >> 6) & 3;
-        rm = (modrm & 7) | REX_B(s);
-        reg = ((modrm >> 3) & 7) | REX_R(s);
-        if (mod != 3) {
-            gen_lea_modrm(env, s, modrm);
-            opreg = OR_TMP0;
-        } else {
-            opreg = rm;
-        }
-        gen_op_mov_v_reg(s, ot, s->T1, reg);
-
-        if (shift) {
-            TCGv imm = tcg_constant_tl(x86_ldub_code(env, s));
-            gen_shiftd_rm_T1(s, ot, opreg, op, imm);
-        } else {
-            gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]);
-        }
-        break;
-
         /************************/
         /* bit operations */
     case 0x1ba: /* bt/bts/btr/btc Gv, im */
@@ -3423,147 +3192,6 @@
             break;
         }
         break;
-    case 0x1bc: /* bsf / tzcnt */
-    case 0x1bd: /* bsr / lzcnt */
-        ot = dflag;
-        modrm = x86_ldub_code(env, s);
-        reg = ((modrm >> 3) & 7) | REX_R(s);
-        gen_ld_modrm(env, s, modrm, ot);
-        gen_extu(ot, s->T0);
-
-        /* Note that lzcnt and tzcnt are in different extensions.  */
-        if ((prefixes & PREFIX_REPZ)
-            && (b & 1
-                ? s->cpuid_ext3_features & CPUID_EXT3_ABM
-                : s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1)) {
-            int size = 8 << ot;
-            /* For lzcnt/tzcnt, C bit is defined related to the input. */
-            tcg_gen_mov_tl(cpu_cc_src, s->T0);
-            if (b & 1) {
-                /* For lzcnt, reduce the target_ulong result by the
-                   number of zeros that we expect to find at the top.  */
-                tcg_gen_clzi_tl(s->T0, s->T0, TARGET_LONG_BITS);
-                tcg_gen_subi_tl(s->T0, s->T0, TARGET_LONG_BITS - size);
-            } else {
-                /* For tzcnt, a zero input must return the operand size.  */
-                tcg_gen_ctzi_tl(s->T0, s->T0, size);
-            }
-            /* For lzcnt/tzcnt, Z bit is defined related to the result.  */
-            gen_op_update1_cc(s);
-            set_cc_op(s, CC_OP_BMILGB + ot);
-        } else {
-            /* For bsr/bsf, only the Z bit is defined and it is related
-               to the input and not the result.  */
-            tcg_gen_mov_tl(cpu_cc_dst, s->T0);
-            set_cc_op(s, CC_OP_LOGICB + ot);
-
-            /* ??? The manual says that the output is undefined when the
-               input is zero, but real hardware leaves it unchanged, and
-               real programs appear to depend on that.  Accomplish this
-               by passing the output as the value to return upon zero.  */
-            if (b & 1) {
-                /* For bsr, return the bit index of the first 1 bit,
-                   not the count of leading zeros.  */
-                tcg_gen_xori_tl(s->T1, cpu_regs[reg], TARGET_LONG_BITS - 1);
-                tcg_gen_clz_tl(s->T0, s->T0, s->T1);
-                tcg_gen_xori_tl(s->T0, s->T0, TARGET_LONG_BITS - 1);
-            } else {
-                tcg_gen_ctz_tl(s->T0, s->T0, cpu_regs[reg]);
-            }
-        }
-        gen_op_mov_reg_v(s, ot, reg, s->T0);
-        break;
-    case 0x130: /* wrmsr */
-    case 0x132: /* rdmsr */
-        if (check_cpl0(s)) {
-            gen_update_cc_op(s);
-            gen_update_eip_cur(s);
-            if (b & 2) {
-                gen_helper_rdmsr(tcg_env);
-            } else {
-                gen_helper_wrmsr(tcg_env);
-                s->base.is_jmp = DISAS_EOB_NEXT;
-            }
-        }
-        break;
-    case 0x131: /* rdtsc */
-        gen_update_cc_op(s);
-        gen_update_eip_cur(s);
-        translator_io_start(&s->base);
-        gen_helper_rdtsc(tcg_env);
-        break;
-    case 0x133: /* rdpmc */
-        gen_update_cc_op(s);
-        gen_update_eip_cur(s);
-        gen_helper_rdpmc(tcg_env);
-        s->base.is_jmp = DISAS_NORETURN;
-        break;
-    case 0x134: /* sysenter */
-        /* For AMD SYSENTER is not valid in long mode */
-        if (LMA(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) {
-            goto illegal_op;
-        }
-        if (!PE(s)) {
-            gen_exception_gpf(s);
-        } else {
-            gen_helper_sysenter(tcg_env);
-            s->base.is_jmp = DISAS_EOB_ONLY;
-        }
-        break;
-    case 0x135: /* sysexit */
-        /* For AMD SYSEXIT is not valid in long mode */
-        if (LMA(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) {
-            goto illegal_op;
-        }
-        if (!PE(s) || CPL(s) != 0) {
-            gen_exception_gpf(s);
-        } else {
-            gen_helper_sysexit(tcg_env, tcg_constant_i32(dflag - 1));
-            s->base.is_jmp = DISAS_EOB_ONLY;
-        }
-        break;
-    case 0x105: /* syscall */
-        /* For Intel SYSCALL is only valid in long mode */
-        if (!LMA(s) && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1) {
-            goto illegal_op;
-        }
-        gen_update_cc_op(s);
-        gen_update_eip_cur(s);
-        gen_helper_syscall(tcg_env, cur_insn_len_i32(s));
-        /* condition codes are modified only in long mode */
-        if (LMA(s)) {
-            assume_cc_op(s, CC_OP_EFLAGS);
-        }
-        /* TF handling for the syscall insn is different. The TF bit is  checked
-           after the syscall insn completes. This allows #DB to not be
-           generated after one has entered CPL0 if TF is set in FMASK.  */
-        s->base.is_jmp = DISAS_EOB_RECHECK_TF;
-        break;
-    case 0x107: /* sysret */
-        /* For Intel SYSRET is only valid in long mode */
-        if (!LMA(s) && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1) {
-            goto illegal_op;
-        }
-        if (!PE(s) || CPL(s) != 0) {
-            gen_exception_gpf(s);
-        } else {
-            gen_helper_sysret(tcg_env, tcg_constant_i32(dflag - 1));
-            /* condition codes are modified only in long mode */
-            if (LMA(s)) {
-                assume_cc_op(s, CC_OP_EFLAGS);
-            }
-            /* TF handling for the sysret insn is different. The TF bit is
-               checked after the sysret insn completes. This allows #DB to be
-               generated "as if" the syscall insn in userspace has just
-               completed.  */
-            s->base.is_jmp = DISAS_EOB_RECHECK_TF;
-        }
-        break;
-    case 0x1a2: /* cpuid */
-        gen_update_cc_op(s);
-        gen_update_eip_cur(s);
-        gen_helper_cpuid(tcg_env);
-        break;
     case 0x100:
         modrm = x86_ldub_code(env, s);
         mod = (modrm >> 6) & 3;
@@ -3967,39 +3595,6 @@
         }
         break;
 
-    case 0x108: /* invd */
-    case 0x109: /* wbinvd; wbnoinvd with REPZ prefix */
-        if (check_cpl0(s)) {
-            gen_svm_check_intercept(s, (b & 1) ? SVM_EXIT_WBINVD : SVM_EXIT_INVD);
-            /* nothing to do */
-        }
-        break;
-    case 0x102: /* lar */
-    case 0x103: /* lsl */
-        {
-            TCGLabel *label1;
-            TCGv t0;
-            if (!PE(s) || VM86(s))
-                goto illegal_op;
-            ot = dflag != MO_16 ? MO_32 : MO_16;
-            modrm = x86_ldub_code(env, s);
-            reg = ((modrm >> 3) & 7) | REX_R(s);
-            gen_ld_modrm(env, s, modrm, MO_16);
-            t0 = tcg_temp_new();
-            gen_update_cc_op(s);
-            if (b == 0x102) {
-                gen_helper_lar(t0, tcg_env, s->T0);
-            } else {
-                gen_helper_lsl(t0, tcg_env, s->T0);
-            }
-            tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z);
-            label1 = gen_new_label();
-            tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1);
-            gen_op_mov_reg_v(s, ot, reg, t0);
-            gen_set_label(label1);
-            set_cc_op(s, CC_OP_EFLAGS);
-        }
-        break;
     case 0x11a:
         modrm = x86_ldub_code(env, s);
         if (s->flags & HF_MPX_EN_MASK) {
@@ -4191,311 +3786,6 @@
         }
         gen_nop_modrm(env, s, modrm);
         break;
-
-    case 0x120: /* mov reg, crN */
-    case 0x122: /* mov crN, reg */
-        if (!check_cpl0(s)) {
-            break;
-        }
-        modrm = x86_ldub_code(env, s);
-        /*
-         * Ignore the mod bits (assume (modrm&0xc0)==0xc0).
-         * AMD documentation (24594.pdf) and testing of Intel 386 and 486
-         * processors all show that the mod bits are assumed to be 1's,
-         * regardless of actual values.
-         */
-        rm = (modrm & 7) | REX_B(s);
-        reg = ((modrm >> 3) & 7) | REX_R(s);
-        switch (reg) {
-        case 0:
-            if ((prefixes & PREFIX_LOCK) &&
-                (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) {
-                reg = 8;
-            }
-            break;
-        case 2:
-        case 3:
-        case 4:
-        case 8:
-            break;
-        default:
-            goto unknown_op;
-        }
-        ot  = (CODE64(s) ? MO_64 : MO_32);
-
-        translator_io_start(&s->base);
-        if (b & 2) {
-            gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0 + reg);
-            gen_op_mov_v_reg(s, ot, s->T0, rm);
-            gen_helper_write_crN(tcg_env, tcg_constant_i32(reg), s->T0);
-            s->base.is_jmp = DISAS_EOB_NEXT;
-        } else {
-            gen_svm_check_intercept(s, SVM_EXIT_READ_CR0 + reg);
-            gen_helper_read_crN(s->T0, tcg_env, tcg_constant_i32(reg));
-            gen_op_mov_reg_v(s, ot, rm, s->T0);
-        }
-        break;
-
-    case 0x121: /* mov reg, drN */
-    case 0x123: /* mov drN, reg */
-        if (check_cpl0(s)) {
-            modrm = x86_ldub_code(env, s);
-            /* Ignore the mod bits (assume (modrm&0xc0)==0xc0).
-             * AMD documentation (24594.pdf) and testing of
-             * intel 386 and 486 processors all show that the mod bits
-             * are assumed to be 1's, regardless of actual values.
-             */
-            rm = (modrm & 7) | REX_B(s);
-            reg = ((modrm >> 3) & 7) | REX_R(s);
-            if (CODE64(s))
-                ot = MO_64;
-            else
-                ot = MO_32;
-            if (reg >= 8) {
-                goto illegal_op;
-            }
-            if (b & 2) {
-                gen_svm_check_intercept(s, SVM_EXIT_WRITE_DR0 + reg);
-                gen_op_mov_v_reg(s, ot, s->T0, rm);
-                tcg_gen_movi_i32(s->tmp2_i32, reg);
-                gen_helper_set_dr(tcg_env, s->tmp2_i32, s->T0);
-                s->base.is_jmp = DISAS_EOB_NEXT;
-            } else {
-                gen_svm_check_intercept(s, SVM_EXIT_READ_DR0 + reg);
-                tcg_gen_movi_i32(s->tmp2_i32, reg);
-                gen_helper_get_dr(s->T0, tcg_env, s->tmp2_i32);
-                gen_op_mov_reg_v(s, ot, rm, s->T0);
-            }
-        }
-        break;
-    case 0x106: /* clts */
-        if (check_cpl0(s)) {
-            gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0);
-            gen_helper_clts(tcg_env);
-            /* abort block because static cpu state changed */
-            s->base.is_jmp = DISAS_EOB_NEXT;
-        }
-        break;
-    /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */
-    case 0x1ae:
-        modrm = x86_ldub_code(env, s);
-        switch (modrm) {
-        CASE_MODRM_MEM_OP(0): /* fxsave */
-            if (!(s->cpuid_features & CPUID_FXSR)
-                || (prefixes & PREFIX_LOCK)) {
-                goto illegal_op;
-            }
-            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
-                gen_exception(s, EXCP07_PREX);
-                break;
-            }
-            gen_lea_modrm(env, s, modrm);
-            gen_helper_fxsave(tcg_env, s->A0);
-            break;
-
-        CASE_MODRM_MEM_OP(1): /* fxrstor */
-            if (!(s->cpuid_features & CPUID_FXSR)
-                || (prefixes & PREFIX_LOCK)) {
-                goto illegal_op;
-            }
-            if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) {
-                gen_exception(s, EXCP07_PREX);
-                break;
-            }
-            gen_lea_modrm(env, s, modrm);
-            gen_helper_fxrstor(tcg_env, s->A0);
-            break;
-
-        CASE_MODRM_MEM_OP(2): /* ldmxcsr */
-            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
-                goto illegal_op;
-            }
-            if (s->flags & HF_TS_MASK) {
-                gen_exception(s, EXCP07_PREX);
-                break;
-            }
-            gen_lea_modrm(env, s, modrm);
-            tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL);
-            gen_helper_ldmxcsr(tcg_env, s->tmp2_i32);
-            break;
-
-        CASE_MODRM_MEM_OP(3): /* stmxcsr */
-            if ((s->flags & HF_EM_MASK) || !(s->flags & HF_OSFXSR_MASK)) {
-                goto illegal_op;
-            }
-            if (s->flags & HF_TS_MASK) {
-                gen_exception(s, EXCP07_PREX);
-                break;
-            }
-            gen_helper_update_mxcsr(tcg_env);
-            gen_lea_modrm(env, s, modrm);
-            tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, mxcsr));
-            gen_op_st_v(s, MO_32, s->T0, s->A0);
-            break;
-
-        CASE_MODRM_MEM_OP(4): /* xsave */
-            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
-                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
-                                | PREFIX_REPZ | PREFIX_REPNZ))) {
-                goto illegal_op;
-            }
-            gen_lea_modrm(env, s, modrm);
-            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
-                                  cpu_regs[R_EDX]);
-            gen_helper_xsave(tcg_env, s->A0, s->tmp1_i64);
-            break;
-
-        CASE_MODRM_MEM_OP(5): /* xrstor */
-            if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
-                || (prefixes & (PREFIX_LOCK | PREFIX_DATA
-                                | PREFIX_REPZ | PREFIX_REPNZ))) {
-                goto illegal_op;
-            }
-            gen_lea_modrm(env, s, modrm);
-            tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
-                                  cpu_regs[R_EDX]);
-            gen_helper_xrstor(tcg_env, s->A0, s->tmp1_i64);
-            /* XRSTOR is how MPX is enabled, which changes how
-               we translate.  Thus we need to end the TB.  */
-            s->base.is_jmp = DISAS_EOB_NEXT;
-            break;
-
-        CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */
-            if (prefixes & PREFIX_LOCK) {
-                goto illegal_op;
-            }
-            if (prefixes & PREFIX_DATA) {
-                /* clwb */
-                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLWB)) {
-                    goto illegal_op;
-                }
-                gen_nop_modrm(env, s, modrm);
-            } else {
-                /* xsaveopt */
-                if ((s->cpuid_ext_features & CPUID_EXT_XSAVE) == 0
-                    || (s->cpuid_xsave_features & CPUID_XSAVE_XSAVEOPT) == 0
-                    || (prefixes & (PREFIX_REPZ | PREFIX_REPNZ))) {
-                    goto illegal_op;
-                }
-                gen_lea_modrm(env, s, modrm);
-                tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX],
-                                      cpu_regs[R_EDX]);
-                gen_helper_xsaveopt(tcg_env, s->A0, s->tmp1_i64);
-            }
-            break;
-
-        CASE_MODRM_MEM_OP(7): /* clflush / clflushopt */
-            if (prefixes & PREFIX_LOCK) {
-                goto illegal_op;
-            }
-            if (prefixes & PREFIX_DATA) {
-                /* clflushopt */
-                if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_CLFLUSHOPT)) {
-                    goto illegal_op;
-                }
-            } else {
-                /* clflush */
-                if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))
-                    || !(s->cpuid_features & CPUID_CLFLUSH)) {
-                    goto illegal_op;
-                }
-            }
-            gen_nop_modrm(env, s, modrm);
-            break;
-
-        case 0xc0 ... 0xc7: /* rdfsbase (f3 0f ae /0) */
-        case 0xc8 ... 0xcf: /* rdgsbase (f3 0f ae /1) */
-        case 0xd0 ... 0xd7: /* wrfsbase (f3 0f ae /2) */
-        case 0xd8 ... 0xdf: /* wrgsbase (f3 0f ae /3) */
-            if (CODE64(s)
-                && (prefixes & PREFIX_REPZ)
-                && !(prefixes & PREFIX_LOCK)
-                && (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_FSGSBASE)) {
-                TCGv base, treg, src, dst;
-
-                /* Preserve hflags bits by testing CR4 at runtime.  */
-                tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK);
-                gen_helper_cr4_testbit(tcg_env, s->tmp2_i32);
-
-                base = cpu_seg_base[modrm & 8 ? R_GS : R_FS];
-                treg = cpu_regs[(modrm & 7) | REX_B(s)];
-
-                if (modrm & 0x10) {
-                    /* wr*base */
-                    dst = base, src = treg;
-                } else {
-                    /* rd*base */
-                    dst = treg, src = base;
-                }
-
-                if (s->dflag == MO_32) {
-                    tcg_gen_ext32u_tl(dst, src);
-                } else {
-                    tcg_gen_mov_tl(dst, src);
-                }
-                break;
-            }
-            goto unknown_op;
-
-        case 0xf8 ... 0xff: /* sfence */
-            if (!(s->cpuid_features & CPUID_SSE)
-                || (prefixes & PREFIX_LOCK)) {
-                goto illegal_op;
-            }
-            tcg_gen_mb(TCG_MO_ST_ST | TCG_BAR_SC);
-            break;
-        case 0xe8 ... 0xef: /* lfence */
-            if (!(s->cpuid_features & CPUID_SSE)
-                || (prefixes & PREFIX_LOCK)) {
-                goto illegal_op;
-            }
-            tcg_gen_mb(TCG_MO_LD_LD | TCG_BAR_SC);
-            break;
-        case 0xf0 ... 0xf7: /* mfence */
-            if (!(s->cpuid_features & CPUID_SSE2)
-                || (prefixes & PREFIX_LOCK)) {
-                goto illegal_op;
-            }
-            tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
-            break;
-
-        default:
-            goto unknown_op;
-        }
-        break;
-
-    case 0x1aa: /* rsm */
-        gen_svm_check_intercept(s, SVM_EXIT_RSM);
-        if (!(s->flags & HF_SMM_MASK))
-            goto illegal_op;
-#ifdef CONFIG_USER_ONLY
-        /* we should not be in SMM mode */
-        g_assert_not_reached();
-#else
-        gen_helper_rsm(tcg_env);
-        assume_cc_op(s, CC_OP_EFLAGS);
-#endif /* CONFIG_USER_ONLY */
-        s->base.is_jmp = DISAS_EOB_ONLY;
-        break;
-    case 0x1b8: /* SSE4.2 popcnt */
-        if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) !=
-             PREFIX_REPZ)
-            goto illegal_op;
-        if (!(s->cpuid_ext_features & CPUID_EXT_POPCNT))
-            goto illegal_op;
-
-        modrm = x86_ldub_code(env, s);
-        reg = ((modrm >> 3) & 7) | REX_R(s);
-
-        ot = dflag;
-        gen_ld_modrm(env, s, modrm, ot);
-        gen_extu(ot, s->T0);
-        tcg_gen_mov_tl(cpu_cc_src, s->T0);
-        tcg_gen_ctpop_tl(s->T0, s->T0);
-        gen_op_mov_reg_v(s, ot, reg, s->T0);
-
-        set_cc_op(s, CC_OP_POPCNT);
-        break;
     default:
         g_assert_not_reached();
     }
diff --git a/tests/avocado/machine_aspeed.py b/tests/avocado/machine_aspeed.py
index cec0181..3a20644 100644
--- a/tests/avocado/machine_aspeed.py
+++ b/tests/avocado/machine_aspeed.py
@@ -311,6 +311,17 @@
             self, 'boot', '## Loading kernel from FIT Image')
         self.wait_for_console_pattern('Starting kernel ...')
 
+    def do_test_aarch64_aspeed_sdk_start(self, image):
+        self.vm.set_console()
+        self.vm.add_args('-drive', 'file=' + image + ',if=mtd,format=raw')
+
+        self.vm.launch()
+
+        self.wait_for_console_pattern('U-Boot 2023.10')
+        self.wait_for_console_pattern('## Loading kernel from FIT Image')
+        self.wait_for_console_pattern('Starting kernel ...')
+        self.wait_for_console_pattern("systemd[1]: Hostname set to")
+
     @skipUnless(os.getenv('QEMU_TEST_FLAKY_TESTS'), 'Test is unstable on GitLab')
 
     def test_arm_ast2500_evb_sdk(self):
@@ -375,3 +386,54 @@
              'i2c i2c-5: new_device: Instantiated device ds1307 at 0x32');
         year = time.strftime("%Y")
         self.ssh_command_output_contains('/sbin/hwclock -f /dev/rtc1', year);
+
+    def test_aarch64_ast2700_evb_sdk_v09_01(self):
+        """
+        :avocado: tags=arch:aarch64
+        :avocado: tags=machine:ast2700-evb
+        """
+
+        image_url = ('https://github.com/AspeedTech-BMC/openbmc/releases/'
+                     'download/v09.01/ast2700-default-obmc.tar.gz')
+        image_hash = 'b1cc0fd73c7650d34c9c8459a243f52a91e9e27144b8608b2645ab19461d1e07'
+        image_path = self.fetch_asset(image_url, asset_hash=image_hash,
+                                      algorithm='sha256')
+        archive.extract(image_path, self.workdir)
+
+        num_cpu = 4
+        image_dir = self.workdir + '/ast2700-default/'
+        uboot_size = os.path.getsize(image_dir + 'u-boot-nodtb.bin')
+        uboot_dtb_load_addr = hex(0x400000000 + uboot_size)
+
+        load_images_list = [
+            {
+                'addr': '0x400000000',
+                'file': image_dir + 'u-boot-nodtb.bin'
+            },
+            {
+                'addr': str(uboot_dtb_load_addr),
+                'file': image_dir + 'u-boot.dtb'
+            },
+            {
+                'addr': '0x430000000',
+                'file': image_dir + 'bl31.bin'
+            },
+            {
+                'addr': '0x430080000',
+                'file': image_dir + 'optee/tee-raw.bin'
+            }
+        ]
+
+        for load_image in load_images_list:
+            addr = load_image['addr']
+            file = load_image['file']
+            self.vm.add_args('-device',
+                             f'loader,force-raw=on,addr={addr},file={file}')
+
+        for i in range(num_cpu):
+            self.vm.add_args('-device',
+                             f'loader,addr=0x430000000,cpu-num={i}')
+
+        self.vm.add_args('-smp', str(num_cpu))
+        self.do_test_aarch64_aspeed_sdk_start(image_dir + 'image-bmc')
+