Merge remote-tracking branch 'remotes/xtensa/tags/20150307-xtensa' into staging

Xtensa updates:

- implement do_unassigned_access callback;
- fix ML605 xtfpga FLASH size.

# gpg: Signature made Sat Mar  7 12:35:05 2015 GMT using RSA key ID F83FA044
# gpg: Good signature from "Max Filippov <max.filippov@cogentembedded.com>"
# gpg:                 aka "Max Filippov <jcmvbkbc@gmail.com>"

* remotes/xtensa/tags/20150307-xtensa:
  target-xtensa: xtfpga: fix ml605 flash size
  target-xtensa: implement do_unassigned_access callback
  hw/xtensa: allow reads/writes in the system I/O region

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/bootdevice.c b/bootdevice.c
index eacd8c8..673bfef 100644
--- a/bootdevice.c
+++ b/bootdevice.c
@@ -105,7 +105,9 @@
         return;
     }
 
-    qemu_boot_set(normal_boot_order, NULL);
+    if (boot_set_handler) {
+        qemu_boot_set(normal_boot_order, &error_abort);
+    }
 
     qemu_unregister_reset(restore_boot_order, normal_boot_order);
     g_free(normal_boot_order);
diff --git a/docs/memory-hotplug.txt b/docs/memory-hotplug.txt
new file mode 100644
index 0000000..f70571d
--- /dev/null
+++ b/docs/memory-hotplug.txt
@@ -0,0 +1,76 @@
+QEMU memory hotplug
+===================
+
+This document explains how to use the memory hotplug feature in QEMU,
+which is present since v2.1.0.
+
+Please, note that memory hotunplug is not supported yet. This means
+that you're able to add memory, but you're not able to remove it.
+Also, proper guest support is required for memory hotplug to work.
+
+Basic RAM hotplug
+-----------------
+
+In order to be able to hotplug memory, QEMU has to be told how many
+hotpluggable memory slots to create and what is the maximum amount of
+memory the guest can grow. This is done at startup time by means of
+the -m command-line option, which has the following format:
+
+ -m [size=]megs[,slots=n,maxmem=size]
+
+Where,
+
+ - "megs" is the startup RAM. It is the RAM the guest will boot with
+ - "slots" is the number of hotpluggable memory slots
+ - "maxmem" is the maximum RAM size the guest can have
+
+For example, the following command-line:
+
+ qemu [...] 1G,slots=3,maxmem=4G
+
+Creates a guest with 1GB of memory and three hotpluggable memory slots.
+The hotpluggable memory slots are empty when the guest is booted, so all
+memory the guest will see after boot is 1GB. The maximum memory the
+guest can reach is 4GB. This means that three additional gigabytes can be
+hotplugged by using any combination of the available memory slots.
+
+Two monitor commands are used to hotplug memory:
+
+ - "object_add": creates a memory backend object
+ - "device_add": creates a front-end pc-dimm device and inserts it
+                 into the first empty slot
+
+For example, the following commands add another 1GB to the guest
+discussed earlier:
+
+  (qemu) object_add memory-backend-ram,id=mem1,size=1G
+  (qemu) device_add pc-dimm,id=dimm1,memdev=mem1
+
+Using the file backend
+----------------------
+
+Besides basic RAM hotplug, QEMU also supports using files as a memory
+backend. This is useful for using hugetlbfs in Linux, which provides
+access to bigger page sizes.
+
+For example, assuming that the host has 1GB hugepages available in
+the /mnt/hugepages-1GB directory, a 1GB hugepage could be hotplugged
+into the guest from the previous section with the following commands:
+
+  (qemu) object_add memory-backend-file,id=mem1,size=1G,mem-path=/mnt/hugepages-1GB
+  (qemu) device_add pc-dimm,id=dimm1,memdev=mem1
+
+It's also possible to start a guest with memory cold-plugged into the
+hotpluggable memory slots. This might seem counterintuitive at first,
+but this allows for a lot of flexibility when using the file backend.
+
+In the following command-line example, a 8GB guest is created where 6GB
+comes from regular RAM, 1GB is a 1GB hugepage page and 256MB is from
+2MB pages. Also, the guest has additional memory slots to hotplug more
+2GB if needed:
+
+ qemu [...] -m 6GB,slots=4,maxmem=10G \
+   -object memory-backend-file,id=mem1,size=1G,mem-path=/mnt/hugepages-1G \
+   -device pc-dimm,id=dimm1,memdev=mem1 \
+   -object memory-backend-file,id=mem2,size=256M,mem-path=/mnt/hugepages-2MB \
+   -device pc-dimm,id=dimm2,memdev=mem2
diff --git a/hmp.c b/hmp.c
index 735097c..71c28bc 100644
--- a/hmp.c
+++ b/hmp.c
@@ -29,6 +29,10 @@
 #include "block/qapi.h"
 #include "qemu-io.h"
 
+#ifdef CONFIG_SPICE
+#include <spice/enums.h>
+#endif
+
 static void hmp_handle_error(Monitor *mon, Error **errp)
 {
     assert(errp);
@@ -545,6 +549,25 @@
 {
     SpiceChannelList *chan;
     SpiceInfo *info;
+    const char *channel_name;
+    const char * const channel_names[] = {
+        [SPICE_CHANNEL_MAIN] = "main",
+        [SPICE_CHANNEL_DISPLAY] = "display",
+        [SPICE_CHANNEL_INPUTS] = "inputs",
+        [SPICE_CHANNEL_CURSOR] = "cursor",
+        [SPICE_CHANNEL_PLAYBACK] = "playback",
+        [SPICE_CHANNEL_RECORD] = "record",
+        [SPICE_CHANNEL_TUNNEL] = "tunnel",
+        [SPICE_CHANNEL_SMARTCARD] = "smartcard",
+        [SPICE_CHANNEL_USBREDIR] = "usbredir",
+        [SPICE_CHANNEL_PORT] = "port",
+#if 0
+        /* minimum spice-protocol is 0.12.3, webdav was added in 0.12.7,
+         * no easy way to #ifdef (SPICE_CHANNEL_* is a enum).  Disable
+         * as quick fix for build failures with older versions. */
+        [SPICE_CHANNEL_WEBDAV] = "webdav",
+#endif
+    };
 
     info = qmp_query_spice(NULL);
 
@@ -581,6 +604,15 @@
                            chan->value->connection_id);
             monitor_printf(mon, "     channel: %" PRId64 ":%" PRId64 "\n",
                            chan->value->channel_type, chan->value->channel_id);
+
+            channel_name = "unknown";
+            if (chan->value->channel_type > 0 &&
+                chan->value->channel_type < ARRAY_SIZE(channel_names) &&
+                channel_names[chan->value->channel_type]) {
+                channel_name = channel_names[chan->value->channel_type];
+            }
+
+            monitor_printf(mon, "     channel name: %s\n", channel_name);
         }
     }
 
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index 61df477..762f75d 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -120,9 +120,12 @@
     QXL_MODE_EX(2560, 2048),
     QXL_MODE_EX(2800, 2100),
     QXL_MODE_EX(3200, 2400),
+    /* these modes need more than 32 MB video memory */
     QXL_MODE_EX(3840, 2160), /* 4k mainstream */
     QXL_MODE_EX(4096, 2160), /* 4k            */
+    /* these modes need more than 64 MB video memory */
     QXL_MODE_EX(7680, 4320), /* 8k mainstream */
+    /* these modes need more than 128 MB video memory */
     QXL_MODE_EX(8192, 4320), /* 8k            */
 };
 
@@ -297,19 +300,6 @@
     qxl->ssd.cursor = cursor_builtin_hidden();
 }
 
-
-static inline uint32_t msb_mask(uint32_t val)
-{
-    uint32_t mask;
-
-    do {
-        mask = ~(val - 1) & val;
-        val &= ~mask;
-    } while (mask < val);
-
-    return mask;
-}
-
 static ram_addr_t qxl_rom_size(void)
 {
     uint32_t required_rom_size = sizeof(QXLRom) + sizeof(QXLModes) +
@@ -367,6 +357,8 @@
     num_pages         -= surface0_area_size;
     num_pages          = num_pages / QXL_PAGE_SIZE;
 
+    assert(ram_header_size + surface0_area_size <= d->vga.vram_size);
+
     rom->draw_area_offset   = cpu_to_le32(0);
     rom->surface0_area_size = cpu_to_le32(surface0_area_size);
     rom->pages_offset       = cpu_to_le32(surface0_area_size);
@@ -1155,7 +1147,6 @@
         qxl_enter_vga_mode(d);
     } else {
         d->mode = QXL_MODE_UNDEFINED;
-        update_displaychangelistener(&d->ssd.dcl, GUI_REFRESH_INTERVAL_IDLE);
     }
 }
 
@@ -1880,6 +1871,12 @@
     if (qxl->vgamem_size_mb < 8) {
         qxl->vgamem_size_mb = 8;
     }
+    /* XXX: we round vgamem_size_mb up to a nearest power of two and it must be
+     * less than vga_common_init()'s maximum on qxl->vga.vram_size (512 now).
+     */
+    if (qxl->vgamem_size_mb > 256) {
+        qxl->vgamem_size_mb = 256;
+    }
     qxl->vgamem_size = qxl->vgamem_size_mb * 1024 * 1024;
 
     /* vga ram (bar 0, total) */
@@ -1910,10 +1907,10 @@
         qxl->vram32_size = 4096;
         qxl->vram_size = 4096;
     }
-    qxl->vgamem_size = msb_mask(qxl->vgamem_size * 2 - 1);
-    qxl->vga.vram_size = msb_mask(qxl->vga.vram_size * 2 - 1);
-    qxl->vram32_size = msb_mask(qxl->vram32_size * 2 - 1);
-    qxl->vram_size = msb_mask(qxl->vram_size * 2 - 1);
+    qxl->vgamem_size = pow2ceil(qxl->vgamem_size);
+    qxl->vga.vram_size = pow2ceil(qxl->vga.vram_size);
+    qxl->vram32_size = pow2ceil(qxl->vram32_size);
+    qxl->vram_size = pow2ceil(qxl->vram_size);
 }
 
 static int qxl_init_common(PCIQXLDevice *qxl)
@@ -1945,7 +1942,7 @@
         break;
     case 4: /* qxl-4 */
         pci_device_rev = QXL_REVISION_STABLE_V12;
-        io_size = msb_mask(QXL_IO_RANGE_SIZE * 2 - 1);
+        io_size = pow2ceil(QXL_IO_RANGE_SIZE);
         break;
     default:
         error_report("Invalid revision %d for qxl device (max %d)",
diff --git a/hw/display/vga.c b/hw/display/vga.c
index c8c49ab..c0f7b34 100644
--- a/hw/display/vga.c
+++ b/hw/display/vga.c
@@ -2094,6 +2094,17 @@
     .text_update = vga_update_text,
 };
 
+static inline uint32_t uint_clamp(uint32_t val, uint32_t vmin, uint32_t vmax)
+{
+    if (val < vmin) {
+        return vmin;
+    }
+    if (val > vmax) {
+        return vmax;
+    }
+    return val;
+}
+
 void vga_common_init(VGACommonState *s, Object *obj, bool global_vmstate)
 {
     int i, j, v, b;
@@ -2121,13 +2132,10 @@
         expand4to8[i] = v;
     }
 
-    /* valid range: 1 MB -> 256 MB */
-    s->vram_size = 1024 * 1024;
-    while (s->vram_size < (s->vram_size_mb << 20) &&
-           s->vram_size < (256 << 20)) {
-        s->vram_size <<= 1;
-    }
-    s->vram_size_mb = s->vram_size >> 20;
+    s->vram_size_mb = uint_clamp(s->vram_size_mb, 1, 512);
+    s->vram_size_mb = pow2ceil(s->vram_size_mb);
+    s->vram_size = s->vram_size_mb << 20;
+
     if (!s->vbe_size) {
         s->vbe_size = s->vram_size;
     }
diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index f27a087..de81b9c 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -100,6 +100,32 @@
     return 0;
 }
 
+ram_addr_t get_current_ram_size(void)
+{
+    MemoryDeviceInfoList *info_list = NULL;
+    MemoryDeviceInfoList **prev = &info_list;
+    MemoryDeviceInfoList *info;
+    ram_addr_t size = ram_size;
+
+    qmp_pc_dimm_device_list(qdev_get_machine(), &prev);
+    for (info = info_list; info; info = info->next) {
+        MemoryDeviceInfo *value = info->value;
+
+        if (value) {
+            switch (value->kind) {
+            case MEMORY_DEVICE_INFO_KIND_DIMM:
+                size += value->dimm->size;
+                break;
+            default:
+                break;
+            }
+        }
+    }
+    qapi_free_MemoryDeviceInfoList(info_list);
+
+    return size;
+}
+
 static int pc_dimm_slot2bitmap(Object *obj, void *opaque)
 {
     unsigned long *bitmap = opaque;
diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 776699b..828c2a7 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -1767,9 +1767,18 @@
             break;
         }
 
-        if (!reported && ((trb->control & TRB_TR_IOC) ||
-                          (shortpkt && (trb->control & TRB_TR_ISP)) ||
-                          (xfer->status != CC_SUCCESS && left == 0))) {
+        /*
+         * XHCI 1.1, 4.11.3.1 Transfer Event TRB -- "each Transfer TRB
+         * encountered with its IOC flag set to '1' shall generate a Transfer
+         * Event."
+         *
+         * Otherwise, longer transfers can have multiple data TRBs (for scatter
+         * gather). Short transfers and errors should be reported once per
+         * transfer only.
+         */
+        if ((trb->control & TRB_TR_IOC) ||
+            (!reported && ((shortpkt && (trb->control & TRB_TR_ISP)) ||
+                           (xfer->status != CC_SUCCESS && left == 0)))) {
             event.slotid = xfer->slotid;
             event.epid = xfer->epid;
             event.length = (trb->status & 0x1ffff) - chunk;
diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index 7bfbb75..8a48d2a 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -25,6 +25,7 @@
 #include "exec/address-spaces.h"
 #include "qapi/visitor.h"
 #include "qapi-event.h"
+#include "trace.h"
 
 #if defined(__linux__)
 #include <sys/mman.h>
@@ -222,6 +223,8 @@
             if (!int128_nz(section.size) || !memory_region_is_ram(section.mr))
                 continue;
 
+            trace_virtio_balloon_handle_output(memory_region_name(section.mr),
+                                               pa);
             /* Using memory_region_get_ram_ptr is bending the rules a bit, but
                should be OK because we only want a single page.  */
             addr = section.offset_within_region;
@@ -285,6 +288,7 @@
     config.num_pages = cpu_to_le32(dev->num_pages);
     config.actual = cpu_to_le32(dev->actual);
 
+    trace_virtio_balloon_get_config(config.num_pages, config.actual);
     memcpy(config_data, &config, sizeof(struct virtio_balloon_config));
 }
 
@@ -294,13 +298,16 @@
     VirtIOBalloon *dev = VIRTIO_BALLOON(vdev);
     struct virtio_balloon_config config;
     uint32_t oldactual = dev->actual;
+    ram_addr_t vm_ram_size = get_current_ram_size();
+
     memcpy(&config, config_data, sizeof(struct virtio_balloon_config));
     dev->actual = le32_to_cpu(config.actual);
     if (dev->actual != oldactual) {
-        qapi_event_send_balloon_change(ram_size -
+        qapi_event_send_balloon_change(vm_ram_size -
                         ((ram_addr_t) dev->actual << VIRTIO_BALLOON_PFN_SHIFT),
                         &error_abort);
     }
+    trace_virtio_balloon_set_config(dev->actual, oldactual);
 }
 
 static uint32_t virtio_balloon_get_features(VirtIODevice *vdev, uint32_t f)
@@ -312,22 +319,24 @@
 static void virtio_balloon_stat(void *opaque, BalloonInfo *info)
 {
     VirtIOBalloon *dev = opaque;
-    info->actual = ram_size - ((uint64_t) dev->actual <<
-                               VIRTIO_BALLOON_PFN_SHIFT);
+    info->actual = get_current_ram_size() - ((uint64_t) dev->actual <<
+                                             VIRTIO_BALLOON_PFN_SHIFT);
 }
 
 static void virtio_balloon_to_target(void *opaque, ram_addr_t target)
 {
     VirtIOBalloon *dev = VIRTIO_BALLOON(opaque);
     VirtIODevice *vdev = VIRTIO_DEVICE(dev);
+    ram_addr_t vm_ram_size = get_current_ram_size();
 
-    if (target > ram_size) {
-        target = ram_size;
+    if (target > vm_ram_size) {
+        target = vm_ram_size;
     }
     if (target) {
-        dev->num_pages = (ram_size - target) >> VIRTIO_BALLOON_PFN_SHIFT;
+        dev->num_pages = (vm_ram_size - target) >> VIRTIO_BALLOON_PFN_SHIFT;
         virtio_notify_config(vdev);
     }
+    trace_virtio_balloon_to_target(target, dev->num_pages);
 }
 
 static void virtio_balloon_save(QEMUFile *f, void *opaque)
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 427b851..fcc3162 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -52,6 +52,7 @@
 #endif
 
 extern ram_addr_t ram_size;
+ram_addr_t get_current_ram_size(void);
 
 /* memory API */
 
diff --git a/include/qemu-common.h b/include/qemu-common.h
index 644b46d..1b5cffb 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -418,6 +418,9 @@
 /* round down to the nearest power of 2*/
 int64_t pow2floor(int64_t value);
 
+/* round up to the nearest power of 2 (0 if overflow) */
+uint64_t pow2ceil(uint64_t value);
+
 #include "qemu/module.h"
 
 /*
diff --git a/migration/rdma.c b/migration/rdma.c
index 6bee30c..17d0035 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -510,7 +510,7 @@
     return result;
 }
 
-static int __qemu_rdma_add_block(RDMAContext *rdma, void *host_addr,
+static int rdma_add_block(RDMAContext *rdma, void *host_addr,
                          ram_addr_t block_offset, uint64_t length)
 {
     RDMALocalBlocks *local = &rdma->local_ram_blocks;
@@ -551,13 +551,12 @@
 
     g_hash_table_insert(rdma->blockmap, (void *) block_offset, block);
 
-    trace___qemu_rdma_add_block(local->nb_blocks,
-                           (uint64_t) block->local_host_addr, block->offset,
-                           block->length,
-                           (uint64_t) (block->local_host_addr + block->length),
-                           BITS_TO_LONGS(block->nb_chunks) *
-                               sizeof(unsigned long) * 8,
-                           block->nb_chunks);
+    trace_rdma_add_block(local->nb_blocks, (uint64_t) block->local_host_addr,
+                         block->offset, block->length,
+                         (uint64_t) (block->local_host_addr + block->length),
+                         BITS_TO_LONGS(block->nb_chunks) *
+                             sizeof(unsigned long) * 8,
+                         block->nb_chunks);
 
     local->nb_blocks++;
 
@@ -572,7 +571,7 @@
 static void qemu_rdma_init_one_block(void *host_addr,
     ram_addr_t block_offset, ram_addr_t length, void *opaque)
 {
-    __qemu_rdma_add_block(opaque, host_addr, block_offset, length);
+    rdma_add_block(opaque, host_addr, block_offset, length);
 }
 
 /*
@@ -595,7 +594,7 @@
     return 0;
 }
 
-static int __qemu_rdma_delete_block(RDMAContext *rdma, ram_addr_t block_offset)
+static int rdma_delete_block(RDMAContext *rdma, ram_addr_t block_offset)
 {
     RDMALocalBlocks *local = &rdma->local_ram_blocks;
     RDMALocalBlock *block = g_hash_table_lookup(rdma->blockmap,
@@ -657,7 +656,7 @@
         local->block = NULL;
     }
 
-    trace___qemu_rdma_delete_block(local->nb_blocks,
+    trace_rdma_delete_block(local->nb_blocks,
                            (uint64_t)block->local_host_addr,
                            block->offset, block->length,
                            (uint64_t)(block->local_host_addr + block->length),
@@ -2187,8 +2186,7 @@
 
     if (rdma->local_ram_blocks.block) {
         while (rdma->local_ram_blocks.nb_blocks) {
-            __qemu_rdma_delete_block(rdma,
-                    rdma->local_ram_blocks.block->offset);
+            rdma_delete_block(rdma, rdma->local_ram_blocks.block->offset);
         }
     }
 
diff --git a/qemu-options.hx b/qemu-options.hx
index 85ca3ad..b0345ae 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -237,12 +237,24 @@
     "NOTE: Some architectures might enforce a specific granularity\n",
     QEMU_ARCH_ALL)
 STEXI
-@item -m [size=]@var{megs}
+@item -m [size=]@var{megs}[,slots=n,maxmem=size]
 @findex -m
-Set virtual RAM size to @var{megs} megabytes. Default is 128 MiB.  Optionally,
-a suffix of ``M'' or ``G'' can be used to signify a value in megabytes or
-gigabytes respectively. Optional pair @var{slots}, @var{maxmem} could be used
-to set amount of hotluggable memory slots and possible maximum amount of memory.
+Sets guest startup RAM size to @var{megs} megabytes. Default is 128 MiB.
+Optionally, a suffix of ``M'' or ``G'' can be used to signify a value in
+megabytes or gigabytes respectively. Optional pair @var{slots}, @var{maxmem}
+could be used to set amount of hotpluggable memory slots and maximum amount of
+memory. Note that @var{maxmem} must be aligned to the page size.
+
+For example, the following command-line sets the guest startup RAM size to
+1GB, creates 3 slots to hotplug additional memory and sets the maximum
+memory the guest can reach to 4GB:
+
+@example
+qemu-system-x86_64 -m 1G,slots=3,maxmem=4G
+@end example
+
+If @var{slots} and @var{maxmem} are not specified, memory hotplug won't
+be enabled and the guest startup RAM will never increase.
 ETEXI
 
 DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath,
diff --git a/scripts/tracetool/format/d.py b/scripts/tracetool/format/d.py
index 46eebb1..c77d5b7 100644
--- a/scripts/tracetool/format/d.py
+++ b/scripts/tracetool/format/d.py
@@ -16,6 +16,19 @@
 from tracetool import out
 
 
+# Reserved keywords from
+# https://wikis.oracle.com/display/DTrace/Types,+Operators+and+Expressions
+RESERVED_WORDS = (
+    'auto', 'goto', 'sizeof', 'break', 'if', 'static', 'case', 'import',
+    'string', 'char', 'inline', 'stringof', 'const', 'int', 'struct',
+    'continue', 'long', 'switch', 'counter', 'offsetof', 'this',
+    'default', 'probe', 'translator', 'do', 'provider', 'typedef',
+    'double', 'register', 'union', 'else', 'restrict', 'unsigned',
+    'enum', 'return', 'void', 'extern', 'self', 'volatile', 'float',
+    'short', 'while', 'for', 'signed', 'xlate',
+)
+
+
 def generate(events, backend):
     events = [e for e in events
               if "disable" not in e.properties]
@@ -25,18 +38,17 @@
         'provider qemu {')
 
     for e in events:
-        args = str(e.args)
-
-        # DTrace provider syntax expects foo() for empty
-        # params, not foo(void)
-        if args == 'void':
-            args = ''
+        args = []
+        for type_, name in e.args:
+            if name in RESERVED_WORDS:
+                name += '_'
+            args.append(type_ + ' ' + name)
 
         # Define prototype for probe arguments
         out('',
             'probe %(name)s(%(args)s);',
             name=e.name,
-            args=args)
+            args=','.join(args))
 
     out('',
         '};')
diff --git a/stubs/qmp_pc_dimm_device_list.c b/stubs/qmp_pc_dimm_device_list.c
index 5cb220c..b584bd8 100644
--- a/stubs/qmp_pc_dimm_device_list.c
+++ b/stubs/qmp_pc_dimm_device_list.c
@@ -5,3 +5,8 @@
 {
    return 0;
 }
+
+ram_addr_t get_current_ram_size(void)
+{
+    return ram_size;
+}
diff --git a/target-tricore/helper.h b/target-tricore/helper.h
index 7405fee..4c82346 100644
--- a/target-tricore/helper.h
+++ b/target-tricore/helper.h
@@ -17,9 +17,12 @@
 
 /* Arithmetic */
 DEF_HELPER_3(add_ssov, i32, env, i32, i32)
+DEF_HELPER_3(add64_ssov, i64, env, i64, i64)
 DEF_HELPER_3(add_suov, i32, env, i32, i32)
 DEF_HELPER_3(add_h_ssov, i32, env, i32, i32)
 DEF_HELPER_3(add_h_suov, i32, env, i32, i32)
+DEF_HELPER_4(addr_h_ssov, i32, env, i64, i32, i32)
+DEF_HELPER_4(addsur_h_ssov, i32, env, i64, i32, i32)
 DEF_HELPER_3(sub_ssov, i32, env, i32, i32)
 DEF_HELPER_3(sub_suov, i32, env, i32, i32)
 DEF_HELPER_3(sub_h_ssov, i32, env, i32, i32)
@@ -31,6 +34,9 @@
 DEF_HELPER_4(madd32_ssov, i32, env, i32, i32, i32)
 DEF_HELPER_4(madd32_suov, i32, env, i32, i32, i32)
 DEF_HELPER_4(madd64_ssov, i64, env, i32, i64, i32)
+DEF_HELPER_5(madd64_q_ssov, i64, env, i64, i32, i32, i32)
+DEF_HELPER_3(madd32_q_add_ssov, i32, env, i64, i64)
+DEF_HELPER_5(maddr_q_ssov, i32, env, i32, i32, i32, i32)
 DEF_HELPER_4(madd64_suov, i64, env, i32, i64, i32)
 DEF_HELPER_4(msub32_ssov, i32, env, i32, i32, i32)
 DEF_HELPER_4(msub32_suov, i32, env, i32, i32, i32)
@@ -44,6 +50,9 @@
 DEF_HELPER_2(abs_h, i32, env, i32)
 DEF_HELPER_3(absdif_b, i32, env, i32, i32)
 DEF_HELPER_3(absdif_h, i32, env, i32, i32)
+DEF_HELPER_4(addr_h, i32, env, i64, i32, i32)
+DEF_HELPER_4(addsur_h, i32, env, i64, i32, i32)
+DEF_HELPER_5(maddr_q, i32, env, i32, i32, i32, i32)
 DEF_HELPER_3(add_b, i32, env, i32, i32)
 DEF_HELPER_3(add_h, i32, env, i32, i32)
 DEF_HELPER_3(sub_b, i32, env, i32, i32)
diff --git a/target-tricore/op_helper.c b/target-tricore/op_helper.c
index ed26b30..40d32af 100644
--- a/target-tricore/op_helper.c
+++ b/target-tricore/op_helper.c
@@ -184,6 +184,31 @@
     return ssov32(env, result);
 }
 
+uint64_t helper_add64_ssov(CPUTriCoreState *env, uint64_t r1, uint64_t r2)
+{
+    uint64_t result;
+    int64_t ovf;
+
+    result = r1 + r2;
+    ovf = (result ^ r1) & ~(r1 ^ r2);
+    env->PSW_USB_AV = (result ^ result * 2u) >> 32;
+    env->PSW_USB_SAV |= env->PSW_USB_AV;
+    if (ovf < 0) {
+        env->PSW_USB_V = (1 << 31);
+        env->PSW_USB_SV = (1 << 31);
+        /* ext_ret > MAX_INT */
+        if ((int64_t)r1 >= 0) {
+            result = INT64_MAX;
+        /* ext_ret < MIN_INT */
+        } else {
+            result = INT64_MIN;
+        }
+    } else {
+        env->PSW_USB_V = 0;
+    }
+    return result;
+}
+
 target_ulong helper_add_h_ssov(CPUTriCoreState *env, target_ulong r1,
                                target_ulong r2)
 {
@@ -194,6 +219,99 @@
     return ssov16(env, ret_hw0, ret_hw1);
 }
 
+uint32_t helper_addr_h_ssov(CPUTriCoreState *env, uint64_t r1, uint32_t r2_l,
+                            uint32_t r2_h)
+{
+    int64_t mul_res0 = sextract64(r1, 0, 32);
+    int64_t mul_res1 = sextract64(r1, 32, 32);
+    int64_t r2_low = sextract64(r2_l, 0, 32);
+    int64_t r2_high = sextract64(r2_h, 0, 32);
+    int64_t result0, result1;
+    uint32_t ovf0, ovf1;
+    uint32_t avf0, avf1;
+
+    ovf0 = ovf1 = 0;
+
+    result0 = r2_low + mul_res0 + 0x8000;
+    result1 = r2_high + mul_res1 + 0x8000;
+
+    avf0 = result0 * 2u;
+    avf0 = result0 ^ avf0;
+    avf1 = result1 * 2u;
+    avf1 = result1 ^ avf1;
+
+    if (result0 > INT32_MAX) {
+        ovf0 = (1 << 31);
+        result0 = INT32_MAX;
+    } else if (result0 < INT32_MIN) {
+        ovf0 = (1 << 31);
+        result0 = INT32_MIN;
+    }
+
+    if (result1 > INT32_MAX) {
+        ovf1 = (1 << 31);
+        result1 = INT32_MAX;
+    } else if (result1 < INT32_MIN) {
+        ovf1 = (1 << 31);
+        result1 = INT32_MIN;
+    }
+
+    env->PSW_USB_V = ovf0 | ovf1;
+    env->PSW_USB_SV |= env->PSW_USB_V;
+
+    env->PSW_USB_AV = avf0 | avf1;
+    env->PSW_USB_SAV |= env->PSW_USB_AV;
+
+    return (result1 & 0xffff0000ULL) | ((result0 >> 16) & 0xffffULL);
+}
+
+uint32_t helper_addsur_h_ssov(CPUTriCoreState *env, uint64_t r1, uint32_t r2_l,
+                              uint32_t r2_h)
+{
+    int64_t mul_res0 = sextract64(r1, 0, 32);
+    int64_t mul_res1 = sextract64(r1, 32, 32);
+    int64_t r2_low = sextract64(r2_l, 0, 32);
+    int64_t r2_high = sextract64(r2_h, 0, 32);
+    int64_t result0, result1;
+    uint32_t ovf0, ovf1;
+    uint32_t avf0, avf1;
+
+    ovf0 = ovf1 = 0;
+
+    result0 = r2_low - mul_res0 + 0x8000;
+    result1 = r2_high + mul_res1 + 0x8000;
+
+    avf0 = result0 * 2u;
+    avf0 = result0 ^ avf0;
+    avf1 = result1 * 2u;
+    avf1 = result1 ^ avf1;
+
+    if (result0 > INT32_MAX) {
+        ovf0 = (1 << 31);
+        result0 = INT32_MAX;
+    } else if (result0 < INT32_MIN) {
+        ovf0 = (1 << 31);
+        result0 = INT32_MIN;
+    }
+
+    if (result1 > INT32_MAX) {
+        ovf1 = (1 << 31);
+        result1 = INT32_MAX;
+    } else if (result1 < INT32_MIN) {
+        ovf1 = (1 << 31);
+        result1 = INT32_MIN;
+    }
+
+    env->PSW_USB_V = ovf0 | ovf1;
+    env->PSW_USB_SV |= env->PSW_USB_V;
+
+    env->PSW_USB_AV = avf0 | avf1;
+    env->PSW_USB_SAV |= env->PSW_USB_AV;
+
+    return (result1 & 0xffff0000ULL) | ((result0 >> 16) & 0xffffULL);
+}
+
+
 target_ulong helper_add_suov(CPUTriCoreState *env, target_ulong r1,
                              target_ulong r2)
 {
@@ -403,6 +521,131 @@
     return ret;
 }
 
+uint32_t
+helper_madd32_q_add_ssov(CPUTriCoreState *env, uint64_t r1, uint64_t r2)
+{
+    int64_t result;
+
+    result = (r1 + r2);
+
+    env->PSW_USB_AV = (result ^ result * 2u);
+    env->PSW_USB_SAV |= env->PSW_USB_AV;
+
+    /* we do the saturation by hand, since we produce an overflow on the host
+       if the mul before was (0x80000000 * 0x80000000) << 1). If this is the
+       case, we flip the saturated value. */
+    if (r2 == 0x8000000000000000LL) {
+        if (result > 0x7fffffffLL) {
+            env->PSW_USB_V = (1 << 31);
+            env->PSW_USB_SV = (1 << 31);
+            result = INT32_MIN;
+        } else if (result < -0x80000000LL) {
+            env->PSW_USB_V = (1 << 31);
+            env->PSW_USB_SV = (1 << 31);
+            result = INT32_MAX;
+        } else {
+            env->PSW_USB_V = 0;
+        }
+    } else {
+        if (result > 0x7fffffffLL) {
+            env->PSW_USB_V = (1 << 31);
+            env->PSW_USB_SV = (1 << 31);
+            result = INT32_MAX;
+        } else if (result < -0x80000000LL) {
+            env->PSW_USB_V = (1 << 31);
+            env->PSW_USB_SV = (1 << 31);
+            result = INT32_MIN;
+        } else {
+            env->PSW_USB_V = 0;
+        }
+    }
+    return (uint32_t)result;
+}
+
+uint64_t helper_madd64_q_ssov(CPUTriCoreState *env, uint64_t r1, uint32_t r2,
+                              uint32_t r3, uint32_t n)
+{
+    int64_t t1 = (int64_t)r1;
+    int64_t t2 = sextract64(r2, 0, 32);
+    int64_t t3 = sextract64(r3, 0, 32);
+    int64_t result, mul;
+    int64_t ovf;
+
+    mul = (t2 * t3) << n;
+    result = mul + t1;
+
+    env->PSW_USB_AV = (result ^ result * 2u) >> 32;
+    env->PSW_USB_SAV |= env->PSW_USB_AV;
+
+    ovf = (result ^ mul) & ~(mul ^ t1);
+    /* we do the saturation by hand, since we produce an overflow on the host
+       if the mul was (0x80000000 * 0x80000000) << 1). If this is the
+       case, we flip the saturated value. */
+    if ((r2 == 0x80000000) && (r3 == 0x80000000) && (n == 1)) {
+        if (ovf >= 0) {
+            env->PSW_USB_V = (1 << 31);
+            env->PSW_USB_SV = (1 << 31);
+            /* ext_ret > MAX_INT */
+            if (mul < 0) {
+                result = INT64_MAX;
+            /* ext_ret < MIN_INT */
+            } else {
+               result = INT64_MIN;
+            }
+        } else {
+            env->PSW_USB_V = 0;
+        }
+    } else {
+        if (ovf < 0) {
+            env->PSW_USB_V = (1 << 31);
+            env->PSW_USB_SV = (1 << 31);
+            /* ext_ret > MAX_INT */
+            if (mul >= 0) {
+                result = INT64_MAX;
+            /* ext_ret < MIN_INT */
+            } else {
+               result = INT64_MIN;
+            }
+        } else {
+            env->PSW_USB_V = 0;
+        }
+    }
+    return (uint64_t)result;
+}
+
+uint32_t helper_maddr_q_ssov(CPUTriCoreState *env, uint32_t r1, uint32_t r2,
+                             uint32_t r3, uint32_t n)
+{
+    int64_t t1 = sextract64(r1, 0, 32);
+    int64_t t2 = sextract64(r2, 0, 32);
+    int64_t t3 = sextract64(r3, 0, 32);
+    int64_t mul, ret;
+
+    if ((t2 == -0x8000ll) && (t3 == -0x8000ll) && (n == 1)) {
+        mul = 0x7fffffff;
+    } else {
+        mul = (t2 * t3) << n;
+    }
+
+    ret = t1 + mul + 0x8000;
+
+    env->PSW_USB_AV = ret ^ ret * 2u;
+    env->PSW_USB_SAV |= env->PSW_USB_AV;
+
+    if (ret > 0x7fffffffll) {
+        env->PSW_USB_V = (1 << 31);
+        env->PSW_USB_SV |= env->PSW_USB_V;
+        ret = INT32_MAX;
+    } else if (ret < -0x80000000ll) {
+        env->PSW_USB_V = (1 << 31);
+        env->PSW_USB_SV |= env->PSW_USB_V;
+        ret = INT32_MIN;
+    } else {
+        env->PSW_USB_V = 0;
+    }
+    return ret & 0xffff0000ll;
+}
+
 uint64_t helper_madd64_suov(CPUTriCoreState *env, target_ulong r1,
                             uint64_t r2, target_ulong r3)
 {
@@ -443,13 +686,28 @@
 target_ulong helper_msub32_suov(CPUTriCoreState *env, target_ulong r1,
                                 target_ulong r2, target_ulong r3)
 {
-    int64_t t1 = extract64(r1, 0, 32);
-    int64_t t2 = extract64(r2, 0, 32);
-    int64_t t3 = extract64(r3, 0, 32);
-    int64_t result;
+    uint64_t t1 = extract64(r1, 0, 32);
+    uint64_t t2 = extract64(r2, 0, 32);
+    uint64_t t3 = extract64(r3, 0, 32);
+    uint64_t result;
+    uint64_t mul;
 
-    result = t2 - (t1 * t3);
-    return suov32_neg(env, result);
+    mul = (t1 * t3);
+    result = t2 - mul;
+
+    env->PSW_USB_AV = result ^ result * 2u;
+    env->PSW_USB_SAV |= env->PSW_USB_AV;
+    /* we calculate ovf by hand here, because the multiplication can overflow on
+       the host, which would give false results if we compare to less than
+       zero */
+    if (mul > t2) {
+        env->PSW_USB_V = (1 << 31);
+        env->PSW_USB_SV = (1 << 31);
+        result = 0;
+    } else {
+        env->PSW_USB_V = 0;
+    }
+    return result;
 }
 
 uint64_t helper_msub64_ssov(CPUTriCoreState *env, target_ulong r1,
@@ -604,6 +862,110 @@
     return ret;
 }
 
+uint32_t helper_addr_h(CPUTriCoreState *env, uint64_t r1, uint32_t r2_l,
+                       uint32_t r2_h)
+{
+    int64_t mul_res0 = sextract64(r1, 0, 32);
+    int64_t mul_res1 = sextract64(r1, 32, 32);
+    int64_t r2_low = sextract64(r2_l, 0, 32);
+    int64_t r2_high = sextract64(r2_h, 0, 32);
+    int64_t result0, result1;
+    uint32_t ovf0, ovf1;
+    uint32_t avf0, avf1;
+
+    ovf0 = ovf1 = 0;
+
+    result0 = r2_low + mul_res0 + 0x8000;
+    result1 = r2_high + mul_res1 + 0x8000;
+
+    if ((result0 > INT32_MAX) || (result0 < INT32_MIN)) {
+        ovf0 = (1 << 31);
+    }
+
+    if ((result1 > INT32_MAX) || (result1 < INT32_MIN)) {
+        ovf1 = (1 << 31);
+    }
+
+    env->PSW_USB_V = ovf0 | ovf1;
+    env->PSW_USB_SV |= env->PSW_USB_V;
+
+    avf0 = result0 * 2u;
+    avf0 = result0 ^ avf0;
+    avf1 = result1 * 2u;
+    avf1 = result1 ^ avf1;
+
+    env->PSW_USB_AV = avf0 | avf1;
+    env->PSW_USB_SAV |= env->PSW_USB_AV;
+
+    return (result1 & 0xffff0000ULL) | ((result0 >> 16) & 0xffffULL);
+}
+
+uint32_t helper_addsur_h(CPUTriCoreState *env, uint64_t r1, uint32_t r2_l,
+                         uint32_t r2_h)
+{
+    int64_t mul_res0 = sextract64(r1, 0, 32);
+    int64_t mul_res1 = sextract64(r1, 32, 32);
+    int64_t r2_low = sextract64(r2_l, 0, 32);
+    int64_t r2_high = sextract64(r2_h, 0, 32);
+    int64_t result0, result1;
+    uint32_t ovf0, ovf1;
+    uint32_t avf0, avf1;
+
+    ovf0 = ovf1 = 0;
+
+    result0 = r2_low - mul_res0 + 0x8000;
+    result1 = r2_high + mul_res1 + 0x8000;
+
+    if ((result0 > INT32_MAX) || (result0 < INT32_MIN)) {
+        ovf0 = (1 << 31);
+    }
+
+    if ((result1 > INT32_MAX) || (result1 < INT32_MIN)) {
+        ovf1 = (1 << 31);
+    }
+
+    env->PSW_USB_V = ovf0 | ovf1;
+    env->PSW_USB_SV |= env->PSW_USB_V;
+
+    avf0 = result0 * 2u;
+    avf0 = result0 ^ avf0;
+    avf1 = result1 * 2u;
+    avf1 = result1 ^ avf1;
+
+    env->PSW_USB_AV = avf0 | avf1;
+    env->PSW_USB_SAV |= env->PSW_USB_AV;
+
+    return (result1 & 0xffff0000ULL) | ((result0 >> 16) & 0xffffULL);
+}
+
+uint32_t helper_maddr_q(CPUTriCoreState *env, uint32_t r1, uint32_t r2,
+                        uint32_t r3, uint32_t n)
+{
+    int64_t t1 = sextract64(r1, 0, 32);
+    int64_t t2 = sextract64(r2, 0, 32);
+    int64_t t3 = sextract64(r3, 0, 32);
+    int64_t mul, ret;
+
+    if ((t2 == -0x8000ll) && (t3 == -0x8000ll) && (n == 1)) {
+        mul = 0x7fffffff;
+    } else {
+        mul = (t2 * t3) << n;
+    }
+
+    ret = t1 + mul + 0x8000;
+
+    if ((ret > 0x7fffffffll) || (ret < -0x80000000ll)) {
+        env->PSW_USB_V = (1 << 31);
+        env->PSW_USB_SV |= env->PSW_USB_V;
+    } else {
+        env->PSW_USB_V = 0;
+    }
+    env->PSW_USB_AV = ret ^ ret * 2u;
+    env->PSW_USB_SAV |= env->PSW_USB_AV;
+
+    return ret & 0xffff0000ll;
+}
+
 uint32_t helper_add_b(CPUTriCoreState *env, target_ulong r1, target_ulong r2)
 {
     int32_t b, i;
diff --git a/target-tricore/translate.c b/target-tricore/translate.c
index 996435d..f720cd7 100644
--- a/target-tricore/translate.c
+++ b/target-tricore/translate.c
@@ -80,6 +80,13 @@
     BS_EXCP   = 3,
 };
 
+enum {
+    MODE_LL = 0,
+    MODE_LU = 1,
+    MODE_UL = 2,
+    MODE_UU = 3,
+};
+
 void tricore_cpu_dump_state(CPUState *cs, FILE *f,
                             fprintf_function cpu_fprintf, int flags)
 {
@@ -395,6 +402,88 @@
     tcg_temp_free(t0);
 }
 
+static inline void
+gen_add64_d(TCGv_i64 ret, TCGv_i64 r1, TCGv_i64 r2)
+{
+    TCGv temp = tcg_temp_new();
+    TCGv_i64 t0 = tcg_temp_new_i64();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 result = tcg_temp_new_i64();
+
+    tcg_gen_add_i64(result, r1, r2);
+    /* calc v bit */
+    tcg_gen_xor_i64(t1, result, r1);
+    tcg_gen_xor_i64(t0, r1, r2);
+    tcg_gen_andc_i64(t1, t1, t0);
+    tcg_gen_trunc_shr_i64_i32(cpu_PSW_V, t1, 32);
+    /* calc SV bit */
+    tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V);
+    /* calc AV/SAV bits */
+    tcg_gen_trunc_shr_i64_i32(temp, result, 32);
+    tcg_gen_add_tl(cpu_PSW_AV, temp, temp);
+    tcg_gen_xor_tl(cpu_PSW_AV, temp, cpu_PSW_AV);
+    /* calc SAV */
+    tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV);
+    /* write back result */
+    tcg_gen_mov_i64(ret, result);
+
+    tcg_temp_free(temp);
+    tcg_temp_free_i64(result);
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+}
+
+static inline void
+gen_addsub64_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2,
+               TCGv r3, void(*op1)(TCGv, TCGv, TCGv),
+               void(*op2)(TCGv, TCGv, TCGv))
+{
+    TCGv temp = tcg_temp_new();
+    TCGv temp2 = tcg_temp_new();
+    TCGv temp3 = tcg_temp_new();
+    TCGv temp4 = tcg_temp_new();
+
+    (*op1)(temp, r1_low, r2);
+    /* calc V0 bit */
+    tcg_gen_xor_tl(temp2, temp, r1_low);
+    tcg_gen_xor_tl(temp3, r1_low, r2);
+    if (op1 == tcg_gen_add_tl) {
+        tcg_gen_andc_tl(temp2, temp2, temp3);
+    } else {
+        tcg_gen_and_tl(temp2, temp2, temp3);
+    }
+
+    (*op2)(temp3, r1_high, r3);
+    /* calc V1 bit */
+    tcg_gen_xor_tl(cpu_PSW_V, temp3, r1_high);
+    tcg_gen_xor_tl(temp4, r1_high, r3);
+    if (op2 == tcg_gen_add_tl) {
+        tcg_gen_andc_tl(cpu_PSW_V, cpu_PSW_V, temp4);
+    } else {
+        tcg_gen_and_tl(cpu_PSW_V, cpu_PSW_V, temp4);
+    }
+    /* combine V0/V1 bits */
+    tcg_gen_or_tl(cpu_PSW_V, cpu_PSW_V, temp2);
+    /* calc sv bit */
+    tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V);
+    /* write result */
+    tcg_gen_mov_tl(ret_low, temp);
+    tcg_gen_mov_tl(ret_high, temp3);
+    /* calc AV bit */
+    tcg_gen_add_tl(temp, ret_low, ret_low);
+    tcg_gen_xor_tl(temp, temp, ret_low);
+    tcg_gen_add_tl(cpu_PSW_AV, ret_high, ret_high);
+    tcg_gen_xor_tl(cpu_PSW_AV, cpu_PSW_AV, ret_high);
+    tcg_gen_or_tl(cpu_PSW_AV, cpu_PSW_AV, temp);
+    /* calc SAV bit */
+    tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV);
+
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+    tcg_temp_free(temp3);
+    tcg_temp_free(temp4);
+}
+
 /* ret = r2 + (r1 * r3); */
 static inline void gen_madd32_d(TCGv ret, TCGv r1, TCGv r2, TCGv r3)
 {
@@ -523,6 +612,694 @@
     tcg_temp_free(temp);
 }
 
+static inline void
+gen_madd_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2,
+           TCGv r3, uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_const_i32(n);
+    TCGv temp2 = tcg_temp_new();
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+    switch (mode) {
+    case MODE_LL:
+        GEN_HELPER_LL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_LU:
+        GEN_HELPER_LU(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UL:
+        GEN_HELPER_UL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UU:
+        GEN_HELPER_UU(mul_h, temp64, r2, r3, temp);
+        break;
+    }
+    tcg_gen_extr_i64_i32(temp, temp2, temp64);
+    gen_addsub64_h(ret_low, ret_high, r1_low, r1_high, temp, temp2,
+                   tcg_gen_add_tl, tcg_gen_add_tl);
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+    tcg_temp_free_i64(temp64);
+}
+
+static inline void
+gen_maddsu_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2,
+             TCGv r3, uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_const_i32(n);
+    TCGv temp2 = tcg_temp_new();
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+    switch (mode) {
+    case MODE_LL:
+        GEN_HELPER_LL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_LU:
+        GEN_HELPER_LU(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UL:
+        GEN_HELPER_UL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UU:
+        GEN_HELPER_UU(mul_h, temp64, r2, r3, temp);
+        break;
+    }
+    tcg_gen_extr_i64_i32(temp, temp2, temp64);
+    gen_addsub64_h(ret_low, ret_high, r1_low, r1_high, temp, temp2,
+                   tcg_gen_sub_tl, tcg_gen_add_tl);
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+    tcg_temp_free_i64(temp64);
+}
+
+static inline void
+gen_maddsum_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2,
+              TCGv r3, uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_const_i32(n);
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+    TCGv_i64 temp64_2 = tcg_temp_new_i64();
+    TCGv_i64 temp64_3 = tcg_temp_new_i64();
+    switch (mode) {
+    case MODE_LL:
+        GEN_HELPER_LL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_LU:
+        GEN_HELPER_LU(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UL:
+        GEN_HELPER_UL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UU:
+        GEN_HELPER_UU(mul_h, temp64, r2, r3, temp);
+        break;
+    }
+    tcg_gen_concat_i32_i64(temp64_3, r1_low, r1_high);
+    tcg_gen_sari_i64(temp64_2, temp64, 32); /* high */
+    tcg_gen_ext32s_i64(temp64, temp64); /* low */
+    tcg_gen_sub_i64(temp64, temp64_2, temp64);
+    tcg_gen_shli_i64(temp64, temp64, 16);
+
+    gen_add64_d(temp64_2, temp64_3, temp64);
+    /* write back result */
+    tcg_gen_extr_i64_i32(ret_low, ret_high, temp64_2);
+
+    tcg_temp_free(temp);
+    tcg_temp_free_i64(temp64);
+    tcg_temp_free_i64(temp64_2);
+    tcg_temp_free_i64(temp64_3);
+}
+
+static inline void gen_adds(TCGv ret, TCGv r1, TCGv r2);
+
+static inline void
+gen_madds_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2,
+           TCGv r3, uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_const_i32(n);
+    TCGv temp2 = tcg_temp_new();
+    TCGv temp3 = tcg_temp_new();
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+
+    switch (mode) {
+    case MODE_LL:
+        GEN_HELPER_LL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_LU:
+        GEN_HELPER_LU(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UL:
+        GEN_HELPER_UL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UU:
+        GEN_HELPER_UU(mul_h, temp64, r2, r3, temp);
+        break;
+    }
+    tcg_gen_extr_i64_i32(temp, temp2, temp64);
+    gen_adds(ret_low, r1_low, temp);
+    tcg_gen_mov_tl(temp, cpu_PSW_V);
+    tcg_gen_mov_tl(temp3, cpu_PSW_AV);
+    gen_adds(ret_high, r1_high, temp2);
+    /* combine v bits */
+    tcg_gen_or_tl(cpu_PSW_V, cpu_PSW_V, temp);
+    /* combine av bits */
+    tcg_gen_or_tl(cpu_PSW_AV, cpu_PSW_AV, temp3);
+
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+    tcg_temp_free(temp3);
+    tcg_temp_free_i64(temp64);
+
+}
+
+static inline void gen_subs(TCGv ret, TCGv r1, TCGv r2);
+
+static inline void
+gen_maddsus_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2,
+              TCGv r3, uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_const_i32(n);
+    TCGv temp2 = tcg_temp_new();
+    TCGv temp3 = tcg_temp_new();
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+
+    switch (mode) {
+    case MODE_LL:
+        GEN_HELPER_LL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_LU:
+        GEN_HELPER_LU(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UL:
+        GEN_HELPER_UL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UU:
+        GEN_HELPER_UU(mul_h, temp64, r2, r3, temp);
+        break;
+    }
+    tcg_gen_extr_i64_i32(temp, temp2, temp64);
+    gen_subs(ret_low, r1_low, temp);
+    tcg_gen_mov_tl(temp, cpu_PSW_V);
+    tcg_gen_mov_tl(temp3, cpu_PSW_AV);
+    gen_adds(ret_high, r1_high, temp2);
+    /* combine v bits */
+    tcg_gen_or_tl(cpu_PSW_V, cpu_PSW_V, temp);
+    /* combine av bits */
+    tcg_gen_or_tl(cpu_PSW_AV, cpu_PSW_AV, temp3);
+
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+    tcg_temp_free(temp3);
+    tcg_temp_free_i64(temp64);
+
+}
+
+static inline void
+gen_maddsums_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2,
+               TCGv r3, uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_const_i32(n);
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+    TCGv_i64 temp64_2 = tcg_temp_new_i64();
+
+    switch (mode) {
+    case MODE_LL:
+        GEN_HELPER_LL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_LU:
+        GEN_HELPER_LU(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UL:
+        GEN_HELPER_UL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UU:
+        GEN_HELPER_UU(mul_h, temp64, r2, r3, temp);
+        break;
+    }
+    tcg_gen_sari_i64(temp64_2, temp64, 32); /* high */
+    tcg_gen_ext32s_i64(temp64, temp64); /* low */
+    tcg_gen_sub_i64(temp64, temp64_2, temp64);
+    tcg_gen_shli_i64(temp64, temp64, 16);
+    tcg_gen_concat_i32_i64(temp64_2, r1_low, r1_high);
+
+    gen_helper_add64_ssov(temp64, cpu_env, temp64_2, temp64);
+    tcg_gen_extr_i64_i32(ret_low, ret_high, temp64);
+
+    tcg_temp_free(temp);
+    tcg_temp_free_i64(temp64);
+    tcg_temp_free_i64(temp64_2);
+}
+
+
+static inline void
+gen_maddm_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2,
+           TCGv r3, uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_const_i32(n);
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+    TCGv_i64 temp64_2 = tcg_temp_new_i64();
+    TCGv_i64 temp64_3 = tcg_temp_new_i64();
+    switch (mode) {
+    case MODE_LL:
+        GEN_HELPER_LL(mulm_h, temp64, r2, r3, temp);
+        break;
+    case MODE_LU:
+        GEN_HELPER_LU(mulm_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UL:
+        GEN_HELPER_UL(mulm_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UU:
+        GEN_HELPER_UU(mulm_h, temp64, r2, r3, temp);
+        break;
+    }
+    tcg_gen_concat_i32_i64(temp64_2, r1_low, r1_high);
+    gen_add64_d(temp64_3, temp64_2, temp64);
+    /* write back result */
+    tcg_gen_extr_i64_i32(ret_low, ret_high, temp64_3);
+
+    tcg_temp_free(temp);
+    tcg_temp_free_i64(temp64);
+    tcg_temp_free_i64(temp64_2);
+    tcg_temp_free_i64(temp64_3);
+}
+
+static inline void
+gen_maddms_h(TCGv ret_low, TCGv ret_high, TCGv r1_low, TCGv r1_high, TCGv r2,
+           TCGv r3, uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_const_i32(n);
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+    TCGv_i64 temp64_2 = tcg_temp_new_i64();
+    switch (mode) {
+    case MODE_LL:
+        GEN_HELPER_LL(mulm_h, temp64, r2, r3, temp);
+        break;
+    case MODE_LU:
+        GEN_HELPER_LU(mulm_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UL:
+        GEN_HELPER_UL(mulm_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UU:
+        GEN_HELPER_UU(mulm_h, temp64, r2, r3, temp);
+        break;
+    }
+    tcg_gen_concat_i32_i64(temp64_2, r1_low, r1_high);
+    gen_helper_add64_ssov(temp64, cpu_env, temp64_2, temp64);
+    tcg_gen_extr_i64_i32(ret_low, ret_high, temp64);
+
+    tcg_temp_free(temp);
+    tcg_temp_free_i64(temp64);
+    tcg_temp_free_i64(temp64_2);
+}
+
+static inline void
+gen_maddr64_h(TCGv ret, TCGv r1_low, TCGv r1_high, TCGv r2, TCGv r3, uint32_t n,
+              uint32_t mode)
+{
+    TCGv temp = tcg_const_i32(n);
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+    switch (mode) {
+    case MODE_LL:
+        GEN_HELPER_LL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_LU:
+        GEN_HELPER_LU(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UL:
+        GEN_HELPER_UL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UU:
+        GEN_HELPER_UU(mul_h, temp64, r2, r3, temp);
+        break;
+    }
+    gen_helper_addr_h(ret, cpu_env, temp64, r1_low, r1_high);
+
+    tcg_temp_free(temp);
+    tcg_temp_free_i64(temp64);
+}
+
+static inline void
+gen_maddr32_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_temp_new();
+    TCGv temp2 = tcg_temp_new();
+
+    tcg_gen_andi_tl(temp2, r1, 0xffff0000);
+    tcg_gen_shli_tl(temp, r1, 16);
+    gen_maddr64_h(ret, temp, temp2, r2, r3, n, mode);
+
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+}
+
+static inline void
+gen_maddsur32_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_const_i32(n);
+    TCGv temp2 = tcg_temp_new();
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+    switch (mode) {
+    case MODE_LL:
+        GEN_HELPER_LL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_LU:
+        GEN_HELPER_LU(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UL:
+        GEN_HELPER_UL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UU:
+        GEN_HELPER_UU(mul_h, temp64, r2, r3, temp);
+        break;
+    }
+    tcg_gen_andi_tl(temp2, r1, 0xffff0000);
+    tcg_gen_shli_tl(temp, r1, 16);
+    gen_helper_addsur_h(ret, cpu_env, temp64, temp, temp2);
+
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+    tcg_temp_free_i64(temp64);
+}
+
+
+static inline void
+gen_maddr64s_h(TCGv ret, TCGv r1_low, TCGv r1_high, TCGv r2, TCGv r3,
+               uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_const_i32(n);
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+    switch (mode) {
+    case MODE_LL:
+        GEN_HELPER_LL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_LU:
+        GEN_HELPER_LU(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UL:
+        GEN_HELPER_UL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UU:
+        GEN_HELPER_UU(mul_h, temp64, r2, r3, temp);
+        break;
+    }
+    gen_helper_addr_h_ssov(ret, cpu_env, temp64, r1_low, r1_high);
+
+    tcg_temp_free(temp);
+    tcg_temp_free_i64(temp64);
+}
+
+static inline void
+gen_maddr32s_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_temp_new();
+    TCGv temp2 = tcg_temp_new();
+
+    tcg_gen_andi_tl(temp2, r1, 0xffff0000);
+    tcg_gen_shli_tl(temp, r1, 16);
+    gen_maddr64s_h(ret, temp, temp2, r2, r3, n, mode);
+
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+}
+
+static inline void
+gen_maddsur32s_h(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n, uint32_t mode)
+{
+    TCGv temp = tcg_const_i32(n);
+    TCGv temp2 = tcg_temp_new();
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+    switch (mode) {
+    case MODE_LL:
+        GEN_HELPER_LL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_LU:
+        GEN_HELPER_LU(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UL:
+        GEN_HELPER_UL(mul_h, temp64, r2, r3, temp);
+        break;
+    case MODE_UU:
+        GEN_HELPER_UU(mul_h, temp64, r2, r3, temp);
+        break;
+    }
+    tcg_gen_andi_tl(temp2, r1, 0xffff0000);
+    tcg_gen_shli_tl(temp, r1, 16);
+    gen_helper_addsur_h_ssov(ret, cpu_env, temp64, temp, temp2);
+
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+    tcg_temp_free_i64(temp64);
+}
+
+static inline void
+gen_maddr_q(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n)
+{
+    TCGv temp = tcg_const_i32(n);
+    gen_helper_maddr_q(ret, cpu_env, r1, r2, r3, temp);
+    tcg_temp_free(temp);
+}
+
+static inline void
+gen_maddrs_q(TCGv ret, TCGv r1, TCGv r2, TCGv r3, uint32_t n)
+{
+    TCGv temp = tcg_const_i32(n);
+    gen_helper_maddr_q_ssov(ret, cpu_env, r1, r2, r3, temp);
+    tcg_temp_free(temp);
+}
+
+static inline void
+gen_madd32_q(TCGv ret, TCGv arg1, TCGv arg2, TCGv arg3, uint32_t n,
+             uint32_t up_shift, CPUTriCoreState *env)
+{
+    TCGv temp = tcg_temp_new();
+    TCGv temp2 = tcg_temp_new();
+    TCGv temp3 = tcg_temp_new();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+    TCGv_i64 t3 = tcg_temp_new_i64();
+
+    tcg_gen_ext_i32_i64(t2, arg2);
+    tcg_gen_ext_i32_i64(t3, arg3);
+
+    tcg_gen_mul_i64(t2, t2, t3);
+    tcg_gen_shli_i64(t2, t2, n);
+
+    tcg_gen_ext_i32_i64(t1, arg1);
+    tcg_gen_sari_i64(t2, t2, up_shift);
+
+    tcg_gen_add_i64(t3, t1, t2);
+    tcg_gen_trunc_i64_i32(temp3, t3);
+    /* calc v bit */
+    tcg_gen_setcondi_i64(TCG_COND_GT, t1, t3, 0x7fffffffLL);
+    tcg_gen_setcondi_i64(TCG_COND_LT, t2, t3, -0x80000000LL);
+    tcg_gen_or_i64(t1, t1, t2);
+    tcg_gen_trunc_i64_i32(cpu_PSW_V, t1);
+    tcg_gen_shli_tl(cpu_PSW_V, cpu_PSW_V, 31);
+    /* We produce an overflow on the host if the mul before was
+       (0x80000000 * 0x80000000) << 1). If this is the
+       case, we negate the ovf. */
+    if (n == 1) {
+        tcg_gen_setcondi_tl(TCG_COND_EQ, temp, arg2, 0x80000000);
+        tcg_gen_setcond_tl(TCG_COND_EQ, temp2, arg2, arg3);
+        tcg_gen_and_tl(temp, temp, temp2);
+        tcg_gen_shli_tl(temp, temp, 31);
+        /* negate v bit, if special condition */
+        tcg_gen_xor_tl(cpu_PSW_V, cpu_PSW_V, temp);
+    }
+    /* Calc SV bit */
+    tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V);
+    /* Calc AV/SAV bits */
+    tcg_gen_add_tl(cpu_PSW_AV, temp3, temp3);
+    tcg_gen_xor_tl(cpu_PSW_AV, temp3, cpu_PSW_AV);
+    /* calc SAV */
+    tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV);
+    /* write back result */
+    tcg_gen_mov_tl(ret, temp3);
+
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+    tcg_temp_free(temp3);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+    tcg_temp_free_i64(t3);
+}
+
+static inline void
+gen_m16add32_q(TCGv ret, TCGv arg1, TCGv arg2, TCGv arg3, uint32_t n)
+{
+    TCGv temp = tcg_temp_new();
+    TCGv temp2 = tcg_temp_new();
+    if (n == 0) {
+        tcg_gen_mul_tl(temp, arg2, arg3);
+    } else { /* n is exspected to be 1 */
+        tcg_gen_mul_tl(temp, arg2, arg3);
+        tcg_gen_shli_tl(temp, temp, 1);
+        /* catch special case r1 = r2 = 0x8000 */
+        tcg_gen_setcondi_tl(TCG_COND_EQ, temp2, temp, 0x80000000);
+        tcg_gen_sub_tl(temp, temp, temp2);
+    }
+    gen_add_d(ret, arg1, temp);
+
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+}
+
+static inline void
+gen_m16adds32_q(TCGv ret, TCGv arg1, TCGv arg2, TCGv arg3, uint32_t n)
+{
+    TCGv temp = tcg_temp_new();
+    TCGv temp2 = tcg_temp_new();
+    if (n == 0) {
+        tcg_gen_mul_tl(temp, arg2, arg3);
+    } else { /* n is exspected to be 1 */
+        tcg_gen_mul_tl(temp, arg2, arg3);
+        tcg_gen_shli_tl(temp, temp, 1);
+        /* catch special case r1 = r2 = 0x8000 */
+        tcg_gen_setcondi_tl(TCG_COND_EQ, temp2, temp, 0x80000000);
+        tcg_gen_sub_tl(temp, temp, temp2);
+    }
+    gen_adds(ret, arg1, temp);
+
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+}
+
+static inline void
+gen_m16add64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2,
+               TCGv arg3, uint32_t n)
+{
+    TCGv temp = tcg_temp_new();
+    TCGv temp2 = tcg_temp_new();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+    TCGv_i64 t3 = tcg_temp_new_i64();
+
+    if (n == 0) {
+        tcg_gen_mul_tl(temp, arg2, arg3);
+    } else { /* n is exspected to be 1 */
+        tcg_gen_mul_tl(temp, arg2, arg3);
+        tcg_gen_shli_tl(temp, temp, 1);
+        /* catch special case r1 = r2 = 0x8000 */
+        tcg_gen_setcondi_tl(TCG_COND_EQ, temp2, temp, 0x80000000);
+        tcg_gen_sub_tl(temp, temp, temp2);
+    }
+    tcg_gen_ext_i32_i64(t2, temp);
+    tcg_gen_shli_i64(t2, t2, 16);
+    tcg_gen_concat_i32_i64(t1, arg1_low, arg1_high);
+    gen_add64_d(t3, t1, t2);
+    /* write back result */
+    tcg_gen_extr_i64_i32(rl, rh, t3);
+
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+    tcg_temp_free_i64(t3);
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+}
+
+static inline void
+gen_m16adds64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2,
+               TCGv arg3, uint32_t n)
+{
+    TCGv temp = tcg_temp_new();
+    TCGv temp2 = tcg_temp_new();
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+
+    if (n == 0) {
+        tcg_gen_mul_tl(temp, arg2, arg3);
+    } else { /* n is exspected to be 1 */
+        tcg_gen_mul_tl(temp, arg2, arg3);
+        tcg_gen_shli_tl(temp, temp, 1);
+        /* catch special case r1 = r2 = 0x8000 */
+        tcg_gen_setcondi_tl(TCG_COND_EQ, temp2, temp, 0x80000000);
+        tcg_gen_sub_tl(temp, temp, temp2);
+    }
+    tcg_gen_ext_i32_i64(t2, temp);
+    tcg_gen_shli_i64(t2, t2, 16);
+    tcg_gen_concat_i32_i64(t1, arg1_low, arg1_high);
+
+    gen_helper_add64_ssov(t1, cpu_env, t1, t2);
+    tcg_gen_extr_i64_i32(rl, rh, t1);
+
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+}
+
+static inline void
+gen_madd64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2,
+             TCGv arg3, uint32_t n, CPUTriCoreState *env)
+{
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+    TCGv_i64 t3 = tcg_temp_new_i64();
+    TCGv_i64 t4 = tcg_temp_new_i64();
+    TCGv temp, temp2;
+
+    tcg_gen_concat_i32_i64(t1, arg1_low, arg1_high);
+    tcg_gen_ext_i32_i64(t2, arg2);
+    tcg_gen_ext_i32_i64(t3, arg3);
+
+    tcg_gen_mul_i64(t2, t2, t3);
+    if (n != 0) {
+        tcg_gen_shli_i64(t2, t2, 1);
+    }
+    tcg_gen_add_i64(t4, t1, t2);
+    /* calc v bit */
+    tcg_gen_xor_i64(t3, t4, t1);
+    tcg_gen_xor_i64(t2, t1, t2);
+    tcg_gen_andc_i64(t3, t3, t2);
+    tcg_gen_trunc_shr_i64_i32(cpu_PSW_V, t3, 32);
+    /* We produce an overflow on the host if the mul before was
+       (0x80000000 * 0x80000000) << 1). If this is the
+       case, we negate the ovf. */
+    if (n == 1) {
+        temp = tcg_temp_new();
+        temp2 = tcg_temp_new();
+        tcg_gen_setcondi_tl(TCG_COND_EQ, temp, arg2, 0x80000000);
+        tcg_gen_setcond_tl(TCG_COND_EQ, temp2, arg2, arg3);
+        tcg_gen_and_tl(temp, temp, temp2);
+        tcg_gen_shli_tl(temp, temp, 31);
+        /* negate v bit, if special condition */
+        tcg_gen_xor_tl(cpu_PSW_V, cpu_PSW_V, temp);
+
+        tcg_temp_free(temp);
+        tcg_temp_free(temp2);
+    }
+    /* write back result */
+    tcg_gen_extr_i64_i32(rl, rh, t4);
+    /* Calc SV bit */
+    tcg_gen_or_tl(cpu_PSW_SV, cpu_PSW_SV, cpu_PSW_V);
+    /* Calc AV/SAV bits */
+    tcg_gen_add_tl(cpu_PSW_AV, rh, rh);
+    tcg_gen_xor_tl(cpu_PSW_AV, rh, cpu_PSW_AV);
+    /* calc SAV */
+    tcg_gen_or_tl(cpu_PSW_SAV, cpu_PSW_SAV, cpu_PSW_AV);
+
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+    tcg_temp_free_i64(t3);
+    tcg_temp_free_i64(t4);
+}
+
+static inline void
+gen_madds32_q(TCGv ret, TCGv arg1, TCGv arg2, TCGv arg3, uint32_t n,
+              uint32_t up_shift)
+{
+    TCGv_i64 t1 = tcg_temp_new_i64();
+    TCGv_i64 t2 = tcg_temp_new_i64();
+    TCGv_i64 t3 = tcg_temp_new_i64();
+
+    tcg_gen_ext_i32_i64(t1, arg1);
+    tcg_gen_ext_i32_i64(t2, arg2);
+    tcg_gen_ext_i32_i64(t3, arg3);
+
+    tcg_gen_mul_i64(t2, t2, t3);
+    tcg_gen_sari_i64(t2, t2, up_shift - n);
+
+    gen_helper_madd32_q_add_ssov(ret, cpu_env, t1, t2);
+
+    tcg_temp_free_i64(t1);
+    tcg_temp_free_i64(t2);
+    tcg_temp_free_i64(t3);
+}
+
+static inline void
+gen_madds64_q(TCGv rl, TCGv rh, TCGv arg1_low, TCGv arg1_high, TCGv arg2,
+             TCGv arg3, uint32_t n)
+{
+    TCGv_i64 r1 = tcg_temp_new_i64();
+    TCGv temp = tcg_const_i32(n);
+
+    tcg_gen_concat_i32_i64(r1, arg1_low, arg1_high);
+    gen_helper_madd64_q_ssov(r1, cpu_env, r1, arg2, arg3, temp);
+    tcg_gen_extr_i64_i32(rl, rh, r1);
+
+    tcg_temp_free_i64(r1);
+    tcg_temp_free(temp);
+}
 /* ret = r2 - (r1 * r3); */
 static inline void gen_msub32_d(TCGv ret, TCGv r1, TCGv r2, TCGv r3)
 {
@@ -1152,15 +1929,33 @@
 }
 
 static inline void
+gen_madds_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high,
+             TCGv r3)
+{
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+    tcg_gen_concat_i32_i64(temp64, r2_low, r2_high);
+    gen_helper_madd64_ssov(temp64, cpu_env, r1, temp64, r3);
+    tcg_gen_extr_i64_i32(ret_low, ret_high, temp64);
+    tcg_temp_free_i64(temp64);
+}
+
+static inline void
 gen_maddsi_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high,
               int32_t con)
 {
     TCGv temp = tcg_const_i32(con);
+    gen_madds_64(ret_low, ret_high, r1, r2_low, r2_high, temp);
+    tcg_temp_free(temp);
+}
+
+static inline void
+gen_maddsu_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high,
+             TCGv r3)
+{
     TCGv_i64 temp64 = tcg_temp_new_i64();
     tcg_gen_concat_i32_i64(temp64, r2_low, r2_high);
-    gen_helper_madd64_ssov(temp64, cpu_env, r1, temp64, temp);
+    gen_helper_madd64_suov(temp64, cpu_env, r1, temp64, r3);
     tcg_gen_extr_i64_i32(ret_low, ret_high, temp64);
-    tcg_temp_free(temp);
     tcg_temp_free_i64(temp64);
 }
 
@@ -1169,12 +1964,8 @@
                int32_t con)
 {
     TCGv temp = tcg_const_i32(con);
-    TCGv_i64 temp64 = tcg_temp_new_i64();
-    tcg_gen_concat_i32_i64(temp64, r2_low, r2_high);
-    gen_helper_madd64_suov(temp64, cpu_env, r1, temp64, temp);
-    tcg_gen_extr_i64_i32(ret_low, ret_high, temp64);
+    gen_maddsu_64(ret_low, ret_high, r1, r2_low, r2_high, temp);
     tcg_temp_free(temp);
-    tcg_temp_free_i64(temp64);
 }
 
 static inline void gen_msubsi_32(TCGv ret, TCGv r1, TCGv r2, int32_t con)
@@ -1192,15 +1983,33 @@
 }
 
 static inline void
+gen_msubs_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high,
+             TCGv r3)
+{
+    TCGv_i64 temp64 = tcg_temp_new_i64();
+    tcg_gen_concat_i32_i64(temp64, r2_low, r2_high);
+    gen_helper_msub64_ssov(temp64, cpu_env, r1, temp64, r3);
+    tcg_gen_extr_i64_i32(ret_low, ret_high, temp64);
+    tcg_temp_free_i64(temp64);
+}
+
+static inline void
 gen_msubsi_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high,
               int32_t con)
 {
     TCGv temp = tcg_const_i32(con);
+    gen_msubs_64(ret_low, ret_high, r1, r2_low, r2_high, temp);
+    tcg_temp_free(temp);
+}
+
+static inline void
+gen_msubsu_64(TCGv ret_low, TCGv ret_high, TCGv r1, TCGv r2_low, TCGv r2_high,
+             TCGv r3)
+{
     TCGv_i64 temp64 = tcg_temp_new_i64();
     tcg_gen_concat_i32_i64(temp64, r2_low, r2_high);
-    gen_helper_msub64_ssov(temp64, cpu_env, r1, temp64, temp);
+    gen_helper_msub64_suov(temp64, cpu_env, r1, temp64, r3);
     tcg_gen_extr_i64_i32(ret_low, ret_high, temp64);
-    tcg_temp_free(temp);
     tcg_temp_free_i64(temp64);
 }
 
@@ -1209,12 +2018,8 @@
                int32_t con)
 {
     TCGv temp = tcg_const_i32(con);
-    TCGv_i64 temp64 = tcg_temp_new_i64();
-    tcg_gen_concat_i32_i64(temp64, r2_low, r2_high);
-    gen_helper_msub64_suov(temp64, cpu_env, r1, temp64, temp);
-    tcg_gen_extr_i64_i32(ret_low, ret_high, temp64);
+    gen_msubsu_64(ret_low, ret_high, r1, r2_low, r2_high, temp);
     tcg_temp_free(temp);
-    tcg_temp_free_i64(temp64);
 }
 
 static void gen_saturate(TCGv ret, TCGv arg, int32_t up, int32_t low)
@@ -4183,10 +4988,10 @@
 
     switch (op1) {
     case OPC1_32_RLC_ADDI:
-        gen_addi_CC(cpu_gpr_d[r2], cpu_gpr_d[r1], const16);
+        gen_addi_d(cpu_gpr_d[r2], cpu_gpr_d[r1], const16);
         break;
     case OPC1_32_RLC_ADDIH:
-        gen_addi_CC(cpu_gpr_d[r2], cpu_gpr_d[r1], const16 << 16);
+        gen_addi_d(cpu_gpr_d[r2], cpu_gpr_d[r1], const16 << 16);
         break;
     case OPC1_32_RLC_ADDIH_A:
         tcg_gen_addi_tl(cpu_gpr_a[r2], cpu_gpr_a[r1], const16 << 16);
@@ -5186,6 +5991,485 @@
     }
 }
 
+/* RRR2 format */
+static void decode_rrr2_madd(CPUTriCoreState *env, DisasContext *ctx)
+{
+    uint32_t op2;
+    uint32_t r1, r2, r3, r4;
+
+    op2 = MASK_OP_RRR2_OP2(ctx->opcode);
+    r1 = MASK_OP_RRR2_S1(ctx->opcode);
+    r2 = MASK_OP_RRR2_S2(ctx->opcode);
+    r3 = MASK_OP_RRR2_S3(ctx->opcode);
+    r4 = MASK_OP_RRR2_D(ctx->opcode);
+    switch (op2) {
+    case OPC2_32_RRR2_MADD_32:
+        gen_madd32_d(cpu_gpr_d[r4], cpu_gpr_d[r1], cpu_gpr_d[r3],
+                     cpu_gpr_d[r2]);
+        break;
+    case OPC2_32_RRR2_MADD_64:
+        gen_madd64_d(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1],
+                     cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r2]);
+        break;
+    case OPC2_32_RRR2_MADDS_32:
+        gen_helper_madd32_ssov(cpu_gpr_d[r4], cpu_env, cpu_gpr_d[r1],
+                               cpu_gpr_d[r3], cpu_gpr_d[r2]);
+        break;
+    case OPC2_32_RRR2_MADDS_64:
+        gen_madds_64(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1],
+                     cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r2]);
+        break;
+    case OPC2_32_RRR2_MADD_U_64:
+        gen_maddu64_d(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1],
+                      cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r2]);
+        break;
+    case OPC2_32_RRR2_MADDS_U_32:
+        gen_helper_madd32_suov(cpu_gpr_d[r4], cpu_env, cpu_gpr_d[r1],
+                               cpu_gpr_d[r3], cpu_gpr_d[r2]);
+        break;
+    case OPC2_32_RRR2_MADDS_U_64:
+        gen_maddsu_64(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1],
+                      cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r2]);
+        break;
+    }
+}
+
+static void decode_rrr2_msub(CPUTriCoreState *env, DisasContext *ctx)
+{
+    uint32_t op2;
+    uint32_t r1, r2, r3, r4;
+
+    op2 = MASK_OP_RRR2_OP2(ctx->opcode);
+    r1 = MASK_OP_RRR2_S1(ctx->opcode);
+    r2 = MASK_OP_RRR2_S2(ctx->opcode);
+    r3 = MASK_OP_RRR2_S3(ctx->opcode);
+    r4 = MASK_OP_RRR2_D(ctx->opcode);
+
+    switch (op2) {
+    case OPC2_32_RRR2_MSUB_32:
+        gen_msub32_d(cpu_gpr_d[r4], cpu_gpr_d[r1], cpu_gpr_d[r3],
+                      cpu_gpr_d[r2]);
+        break;
+    case OPC2_32_RRR2_MSUB_64:
+        gen_msub64_d(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1],
+                     cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r2]);
+        break;
+    case OPC2_32_RRR2_MSUBS_32:
+        gen_helper_msub32_ssov(cpu_gpr_d[r4], cpu_env, cpu_gpr_d[r1],
+                               cpu_gpr_d[r3], cpu_gpr_d[r2]);
+        break;
+    case OPC2_32_RRR2_MSUBS_64:
+        gen_msubs_64(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1],
+                     cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r2]);
+        break;
+    case OPC2_32_RRR2_MSUB_U_64:
+        gen_msubu64_d(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1],
+                      cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r2]);
+        break;
+    case OPC2_32_RRR2_MSUBS_U_32:
+        gen_helper_msub32_suov(cpu_gpr_d[r4], cpu_env, cpu_gpr_d[r1],
+                               cpu_gpr_d[r3], cpu_gpr_d[r2]);
+        break;
+    case OPC2_32_RRR2_MSUBS_U_64:
+        gen_msubsu_64(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r1],
+                      cpu_gpr_d[r3], cpu_gpr_d[r3+1], cpu_gpr_d[r2]);
+        break;
+    }
+}
+
+/* RRR1 format */
+static void decode_rrr1_madd(CPUTriCoreState *env, DisasContext *ctx)
+{
+    uint32_t op2;
+    uint32_t r1, r2, r3, r4, n;
+
+    op2 = MASK_OP_RRR1_OP2(ctx->opcode);
+    r1 = MASK_OP_RRR1_S1(ctx->opcode);
+    r2 = MASK_OP_RRR1_S2(ctx->opcode);
+    r3 = MASK_OP_RRR1_S3(ctx->opcode);
+    r4 = MASK_OP_RRR1_D(ctx->opcode);
+    n = MASK_OP_RRR1_N(ctx->opcode);
+
+    switch (op2) {
+    case OPC2_32_RRR1_MADD_H_LL:
+        gen_madd_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                   cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL);
+        break;
+    case OPC2_32_RRR1_MADD_H_LU:
+        gen_madd_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                   cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU);
+        break;
+    case OPC2_32_RRR1_MADD_H_UL:
+        gen_madd_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                   cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL);
+        break;
+    case OPC2_32_RRR1_MADD_H_UU:
+        gen_madd_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                   cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU);
+        break;
+    case OPC2_32_RRR1_MADDS_H_LL:
+        gen_madds_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                    cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL);
+        break;
+    case OPC2_32_RRR1_MADDS_H_LU:
+        gen_madds_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                    cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU);
+        break;
+    case OPC2_32_RRR1_MADDS_H_UL:
+        gen_madds_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                    cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL);
+        break;
+    case OPC2_32_RRR1_MADDS_H_UU:
+        gen_madds_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                    cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU);
+        break;
+    case OPC2_32_RRR1_MADDM_H_LL:
+        gen_maddm_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                    cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL);
+        break;
+    case OPC2_32_RRR1_MADDM_H_LU:
+        gen_maddm_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                    cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU);
+        break;
+    case OPC2_32_RRR1_MADDM_H_UL:
+        gen_maddm_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                    cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL);
+        break;
+    case OPC2_32_RRR1_MADDM_H_UU:
+        gen_maddm_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                    cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU);
+        break;
+    case OPC2_32_RRR1_MADDMS_H_LL:
+        gen_maddms_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                     cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL);
+        break;
+    case OPC2_32_RRR1_MADDMS_H_LU:
+        gen_maddms_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                     cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU);
+        break;
+    case OPC2_32_RRR1_MADDMS_H_UL:
+        gen_maddms_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                     cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL);
+        break;
+    case OPC2_32_RRR1_MADDMS_H_UU:
+        gen_maddms_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                     cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU);
+        break;
+    case OPC2_32_RRR1_MADDR_H_LL:
+        gen_maddr32_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                      cpu_gpr_d[r2], n, MODE_LL);
+        break;
+    case OPC2_32_RRR1_MADDR_H_LU:
+        gen_maddr32_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                      cpu_gpr_d[r2], n, MODE_LU);
+        break;
+    case OPC2_32_RRR1_MADDR_H_UL:
+        gen_maddr32_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                      cpu_gpr_d[r2], n, MODE_UL);
+        break;
+    case OPC2_32_RRR1_MADDR_H_UU:
+        gen_maddr32_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                      cpu_gpr_d[r2], n, MODE_UU);
+        break;
+    case OPC2_32_RRR1_MADDRS_H_LL:
+        gen_maddr32s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                       cpu_gpr_d[r2], n, MODE_LL);
+        break;
+    case OPC2_32_RRR1_MADDRS_H_LU:
+        gen_maddr32s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                       cpu_gpr_d[r2], n, MODE_LU);
+        break;
+    case OPC2_32_RRR1_MADDRS_H_UL:
+        gen_maddr32s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                       cpu_gpr_d[r2], n, MODE_UL);
+        break;
+    case OPC2_32_RRR1_MADDRS_H_UU:
+        gen_maddr32s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                       cpu_gpr_d[r2], n, MODE_UU);
+        break;
+    }
+}
+
+static void decode_rrr1_maddq_h(CPUTriCoreState *env, DisasContext *ctx)
+{
+    uint32_t op2;
+    uint32_t r1, r2, r3, r4, n;
+    TCGv temp, temp2;
+
+    op2 = MASK_OP_RRR1_OP2(ctx->opcode);
+    r1 = MASK_OP_RRR1_S1(ctx->opcode);
+    r2 = MASK_OP_RRR1_S2(ctx->opcode);
+    r3 = MASK_OP_RRR1_S3(ctx->opcode);
+    r4 = MASK_OP_RRR1_D(ctx->opcode);
+    n = MASK_OP_RRR1_N(ctx->opcode);
+
+    temp = tcg_const_i32(n);
+    temp2 = tcg_temp_new();
+
+    switch (op2) {
+    case OPC2_32_RRR1_MADD_Q_32:
+        gen_madd32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                     cpu_gpr_d[r2], n, 32, env);
+        break;
+    case OPC2_32_RRR1_MADD_Q_64:
+        gen_madd64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                     cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                     n, env);
+        break;
+    case OPC2_32_RRR1_MADD_Q_32_L:
+        tcg_gen_ext16s_tl(temp, cpu_gpr_d[r2]);
+        gen_madd32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                     temp, n, 16, env);
+        break;
+    case OPC2_32_RRR1_MADD_Q_64_L:
+        tcg_gen_ext16s_tl(temp, cpu_gpr_d[r2]);
+        gen_madd64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                     cpu_gpr_d[r3+1], cpu_gpr_d[r1], temp,
+                     n, env);
+        break;
+    case OPC2_32_RRR1_MADD_Q_32_U:
+        tcg_gen_sari_tl(temp, cpu_gpr_d[r2], 16);
+        gen_madd32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                     temp, n, 16, env);
+        break;
+    case OPC2_32_RRR1_MADD_Q_64_U:
+        tcg_gen_sari_tl(temp, cpu_gpr_d[r2], 16);
+        gen_madd64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                     cpu_gpr_d[r3+1], cpu_gpr_d[r1], temp,
+                     n, env);
+        break;
+    case OPC2_32_RRR1_MADD_Q_32_LL:
+        tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]);
+        tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]);
+        gen_m16add32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n);
+        break;
+    case OPC2_32_RRR1_MADD_Q_64_LL:
+        tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]);
+        tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]);
+        gen_m16add64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                       cpu_gpr_d[r3+1], temp, temp2, n);
+        break;
+    case OPC2_32_RRR1_MADD_Q_32_UU:
+        tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16);
+        tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16);
+        gen_m16add32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n);
+        break;
+    case OPC2_32_RRR1_MADD_Q_64_UU:
+        tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16);
+        tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16);
+        gen_m16add64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                       cpu_gpr_d[r3+1], temp, temp2, n);
+        break;
+    case OPC2_32_RRR1_MADDS_Q_32:
+        gen_madds32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                      cpu_gpr_d[r2], n, 32);
+        break;
+    case OPC2_32_RRR1_MADDS_Q_64:
+        gen_madds64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                      cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                      n);
+        break;
+    case OPC2_32_RRR1_MADDS_Q_32_L:
+        tcg_gen_ext16s_tl(temp, cpu_gpr_d[r2]);
+        gen_madds32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                      temp, n, 16);
+        break;
+    case OPC2_32_RRR1_MADDS_Q_64_L:
+        tcg_gen_ext16s_tl(temp, cpu_gpr_d[r2]);
+        gen_madds64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                      cpu_gpr_d[r3+1], cpu_gpr_d[r1], temp,
+                      n);
+        break;
+    case OPC2_32_RRR1_MADDS_Q_32_U:
+        tcg_gen_sari_tl(temp, cpu_gpr_d[r2], 16);
+        gen_madds32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                      temp, n, 16);
+        break;
+    case OPC2_32_RRR1_MADDS_Q_64_U:
+        tcg_gen_sari_tl(temp, cpu_gpr_d[r2], 16);
+        gen_madds64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                      cpu_gpr_d[r3+1], cpu_gpr_d[r1], temp,
+                      n);
+        break;
+    case OPC2_32_RRR1_MADDS_Q_32_LL:
+        tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]);
+        tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]);
+        gen_m16adds32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n);
+        break;
+    case OPC2_32_RRR1_MADDS_Q_64_LL:
+        tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]);
+        tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]);
+        gen_m16adds64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                        cpu_gpr_d[r3+1], temp, temp2, n);
+        break;
+    case OPC2_32_RRR1_MADDS_Q_32_UU:
+        tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16);
+        tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16);
+        gen_m16adds32_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n);
+        break;
+    case OPC2_32_RRR1_MADDS_Q_64_UU:
+        tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16);
+        tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16);
+        gen_m16adds64_q(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                        cpu_gpr_d[r3+1], temp, temp2, n);
+        break;
+    case OPC2_32_RRR1_MADDR_H_64_UL:
+        gen_maddr64_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r3+1],
+                      cpu_gpr_d[r1], cpu_gpr_d[r2], n, 2);
+        break;
+    case OPC2_32_RRR1_MADDRS_H_64_UL:
+        gen_maddr64s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r3+1],
+                       cpu_gpr_d[r1], cpu_gpr_d[r2], n, 2);
+        break;
+    case OPC2_32_RRR1_MADDR_Q_32_LL:
+        tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]);
+        tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]);
+        gen_maddr_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n);
+        break;
+    case OPC2_32_RRR1_MADDR_Q_32_UU:
+        tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16);
+        tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16);
+        gen_maddr_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n);
+        break;
+    case OPC2_32_RRR1_MADDRS_Q_32_LL:
+        tcg_gen_ext16s_tl(temp, cpu_gpr_d[r1]);
+        tcg_gen_ext16s_tl(temp2, cpu_gpr_d[r2]);
+        gen_maddrs_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n);
+        break;
+    case OPC2_32_RRR1_MADDRS_Q_32_UU:
+        tcg_gen_sari_tl(temp, cpu_gpr_d[r1], 16);
+        tcg_gen_sari_tl(temp2, cpu_gpr_d[r2], 16);
+        gen_maddrs_q(cpu_gpr_d[r4], cpu_gpr_d[r3], temp, temp2, n);
+        break;
+    }
+    tcg_temp_free(temp);
+    tcg_temp_free(temp2);
+}
+
+static void decode_rrr1_maddsu_h(CPUTriCoreState *env, DisasContext *ctx)
+{
+    uint32_t op2;
+    uint32_t r1, r2, r3, r4, n;
+
+    op2 = MASK_OP_RRR1_OP2(ctx->opcode);
+    r1 = MASK_OP_RRR1_S1(ctx->opcode);
+    r2 = MASK_OP_RRR1_S2(ctx->opcode);
+    r3 = MASK_OP_RRR1_S3(ctx->opcode);
+    r4 = MASK_OP_RRR1_D(ctx->opcode);
+    n = MASK_OP_RRR1_N(ctx->opcode);
+
+    switch (op2) {
+    case OPC2_32_RRR1_MADDSU_H_32_LL:
+        gen_maddsu_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                     cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LL);
+        break;
+    case OPC2_32_RRR1_MADDSU_H_32_LU:
+        gen_maddsu_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                     cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_LU);
+        break;
+    case OPC2_32_RRR1_MADDSU_H_32_UL:
+        gen_maddsu_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                     cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UL);
+        break;
+    case OPC2_32_RRR1_MADDSU_H_32_UU:
+        gen_maddsu_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                     cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2], n, MODE_UU);
+        break;
+    case OPC2_32_RRR1_MADDSUS_H_32_LL:
+        gen_maddsus_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                      cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                      n, MODE_LL);
+        break;
+    case OPC2_32_RRR1_MADDSUS_H_32_LU:
+        gen_maddsus_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                      cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                      n, MODE_LU);
+        break;
+    case OPC2_32_RRR1_MADDSUS_H_32_UL:
+        gen_maddsus_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                      cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                      n, MODE_UL);
+        break;
+    case OPC2_32_RRR1_MADDSUS_H_32_UU:
+        gen_maddsus_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                      cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                      n, MODE_UU);
+        break;
+    case OPC2_32_RRR1_MADDSUM_H_64_LL:
+        gen_maddsum_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                      cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                      n, MODE_LL);
+        break;
+    case OPC2_32_RRR1_MADDSUM_H_64_LU:
+        gen_maddsum_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                      cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                      n, MODE_LU);
+        break;
+    case OPC2_32_RRR1_MADDSUM_H_64_UL:
+        gen_maddsum_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                      cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                      n, MODE_UL);
+        break;
+    case OPC2_32_RRR1_MADDSUM_H_64_UU:
+        gen_maddsum_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                      cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                      n, MODE_UU);
+        break;
+    case OPC2_32_RRR1_MADDSUMS_H_64_LL:
+        gen_maddsums_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                       cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                       n, MODE_LL);
+        break;
+    case OPC2_32_RRR1_MADDSUMS_H_64_LU:
+        gen_maddsums_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                       cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                       n, MODE_LU);
+        break;
+    case OPC2_32_RRR1_MADDSUMS_H_64_UL:
+        gen_maddsums_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                       cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                       n, MODE_UL);
+        break;
+    case OPC2_32_RRR1_MADDSUMS_H_64_UU:
+        gen_maddsums_h(cpu_gpr_d[r4], cpu_gpr_d[r4+1], cpu_gpr_d[r3],
+                       cpu_gpr_d[r3+1], cpu_gpr_d[r1], cpu_gpr_d[r2],
+                       n, MODE_UU);
+        break;
+    case OPC2_32_RRR1_MADDSUR_H_16_LL:
+        gen_maddsur32_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                        cpu_gpr_d[r2], n, MODE_LL);
+        break;
+    case OPC2_32_RRR1_MADDSUR_H_16_LU:
+        gen_maddsur32_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                        cpu_gpr_d[r2], n, MODE_LU);
+        break;
+    case OPC2_32_RRR1_MADDSUR_H_16_UL:
+        gen_maddsur32_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                        cpu_gpr_d[r2], n, MODE_UL);
+        break;
+    case OPC2_32_RRR1_MADDSUR_H_16_UU:
+        gen_maddsur32_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                        cpu_gpr_d[r2], n, MODE_UU);
+        break;
+    case OPC2_32_RRR1_MADDSURS_H_16_LL:
+        gen_maddsur32s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                         cpu_gpr_d[r2], n, MODE_LL);
+        break;
+    case OPC2_32_RRR1_MADDSURS_H_16_LU:
+        gen_maddsur32s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                         cpu_gpr_d[r2], n, MODE_LU);
+        break;
+    case OPC2_32_RRR1_MADDSURS_H_16_UL:
+        gen_maddsur32s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                         cpu_gpr_d[r2], n, MODE_UL);
+        break;
+    case OPC2_32_RRR1_MADDSURS_H_16_UU:
+        gen_maddsur32s_h(cpu_gpr_d[r4], cpu_gpr_d[r3], cpu_gpr_d[r1],
+                         cpu_gpr_d[r2], n, MODE_UU);
+        break;
+    }
+}
+
 static void decode_32Bit_opc(CPUTriCoreState *env, DisasContext *ctx)
 {
     int op1;
@@ -5475,6 +6759,23 @@
         break;
     case OPCM_32_RRR_DIVIDE:
         decode_rrr_divide(env, ctx);
+/* RRR2 Format */
+    case OPCM_32_RRR2_MADD:
+        decode_rrr2_madd(env, ctx);
+        break;
+    case OPCM_32_RRR2_MSUB:
+        decode_rrr2_msub(env, ctx);
+        break;
+/* RRR1 format */
+    case OPCM_32_RRR1_MADD:
+        decode_rrr1_madd(env, ctx);
+        break;
+    case OPCM_32_RRR1_MADDQ_H:
+        decode_rrr1_maddq_h(env, ctx);
+        break;
+    case OPCM_32_RRR1_MADDSU_H:
+        decode_rrr1_maddsu_h(env, ctx);
+        break;
     }
 }
 
diff --git a/target-tricore/tricore-opcodes.h b/target-tricore/tricore-opcodes.h
index baf537f..41c9ef6 100644
--- a/target-tricore/tricore-opcodes.h
+++ b/target-tricore/tricore-opcodes.h
@@ -1245,10 +1245,10 @@
     OPC2_32_RRR1_MADDS_Q_64_LL                   = 0x3d,
     OPC2_32_RRR1_MADDS_Q_32_UU                   = 0x24,
     OPC2_32_RRR1_MADDS_Q_64_UU                   = 0x3c,
-    OPC2_32_RRR1_MADDR_H_16_UL                   = 0x1e,
-    OPC2_32_RRR1_MADDRS_H_16_UL                  = 0x3e,
-    OPC2_32_RRR1_MADDR_Q_32_L                    = 0x07,
-    OPC2_32_RRR1_MADDR_Q_32_U                    = 0x06,
+    OPC2_32_RRR1_MADDR_H_64_UL                   = 0x1e,
+    OPC2_32_RRR1_MADDRS_H_64_UL                  = 0x3e,
+    OPC2_32_RRR1_MADDR_Q_32_LL                   = 0x07,
+    OPC2_32_RRR1_MADDR_Q_32_UU                   = 0x06,
     OPC2_32_RRR1_MADDRS_Q_32_LL                  = 0x27,
     OPC2_32_RRR1_MADDRS_Q_32_UU                  = 0x26,
 };
@@ -1371,7 +1371,7 @@
     OPC2_32_RRR2_MADD_64                         = 0x6a,
     OPC2_32_RRR2_MADDS_32                        = 0x8a,
     OPC2_32_RRR2_MADDS_64                        = 0xea,
-    OPC2_32_RRR2_MADD_U_32                       = 0x68,
+    OPC2_32_RRR2_MADD_U_64                       = 0x68,
     OPC2_32_RRR2_MADDS_U_32                      = 0x88,
     OPC2_32_RRR2_MADDS_U_64                      = 0xe8,
 };
diff --git a/trace-events b/trace-events
index f87b077..4ac588c 100644
--- a/trace-events
+++ b/trace-events
@@ -143,6 +143,10 @@
 # balloon.c
 # Since requests are raised via monitor, not many tracepoints are needed.
 balloon_event(void *opaque, unsigned long addr) "opaque %p addr %lu"
+virtio_balloon_handle_output(const char *name, uint64_t gpa) "setion name: %s gpa: %"PRIx64""
+virtio_balloon_get_config(uint32_t num_pages, uint32_t acutal) "num_pages: %d acutal: %d"
+virtio_balloon_set_config(uint32_t acutal, uint32_t oldacutal) "acutal: %d oldacutal: %d"
+virtio_balloon_to_target(uint64_t target, uint32_t num_pages) "balloon target: %"PRIx64" num_pages: %d"
 
 # hw/intc/apic_common.c
 cpu_set_apic_base(uint64_t val) "%016"PRIx64
@@ -1378,8 +1382,6 @@
 migrate_transferred(uint64_t tranferred, uint64_t time_spent, double bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %g max_size %" PRId64
 
 # migration/rdma.c
-__qemu_rdma_add_block(int block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Added Block: %d, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d"
-__qemu_rdma_delete_block(int block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Deleted Block: %d, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d"
 qemu_dma_accept_incoming_migration(void) ""
 qemu_dma_accept_incoming_migration_accepted(void) ""
 qemu_rdma_accept_pin_state(bool pin) "%d"
@@ -1433,6 +1435,8 @@
 qemu_rdma_write_one_sendreg(uint64_t chunk, int len, int index, int64_t offset) "Sending registration request chunk %" PRIu64 " for %d bytes, index: %d, offset: %" PRId64
 qemu_rdma_write_one_top(uint64_t chunks, uint64_t size) "Writing %" PRIu64 " chunks, (%" PRIu64 " MB)"
 qemu_rdma_write_one_zero(uint64_t chunk, int len, int index, int64_t offset) "Entire chunk is zero, sending compress: %" PRIu64 " for %d bytes, index: %d, offset: %" PRId64
+rdma_add_block(int block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Added Block: %d, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d"
+rdma_delete_block(int block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Deleted Block: %d, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d"
 rdma_start_incoming_migration(void) ""
 rdma_start_incoming_migration_after_dest_init(void) ""
 rdma_start_incoming_migration_after_rdma_listen(void) ""
diff --git a/util/cutils.c b/util/cutils.c
index dbe7412..c2250d1 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -483,6 +483,20 @@
     return value;
 }
 
+/* round up to the nearest power of 2 (0 if overflow) */
+uint64_t pow2ceil(uint64_t value)
+{
+    uint8_t nlz = clz64(value);
+
+    if (is_power_of_2(value)) {
+        return value;
+    }
+    if (!nlz) {
+        return 0;
+    }
+    return 1ULL << (64 - nlz);
+}
+
 /*
  * Implementation of  ULEB128 (http://en.wikipedia.org/wiki/LEB128)
  * Input is limited to 14-bit numbers
diff --git a/vl.c b/vl.c
index 801d487..4063c8e 100644
--- a/vl.c
+++ b/vl.c
@@ -2733,7 +2733,8 @@
     int snapshot, linux_boot;
     const char *initrd_filename;
     const char *kernel_filename, *kernel_cmdline;
-    const char *boot_order;
+    const char *boot_order = NULL;
+    const char *boot_once = NULL;
     DisplayState *ds;
     int cyls, heads, secs, translation;
     QemuOpts *hda_opts = NULL, *opts, *machine_opts, *icount_opts = NULL;
@@ -4048,39 +4049,36 @@
     kernel_cmdline = qemu_opt_get(machine_opts, "append");
     bios_name = qemu_opt_get(machine_opts, "firmware");
 
-    boot_order = machine_class->default_boot_order;
     opts = qemu_opts_find(qemu_find_opts("boot-opts"), NULL);
     if (opts) {
-        char *normal_boot_order;
-        const char *order, *once;
         Error *local_err = NULL;
 
-        order = qemu_opt_get(opts, "order");
-        if (order) {
-            validate_bootdevices(order, &local_err);
+        boot_order = qemu_opt_get(opts, "order");
+        if (boot_order) {
+            validate_bootdevices(boot_order, &local_err);
             if (local_err) {
                 error_report_err(local_err);
                 exit(1);
             }
-            boot_order = order;
         }
 
-        once = qemu_opt_get(opts, "once");
-        if (once) {
-            validate_bootdevices(once, &local_err);
+        boot_once = qemu_opt_get(opts, "once");
+        if (boot_once) {
+            validate_bootdevices(boot_once, &local_err);
             if (local_err) {
                 error_report_err(local_err);
                 exit(1);
             }
-            normal_boot_order = g_strdup(boot_order);
-            boot_order = once;
-            qemu_register_reset(restore_boot_order, normal_boot_order);
         }
 
         boot_menu = qemu_opt_get_bool(opts, "menu", boot_menu);
         boot_strict = qemu_opt_get_bool(opts, "strict", false);
     }
 
+    if (!boot_order) {
+        boot_order = machine_class->default_boot_order;
+    }
+
     if (!kernel_cmdline) {
         kernel_cmdline = "";
         current_machine->kernel_cmdline = (char *)kernel_cmdline;
@@ -4244,6 +4242,16 @@
 
     net_check_clients();
 
+    if (boot_once) {
+        Error *local_err = NULL;
+        qemu_boot_set(boot_once, &local_err);
+        if (local_err) {
+            error_report("%s", error_get_pretty(local_err));
+            exit(1);
+        }
+        qemu_register_reset(restore_boot_order, g_strdup(boot_order));
+    }
+
     ds = init_displaystate();
 
     /* init local displays */