Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging

* scsi-disk: Don't silently truncate serial number
* backends/hostmem: Report error on unavailable qemu_madvise() features or unaligned memory sizes
* target/i386: fixes and documentation for INHIBIT_IRQ/TF/RF and debugging
* i386/hvf: Adds support for INVTSC cpuid bit
* i386/hvf: Fixes for dirty memory tracking
* i386/hvf: Use hv_vcpu_interrupt() and hv_vcpu_run_until()
* hvf: Cleanups
* stubs: fixes for --disable-system build
* i386/kvm: support for FRED
* i386/kvm: fix MCE handling on AMD hosts

# -----BEGIN PGP SIGNATURE-----
#
# iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmZkF2oUHHBib256aW5p
# QHJlZGhhdC5jb20ACgkQv/vSX3jHroPNlQf+N9y6Eh0nMEEQ69twtV8ytglTY+uX
# FsogvnsXHNMVubOWmmeItM6kFXTAkR9cmFaL8dqI1Gs03xEQdQXbF1KejJZOAZVl
# RQMOW8Fg2Afr+0lwqCXHvhsmZ4hr5yUkRndyucA/E9AO2uGrtgwsWGDBGaHJOZIA
# lAsEMOZgKjXHZnefXjhMrvpk/QNovjEV6f1RHX3oKZjKSI5/G4IqGSmwNYToot8p
# 2fgs4Qti4+1gNyM2oBLq7cCMjMS61tSxOMH4uqVoIisjyckPlAFRvc+DXtKsUAAs
# 9AgM++pNgpB0IXv67czRUNdRoK7OI8I0ULhI4qHXi6Yg2QYAHqpQ6WL4Lg==
# =RP7U
# -----END PGP SIGNATURE-----
# gpg: Signature made Sat 08 Jun 2024 01:33:46 AM PDT
# gpg:                using RSA key F13338574B662389866C7682BFFBD25F78C7AE83
# gpg:                issuer "pbonzini@redhat.com"
# gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full]
# gpg:                 aka "Paolo Bonzini <pbonzini@redhat.com>" [full]

* tag 'for-upstream' of https://gitlab.com/bonzini/qemu: (42 commits)
  python: mkvenv: remove ensure command
  Revert "python: use vendored tomli"
  i386: Add support for overflow recovery
  i386: Add support for SUCCOR feature
  i386: Fix MCE support for AMD hosts
  docs: i386: pc: Avoid mentioning limit of maximum vCPUs
  target/i386: Add get/set/migrate support for FRED MSRs
  target/i386: enumerate VMX nested-exception support
  vmxcap: add support for VMX FRED controls
  target/i386: mark CR4.FRED not reserved
  target/i386: add support for FRED in CPUID enumeration
  hvf: Makes assert_hvf_ok report failed expression
  i386/hvf: Updates API usage to use modern vCPU run function
  i386/hvf: In kick_vcpu use hv_vcpu_interrupt to force exit
  i386/hvf: Fixes dirty memory tracking by page granularity RX->RWX change
  hvf: Consistent types for vCPU handles
  i386/hvf: Fixes some compilation warnings
  i386/hvf: Adds support for INVTSC cpuid bit
  stubs/meson: Fix qemuutil build when --disable-system
  scsi-disk: Don't silently truncate serial number
  ...

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
diff --git a/.gitlab-ci.d/buildtest.yml b/.gitlab-ci.d/buildtest.yml
index 91c57ef..0eec570 100644
--- a/.gitlab-ci.d/buildtest.yml
+++ b/.gitlab-ci.d/buildtest.yml
@@ -432,6 +432,7 @@
     IMAGE: fedora
     CONFIGURE_ARGS: --cc=clang --cxx=clang++
       --extra-cflags=-fsanitize=undefined --extra-cflags=-fno-sanitize-recover=undefined
+      --extra-cflags=-fno-sanitize=function
     TARGETS: alpha-softmmu arm-softmmu m68k-softmmu mips64-softmmu s390x-softmmu
     MAKE_CHECK_ARGS: check-qtest check-tcg
 
diff --git a/.gitlab-ci.d/custom-runners.yml b/.gitlab-ci.d/custom-runners.yml
index 29e52df..1aa3c60 100644
--- a/.gitlab-ci.d/custom-runners.yml
+++ b/.gitlab-ci.d/custom-runners.yml
@@ -32,4 +32,3 @@
   - local: '/.gitlab-ci.d/custom-runners/ubuntu-22.04-s390x.yml'
   - local: '/.gitlab-ci.d/custom-runners/ubuntu-22.04-aarch64.yml'
   - local: '/.gitlab-ci.d/custom-runners/ubuntu-22.04-aarch32.yml'
-  - local: '/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml'
diff --git a/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml b/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml
deleted file mode 100644
index 367424d..0000000
--- a/.gitlab-ci.d/custom-runners/centos-stream-8-x86_64.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-# All centos-stream-8 jobs should run successfully in an environment
-# setup by the scripts/ci/setup/stream/8/build-environment.yml task
-# "Installation of extra packages to build QEMU"
-
-centos-stream-8-x86_64:
- extends: .custom_runner_template
- allow_failure: true
- needs: []
- stage: build
- tags:
- - centos_stream_8
- - x86_64
- rules:
- - if: '$CI_PROJECT_NAMESPACE == "qemu-project" && $CI_COMMIT_BRANCH =~ /^staging/'
- - if: "$CENTOS_STREAM_8_x86_64_RUNNER_AVAILABLE"
- before_script:
- - JOBS=$(expr $(nproc) + 1)
- script:
- - mkdir build
- - cd build
- - ../scripts/ci/org.centos/stream/8/x86_64/configure
-   || { cat config.log meson-logs/meson-log.txt; exit 1; }
- - make -j"$JOBS"
- - make NINJA=":" check check-avocado
diff --git a/.gitlab-ci.d/custom-runners/ubuntu-22.04-aarch32.yml b/.gitlab-ci.d/custom-runners/ubuntu-22.04-aarch32.yml
index b8a0d751..8727687 100644
--- a/.gitlab-ci.d/custom-runners/ubuntu-22.04-aarch32.yml
+++ b/.gitlab-ci.d/custom-runners/ubuntu-22.04-aarch32.yml
@@ -1,5 +1,5 @@
 # All ubuntu-22.04 jobs should run successfully in an environment
-# setup by the scripts/ci/setup/qemu/build-environment.yml task
+# setup by the scripts/ci/setup/ubuntu/build-environment.yml task
 # "Install basic packages to build QEMU on Ubuntu 22.04"
 
 ubuntu-22.04-aarch32-all:
diff --git a/.gitlab-ci.d/custom-runners/ubuntu-22.04-aarch64.yml b/.gitlab-ci.d/custom-runners/ubuntu-22.04-aarch64.yml
index 374b095..263a3c2 100644
--- a/.gitlab-ci.d/custom-runners/ubuntu-22.04-aarch64.yml
+++ b/.gitlab-ci.d/custom-runners/ubuntu-22.04-aarch64.yml
@@ -1,5 +1,5 @@
 # All ubuntu-22.04 jobs should run successfully in an environment
-# setup by the scripts/ci/setup/qemu/build-environment.yml task
+# setup by the scripts/ci/setup/ubuntu/build-environment.yml task
 # "Install basic packages to build QEMU on Ubuntu 22.04"
 
 ubuntu-22.04-aarch64-all-linux-static:
diff --git a/.gitlab-ci.d/custom-runners/ubuntu-22.04-s390x.yml b/.gitlab-ci.d/custom-runners/ubuntu-22.04-s390x.yml
index 2593504..69ddd3e 100644
--- a/.gitlab-ci.d/custom-runners/ubuntu-22.04-s390x.yml
+++ b/.gitlab-ci.d/custom-runners/ubuntu-22.04-s390x.yml
@@ -1,5 +1,5 @@
 # All ubuntu-22.04 jobs should run successfully in an environment
-# setup by the scripts/ci/setup/build-environment.yml task
+# setup by the scripts/ci/setup/ubuntu/build-environment.yml task
 # "Install basic packages to build QEMU on Ubuntu 22.04"
 
 ubuntu-22.04-s390x-all-linux:
diff --git a/disas/microblaze.c b/disas/microblaze.c
index 49a4c0f..197327f 100644
--- a/disas/microblaze.c
+++ b/disas/microblaze.c
@@ -563,10 +563,7 @@
 };
 
 /* prefix for register names */
-static const char register_prefix[] = "r";
-static const char fsl_register_prefix[] = "rfsl";
-static const char pvr_register_prefix[] = "rpvr";
-
+#define register_prefix "r"
 
 /* #defines for valid immediate range */
 #define MIN_IMM  ((int) 0x80000000)
@@ -579,156 +576,64 @@
 
 #include "disas/dis-asm.h"
 
-#define get_field_rd(instr) get_field(instr, RD_MASK, RD_LOW)
-#define get_field_r1(instr) get_field(instr, RA_MASK, RA_LOW)
-#define get_field_r2(instr) get_field(instr, RB_MASK, RB_LOW)
+#define PRIreg    register_prefix "%ld"
+#define PRIrfsl   register_prefix "fsl%ld"
+#define PRIpvr    register_prefix "pvr%d"
+#define PRIimm    "%d"
+
+#define get_field_rd(instr)      ((instr & RD_MASK) >> RD_LOW)
+#define get_field_r1(instr)      ((instr & RA_MASK) >> RA_LOW)
+#define get_field_r2(instr)      ((instr & RB_MASK) >> RB_LOW)
+#define get_field_rfsl(instr)    (instr & RFSL_MASK)
+#define get_field_imm(instr)     ((int16_t)instr)
+#define get_field_imm5(instr)    ((int)instr & IMM5_MASK)
+#define get_field_imm15(instr)   ((int)instr & IMM15_MASK)
+
 #define get_int_field_imm(instr) ((instr & IMM_MASK) >> IMM_LOW)
 #define get_int_field_r1(instr) ((instr & RA_MASK) >> RA_LOW)
 
-/* Local function prototypes. */
-
-static char * get_field (long instr, long mask, unsigned short low);
-static char * get_field_imm (long instr);
-static char * get_field_imm5 (long instr);
-static char * get_field_rfsl (long instr);
-static char * get_field_imm15 (long instr);
-#if 0
-static char * get_field_unsigned_imm (long instr);
-#endif
-
-static char *
-get_field (long instr, long mask, unsigned short low)
+static int get_field_special(long instr, const struct op_code_struct *op)
 {
-  char tmpstr[25];
-  snprintf(tmpstr, sizeof(tmpstr), "%s%d", register_prefix,
-           (int)((instr & mask) >> low));
-  return(strdup(tmpstr));
+    return ((instr & IMM_MASK) >> IMM_LOW) ^ op->immval_mask;
 }
 
-static char *
-get_field_imm (long instr)
+/* Returns NULL for PVR registers, which should be rendered differently. */
+static const char *get_special_name(int special)
 {
-  char tmpstr[25];
-  snprintf(tmpstr, sizeof(tmpstr), "%d",
-           (short)((instr & IMM_MASK) >> IMM_LOW));
-  return(strdup(tmpstr));
-}
-
-static char *
-get_field_imm5 (long instr)
-{
-  char tmpstr[25];
-  snprintf(tmpstr, sizeof(tmpstr), "%d",
-           (short)((instr & IMM5_MASK) >> IMM_LOW));
-  return(strdup(tmpstr));
-}
-
-static char *
-get_field_rfsl (long instr)
-{
-  char tmpstr[25];
-  snprintf(tmpstr, sizeof(tmpstr), "%s%d", fsl_register_prefix,
-           (short)((instr & RFSL_MASK) >> IMM_LOW));
-  return(strdup(tmpstr));
-}
-
-static char *
-get_field_imm15 (long instr)
-{
-  char tmpstr[25];
-  snprintf(tmpstr, sizeof(tmpstr), "%d",
-           (short)((instr & IMM15_MASK) >> IMM_LOW));
-  return(strdup(tmpstr));
-}
-
-#if 0
-static char *
-get_field_unsigned_imm (long instr)
-{
-  char tmpstr[25];
-  snprintf(tmpstr, sizeof(tmpstr), "%d",
-           (int)((instr & IMM_MASK) >> IMM_LOW));
-  return(strdup(tmpstr));
-}
-#endif
-
-/*
-  char *
-  get_field_special (instr) 
-  long instr;
-  {
-  char tmpstr[25];
-  
-  snprintf(tmpstr, sizeof(tmpstr), "%s%s", register_prefix,
-          (((instr & IMM_MASK) >> IMM_LOW) & REG_MSR_MASK) == 0 ? "pc" : "msr");
-  
-  return(strdup(tmpstr));
-  }
-*/
-
-static char *
-get_field_special(long instr, const struct op_code_struct *op)
-{
-   char tmpstr[25];
-   char spr[6];
-
-   switch ( (((instr & IMM_MASK) >> IMM_LOW) ^ op->immval_mask) ) {
-
-   case REG_MSR_MASK :
-      strcpy(spr, "msr");
-      break;
-   case REG_PC_MASK :
-      strcpy(spr, "pc");
-      break;
-   case REG_EAR_MASK :
-      strcpy(spr, "ear");
-      break;
-   case REG_ESR_MASK :
-      strcpy(spr, "esr");
-      break;
-   case REG_FSR_MASK :
-      strcpy(spr, "fsr");
-      break;
-   case REG_BTR_MASK :
-      strcpy(spr, "btr");
-      break;      
-   case REG_EDR_MASK :
-      strcpy(spr, "edr");
-      break;
-   case REG_PID_MASK :
-      strcpy(spr, "pid");
-      break;
-   case REG_ZPR_MASK :
-      strcpy(spr, "zpr");
-      break;
-   case REG_TLBX_MASK :
-      strcpy(spr, "tlbx");
-      break;
-   case REG_TLBLO_MASK :
-      strcpy(spr, "tlblo");
-      break;
-   case REG_TLBHI_MASK :
-      strcpy(spr, "tlbhi");
-      break;
-   case REG_TLBSX_MASK :
-      strcpy(spr, "tlbsx");
-      break;
-   default :
-     {
-       if ( ((((instr & IMM_MASK) >> IMM_LOW) ^ op->immval_mask) & 0xE000) == REG_PVR_MASK) {
-          snprintf(tmpstr, sizeof(tmpstr), "%s%u", pvr_register_prefix,
-                 (unsigned short)(((instr & IMM_MASK) >> IMM_LOW) ^
-                                  op->immval_mask) ^ REG_PVR_MASK);
-	 return(strdup(tmpstr));
-       } else {
-	 strcpy(spr, "pc");
-       }
-     }
-     break;
-   }
-   
-   snprintf(tmpstr, sizeof(tmpstr), "%s%s", register_prefix, spr);
-   return(strdup(tmpstr));
+    switch (special) {
+    case REG_MSR_MASK:
+        return register_prefix "msr";
+    case REG_PC_MASK:
+        return register_prefix "pc";
+    case REG_EAR_MASK:
+        return register_prefix "ear";
+    case REG_ESR_MASK:
+        return register_prefix "esr";
+    case REG_FSR_MASK:
+        return register_prefix "fsr";
+    case REG_BTR_MASK:
+        return register_prefix "btr";
+    case REG_EDR_MASK:
+        return register_prefix "edr";
+    case REG_PID_MASK:
+        return register_prefix "pid";
+    case REG_ZPR_MASK:
+        return register_prefix "zpr";
+    case REG_TLBX_MASK:
+        return register_prefix "tlbx";
+    case REG_TLBLO_MASK:
+        return register_prefix "tlblo";
+    case REG_TLBHI_MASK:
+        return register_prefix "tlbhi";
+    case REG_TLBSX_MASK:
+        return register_prefix "tlbsx";
+    default:
+        if ((special & 0xE000) == REG_PVR_MASK) {
+            /* pvr register */
+            return NULL;
+        }
+        return register_prefix "pc";
+    }
 }
 
 static unsigned long
@@ -767,185 +672,189 @@
   return inst;
 }
 
+static void print_immval_addr(struct disassemble_info *info, bool immfound,
+                              int immval, unsigned inst, int addend)
+{
+    if (info->print_address_func && info->symbol_at_address_func) {
+        if (immfound) {
+            immval |= get_int_field_imm(inst) & 0x0000ffff;
+        } else {
+            immval = (int16_t)get_int_field_imm(inst);
+        }
+        immval += addend;
+        if (immval != 0 && info->symbol_at_address_func(immval, info)) {
+            info->fprintf_func(info->stream, "\t// ");
+            info->print_address_func (immval, info);
+        } else if (addend) {
+            info->fprintf_func(info->stream, "\t// %x", immval);
+        }
+    }
+}
 
 int 
-print_insn_microblaze (bfd_vma memaddr, struct disassemble_info * info)
+print_insn_microblaze(bfd_vma memaddr, struct disassemble_info *info)
 {
-  fprintf_function    fprintf_func = info->fprintf_func;
-  void *              stream = info->stream;
-  unsigned long       inst, prev_inst;
-  const struct op_code_struct *op, *pop;
-  int                 immval = 0;
-  bfd_boolean         immfound = FALSE;
-  static bfd_vma prev_insn_addr = -1; /*init the prev insn addr */
-  static int     prev_insn_vma = -1;  /*init the prev insn vma */
-  int            curr_insn_vma = info->buffer_vma;
+    fprintf_function fprintf_func = info->fprintf_func;
+    void *stream = info->stream;
+    unsigned long inst, prev_inst;
+    const struct op_code_struct *op, *pop;
+    int immval = 0;
+    bool immfound = false;
+    static bfd_vma prev_insn_addr = -1; /*init the prev insn addr */
+    static int prev_insn_vma = -1;  /*init the prev insn vma */
+    int curr_insn_vma = info->buffer_vma;
+    int special;
+    const char *special_name;
 
-  info->bytes_per_chunk = 4;
+    info->bytes_per_chunk = 4;
 
-  inst = read_insn_microblaze (memaddr, info, &op);
-  if (inst == 0) {
-    return -1;
-  }
-  
-  if (prev_insn_vma == curr_insn_vma) {
-  if (memaddr-(info->bytes_per_chunk) == prev_insn_addr) {
-    prev_inst = read_insn_microblaze (prev_insn_addr, info, &pop);
-    if (prev_inst == 0)
-      return -1;
-    if (pop->instr == imm) {
-      immval = (get_int_field_imm(prev_inst) << 16) & 0xffff0000;
-      immfound = TRUE;
-    }
-    else {
-      immval = 0;
-      immfound = FALSE;
-    }
-  }
-  }
-  /* make curr insn as prev insn */
-  prev_insn_addr = memaddr;
-  prev_insn_vma = curr_insn_vma;
-
-  if (op->name == 0) {
-    fprintf_func (stream, ".short 0x%04lx", inst);
-  }
-  else
-    {
-      fprintf_func (stream, "%s", op->name);
-      
-      switch (op->inst_type)
-	{
-  case INST_TYPE_RD_R1_R2:
-     fprintf_func(stream, "\t%s, %s, %s", get_field_rd(inst), get_field_r1(inst), get_field_r2(inst));
-     break;
-        case INST_TYPE_RD_R1_IMM:
-	  fprintf_func(stream, "\t%s, %s, %s", get_field_rd(inst), get_field_r1(inst), get_field_imm(inst));
-	  if (info->print_address_func && get_int_field_r1(inst) == 0 && info->symbol_at_address_func) {
-	    if (immfound)
-	      immval |= (get_int_field_imm(inst) & 0x0000ffff);
-	    else {
-	      immval = get_int_field_imm(inst);
-	      if (immval & 0x8000)
-		immval |= 0xFFFF0000;
-	    }
-	    if (immval > 0 && info->symbol_at_address_func(immval, info)) {
-	      fprintf_func (stream, "\t// ");
-	      info->print_address_func (immval, info);
-	    }
-	  }
-	  break;
-	case INST_TYPE_RD_R1_IMM5:
-	  fprintf_func(stream, "\t%s, %s, %s", get_field_rd(inst), get_field_r1(inst), get_field_imm5(inst));
-	  break;
-	case INST_TYPE_RD_RFSL:
-	  fprintf_func(stream, "\t%s, %s", get_field_rd(inst), get_field_rfsl(inst));
-	  break;
-	case INST_TYPE_R1_RFSL:
-	  fprintf_func(stream, "\t%s, %s", get_field_r1(inst), get_field_rfsl(inst));
-	  break;
-	case INST_TYPE_RD_SPECIAL:
-	  fprintf_func(stream, "\t%s, %s", get_field_rd(inst), get_field_special(inst, op));
-	  break;
-	case INST_TYPE_SPECIAL_R1:
-	  fprintf_func(stream, "\t%s, %s", get_field_special(inst, op), get_field_r1(inst));
-	  break;
-	case INST_TYPE_RD_R1:
-	  fprintf_func(stream, "\t%s, %s", get_field_rd(inst), get_field_r1(inst));
-	  break;
-	case INST_TYPE_R1_R2:
-	  fprintf_func(stream, "\t%s, %s", get_field_r1(inst), get_field_r2(inst));
-	  break;
-	case INST_TYPE_R1_IMM:
-	  fprintf_func(stream, "\t%s, %s", get_field_r1(inst), get_field_imm(inst));
-	  /* The non-pc relative instructions are returns, which shouldn't 
-	     have a label printed */
-	  if (info->print_address_func && op->inst_offset_type == INST_PC_OFFSET && info->symbol_at_address_func) {
-	    if (immfound)
-	      immval |= (get_int_field_imm(inst) & 0x0000ffff);
-	    else {
-	      immval = get_int_field_imm(inst);
-	      if (immval & 0x8000)
-		immval |= 0xFFFF0000;
-	    }
-	    immval += memaddr;
-	    if (immval > 0 && info->symbol_at_address_func(immval, info)) {
-	      fprintf_func (stream, "\t// ");
-	      info->print_address_func (immval, info);
-	    } else {
-	      fprintf_func (stream, "\t\t// ");
-	      fprintf_func (stream, "%x", immval);
-	    }
-	  }
-	  break;
-        case INST_TYPE_RD_IMM:
-	  fprintf_func(stream, "\t%s, %s", get_field_rd(inst), get_field_imm(inst));
-	  if (info->print_address_func && info->symbol_at_address_func) {
-	    if (immfound)
-	      immval |= (get_int_field_imm(inst) & 0x0000ffff);
-	    else {
-	      immval = get_int_field_imm(inst);
-	      if (immval & 0x8000)
-		immval |= 0xFFFF0000;
-	    }
-	    if (op->inst_offset_type == INST_PC_OFFSET)
-	      immval += (int) memaddr;
-	    if (info->symbol_at_address_func(immval, info)) {
-	      fprintf_func (stream, "\t// ");
-	      info->print_address_func (immval, info);
-	    } 
-	  }
-	  break;
-        case INST_TYPE_IMM:
-	  fprintf_func(stream, "\t%s", get_field_imm(inst));
-	  if (info->print_address_func && info->symbol_at_address_func && op->instr != imm) {
-	    if (immfound)
-	      immval |= (get_int_field_imm(inst) & 0x0000ffff);
-	    else {
-	      immval = get_int_field_imm(inst);
-	      if (immval & 0x8000)
-		immval |= 0xFFFF0000;
-	    }
-	    if (op->inst_offset_type == INST_PC_OFFSET)
-	      immval += (int) memaddr;
-	    if (immval > 0 && info->symbol_at_address_func(immval, info)) {
-	      fprintf_func (stream, "\t// ");
-	      info->print_address_func (immval, info);
-	    } else if (op->inst_offset_type == INST_PC_OFFSET) {
-	      fprintf_func (stream, "\t\t// ");
-	      fprintf_func (stream, "%x", immval);
-	    }
-	  }
-	  break;
-        case INST_TYPE_RD_R2:
-	  fprintf_func(stream, "\t%s, %s", get_field_rd(inst), get_field_r2(inst));
-	  break;
-  case INST_TYPE_R2:
-     fprintf_func(stream, "\t%s", get_field_r2(inst));
-     break;
-  case INST_TYPE_R1:
-     fprintf_func(stream, "\t%s", get_field_r1(inst));
-     break;
-  case INST_TYPE_RD_R1_SPECIAL:
-     fprintf_func(stream, "\t%s, %s", get_field_rd(inst), get_field_r2(inst));
-     break;
-  case INST_TYPE_RD_IMM15:
-     fprintf_func(stream, "\t%s, %s", get_field_rd(inst), get_field_imm15(inst));
-     break;
-     /* For tuqula instruction */
-  case INST_TYPE_RD:
-     fprintf_func(stream, "\t%s", get_field_rd(inst));
-     break;
-  case INST_TYPE_RFSL:
-     fprintf_func(stream, "\t%s", get_field_rfsl(inst));
-     break;
-  default:
-	  /* if the disassembler lags the instruction set */
-	  fprintf_func (stream, "\tundecoded operands, inst is 0x%04lx", inst);
-	  break;
-	}
+    inst = read_insn_microblaze (memaddr, info, &op);
+    if (inst == 0) {
+        return -1;
     }
   
-  /* Say how many bytes we consumed? */
-  return 4;
+    if (prev_insn_vma == curr_insn_vma) {
+        if (memaddr - info->bytes_per_chunk == prev_insn_addr) {
+            prev_inst = read_insn_microblaze (prev_insn_addr, info, &pop);
+            if (prev_inst == 0)
+                return -1;
+            if (pop->instr == imm) {
+                immval = (get_int_field_imm(prev_inst) << 16) & 0xffff0000;
+                immfound = TRUE;
+            }
+            else {
+                immval = 0;
+                immfound = FALSE;
+            }
+        }
+    }
+    /* make curr insn as prev insn */
+    prev_insn_addr = memaddr;
+    prev_insn_vma = curr_insn_vma;
+
+    if (op->name == 0) {
+        fprintf_func (stream, ".short 0x%04lx", inst);
+        return 4;
+    }
+
+    switch (op->inst_type) {
+    case INST_TYPE_RD_R1_R2:
+        fprintf_func(stream, "%s\t" PRIreg ", " PRIreg ", " PRIreg,
+                     op->name, get_field_rd(inst), get_field_r1(inst),
+                     get_field_r2(inst));
+        break;
+    case INST_TYPE_RD_R1_IMM:
+        fprintf_func(stream, "%s\t" PRIreg ", " PRIreg ", " PRIimm,
+                     op->name, get_field_rd(inst), get_field_r1(inst),
+                     get_field_imm(inst));
+        if (get_int_field_r1(inst) == 0) {
+            print_immval_addr(info, immfound, immval, inst, 0);
+        }
+        break;
+    case INST_TYPE_RD_R1_IMM5:
+        fprintf_func(stream, "%s\t" PRIreg ", " PRIreg ", " PRIimm,
+                     op->name, get_field_rd(inst), get_field_r1(inst),
+                     get_field_imm5(inst));
+        break;
+    case INST_TYPE_RD_RFSL:
+        fprintf_func(stream, "%s\t" PRIreg ", " PRIrfsl,
+                     op->name, get_field_rd(inst), get_field_rfsl(inst));
+        break;
+    case INST_TYPE_R1_RFSL:
+        fprintf_func(stream, "%s\t" PRIreg ", " PRIrfsl,
+                     op->name, get_field_r1(inst), get_field_rfsl(inst));
+        break;
+    case INST_TYPE_RD_SPECIAL:
+        special = get_field_special(inst, op);
+        special_name = get_special_name(special);
+        if (special_name) {
+            fprintf_func(stream, "%s\t" PRIreg ", %s",
+                         op->name, get_field_rd(inst), special_name);
+        } else {
+            fprintf_func(stream, "%s\t" PRIreg ", " PRIpvr,
+                         op->name, get_field_rd(inst), special ^ REG_PVR_MASK);
+        }
+        break;
+    case INST_TYPE_SPECIAL_R1:
+        special = get_field_special(inst, op);
+        special_name = get_special_name(special);
+        if (special_name) {
+            fprintf_func(stream, "%s\t%s, " PRIreg,
+                         op->name, special_name, get_field_r1(inst));
+        } else {
+            fprintf_func(stream, "%s\t" PRIpvr ", " PRIreg,
+                         op->name, special ^ REG_PVR_MASK, get_field_r1(inst));
+        }
+        break;
+    case INST_TYPE_RD_R1:
+        fprintf_func(stream, "%s\t" PRIreg ", " PRIreg,
+                     op->name, get_field_rd(inst), get_field_r1(inst));
+        break;
+    case INST_TYPE_R1_R2:
+        fprintf_func(stream, "%s\t" PRIreg ", " PRIreg,
+                     op->name, get_field_r1(inst), get_field_r2(inst));
+        break;
+    case INST_TYPE_R1_IMM:
+        fprintf_func(stream, "%s\t" PRIreg ", " PRIimm,
+                     op->name, get_field_r1(inst), get_field_imm(inst));
+        /*
+         * The non-pc relative instructions are returns,
+         * which shouldn't have a label printed.
+         */
+        if (op->inst_offset_type == INST_PC_OFFSET) {
+            print_immval_addr(info, immfound, immval, inst, memaddr);
+        }
+        break;
+    case INST_TYPE_RD_IMM:
+        fprintf_func(stream, "%s\t" PRIreg ", " PRIimm,
+                     op->name, get_field_rd(inst), get_field_imm(inst));
+        print_immval_addr(info, immfound, immval, inst,
+                          op->inst_offset_type == INST_PC_OFFSET
+                          ? memaddr : 0);
+        break;
+    case INST_TYPE_IMM:
+        fprintf_func(stream, "%s\t" PRIimm,
+                     op->name, get_field_imm(inst));
+        if (op->instr != imm) {
+            print_immval_addr(info, immfound, immval, inst,
+                              op->inst_offset_type == INST_PC_OFFSET
+                              ? memaddr : 0);
+        }
+        break;
+    case INST_TYPE_RD_R2:
+        fprintf_func(stream, "%s\t" PRIreg ", " PRIreg,
+                     op->name, get_field_rd(inst), get_field_r2(inst));
+        break;
+    case INST_TYPE_R2:
+        fprintf_func(stream, "%s\t" PRIreg,
+                     op->name, get_field_r2(inst));
+        break;
+    case INST_TYPE_R1:
+        fprintf_func(stream, "%s\t" PRIreg,
+                     op->name, get_field_r1(inst));
+        break;
+    case INST_TYPE_RD_R1_SPECIAL:
+        fprintf_func(stream, "%s\t" PRIreg ", " PRIreg,
+                     op->name, get_field_rd(inst), get_field_r2(inst));
+        break;
+    case INST_TYPE_RD_IMM15:
+        fprintf_func(stream, "%s\t" PRIreg ", " PRIimm,
+                     op->name, get_field_rd(inst), get_field_imm15(inst));
+        break;
+        /* For tuqula instruction */
+    case INST_TYPE_RD:
+        fprintf_func(stream, "%s\t" PRIreg,
+                     op->name, get_field_rd(inst));
+        break;
+    case INST_TYPE_RFSL:
+        fprintf_func(stream, "%s\t" PRIrfsl,
+                     op->name, get_field_rfsl(inst));
+        break;
+    default:
+        /* if the disassembler lags the instruction set */
+        fprintf_func(stream, "%s\tundecoded operands, inst is 0x%04lx",
+                     op->name, inst);
+        break;
+    }
+    return 4;
 }
diff --git a/disas/riscv.c b/disas/riscv.c
index 297cfa2..90d6b26 100644
--- a/disas/riscv.c
+++ b/disas/riscv.c
@@ -4820,272 +4820,249 @@
 
 /* format instruction */
 
-static void append(char *s1, const char *s2, size_t n)
-{
-    size_t l1 = strlen(s1);
-    if (n - l1 - 1 > 0) {
-        strncat(s1, s2, n - l1);
-    }
-}
-
-static void format_inst(char *buf, size_t buflen, size_t tab, rv_decode *dec)
+static GString *format_inst(size_t tab, rv_decode *dec)
 {
     const rv_opcode_data *opcode_data = dec->opcode_data;
-    char tmp[64];
+    GString *buf = g_string_sized_new(64);
     const char *fmt;
 
     fmt = opcode_data[dec->op].format;
     while (*fmt) {
         switch (*fmt) {
         case 'O':
-            append(buf, opcode_data[dec->op].name, buflen);
+            g_string_append(buf, opcode_data[dec->op].name);
             break;
         case '(':
-            append(buf, "(", buflen);
-            break;
         case ',':
-            append(buf, ",", buflen);
-            break;
         case ')':
-            append(buf, ")", buflen);
-            break;
         case '-':
-            append(buf, "-", buflen);
+            g_string_append_c(buf, *fmt);
             break;
         case 'b':
-            snprintf(tmp, sizeof(tmp), "%d", dec->bs);
-            append(buf, tmp, buflen);
+            g_string_append_printf(buf, "%d", dec->bs);
             break;
         case 'n':
-            snprintf(tmp, sizeof(tmp), "%d", dec->rnum);
-            append(buf, tmp, buflen);
+            g_string_append_printf(buf, "%d", dec->rnum);
             break;
         case '0':
-            append(buf, rv_ireg_name_sym[dec->rd], buflen);
+            g_string_append(buf, rv_ireg_name_sym[dec->rd]);
             break;
         case '1':
-            append(buf, rv_ireg_name_sym[dec->rs1], buflen);
+            g_string_append(buf, rv_ireg_name_sym[dec->rs1]);
             break;
         case '2':
-            append(buf, rv_ireg_name_sym[dec->rs2], buflen);
+            g_string_append(buf, rv_ireg_name_sym[dec->rs2]);
             break;
         case '3':
-            append(buf, dec->cfg->ext_zfinx ? rv_ireg_name_sym[dec->rd] :
-                                              rv_freg_name_sym[dec->rd],
-                   buflen);
+            if (dec->cfg->ext_zfinx) {
+                g_string_append(buf, rv_ireg_name_sym[dec->rd]);
+            } else {
+                g_string_append(buf, rv_freg_name_sym[dec->rd]);
+            }
             break;
         case '4':
-            append(buf, dec->cfg->ext_zfinx ? rv_ireg_name_sym[dec->rs1] :
-                                              rv_freg_name_sym[dec->rs1],
-                   buflen);
+            if (dec->cfg->ext_zfinx) {
+                g_string_append(buf, rv_ireg_name_sym[dec->rs1]);
+            } else {
+                g_string_append(buf, rv_freg_name_sym[dec->rs1]);
+            }
             break;
         case '5':
-            append(buf, dec->cfg->ext_zfinx ? rv_ireg_name_sym[dec->rs2] :
-                                              rv_freg_name_sym[dec->rs2],
-                   buflen);
+            if (dec->cfg->ext_zfinx) {
+                g_string_append(buf, rv_ireg_name_sym[dec->rs2]);
+            } else {
+                g_string_append(buf, rv_freg_name_sym[dec->rs2]);
+            }
             break;
         case '6':
-            append(buf, dec->cfg->ext_zfinx ? rv_ireg_name_sym[dec->rs3] :
-                                              rv_freg_name_sym[dec->rs3],
-                   buflen);
+            if (dec->cfg->ext_zfinx) {
+                g_string_append(buf, rv_ireg_name_sym[dec->rs3]);
+            } else {
+                g_string_append(buf, rv_freg_name_sym[dec->rs3]);
+            }
             break;
         case '7':
-            snprintf(tmp, sizeof(tmp), "%d", dec->rs1);
-            append(buf, tmp, buflen);
+            g_string_append_printf(buf, "%d", dec->rs1);
             break;
         case 'i':
-            snprintf(tmp, sizeof(tmp), "%d", dec->imm);
-            append(buf, tmp, buflen);
+            g_string_append_printf(buf, "%d", dec->imm);
             break;
         case 'u':
-            snprintf(tmp, sizeof(tmp), "%u", ((uint32_t)dec->imm & 0b111111));
-            append(buf, tmp, buflen);
+            g_string_append_printf(buf, "%u", ((uint32_t)dec->imm & 0b111111));
             break;
         case 'j':
-            snprintf(tmp, sizeof(tmp), "%d", dec->imm1);
-            append(buf, tmp, buflen);
+            g_string_append_printf(buf, "%d", dec->imm1);
             break;
         case 'o':
-            snprintf(tmp, sizeof(tmp), "%d", dec->imm);
-            append(buf, tmp, buflen);
-            while (strlen(buf) < tab * 2) {
-                append(buf, " ", buflen);
+            g_string_append_printf(buf, "%d", dec->imm);
+            while (buf->len < tab * 2) {
+                g_string_append_c(buf, ' ');
             }
-            snprintf(tmp, sizeof(tmp), "# 0x%" PRIx64,
-                dec->pc + dec->imm);
-            append(buf, tmp, buflen);
+            g_string_append_printf(buf, "# 0x%" PRIx64, dec->pc + dec->imm);
             break;
         case 'U':
             fmt++;
-            snprintf(tmp, sizeof(tmp), "%d", dec->imm >> 12);
-            append(buf, tmp, buflen);
+            g_string_append_printf(buf, "%d", dec->imm >> 12);
             if (*fmt == 'o') {
-                while (strlen(buf) < tab * 2) {
-                    append(buf, " ", buflen);
+                while (buf->len < tab * 2) {
+                    g_string_append_c(buf, ' ');
                 }
-                snprintf(tmp, sizeof(tmp), "# 0x%" PRIx64,
-                    dec->pc + dec->imm);
-                append(buf, tmp, buflen);
+                g_string_append_printf(buf, "# 0x%" PRIx64, dec->pc + dec->imm);
             }
             break;
         case 'c': {
             const char *name = csr_name(dec->imm & 0xfff);
             if (name) {
-                append(buf, name, buflen);
+                g_string_append(buf, name);
             } else {
-                snprintf(tmp, sizeof(tmp), "0x%03x", dec->imm & 0xfff);
-                append(buf, tmp, buflen);
+                g_string_append_printf(buf, "0x%03x", dec->imm & 0xfff);
             }
             break;
         }
         case 'r':
             switch (dec->rm) {
             case rv_rm_rne:
-                append(buf, "rne", buflen);
+                g_string_append(buf, "rne");
                 break;
             case rv_rm_rtz:
-                append(buf, "rtz", buflen);
+                g_string_append(buf, "rtz");
                 break;
             case rv_rm_rdn:
-                append(buf, "rdn", buflen);
+                g_string_append(buf, "rdn");
                 break;
             case rv_rm_rup:
-                append(buf, "rup", buflen);
+                g_string_append(buf, "rup");
                 break;
             case rv_rm_rmm:
-                append(buf, "rmm", buflen);
+                g_string_append(buf, "rmm");
                 break;
             case rv_rm_dyn:
-                append(buf, "dyn", buflen);
+                g_string_append(buf, "dyn");
                 break;
             default:
-                append(buf, "inv", buflen);
+                g_string_append(buf, "inv");
                 break;
             }
             break;
         case 'p':
             if (dec->pred & rv_fence_i) {
-                append(buf, "i", buflen);
+                g_string_append_c(buf, 'i');
             }
             if (dec->pred & rv_fence_o) {
-                append(buf, "o", buflen);
+                g_string_append_c(buf, 'o');
             }
             if (dec->pred & rv_fence_r) {
-                append(buf, "r", buflen);
+                g_string_append_c(buf, 'r');
             }
             if (dec->pred & rv_fence_w) {
-                append(buf, "w", buflen);
+                g_string_append_c(buf, 'w');
             }
             break;
         case 's':
             if (dec->succ & rv_fence_i) {
-                append(buf, "i", buflen);
+                g_string_append_c(buf, 'i');
             }
             if (dec->succ & rv_fence_o) {
-                append(buf, "o", buflen);
+                g_string_append_c(buf, 'o');
             }
             if (dec->succ & rv_fence_r) {
-                append(buf, "r", buflen);
+                g_string_append_c(buf, 'r');
             }
             if (dec->succ & rv_fence_w) {
-                append(buf, "w", buflen);
+                g_string_append_c(buf, 'w');
             }
             break;
         case '\t':
-            while (strlen(buf) < tab) {
-                append(buf, " ", buflen);
+            while (buf->len < tab) {
+                g_string_append_c(buf, ' ');
             }
             break;
         case 'A':
             if (dec->aq) {
-                append(buf, ".aq", buflen);
+                g_string_append(buf, ".aq");
             }
             break;
         case 'R':
             if (dec->rl) {
-                append(buf, ".rl", buflen);
+                g_string_append(buf, ".rl");
             }
             break;
         case 'l':
-            append(buf, ",v0", buflen);
+            g_string_append(buf, ",v0");
             break;
         case 'm':
             if (dec->vm == 0) {
-                append(buf, ",v0.t", buflen);
+                g_string_append(buf, ",v0.t");
             }
             break;
         case 'D':
-            append(buf, rv_vreg_name_sym[dec->rd], buflen);
+            g_string_append(buf, rv_vreg_name_sym[dec->rd]);
             break;
         case 'E':
-            append(buf, rv_vreg_name_sym[dec->rs1], buflen);
+            g_string_append(buf, rv_vreg_name_sym[dec->rs1]);
             break;
         case 'F':
-            append(buf, rv_vreg_name_sym[dec->rs2], buflen);
+            g_string_append(buf, rv_vreg_name_sym[dec->rs2]);
             break;
         case 'G':
-            append(buf, rv_vreg_name_sym[dec->rs3], buflen);
+            g_string_append(buf, rv_vreg_name_sym[dec->rs3]);
             break;
         case 'v': {
-            char nbuf[32] = {0};
             const int sew = 1 << (((dec->vzimm >> 3) & 0b111) + 3);
-            sprintf(nbuf, "%d", sew);
             const int lmul = dec->vzimm & 0b11;
             const int flmul = (dec->vzimm >> 2) & 1;
             const char *vta = (dec->vzimm >> 6) & 1 ? "ta" : "tu";
             const char *vma = (dec->vzimm >> 7) & 1 ? "ma" : "mu";
-            append(buf, "e", buflen);
-            append(buf, nbuf, buflen);
-            append(buf, ",m", buflen);
+
+            g_string_append_printf(buf, "e%d,m", sew);
             if (flmul) {
                 switch (lmul) {
                 case 3:
-                    sprintf(nbuf, "f2");
+                    g_string_append(buf, "f2");
                     break;
                 case 2:
-                    sprintf(nbuf, "f4");
+                    g_string_append(buf, "f4");
                     break;
                 case 1:
-                    sprintf(nbuf, "f8");
-                break;
+                    g_string_append(buf, "f8");
+                    break;
                 }
-                append(buf, nbuf, buflen);
             } else {
-                sprintf(nbuf, "%d", 1 << lmul);
-                append(buf, nbuf, buflen);
+                g_string_append_printf(buf, "%d", 1 << lmul);
             }
-            append(buf, ",", buflen);
-            append(buf, vta, buflen);
-            append(buf, ",", buflen);
-            append(buf, vma, buflen);
+            g_string_append_c(buf, ',');
+            g_string_append(buf, vta);
+            g_string_append_c(buf, ',');
+            g_string_append(buf, vma);
             break;
         }
         case 'x': {
             switch (dec->rlist) {
             case 4:
-                snprintf(tmp, sizeof(tmp), "{ra}");
+                g_string_append(buf, "{ra}");
                 break;
             case 5:
-                snprintf(tmp, sizeof(tmp), "{ra, s0}");
+                g_string_append(buf, "{ra, s0}");
                 break;
             case 15:
-                snprintf(tmp, sizeof(tmp), "{ra, s0-s11}");
+                g_string_append(buf, "{ra, s0-s11}");
                 break;
             default:
-                snprintf(tmp, sizeof(tmp), "{ra, s0-s%d}", dec->rlist - 5);
+                g_string_append_printf(buf, "{ra, s0-s%d}", dec->rlist - 5);
                 break;
             }
-            append(buf, tmp, buflen);
             break;
         }
         case 'h':
-            append(buf, rv_fli_name_const[dec->imm], buflen);
+            g_string_append(buf, rv_fli_name_const[dec->imm]);
             break;
         default:
             break;
         }
         fmt++;
     }
+
+    return buf;
 }
 
 /* lift instruction to pseudo-instruction */
@@ -5171,9 +5148,8 @@
 
 /* disassemble instruction */
 
-static void
-disasm_inst(char *buf, size_t buflen, rv_isa isa, uint64_t pc, rv_inst inst,
-            RISCVCPUConfig *cfg)
+static GString *disasm_inst(rv_isa isa, uint64_t pc, rv_inst inst,
+                            RISCVCPUConfig *cfg)
 {
     rv_decode dec = { 0 };
     dec.pc = pc;
@@ -5220,7 +5196,7 @@
     decode_inst_operands(&dec, isa);
     decode_inst_decompress(&dec, isa);
     decode_inst_lift_pseudo(&dec);
-    format_inst(buf, buflen, 24, &dec);
+    return format_inst(24, &dec);
 }
 
 #define INST_FMT_2 "%04" PRIx64 "              "
@@ -5231,7 +5207,6 @@
 static int
 print_insn_riscv(bfd_vma memaddr, struct disassemble_info *info, rv_isa isa)
 {
-    char buf[128] = { 0 };
     bfd_byte packet[2];
     rv_inst inst = 0;
     size_t len = 2;
@@ -5272,9 +5247,9 @@
         }
     }
 
-    disasm_inst(buf, sizeof(buf), isa, memaddr, inst,
-                (RISCVCPUConfig *)info->target_info);
-    (*info->fprintf_func)(info->stream, "%s", buf);
+    g_autoptr(GString) str =
+        disasm_inst(isa, memaddr, inst, (RISCVCPUConfig *)info->target_info);
+    (*info->fprintf_func)(info->stream, "%s", str->str);
 
     return len;
 }
diff --git a/docs/devel/ci-jobs.rst.inc b/docs/devel/ci-jobs.rst.inc
index be06322..3756bbe 100644
--- a/docs/devel/ci-jobs.rst.inc
+++ b/docs/devel/ci-jobs.rst.inc
@@ -182,13 +182,6 @@
 runner, you can set this variable to enable the tests that require this
 kind of host. The runner should be tagged with "s390x".
 
-CENTOS_STREAM_8_x86_64_RUNNER_AVAILABLE
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-If you've got access to a CentOS Stream 8 x86_64 host that can be
-used as a gitlab-CI runner, you can set this variable to enable the
-tests that require this kind of host. The runner should be tagged with
-both "centos_stream_8" and "x86_64".
-
 CCACHE_DISABLE
 ~~~~~~~~~~~~~~
 The jobs are configured to use "ccache" by default since this typically
diff --git a/docs/devel/ci-runners.rst.inc b/docs/devel/ci-runners.rst.inc
index 7817001..67b23d3 100644
--- a/docs/devel/ci-runners.rst.inc
+++ b/docs/devel/ci-runners.rst.inc
@@ -41,19 +41,18 @@
 Build environment
 ~~~~~~~~~~~~~~~~~
 
-The ``scripts/ci/setup/build-environment.yml`` Ansible playbook will
-set up machines with the environment needed to perform builds and run
-QEMU tests.  This playbook consists on the installation of various
-required packages (and a general package update while at it).  It
-currently covers a number of different Linux distributions, but it can
-be expanded to cover other systems.
+The ``scripts/ci/setup/$DISTRO/build-environment.yml`` Ansible
+playbook will set up machines with the environment needed to perform
+builds and run QEMU tests. This playbook consists on the installation
+of various required packages (and a general package update while at
+it).
 
 The minimum required version of Ansible successfully tested in this
 playbook is 2.8.0 (a version check is embedded within the playbook
 itself).  To run the playbook, execute::
 
   cd scripts/ci/setup
-  ansible-playbook -i inventory build-environment.yml
+  ansible-playbook -i inventory $DISTRO/build-environment.yml
 
 Please note that most of the tasks in the playbook require superuser
 privileges, such as those from the ``root`` account or those obtained
diff --git a/docs/devel/testing.rst b/docs/devel/testing.rst
index fa28e3e..23d3f44 100644
--- a/docs/devel/testing.rst
+++ b/docs/devel/testing.rst
@@ -387,9 +387,9 @@
 
 .. code::
 
-  make docker-test-build@centos8
+  make docker-test-build@debian
 
-This will create a container instance using the ``centos8`` image (the image
+This will create a container instance using the ``debian`` image (the image
 is downloaded and initialized automatically), in which the ``test-build`` job
 is executed.
 
@@ -410,8 +410,8 @@
 Images
 ~~~~~~
 
-Along with many other images, the ``centos8`` image is defined in a Dockerfile
-in ``tests/docker/dockerfiles/``, called ``centos8.docker``. ``make docker-help``
+Along with many other images, the ``debian`` image is defined in a Dockerfile
+in ``tests/docker/dockerfiles/``, called ``debian.docker``. ``make docker-help``
 command will list all the available images.
 
 A ``.pre`` script can be added beside the ``.docker`` file, which will be
diff --git a/fpu/softfloat-specialize.c.inc b/fpu/softfloat-specialize.c.inc
index f573014..8f3b97d 100644
--- a/fpu/softfloat-specialize.c.inc
+++ b/fpu/softfloat-specialize.c.inc
@@ -447,6 +447,17 @@
     } else {
         return 1;
     }
+#elif defined(TARGET_SPARC)
+    /* Prefer SNaN over QNaN, order B then A. */
+    if (is_snan(b_cls)) {
+        return 1;
+    } else if (is_snan(a_cls)) {
+        return 0;
+    } else if (is_qnan(b_cls)) {
+        return 1;
+    } else {
+        return 0;
+    }
 #elif defined(TARGET_XTENSA)
     /*
      * Xtensa has two NaN propagation modes.
@@ -624,6 +635,26 @@
         float_raise(float_flag_invalid | float_flag_invalid_imz, status);
     }
     return 3; /* default NaN */
+#elif defined(TARGET_SPARC)
+    /* For (inf,0,nan) return c. */
+    if (infzero) {
+        float_raise(float_flag_invalid | float_flag_invalid_imz, status);
+        return 2;
+    }
+    /* Prefer SNaN over QNaN, order C, B, A. */
+    if (is_snan(c_cls)) {
+        return 2;
+    } else if (is_snan(b_cls)) {
+        return 1;
+    } else if (is_snan(a_cls)) {
+        return 0;
+    } else if (is_qnan(c_cls)) {
+        return 2;
+    } else if (is_qnan(b_cls)) {
+        return 1;
+    } else {
+        return 0;
+    }
 #elif defined(TARGET_XTENSA)
     /*
      * For Xtensa, the (inf,zero,nan) case sets InvalidOp and returns
diff --git a/hw/dma/pl330.c b/hw/dma/pl330.c
index 70a502d..5f89295 100644
--- a/hw/dma/pl330.c
+++ b/hw/dma/pl330.c
@@ -15,6 +15,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/cutils.h"
 #include "hw/irq.h"
 #include "hw/qdev-properties.h"
 #include "hw/sysbus.h"
@@ -317,22 +318,14 @@
 
 static void pl330_hexdump(uint8_t *buf, size_t size)
 {
-    unsigned int b, i, len;
-    char tmpbuf[80];
+    g_autoptr(GString) str = g_string_sized_new(64);
+    size_t b, len;
 
-    for (b = 0; b < size; b += 16) {
-        len = size - b;
-        if (len > 16) {
-            len = 16;
-        }
-        tmpbuf[0] = '\0';
-        for (i = 0; i < len; i++) {
-            if ((i % 4) == 0) {
-                strcat(tmpbuf, " ");
-            }
-            sprintf(tmpbuf + strlen(tmpbuf), " %02x", buf[b + i]);
-        }
-        trace_pl330_hexdump(b, tmpbuf);
+    for (b = 0; b < size; b += len) {
+        len = MIN(16, size - b);
+        g_string_truncate(str, 0);
+        qemu_hexdump_line(str, buf + b, len, 1, 4);
+        trace_pl330_hexdump(b, str->str);
     }
 }
 
diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index 73ec373..fcb6cca 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -24,6 +24,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/cutils.h"
 #include "hw/scsi/scsi.h"
 #include "sysemu/block-backend.h"
 #include "scsi/constants.h"
@@ -1309,14 +1310,9 @@
     trace_ide_atapi_cmd(s, s->io_buffer[0]);
 
     if (trace_event_get_state_backends(TRACE_IDE_ATAPI_CMD_PACKET)) {
-        /* Each pretty-printed byte needs two bytes and a space; */
-        char *ppacket = g_malloc(ATAPI_PACKET_SIZE * 3 + 1);
-        int i;
-        for (i = 0; i < ATAPI_PACKET_SIZE; i++) {
-            sprintf(ppacket + (i * 3), "%02x ", buf[i]);
-        }
-        trace_ide_atapi_cmd_packet(s, s->lcyl | (s->hcyl << 8), ppacket);
-        g_free(ppacket);
+        g_autoptr(GString) str =
+            qemu_hexdump_line(NULL, buf, ATAPI_PACKET_SIZE, 1, 0);
+        trace_ide_atapi_cmd_packet(s, s->lcyl | (s->hcyl << 8), str->str);
     }
 
     /*
diff --git a/hw/intc/loongarch_extioi.c b/hw/intc/loongarch_extioi.c
index 0b35854..1e8e011 100644
--- a/hw/intc/loongarch_extioi.c
+++ b/hw/intc/loongarch_extioi.c
@@ -143,10 +143,13 @@
 
     for (i = 0; i < 4; i++) {
         cpu = val & 0xff;
-        cpu = ctz32(cpu);
-        cpu = (cpu >= 4) ? 0 : cpu;
         val = val >> 8;
 
+        if (!(s->status & BIT(EXTIOI_ENABLE_CPU_ENCODE))) {
+            cpu = ctz32(cpu);
+            cpu = (cpu >= 4) ? 0 : cpu;
+        }
+
         if (s->sw_coremap[irq + i] == cpu) {
             continue;
         }
@@ -265,6 +268,61 @@
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
+static MemTxResult extioi_virt_readw(void *opaque, hwaddr addr, uint64_t *data,
+                                     unsigned size, MemTxAttrs attrs)
+{
+    LoongArchExtIOI *s = LOONGARCH_EXTIOI(opaque);
+
+    switch (addr) {
+    case EXTIOI_VIRT_FEATURES:
+        *data = s->features;
+        break;
+    case EXTIOI_VIRT_CONFIG:
+        *data = s->status;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    return MEMTX_OK;
+}
+
+static MemTxResult extioi_virt_writew(void *opaque, hwaddr addr,
+                          uint64_t val, unsigned size,
+                          MemTxAttrs attrs)
+{
+    LoongArchExtIOI *s = LOONGARCH_EXTIOI(opaque);
+
+    switch (addr) {
+    case EXTIOI_VIRT_FEATURES:
+        return MEMTX_ACCESS_ERROR;
+
+    case EXTIOI_VIRT_CONFIG:
+        /*
+         * extioi features can only be set at disabled status
+         */
+        if ((s->status & BIT(EXTIOI_ENABLE)) && val) {
+            return MEMTX_ACCESS_ERROR;
+        }
+
+        s->status = val & s->features;
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return MEMTX_OK;
+}
+
+static const MemoryRegionOps extioi_virt_ops = {
+    .read_with_attrs = extioi_virt_readw,
+    .write_with_attrs = extioi_virt_writew,
+    .impl.min_access_size = 4,
+    .impl.max_access_size = 4,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 8,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
 static void loongarch_extioi_realize(DeviceState *dev, Error **errp)
 {
     LoongArchExtIOI *s = LOONGARCH_EXTIOI(dev);
@@ -284,6 +342,16 @@
     memory_region_init_io(&s->extioi_system_mem, OBJECT(s), &extioi_ops,
                           s, "extioi_system_mem", 0x900);
     sysbus_init_mmio(sbd, &s->extioi_system_mem);
+
+    if (s->features & BIT(EXTIOI_HAS_VIRT_EXTENSION)) {
+        memory_region_init_io(&s->virt_extend, OBJECT(s), &extioi_virt_ops,
+                              s, "extioi_virt", EXTIOI_VIRT_SIZE);
+        sysbus_init_mmio(sbd, &s->virt_extend);
+        s->features |= EXTIOI_VIRT_HAS_FEATURES;
+    } else {
+        s->status |= BIT(EXTIOI_ENABLE);
+    }
+
     s->cpu = g_new0(ExtIOICore, s->num_cpu);
     if (s->cpu == NULL) {
         error_setg(errp, "Memory allocation for ExtIOICore faile");
@@ -304,6 +372,13 @@
     g_free(s->cpu);
 }
 
+static void loongarch_extioi_reset(DeviceState *d)
+{
+    LoongArchExtIOI *s = LOONGARCH_EXTIOI(d);
+
+    s->status = 0;
+}
+
 static int vmstate_extioi_post_load(void *opaque, int version_id)
 {
     LoongArchExtIOI *s = LOONGARCH_EXTIOI(opaque);
@@ -333,8 +408,8 @@
 
 static const VMStateDescription vmstate_loongarch_extioi = {
     .name = TYPE_LOONGARCH_EXTIOI,
-    .version_id = 2,
-    .minimum_version_id = 2,
+    .version_id = 3,
+    .minimum_version_id = 3,
     .post_load = vmstate_extioi_post_load,
     .fields = (const VMStateField[]) {
         VMSTATE_UINT32_ARRAY(bounce, LoongArchExtIOI, EXTIOI_IRQS_GROUP_COUNT),
@@ -347,12 +422,16 @@
 
         VMSTATE_STRUCT_VARRAY_POINTER_UINT32(cpu, LoongArchExtIOI, num_cpu,
                          vmstate_extioi_core, ExtIOICore),
+        VMSTATE_UINT32(features, LoongArchExtIOI),
+        VMSTATE_UINT32(status, LoongArchExtIOI),
         VMSTATE_END_OF_LIST()
     }
 };
 
 static Property extioi_properties[] = {
     DEFINE_PROP_UINT32("num-cpu", LoongArchExtIOI, num_cpu, 1),
+    DEFINE_PROP_BIT("has-virtualization-extension", LoongArchExtIOI, features,
+                    EXTIOI_HAS_VIRT_EXTENSION, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -361,6 +440,7 @@
     DeviceClass *dc = DEVICE_CLASS(klass);
 
     dc->realize = loongarch_extioi_realize;
+    dc->reset   = loongarch_extioi_reset;
     device_class_set_props(dc, extioi_properties);
     dc->vmsd = &vmstate_loongarch_extioi;
 }
diff --git a/hw/loongarch/virt.c b/hw/loongarch/virt.c
index 3e6e93e..66cef20 100644
--- a/hw/loongarch/virt.c
+++ b/hw/loongarch/virt.c
@@ -11,6 +11,7 @@
 #include "hw/boards.h"
 #include "hw/char/serial.h"
 #include "sysemu/kvm.h"
+#include "sysemu/tcg.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/qtest.h"
 #include "sysemu/runstate.h"
@@ -45,8 +46,34 @@
 #include "sysemu/tpm.h"
 #include "sysemu/block-backend.h"
 #include "hw/block/flash.h"
+#include "hw/virtio/virtio-iommu.h"
 #include "qemu/error-report.h"
 
+static bool virt_is_veiointc_enabled(LoongArchVirtMachineState *lvms)
+{
+    if (lvms->veiointc == ON_OFF_AUTO_OFF) {
+        return false;
+    }
+    return true;
+}
+
+static void virt_get_veiointc(Object *obj, Visitor *v, const char *name,
+                              void *opaque, Error **errp)
+{
+    LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(obj);
+    OnOffAuto veiointc = lvms->veiointc;
+
+    visit_type_OnOffAuto(v, name, &veiointc, errp);
+}
+
+static void virt_set_veiointc(Object *obj, Visitor *v, const char *name,
+                              void *opaque, Error **errp)
+{
+    LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(obj);
+
+    visit_type_OnOffAuto(v, name, &lvms->veiointc, errp);
+}
+
 static PFlashCFI01 *virt_flash_create1(LoongArchVirtMachineState *lvms,
                                        const char *name,
                                        const char *alias_prop_name)
@@ -717,25 +744,47 @@
     uint32_t cpuintc_phandle, eiointc_phandle, pch_pic_phandle, pch_msi_phandle;
 
     /*
-     * The connection of interrupts:
-     *   +-----+    +---------+     +-------+
-     *   | IPI |--> | CPUINTC | <-- | Timer |
-     *   +-----+    +---------+     +-------+
-     *                  ^
-     *                  |
-     *            +---------+
-     *            | EIOINTC |
-     *            +---------+
-     *             ^       ^
-     *             |       |
-     *      +---------+ +---------+
-     *      | PCH-PIC | | PCH-MSI |
-     *      +---------+ +---------+
-     *        ^      ^          ^
-     *        |      |          |
-     * +--------+ +---------+ +---------+
-     * | UARTs  | | Devices | | Devices |
-     * +--------+ +---------+ +---------+
+     * Extended IRQ model.
+     *                                 |
+     * +-----------+     +-------------|--------+     +-----------+
+     * | IPI/Timer | --> | CPUINTC(0-3)|(4-255) | <-- | IPI/Timer |
+     * +-----------+     +-------------|--------+     +-----------+
+     *                         ^       |
+     *                         |
+     *                    +---------+
+     *                    | EIOINTC |
+     *                    +---------+
+     *                     ^       ^
+     *                     |       |
+     *              +---------+ +---------+
+     *              | PCH-PIC | | PCH-MSI |
+     *              +---------+ +---------+
+     *                ^      ^          ^
+     *                |      |          |
+     *         +--------+ +---------+ +---------+
+     *         | UARTs  | | Devices | | Devices |
+     *         +--------+ +---------+ +---------+
+     *
+     * Virt extended IRQ model.
+     *
+     *   +-----+    +---------------+     +-------+
+     *   | IPI |--> | CPUINTC(0-255)| <-- | Timer |
+     *   +-----+    +---------------+     +-------+
+     *                     ^
+     *                     |
+     *               +-----------+
+     *               | V-EIOINTC |
+     *               +-----------+
+     *                ^         ^
+     *                |         |
+     *         +---------+ +---------+
+     *         | PCH-PIC | | PCH-MSI |
+     *         +---------+ +---------+
+     *           ^      ^          ^
+     *           |      |          |
+     *    +--------+ +---------+ +---------+
+     *    | UARTs  | | Devices | | Devices |
+     *    +--------+ +---------+ +---------+
      */
 
     /* Create IPI device */
@@ -767,9 +816,16 @@
     /* Create EXTIOI device */
     extioi = qdev_new(TYPE_LOONGARCH_EXTIOI);
     qdev_prop_set_uint32(extioi, "num-cpu", ms->smp.cpus);
+    if (virt_is_veiointc_enabled(lvms)) {
+        qdev_prop_set_bit(extioi, "has-virtualization-extension", true);
+    }
     sysbus_realize_and_unref(SYS_BUS_DEVICE(extioi), &error_fatal);
     memory_region_add_subregion(&lvms->system_iocsr, APIC_BASE,
-                   sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 0));
+                    sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 0));
+    if (virt_is_veiointc_enabled(lvms)) {
+        memory_region_add_subregion(&lvms->system_iocsr, EXTIOI_VIRT_BASE,
+                    sysbus_mmio_get_region(SYS_BUS_DEVICE(extioi), 1));
+    }
 
     /*
      * connect ext irq to the cpu irq
@@ -876,38 +932,91 @@
     }
 }
 
-
-static void virt_iocsr_misc_write(void *opaque, hwaddr addr,
-                                  uint64_t val, unsigned size)
+static MemTxResult virt_iocsr_misc_write(void *opaque, hwaddr addr,
+                                         uint64_t val, unsigned size,
+                                         MemTxAttrs attrs)
 {
+    LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(opaque);
+    uint64_t features;
+
+    switch (addr) {
+    case MISC_FUNC_REG:
+        if (!virt_is_veiointc_enabled(lvms)) {
+            return MEMTX_OK;
+        }
+
+        features = address_space_ldl(&lvms->as_iocsr,
+                                     EXTIOI_VIRT_BASE + EXTIOI_VIRT_CONFIG,
+                                     attrs, NULL);
+        if (val & BIT_ULL(IOCSRM_EXTIOI_EN)) {
+            features |= BIT(EXTIOI_ENABLE);
+        }
+        if (val & BIT_ULL(IOCSRM_EXTIOI_INT_ENCODE)) {
+            features |= BIT(EXTIOI_ENABLE_INT_ENCODE);
+        }
+
+        address_space_stl(&lvms->as_iocsr,
+                          EXTIOI_VIRT_BASE + EXTIOI_VIRT_CONFIG,
+                          features, attrs, NULL);
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    return MEMTX_OK;
 }
 
-static uint64_t virt_iocsr_misc_read(void *opaque, hwaddr addr, unsigned size)
+static MemTxResult virt_iocsr_misc_read(void *opaque, hwaddr addr,
+                                        uint64_t *data,
+                                        unsigned size, MemTxAttrs attrs)
 {
-    uint64_t ret;
+    LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(opaque);
+    uint64_t ret = 0;
+    int features;
 
     switch (addr) {
     case VERSION_REG:
-        return 0x11ULL;
+        ret = 0x11ULL;
+        break;
     case FEATURE_REG:
         ret = BIT(IOCSRF_MSI) | BIT(IOCSRF_EXTIOI) | BIT(IOCSRF_CSRIPI);
         if (kvm_enabled()) {
             ret |= BIT(IOCSRF_VM);
         }
-        return ret;
+        break;
     case VENDOR_REG:
-        return 0x6e6f73676e6f6f4cULL; /* "Loongson" */
+        ret = 0x6e6f73676e6f6f4cULL; /* "Loongson" */
+        break;
     case CPUNAME_REG:
-        return 0x303030354133ULL;     /* "3A5000" */
+        ret = 0x303030354133ULL;     /* "3A5000" */
+        break;
     case MISC_FUNC_REG:
-        return BIT_ULL(IOCSRM_EXTIOI_EN);
+        if (!virt_is_veiointc_enabled(lvms)) {
+            ret |= BIT_ULL(IOCSRM_EXTIOI_EN);
+            break;
+        }
+
+        features = address_space_ldl(&lvms->as_iocsr,
+                                     EXTIOI_VIRT_BASE + EXTIOI_VIRT_CONFIG,
+                                     attrs, NULL);
+        if (features & BIT(EXTIOI_ENABLE)) {
+            ret |= BIT_ULL(IOCSRM_EXTIOI_EN);
+        }
+        if (features & BIT(EXTIOI_ENABLE_INT_ENCODE)) {
+            ret |= BIT_ULL(IOCSRM_EXTIOI_INT_ENCODE);
+        }
+        break;
+    default:
+        g_assert_not_reached();
     }
-    return 0ULL;
+
+    *data = ret;
+    return MEMTX_OK;
 }
 
 static const MemoryRegionOps virt_iocsr_misc_ops = {
-    .read  = virt_iocsr_misc_read,
-    .write = virt_iocsr_misc_write,
+    .read_with_attrs  = virt_iocsr_misc_read,
+    .write_with_attrs = virt_iocsr_misc_write,
     .endianness = DEVICE_LITTLE_ENDIAN,
     .valid = {
         .min_access_size = 4,
@@ -1117,6 +1226,9 @@
 {
     LoongArchVirtMachineState *lvms = LOONGARCH_VIRT_MACHINE(obj);
 
+    if (tcg_enabled()) {
+        lvms->veiointc = ON_OFF_AUTO_OFF;
+    }
     lvms->acpi = ON_OFF_AUTO_AUTO;
     lvms->oem_id = g_strndup(ACPI_BUILD_APPNAME6, 6);
     lvms->oem_table_id = g_strndup(ACPI_BUILD_APPNAME8, 8);
@@ -1213,6 +1325,7 @@
     MachineClass *mc = MACHINE_GET_CLASS(machine);
 
     if (device_is_dynamic_sysbus(mc, dev) ||
+        object_dynamic_cast(OBJECT(dev), TYPE_VIRTIO_IOMMU_PCI) ||
         memhp_type_supported(dev)) {
         return HOTPLUG_HANDLER(machine);
     }
@@ -1302,6 +1415,11 @@
         NULL, NULL);
     object_class_property_set_description(oc, "acpi",
         "Enable ACPI");
+    object_class_property_add(oc, "v-eiointc", "OnOffAuto",
+        virt_get_veiointc, virt_set_veiointc,
+        NULL, NULL);
+    object_class_property_set_description(oc, "v-eiointc",
+                            "Enable Virt Extend I/O Interrupt Controller.");
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_RAMFB_DEVICE);
 #ifdef CONFIG_TPM
     machine_class_allow_dynamic_sysbus_dev(mc, TYPE_TPM_TIS_SYSBUS);
diff --git a/hw/mips/malta.c b/hw/mips/malta.c
index af74008..664a2ae 100644
--- a/hw/mips/malta.c
+++ b/hw/mips/malta.c
@@ -26,6 +26,7 @@
 #include "qemu/units.h"
 #include "qemu/bitops.h"
 #include "qemu/datadir.h"
+#include "qemu/cutils.h"
 #include "qemu/guest-random.h"
 #include "hw/clock.h"
 #include "hw/southbridge/piix.h"
@@ -850,15 +851,18 @@
     va_end(ap);
 }
 
-static void reinitialize_rng_seed(void *opaque)
+static GString *rng_seed_hex_new(void)
 {
-    char *rng_seed_hex = opaque;
     uint8_t rng_seed[32];
 
     qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed));
-    for (size_t i = 0; i < sizeof(rng_seed); ++i) {
-        sprintf(rng_seed_hex + i * 2, "%02x", rng_seed[i]);
-    }
+    return qemu_hexdump_line(NULL, rng_seed, sizeof(rng_seed), 0, 0);
+}
+
+static void reinitialize_rng_seed(void *opaque)
+{
+    g_autoptr(GString) hex = rng_seed_hex_new();
+    memcpy(opaque, hex->str, hex->len);
 }
 
 /* Kernel */
@@ -870,8 +874,6 @@
     uint32_t *prom_buf;
     long prom_size;
     int prom_index = 0;
-    uint8_t rng_seed[32];
-    char rng_seed_hex[sizeof(rng_seed) * 2 + 1];
     size_t rng_seed_prom_offset;
 
     kernel_size = load_elf(loaderparams.kernel_filename, NULL,
@@ -946,14 +948,13 @@
     prom_set(prom_buf, prom_index++, "modetty0");
     prom_set(prom_buf, prom_index++, "38400n8r");
 
-    qemu_guest_getrandom_nofail(rng_seed, sizeof(rng_seed));
-    for (size_t i = 0; i < sizeof(rng_seed); ++i) {
-        sprintf(rng_seed_hex + i * 2, "%02x", rng_seed[i]);
-    }
     prom_set(prom_buf, prom_index++, "rngseed");
     rng_seed_prom_offset = prom_index * ENVP_ENTRY_SIZE +
                            sizeof(uint32_t) * ENVP_NB_ENTRIES;
-    prom_set(prom_buf, prom_index++, "%s", rng_seed_hex);
+    {
+        g_autoptr(GString) hex = rng_seed_hex_new();
+        prom_set(prom_buf, prom_index++, "%s", hex->str);
+    }
 
     prom_set(prom_buf, prom_index++, NULL);
 
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
index 5f55ae5..0812d39 100644
--- a/hw/scsi/scsi-disk.c
+++ b/hw/scsi/scsi-disk.c
@@ -2662,19 +2662,12 @@
 
 static void scsi_disk_new_request_dump(uint32_t lun, uint32_t tag, uint8_t *buf)
 {
-    int i;
     int len = scsi_cdb_length(buf);
-    char *line_buffer, *p;
+    g_autoptr(GString) str = NULL;
 
     assert(len > 0 && len <= 16);
-    line_buffer = g_malloc(len * 5 + 1);
-
-    for (i = 0, p = line_buffer; i < len; i++) {
-        p += sprintf(p, " 0x%02x", buf[i]);
-    }
-    trace_scsi_disk_new_request(lun, tag, line_buffer);
-
-    g_free(line_buffer);
+    str = qemu_hexdump_line(NULL, buf, len, 1, 0);
+    trace_scsi_disk_new_request(lun, tag, str->str);
 }
 
 static SCSIRequest *scsi_new_request(SCSIDevice *d, uint32_t tag, uint32_t lun,
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 7368b71..3cdaa12 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -944,13 +944,15 @@
 static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config,
                                    uint32_t config_len)
 {
-    int b, len;
-    char line[QEMU_HEXDUMP_LINE_LEN];
+    g_autoptr(GString) str = g_string_sized_new(4 * 16);
+    size_t b, len;
 
-    for (b = 0; b < config_len; b += 16) {
-        len = config_len - b;
-        qemu_hexdump_line(line, config + b, len);
-        trace_vhost_vdpa_dump_config(dev, b, line);
+    for (b = 0; b < config_len; b += len) {
+        len = MIN(config_len - b, 16);
+
+        g_string_truncate(str, 0);
+        qemu_hexdump_line(str, config + b, len, 1, 4);
+        trace_vhost_vdpa_dump_config(dev, b, str->str);
     }
 }
 
diff --git a/include/hw/intc/loongarch_extioi.h b/include/hw/intc/loongarch_extioi.h
index 410c6e1..eccc2e0 100644
--- a/include/hw/intc/loongarch_extioi.h
+++ b/include/hw/intc/loongarch_extioi.h
@@ -41,6 +41,24 @@
 #define EXTIOI_COREMAP_END           (0xD00 - APIC_OFFSET)
 #define EXTIOI_SIZE                  0x800
 
+#define EXTIOI_VIRT_BASE             (0x40000000)
+#define EXTIOI_VIRT_SIZE             (0x1000)
+#define EXTIOI_VIRT_FEATURES         (0x0)
+#define  EXTIOI_HAS_VIRT_EXTENSION   (0)
+#define  EXTIOI_HAS_ENABLE_OPTION    (1)
+#define  EXTIOI_HAS_INT_ENCODE       (2)
+#define  EXTIOI_HAS_CPU_ENCODE       (3)
+#define  EXTIOI_VIRT_HAS_FEATURES    (BIT(EXTIOI_HAS_VIRT_EXTENSION)  \
+                                      | BIT(EXTIOI_HAS_ENABLE_OPTION) \
+                                      | BIT(EXTIOI_HAS_INT_ENCODE)    \
+                                      | BIT(EXTIOI_HAS_CPU_ENCODE))
+#define EXTIOI_VIRT_CONFIG           (0x4)
+#define  EXTIOI_ENABLE               (1)
+#define  EXTIOI_ENABLE_INT_ENCODE    (2)
+#define  EXTIOI_ENABLE_CPU_ENCODE    (3)
+#define EXTIOI_VIRT_COREMAP_START    (0x40)
+#define EXTIOI_VIRT_COREMAP_END      (0x240)
+
 typedef struct ExtIOICore {
     uint32_t coreisr[EXTIOI_IRQS_GROUP_COUNT];
     DECLARE_BITMAP(sw_isr[LS3A_INTC_IP], EXTIOI_IRQS);
@@ -52,6 +70,8 @@
 struct LoongArchExtIOI {
     SysBusDevice parent_obj;
     uint32_t num_cpu;
+    uint32_t features;
+    uint32_t status;
     /* hardware state */
     uint32_t nodetype[EXTIOI_IRQS_NODETYPE_COUNT / 2];
     uint32_t bounce[EXTIOI_IRQS_GROUP_COUNT];
@@ -65,5 +85,6 @@
     qemu_irq irq[EXTIOI_IRQS];
     ExtIOICore *cpu;
     MemoryRegion extioi_system_mem;
+    MemoryRegion virt_extend;
 };
 #endif /* LOONGARCH_EXTIOI_H */
diff --git a/include/hw/loongarch/virt.h b/include/hw/loongarch/virt.h
index 2c4f5cf..8fdfacf 100644
--- a/include/hw/loongarch/virt.h
+++ b/include/hw/loongarch/virt.h
@@ -50,6 +50,7 @@
     Notifier     machine_done;
     Notifier     powerdown_notifier;
     OnOffAuto    acpi;
+    OnOffAuto    veiointc;
     char         *oem_id;
     char         *oem_table_id;
     DeviceState  *acpi_ged;
diff --git a/include/qemu/cutils.h b/include/qemu/cutils.h
index c5dea63..da15547 100644
--- a/include/qemu/cutils.h
+++ b/include/qemu/cutils.h
@@ -282,12 +282,21 @@
  */
 int parse_debug_env(const char *name, int max, int initial);
 
-/*
- * Hexdump a line of a byte buffer into a hexadecimal/ASCII buffer
+/**
+ * qemu_hexdump_line:
+ * @str: GString into which to append
+ * @buf: buffer to dump
+ * @len: number of bytes to dump
+ * @unit_len: add a space between every @unit_len bytes
+ * @block_len: add an extra space between every @block_len bytes
+ *
+ * Append @len bytes of @buf as hexadecimal into @str.
+ * Add spaces between every @unit_len and @block_len bytes.
+ * If @str is NULL, allocate a new string and return it;
+ * otherwise return @str.
  */
-#define QEMU_HEXDUMP_LINE_BYTES 16 /* Number of bytes to dump */
-#define QEMU_HEXDUMP_LINE_LEN 75   /* Number of characters in line */
-void qemu_hexdump_line(char *line, const void *bufptr, size_t len);
+GString *qemu_hexdump_line(GString *str, const void *buf, size_t len,
+                           size_t unit_len, size_t block_len);
 
 /*
  * Hexdump a buffer to a file. An optional string prefix is added to every line
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index c1e1511..0d4dc1f 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -1003,6 +1003,9 @@
     r |= features & CPU_FEATURE_FSMULD ? HWCAP_SPARC_FSMULD : 0;
     r |= features & CPU_FEATURE_VIS1 ? HWCAP_SPARC_VIS : 0;
     r |= features & CPU_FEATURE_VIS2 ? HWCAP_SPARC_VIS2 : 0;
+    r |= features & CPU_FEATURE_FMAF ? HWCAP_SPARC_FMAF : 0;
+    r |= features & CPU_FEATURE_VIS3 ? HWCAP_SPARC_VIS3 : 0;
+    r |= features & CPU_FEATURE_IMA ? HWCAP_SPARC_IMA : 0;
 #endif
 
     return r;
diff --git a/linux-user/ioctls.h b/linux-user/ioctls.h
index d508d0c..3b41128 100644
--- a/linux-user/ioctls.h
+++ b/linux-user/ioctls.h
@@ -102,6 +102,7 @@
      IOCTL(BLKRAGET, IOC_R, MK_PTR(TYPE_LONG))
      IOCTL(BLKSSZGET, IOC_R, MK_PTR(TYPE_INT))
      IOCTL(BLKBSZGET, IOC_R, MK_PTR(TYPE_INT))
+     IOCTL(BLKBSZSET, IOC_W, MK_PTR(TYPE_INT))
      IOCTL_SPECIAL(BLKPG, IOC_W, do_ioctl_blkpg,
                    MK_PTR(MK_STRUCT(STRUCT_blkpg_ioctl_arg)))
 
diff --git a/scripts/ci/org.centos/stream/8/build-environment.yml b/scripts/ci/org.centos/stream/8/build-environment.yml
deleted file mode 100644
index 1ead77e..0000000
--- a/scripts/ci/org.centos/stream/8/build-environment.yml
+++ /dev/null
@@ -1,82 +0,0 @@
----
-- name: Installation of extra packages to build QEMU
-  hosts: all
-  tasks:
-    - name: Extra check for CentOS Stream 8
-      lineinfile:
-        path: /etc/redhat-release
-        line: CentOS Stream release 8
-        state: present
-      check_mode: yes
-      register: centos_stream_8
-
-    - name: Enable EPEL repo on CentOS Stream 8
-      dnf:
-        name:
-          - epel-release
-        state: present
-      when:
-        - centos_stream_8
-
-    - name: Enable PowerTools repo on CentOS Stream 8
-      ini_file:
-        path: /etc/yum.repos.d/CentOS-Stream-PowerTools.repo
-        section: powertools
-        option: enabled
-        value: "1"
-      when:
-        - centos_stream_8
-
-    - name: Install basic packages to build QEMU on CentOS Stream 8
-      dnf:
-        name:
-          - bzip2
-          - bzip2-devel
-          - capstone-devel
-          - dbus-daemon
-          - device-mapper-multipath-devel
-          - diffutils
-          - gcc
-          - gcc-c++
-          - genisoimage
-          - gettext
-          - git
-          - glib2-devel
-          - glusterfs-api-devel
-          - gnutls-devel
-          - libaio-devel
-          - libcap-ng-devel
-          - libcurl-devel
-          - libepoxy-devel
-          - libfdt-devel
-          - libgcrypt-devel
-          - libiscsi-devel
-          - libpmem-devel
-          - librados-devel
-          - librbd-devel
-          - libseccomp-devel
-          - libslirp-devel
-          - libssh-devel
-          - libxkbcommon-devel
-          - lzo-devel
-          - make
-          - mesa-libEGL-devel
-          - nettle-devel
-          - ninja-build
-          - nmap-ncat
-          - numactl-devel
-          - pixman-devel
-          - python38
-          - python3-sphinx
-          - rdma-core-devel
-          - redhat-rpm-config
-          - snappy-devel
-          - spice-glib-devel
-          - spice-server-devel
-          - systemd-devel
-          - systemtap-sdt-devel
-          - tar
-          - zlib-devel
-        state: present
-      when:
-        - centos_stream_8
diff --git a/scripts/ci/org.centos/stream/8/x86_64/configure b/scripts/ci/org.centos/stream/8/x86_64/configure
deleted file mode 100755
index 868db66..0000000
--- a/scripts/ci/org.centos/stream/8/x86_64/configure
+++ /dev/null
@@ -1,198 +0,0 @@
-#!/bin/sh -e
-#
-# Configuration for QEMU based on CentOS Stream 8 x86_64 builds
-#
-# The "configure" command line is based on:
-#
-# https://git.centos.org/rpms/qemu-kvm/blob/c8s-stream-rhel/f/SPECS/qemu-kvm.spec
-#
-# But, because the SPEC file contains a number of conditionals and
-# variable and expansions only available at RPM build time, this version
-# was initially generated from an actual RPM build on an x86_64 platform.
-#
-# From that initial version, options that are required or are a
-# consequence of non-upstream patches have been adapted.  One example
-# is "--without-default-devices" which is *not* present here, given
-# that patches adding downstream specific devices are not available.
-#
-../configure \
---python=/usr/bin/python3.8 \
---prefix="/usr" \
---libdir="/usr/lib64" \
---datadir="/usr/share" \
---sysconfdir="/etc" \
---interp-prefix=/usr/qemu-%M \
---localstatedir="/var" \
---docdir="/usr/share/doc" \
---libexecdir="/usr/libexec" \
---extra-ldflags="-Wl,--build-id -Wl,-z,relro -Wl,-z,now" \
---extra-cflags="-O2 -g -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fexceptions -fstack-protector-strong -grecord-gcc-switches -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1 -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection" \
---with-suffix="qemu-kvm" \
---firmwarepath=/usr/share/qemu-firmware \
---target-list="x86_64-softmmu" \
---block-drv-rw-whitelist="qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,gluster" \
---audio-drv-list="" \
---block-drv-ro-whitelist="vmdk,vhdx,vpc,https,ssh" \
---with-coroutine=ucontext \
---tls-priority=@QEMU,SYSTEM \
---disable-af-xdp \
---disable-attr \
---disable-auth-pam \
---disable-avx2 \
---disable-avx512f \
---disable-bochs \
---disable-bpf \
---disable-brlapi \
---disable-bsd-user \
---disable-bzip2 \
---disable-cap-ng \
---disable-capstone \
---disable-cfi \
---disable-cfi-debug \
---disable-cloop \
---disable-cocoa \
---disable-coroutine-pool \
---disable-crypto-afalg \
---disable-curl \
---disable-curses \
---disable-debug-info \
---disable-debug-mutex \
---disable-debug-tcg \
---disable-dmg \
---disable-docs \
---disable-fuse \
---disable-fuse-lseek \
---disable-gcrypt \
---disable-gio \
---disable-glusterfs \
---disable-gnutls \
---disable-gtk \
---disable-guest-agent \
---disable-guest-agent-msi \
---disable-hvf \
---disable-iconv \
---disable-kvm \
---disable-libdaxctl \
---disable-libiscsi \
---disable-libnfs \
---disable-libpmem \
---disable-libssh \
---disable-libudev \
---disable-libusb \
---disable-linux-aio \
---disable-linux-io-uring \
---disable-linux-user \
---disable-live-block-migration \
---disable-lto \
---disable-lzfse \
---disable-lzo \
---disable-malloc-trim \
---disable-membarrier \
---disable-modules \
---disable-module-upgrades \
---disable-mpath \
---disable-multiprocess \
---disable-netmap \
---disable-nettle \
---disable-numa \
---disable-nvmm \
---disable-opengl \
---disable-parallels \
---disable-pie \
---disable-qcow1 \
---disable-qed \
---disable-qom-cast-debug \
---disable-rbd \
---disable-rdma \
---disable-replication \
---disable-rng-none \
---disable-safe-stack \
---disable-sanitizers \
---disable-sdl \
---disable-sdl-image \
---disable-seccomp \
---disable-slirp-smbd \
---disable-smartcard \
---disable-snappy \
---disable-sparse \
---disable-spice \
---disable-strip \
---disable-system \
---disable-tcg \
---disable-tools \
---disable-tpm \
---disable-u2f \
---disable-usb-redir \
---disable-user \
---disable-vde \
---disable-vdi \
---disable-vhost-crypto \
---disable-vhost-kernel \
---disable-vhost-net \
---disable-vhost-user \
---disable-vhost-user-blk-server \
---disable-vhost-vdpa \
---disable-virglrenderer \
---disable-virtfs \
---disable-vnc \
---disable-vnc-jpeg \
---disable-png \
---disable-vnc-sasl \
---disable-vte \
---disable-vvfat \
---disable-werror \
---disable-whpx \
---disable-xen \
---disable-xen-pci-passthrough \
---disable-xkbcommon \
---disable-zstd \
---enable-attr \
---enable-avx2 \
---enable-cap-ng \
---enable-capstone \
---enable-coroutine-pool \
---enable-curl \
---enable-debug-info \
---enable-docs \
---enable-fdt \
---enable-gcrypt \
---enable-glusterfs \
---enable-gnutls \
---enable-guest-agent \
---enable-iconv \
---enable-kvm \
---enable-libiscsi \
---enable-libpmem \
---enable-libssh \
---enable-libusb \
---enable-libudev \
---enable-linux-aio \
---enable-lzo \
---enable-malloc-trim \
---enable-modules \
---enable-mpath \
---enable-numa \
---enable-opengl \
---enable-pie \
---enable-rbd \
---enable-rdma \
---enable-seccomp \
---enable-snappy \
---enable-smartcard \
---enable-spice \
---enable-system \
---enable-tcg \
---enable-tools \
---enable-tpm \
---enable-trace-backends=dtrace \
---enable-usb-redir \
---enable-vhost-kernel \
---enable-vhost-net \
---enable-vhost-user \
---enable-vhost-user-blk-server \
---enable-vhost-vdpa \
---enable-vnc \
---enable-png \
---enable-vnc-sasl \
---enable-werror \
---enable-xkbcommon
diff --git a/scripts/ci/org.centos/stream/8/x86_64/test-avocado b/scripts/ci/org.centos/stream/8/x86_64/test-avocado
deleted file mode 100755
index 73e7a1a..0000000
--- a/scripts/ci/org.centos/stream/8/x86_64/test-avocado
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/bin/sh -e
-#
-# Runs a previously vetted list of tests, either marked explicitly for
-# KVM and x86_64, or tests that are generic enough to be valid for all
-# targets. Such a test list can be generated with:
-#
-# ./pyvenv/bin/avocado list --filter-by-tags-include-empty \
-#   --filter-by-tags-include-empty-key -t accel:kvm,arch:x86_64 \
-#   tests/avocado/
-#
-# This is almost the complete list of avocado based tests available at
-# the time this was compile, with the following exceptions:
-#
-# * Require machine type "x-remote":
-#   - tests/avocado/multiprocess.py:Multiprocess.test_multiprocess_x86_64
-#
-# * Requires display type "egl-headless":
-#   - tests/avocado/virtio-gpu.py:VirtioGPUx86.test_virtio_vga_virgl
-#   - tests/avocado/virtio-gpu.py:VirtioGPUx86.test_vhost_user_vga_virgl
-#
-#  * Test is marked (unconditionally) to be skipped:
-#   - tests/avocado/virtio_check_params.py:VirtioMaxSegSettingsCheck.test_machine_types
-#
-make get-vm-images
-./pyvenv/bin/avocado run \
-    --job-results-dir=tests/results/ \
-    tests/avocado/boot_linux.py:BootLinuxX8664.test_pc_i440fx_kvm \
-    tests/avocado/boot_linux.py:BootLinuxX8664.test_pc_q35_kvm \
-    tests/avocado/boot_linux_console.py:BootLinuxConsole.test_x86_64_pc \
-    tests/avocado/cpu_queries.py:QueryCPUModelExpansion.test \
-    tests/avocado/empty_cpu_model.py:EmptyCPUModel.test \
-    tests/avocado/hotplug_cpu.py:HotPlugCPU.test \
-    tests/avocado/netdev-ethtool.py:NetDevEthtool.test_igb \
-    tests/avocado/netdev-ethtool.py:NetDevEthtool.test_igb_nomsi \
-    tests/avocado/info_usernet.py:InfoUsernet.test_hostfwd \
-    tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu \
-    tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu_pt \
-    tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu_strict \
-    tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu_strict_cm \
-    tests/avocado/linux_initrd.py:LinuxInitrd.test_with_2gib_file_should_exit_error_msg_with_linux_v3_6 \
-    tests/avocado/linux_initrd.py:LinuxInitrd.test_with_2gib_file_should_work_with_linux_v4_16 \
-    tests/avocado/migration.py:Migration.test_migration_with_exec \
-    tests/avocado/migration.py:Migration.test_migration_with_tcp_localhost \
-    tests/avocado/migration.py:Migration.test_migration_with_unix \
-    tests/avocado/pc_cpu_hotplug_props.py:OmittedCPUProps.test_no_die_id \
-    tests/avocado/replay_kernel.py:ReplayKernelNormal.test_x86_64_pc \
-    tests/avocado/reverse_debugging.py:ReverseDebugging_X86_64.test_x86_64_pc \
-    tests/avocado/version.py:Version.test_qmp_human_info_version \
-    tests/avocado/virtio_version.py:VirtioVersionCheck.test_conventional_devs \
-    tests/avocado/virtio_version.py:VirtioVersionCheck.test_modern_only_devs \
-    tests/avocado/vnc.py:Vnc.test_change_password \
-    tests/avocado/vnc.py:Vnc.test_change_password_requires_a_password \
-    tests/avocado/vnc.py:Vnc.test_no_vnc \
-    tests/avocado/vnc.py:Vnc.test_no_vnc_change_password \
-    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_4_0 \
-    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_4_1 \
-    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_set_4_0 \
-    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_unset_4_1 \
-    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v1_4_0 \
-    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v1_set_4_0 \
-    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v2_4_0 \
-    tests/avocado/x86_cpu_model_versions.py:CascadelakeArchCapabilities.test_v2_unset_4_1 \
-    tests/avocado/x86_cpu_model_versions.py:X86CPUModelAliases.test_4_0_alias_compatibility \
-    tests/avocado/x86_cpu_model_versions.py:X86CPUModelAliases.test_4_1_alias \
-    tests/avocado/x86_cpu_model_versions.py:X86CPUModelAliases.test_none_alias
diff --git a/scripts/ci/org.centos/stream/README b/scripts/ci/org.centos/stream/README
deleted file mode 100644
index e3eadfe..0000000
--- a/scripts/ci/org.centos/stream/README
+++ /dev/null
@@ -1,17 +0,0 @@
-This directory contains scripts for generating a build of QEMU that
-closely matches the CentOS Stream[1] builds of the qemu-kvm package.
-
-To have the environment ready to configure, build QEMU and run tests,
-please start with a CentOS Stream machine and:
-
- * apply the generic "build-environment.yml" playbook located at
-   scripts/ci/setup
-
- * apply the "build-environment.yml" in the directory following the
-   CentOS Stream version (such as "8").
-
-This currently only covers CentOS Stream 8 environments and
-packages[2].
-
-[1] https://www.centos.org/centos-stream/
-[2] https://git.centos.org/rpms/qemu-kvm/commits/c8s-stream-rhel
diff --git a/scripts/ci/setup/build-environment.yml b/scripts/ci/setup/build-environment.yml
deleted file mode 100644
index de0d866..0000000
--- a/scripts/ci/setup/build-environment.yml
+++ /dev/null
@@ -1,284 +0,0 @@
-# Copyright (c) 2021 Red Hat, Inc.
-#
-# Author:
-#  Cleber Rosa <crosa@redhat.com>
-#
-# This work is licensed under the terms of the GNU GPL, version 2 or
-# later.  See the COPYING file in the top-level directory.
-#
-# This is an ansible playbook file.  Run it to set up systems with the
-# environment needed to build QEMU.
----
-- name: Installation of basic packages to build QEMU
-  hosts: all
-  tasks:
-    - name: Check for suitable ansible version
-      delegate_to: localhost
-      assert:
-        that:
-          - '((ansible_version.major == 2) and (ansible_version.minor >= 8)) or (ansible_version.major >= 3)'
-        msg: "Unsuitable ansible version, please use version 2.8.0 or later"
-
-    - name: Add armhf foreign architecture to aarch64 hosts
-      command: dpkg --add-architecture armhf
-      when:
-        - ansible_facts['distribution'] == 'Ubuntu'
-        - ansible_facts['architecture'] == 'aarch64'
-
-    - name: Update apt cache / upgrade packages via apt
-      apt:
-        update_cache: yes
-        upgrade: yes
-      when:
-        - ansible_facts['distribution'] == 'Ubuntu'
-
-    # lcitool variables -f json ubuntu-2204 qemu | jq -r '.pkgs[]' | xargs -n 1 echo "-"
-    - name: Install basic packages to build QEMU on Ubuntu 22.04
-      package:
-        name:
-          - bash
-          - bc
-          - bison
-          - bsdextrautils
-          - bzip2
-          - ca-certificates
-          - ccache
-          - clang
-          - dbus
-          - debianutils
-          - diffutils
-          - exuberant-ctags
-          - findutils
-          - flex
-          - g++
-          - gcc
-          - gcovr
-          - genisoimage
-          - gettext
-          - git
-          - hostname
-          - libaio-dev
-          - libasan5
-          - libasound2-dev
-          - libattr1-dev
-          - libbpf-dev
-          - libbrlapi-dev
-          - libbz2-dev
-          - libc6-dev
-          - libcacard-dev
-          - libcap-ng-dev
-          - libcapstone-dev
-          - libcmocka-dev
-          - libcurl4-gnutls-dev
-          - libdaxctl-dev
-          - libdrm-dev
-          - libepoxy-dev
-          - libfdt-dev
-          - libffi-dev
-          - libgbm-dev
-          - libgcrypt20-dev
-          - libglib2.0-dev
-          - libglusterfs-dev
-          - libgnutls28-dev
-          - libgtk-3-dev
-          - libibumad-dev
-          - libibverbs-dev
-          - libiscsi-dev
-          - libjemalloc-dev
-          - libjpeg-turbo8-dev
-          - libjson-c-dev
-          - liblttng-ust-dev
-          - liblzo2-dev
-          - libncursesw5-dev
-          - libnfs-dev
-          - libnuma-dev
-          - libpam0g-dev
-          - libpcre2-dev
-          - libpixman-1-dev
-          - libpng-dev
-          - libpulse-dev
-          - librbd-dev
-          - librdmacm-dev
-          - libsasl2-dev
-          - libsdl2-dev
-          - libsdl2-image-dev
-          - libseccomp-dev
-          - libslirp-dev
-          - libsnappy-dev
-          - libspice-protocol-dev
-          - libssh-dev
-          - libsystemd-dev
-          - libtasn1-6-dev
-          - libubsan1
-          - libudev-dev
-          - liburing-dev
-          - libusb-1.0-0-dev
-          - libusbredirhost-dev
-          - libvdeplug-dev
-          - libvirglrenderer-dev
-          - libvte-2.91-dev
-          - libxml2-dev
-          - libzstd-dev
-          - llvm
-          - locales
-          - make
-          - meson
-          - multipath-tools
-          - ncat
-          - nettle-dev
-          - ninja-build
-          - openssh-client
-          - pkgconf
-          - python3
-          - python3-numpy
-          - python3-opencv
-          - python3-pillow
-          - python3-pip
-          - python3-sphinx
-          - python3-sphinx-rtd-theme
-          - python3-venv
-          - python3-yaml
-          - rpm2cpio
-          - sed
-          - sparse
-          - systemtap-sdt-dev
-          - tar
-          - tesseract-ocr
-          - tesseract-ocr-eng
-          - texinfo
-          - xfslibs-dev
-          - zlib1g-dev
-        state: present
-      when:
-        - ansible_facts['distribution'] == 'Ubuntu'
-        - ansible_facts['distribution_version'] == '22.04'
-
-    # not all packages are available for all architectures
-    - name: Install additional packages to build QEMU on Ubuntu 22.04
-      package:
-        name:
-          - libpmem-dev
-          - libspice-server-dev
-          - libxen-dev
-        state: present
-      when:
-        - ansible_facts['distribution'] == 'Ubuntu'
-        - ansible_facts['distribution_version'] == '22.04'
-        - ansible_facts['architecture'] == 'aarch64' or ansible_facts['architecture'] == 'x86_64'
-
-    - name: Install armhf cross-compile packages to build QEMU on AArch64 Ubuntu 22.04
-      package:
-        name:
-          - binutils-arm-linux-gnueabihf
-          - gcc-arm-linux-gnueabihf
-          - libblkid-dev:armhf
-          - libc6-dev:armhf
-          - libffi-dev:armhf
-          - libglib2.0-dev:armhf
-          - libmount-dev:armhf
-          - libpcre2-dev:armhf
-          - libpixman-1-dev:armhf
-          - zlib1g-dev:armhf
-      when:
-        - ansible_facts['distribution'] == 'Ubuntu'
-        - ansible_facts['distribution_version'] == '22.04'
-        - ansible_facts['architecture'] == 'aarch64'
-
-    - name: Enable EPEL repo on EL8
-      dnf:
-        name:
-          - epel-release
-        state: present
-      when:
-        - ansible_facts['distribution_file_variety'] in ['RedHat', 'CentOS']
-        - ansible_facts['distribution_major_version'] == '8'
-
-    - name: Enable PowerTools repo on CentOS 8
-      ini_file:
-        path: /etc/yum.repos.d/CentOS-Stream-PowerTools.repo
-        section: powertools
-        option: enabled
-        value: "1"
-      when:
-        - ansible_facts['distribution_file_variety'] == 'CentOS'
-        - ansible_facts['distribution_major_version'] == '8'
-
-    - name: Install basic packages to build QEMU on EL8
-      dnf:
-        # This list of packages start with tests/docker/dockerfiles/centos8.docker
-        # but only include files that are common to all distro variants and present
-        # in the standard repos (no add-ons)
-        name:
-          - bzip2
-          - bzip2-devel
-          - capstone-devel
-          - dbus-daemon
-          - device-mapper-multipath-devel
-          - diffutils
-          - gcc
-          - gcc-c++
-          - genisoimage
-          - gettext
-          - git
-          - glib2-devel
-          - glusterfs-api-devel
-          - gnutls-devel
-          - libaio-devel
-          - libcap-ng-devel
-          - libcurl-devel
-          - libepoxy-devel
-          - libfdt-devel
-          - libgcrypt-devel
-          - libiscsi-devel
-          - libpmem-devel
-          - librados-devel
-          - librbd-devel
-          - libseccomp-devel
-          - libssh-devel
-          - libxkbcommon-devel
-          - lzo-devel
-          - make
-          - mesa-libEGL-devel
-          - nettle-devel
-          - ninja-build
-          - nmap-ncat
-          - numactl-devel
-          - pixman-devel
-          - python38
-          - python3-sphinx
-          - rdma-core-devel
-          - redhat-rpm-config
-          - snappy-devel
-          - spice-glib-devel
-          - systemd-devel
-          - systemtap-sdt-devel
-          - tar
-          - zlib-devel
-        state: present
-      when:
-        - ansible_facts['distribution_file_variety'] in ['RedHat', 'CentOS']
-        - ansible_facts['distribution_version'] == '8'
-
-    - name: Install packages only available on x86 and aarch64
-      dnf:
-        # Spice server not available in ppc64le
-        name:
-          - spice-server
-          - spice-server-devel
-        state: present
-      when:
-        - ansible_facts['distribution_file_variety'] in ['RedHat', 'CentOS']
-        - ansible_facts['distribution_version'] == '8'
-        - ansible_facts['architecture'] == 'aarch64' or ansible_facts['architecture'] == 'x86_64'
-
-    - name: Check whether the Python runtime version is managed by alternatives
-      stat:
-        path: /etc/alternatives/python3
-      register: python3
-
-    - name: Set default Python runtime to 3.8 on EL8
-      command: alternatives --set python3 /usr/bin/python3.8
-      when:
-        - ansible_facts['distribution_file_variety'] in ['RedHat', 'CentOS']
-        - ansible_facts['distribution_version'] == '8'
-        - python3.stat.islnk and python3.stat.lnk_target != '/usr/bin/python3.8'
diff --git a/scripts/ci/setup/ubuntu/build-environment.yml b/scripts/ci/setup/ubuntu/build-environment.yml
new file mode 100644
index 0000000..edf1900
--- /dev/null
+++ b/scripts/ci/setup/ubuntu/build-environment.yml
@@ -0,0 +1,69 @@
+# Copyright (c) 2021 Red Hat, Inc.
+#
+# Author:
+#  Cleber Rosa <crosa@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# later.  See the COPYING file in the top-level directory.
+#
+# This is an ansible playbook file.  Run it to set up systems with the
+# environment needed to build QEMU.
+---
+- name: Installation of basic packages to build QEMU
+  hosts: all
+  tasks:
+    - name: Check for suitable ansible version
+      delegate_to: localhost
+      assert:
+        that:
+          - '((ansible_version.major == 2) and (ansible_version.minor >= 8)) or (ansible_version.major >= 3)'
+        msg: "Unsuitable ansible version, please use version 2.8.0 or later"
+
+    - name: Add armhf foreign architecture to aarch64 hosts
+      command: dpkg --add-architecture armhf
+      when:
+        - ansible_facts['distribution'] == 'Ubuntu'
+        - ansible_facts['architecture'] == 'aarch64'
+
+    - name: Update apt cache / upgrade packages via apt
+      apt:
+        update_cache: yes
+        upgrade: yes
+      when:
+        - ansible_facts['distribution'] == 'Ubuntu'
+
+    # the package lists are updated by "make lcitool-refresh"
+    - name: Include package lists based on OS and architecture
+      include_vars:
+        file: "ubuntu-2204-{{ ansible_facts['architecture'] }}.yaml"
+      when:
+        - ansible_facts['distribution'] == 'Ubuntu'
+        - ansible_facts['distribution_version'] == '22.04'
+        - ansible_facts['architecture'] == 'aarch64' or ansible_facts['architecture'] == 'x86_64'
+
+    - name: Install packages for QEMU on Ubuntu 22.04
+      package:
+        name: "{{ packages }}"
+      when:
+        - ansible_facts['distribution'] == 'Ubuntu'
+        - ansible_facts['distribution_version'] == '22.04'
+        - ansible_facts['architecture'] == 'aarch64' or ansible_facts['architecture'] == 'x86_64'
+
+    - name: Install armhf cross-compile packages to build QEMU on AArch64 Ubuntu 22.04
+      package:
+        name:
+          - binutils-arm-linux-gnueabihf
+          - gcc-arm-linux-gnueabihf
+          - libblkid-dev:armhf
+          - libc6-dev:armhf
+          - libffi-dev:armhf
+          - libglib2.0-dev:armhf
+          - libmount-dev:armhf
+          - libpcre2-dev:armhf
+          - libpixman-1-dev:armhf
+          - zlib1g-dev:armhf
+      when:
+        - ansible_facts['distribution'] == 'Ubuntu'
+        - ansible_facts['distribution_version'] == '22.04'
+        - ansible_facts['architecture'] == 'aarch64'
+
diff --git a/scripts/ci/setup/ubuntu/ubuntu-2204-aarch64.yaml b/scripts/ci/setup/ubuntu/ubuntu-2204-aarch64.yaml
new file mode 100644
index 0000000..8d7d872
--- /dev/null
+++ b/scripts/ci/setup/ubuntu/ubuntu-2204-aarch64.yaml
@@ -0,0 +1,127 @@
+# THIS FILE WAS AUTO-GENERATED
+#
+#  $ lcitool variables --host-arch aarch64 ubuntu-2204 qemu
+#
+# https://gitlab.com/libvirt/libvirt-ci
+
+packages:
+  - bash
+  - bc
+  - bison
+  - bsdextrautils
+  - bzip2
+  - ca-certificates
+  - ccache
+  - clang
+  - dbus
+  - debianutils
+  - diffutils
+  - exuberant-ctags
+  - findutils
+  - flex
+  - gcc
+  - gcovr
+  - gettext
+  - git
+  - hostname
+  - libaio-dev
+  - libasan6
+  - libasound2-dev
+  - libattr1-dev
+  - libbpf-dev
+  - libbrlapi-dev
+  - libbz2-dev
+  - libc6-dev
+  - libcacard-dev
+  - libcap-ng-dev
+  - libcapstone-dev
+  - libcmocka-dev
+  - libcurl4-gnutls-dev
+  - libdaxctl-dev
+  - libdrm-dev
+  - libepoxy-dev
+  - libfdt-dev
+  - libffi-dev
+  - libfuse3-dev
+  - libgbm-dev
+  - libgcrypt20-dev
+  - libglib2.0-dev
+  - libglusterfs-dev
+  - libgnutls28-dev
+  - libgtk-3-dev
+  - libibumad-dev
+  - libibverbs-dev
+  - libiscsi-dev
+  - libjemalloc-dev
+  - libjpeg-turbo8-dev
+  - libjson-c-dev
+  - liblttng-ust-dev
+  - liblzo2-dev
+  - libncursesw5-dev
+  - libnfs-dev
+  - libnuma-dev
+  - libpam0g-dev
+  - libpcre2-dev
+  - libpipewire-0.3-dev
+  - libpixman-1-dev
+  - libpng-dev
+  - libpulse-dev
+  - librbd-dev
+  - librdmacm-dev
+  - libsasl2-dev
+  - libsdl2-dev
+  - libsdl2-image-dev
+  - libseccomp-dev
+  - libselinux1-dev
+  - libslirp-dev
+  - libsnappy-dev
+  - libsndio-dev
+  - libspice-protocol-dev
+  - libspice-server-dev
+  - libssh-dev
+  - libsystemd-dev
+  - libtasn1-6-dev
+  - libubsan1
+  - libudev-dev
+  - liburing-dev
+  - libusb-1.0-0-dev
+  - libusbredirhost-dev
+  - libvdeplug-dev
+  - libvirglrenderer-dev
+  - libvte-2.91-dev
+  - libxen-dev
+  - libzstd-dev
+  - llvm
+  - locales
+  - make
+  - meson
+  - mtools
+  - multipath-tools
+  - ncat
+  - nettle-dev
+  - ninja-build
+  - openssh-client
+  - pkgconf
+  - python3
+  - python3-numpy
+  - python3-opencv
+  - python3-pillow
+  - python3-pip
+  - python3-sphinx
+  - python3-sphinx-rtd-theme
+  - python3-tomli
+  - python3-venv
+  - python3-yaml
+  - rpm2cpio
+  - sed
+  - socat
+  - sparse
+  - swtpm
+  - systemtap-sdt-dev
+  - tar
+  - tesseract-ocr
+  - tesseract-ocr-eng
+  - xorriso
+  - zlib1g-dev
+  - zstd
+
diff --git a/scripts/ci/setup/ubuntu/ubuntu-2204-armhf-cross.yml b/scripts/ci/setup/ubuntu/ubuntu-2204-armhf-cross.yml
new file mode 100644
index 0000000..0cc34cd
--- /dev/null
+++ b/scripts/ci/setup/ubuntu/ubuntu-2204-armhf-cross.yml
@@ -0,0 +1,127 @@
+# THIS FILE WAS AUTO-GENERATED
+#
+#  $ lcitool variables --cross-arch armv7l ubuntu-2204 qemu
+#
+# https://gitlab.com/libvirt/libvirt-ci
+
+packages:
+  - bash
+  - bc
+  - bison
+  - bsdextrautils
+  - bzip2
+  - ca-certificates
+  - ccache
+  - dbus
+  - debianutils
+  - diffutils
+  - exuberant-ctags
+  - findutils
+  - flex
+  - gcc
+  - gcovr
+  - gettext
+  - git
+  - hostname
+  - libglib2.0-dev
+  - libpcre2-dev
+  - libsndio-dev
+  - libspice-protocol-dev
+  - llvm
+  - locales
+  - make
+  - meson
+  - mtools
+  - ncat
+  - ninja-build
+  - openssh-client
+  - pkgconf
+  - python3
+  - python3-numpy
+  - python3-opencv
+  - python3-pillow
+  - python3-pip
+  - python3-sphinx
+  - python3-sphinx-rtd-theme
+  - python3-tomli
+  - python3-venv
+  - python3-yaml
+  - rpm2cpio
+  - sed
+  - socat
+  - sparse
+  - swtpm
+  - tar
+  - tesseract-ocr
+  - tesseract-ocr-eng
+  - xorriso
+  - zstd
+  - gcc-arm-linux-gnueabihf
+  - libaio-dev:armhf
+  - libasan6:armhf
+  - libasound2-dev:armhf
+  - libattr1-dev:armhf
+  - libbpf-dev:armhf
+  - libbrlapi-dev:armhf
+  - libbz2-dev:armhf
+  - libc6-dev:armhf
+  - libcacard-dev:armhf
+  - libcap-ng-dev:armhf
+  - libcapstone-dev:armhf
+  - libcmocka-dev:armhf
+  - libcurl4-gnutls-dev:armhf
+  - libdaxctl-dev:armhf
+  - libdrm-dev:armhf
+  - libepoxy-dev:armhf
+  - libfdt-dev:armhf
+  - libffi-dev:armhf
+  - libfuse3-dev:armhf
+  - libgbm-dev:armhf
+  - libgcrypt20-dev:armhf
+  - libglib2.0-dev:armhf
+  - libglusterfs-dev:armhf
+  - libgnutls28-dev:armhf
+  - libgtk-3-dev:armhf
+  - libibumad-dev:armhf
+  - libibverbs-dev:armhf
+  - libiscsi-dev:armhf
+  - libjemalloc-dev:armhf
+  - libjpeg-turbo8-dev:armhf
+  - libjson-c-dev:armhf
+  - liblttng-ust-dev:armhf
+  - liblzo2-dev:armhf
+  - libncursesw5-dev:armhf
+  - libnfs-dev:armhf
+  - libnuma-dev:armhf
+  - libpam0g-dev:armhf
+  - libpipewire-0.3-dev:armhf
+  - libpixman-1-dev:armhf
+  - libpng-dev:armhf
+  - libpulse-dev:armhf
+  - librbd-dev:armhf
+  - librdmacm-dev:armhf
+  - libsasl2-dev:armhf
+  - libsdl2-dev:armhf
+  - libsdl2-image-dev:armhf
+  - libseccomp-dev:armhf
+  - libselinux1-dev:armhf
+  - libslirp-dev:armhf
+  - libsnappy-dev:armhf
+  - libspice-server-dev:armhf
+  - libssh-dev:armhf
+  - libsystemd-dev:armhf
+  - libtasn1-6-dev:armhf
+  - libubsan1:armhf
+  - libudev-dev:armhf
+  - liburing-dev:armhf
+  - libusb-1.0-0-dev:armhf
+  - libusbredirhost-dev:armhf
+  - libvdeplug-dev:armhf
+  - libvirglrenderer-dev:armhf
+  - libvte-2.91-dev:armhf
+  - libxen-dev:armhf
+  - libzstd-dev:armhf
+  - nettle-dev:armhf
+  - systemtap-sdt-dev:armhf
+  - zlib1g-dev:armhf
+
diff --git a/scripts/ci/setup/ubuntu/ubuntu-2204-s390x.yaml b/scripts/ci/setup/ubuntu/ubuntu-2204-s390x.yaml
new file mode 100644
index 0000000..16050a5
--- /dev/null
+++ b/scripts/ci/setup/ubuntu/ubuntu-2204-s390x.yaml
@@ -0,0 +1,125 @@
+# THIS FILE WAS AUTO-GENERATED
+#
+#  $ lcitool variables --host-arch s390x ubuntu-2204 qemu
+#
+# https://gitlab.com/libvirt/libvirt-ci
+
+packages:
+  - bash
+  - bc
+  - bison
+  - bsdextrautils
+  - bzip2
+  - ca-certificates
+  - ccache
+  - clang
+  - dbus
+  - debianutils
+  - diffutils
+  - exuberant-ctags
+  - findutils
+  - flex
+  - gcc
+  - gcovr
+  - gettext
+  - git
+  - hostname
+  - libaio-dev
+  - libasan6
+  - libasound2-dev
+  - libattr1-dev
+  - libbpf-dev
+  - libbrlapi-dev
+  - libbz2-dev
+  - libc6-dev
+  - libcacard-dev
+  - libcap-ng-dev
+  - libcapstone-dev
+  - libcmocka-dev
+  - libcurl4-gnutls-dev
+  - libdaxctl-dev
+  - libdrm-dev
+  - libepoxy-dev
+  - libfdt-dev
+  - libffi-dev
+  - libfuse3-dev
+  - libgbm-dev
+  - libgcrypt20-dev
+  - libglib2.0-dev
+  - libglusterfs-dev
+  - libgnutls28-dev
+  - libgtk-3-dev
+  - libibumad-dev
+  - libibverbs-dev
+  - libiscsi-dev
+  - libjemalloc-dev
+  - libjpeg-turbo8-dev
+  - libjson-c-dev
+  - liblttng-ust-dev
+  - liblzo2-dev
+  - libncursesw5-dev
+  - libnfs-dev
+  - libnuma-dev
+  - libpam0g-dev
+  - libpcre2-dev
+  - libpipewire-0.3-dev
+  - libpixman-1-dev
+  - libpng-dev
+  - libpulse-dev
+  - librbd-dev
+  - librdmacm-dev
+  - libsasl2-dev
+  - libsdl2-dev
+  - libsdl2-image-dev
+  - libseccomp-dev
+  - libselinux1-dev
+  - libslirp-dev
+  - libsnappy-dev
+  - libsndio-dev
+  - libspice-protocol-dev
+  - libssh-dev
+  - libsystemd-dev
+  - libtasn1-6-dev
+  - libubsan1
+  - libudev-dev
+  - liburing-dev
+  - libusb-1.0-0-dev
+  - libusbredirhost-dev
+  - libvdeplug-dev
+  - libvirglrenderer-dev
+  - libvte-2.91-dev
+  - libzstd-dev
+  - llvm
+  - locales
+  - make
+  - meson
+  - mtools
+  - multipath-tools
+  - ncat
+  - nettle-dev
+  - ninja-build
+  - openssh-client
+  - pkgconf
+  - python3
+  - python3-numpy
+  - python3-opencv
+  - python3-pillow
+  - python3-pip
+  - python3-sphinx
+  - python3-sphinx-rtd-theme
+  - python3-tomli
+  - python3-venv
+  - python3-yaml
+  - rpm2cpio
+  - sed
+  - socat
+  - sparse
+  - swtpm
+  - systemtap-sdt-dev
+  - tar
+  - tesseract-ocr
+  - tesseract-ocr-eng
+  - xorriso
+  - zlib1g-dev
+  - zstd
+
diff --git a/system/qtest.c b/system/qtest.c
index 6da58b3..507a358 100644
--- a/system/qtest.c
+++ b/system/qtest.c
@@ -601,9 +601,9 @@
         qtest_send_prefix(chr);
         qtest_sendf(chr, "OK 0x%016" PRIx64 "\n", value);
     } else if (strcmp(words[0], "read") == 0) {
-        uint64_t addr, len, i;
+        g_autoptr(GString) enc = NULL;
+        uint64_t addr, len;
         uint8_t *data;
-        char *enc;
         int ret;
 
         g_assert(words[1] && words[2]);
@@ -618,16 +618,12 @@
         address_space_read(first_cpu->as, addr, MEMTXATTRS_UNSPECIFIED, data,
                            len);
 
-        enc = g_malloc(2 * len + 1);
-        for (i = 0; i < len; i++) {
-            sprintf(&enc[i * 2], "%02x", data[i]);
-        }
+        enc = qemu_hexdump_line(NULL, data, len, 0, 0);
 
         qtest_send_prefix(chr);
-        qtest_sendf(chr, "OK 0x%s\n", enc);
+        qtest_sendf(chr, "OK 0x%s\n", enc->str);
 
         g_free(data);
-        g_free(enc);
     } else if (strcmp(words[0], "b64read") == 0) {
         uint64_t addr, len;
         uint8_t *data;
diff --git a/target/loongarch/cpu.c b/target/loongarch/cpu.c
index b5c1ec9..270f711 100644
--- a/target/loongarch/cpu.c
+++ b/target/loongarch/cpu.c
@@ -707,7 +707,7 @@
     qemu_fprintf(f, "EENTRY=%016" PRIx64 "\n", env->CSR_EENTRY);
     qemu_fprintf(f, "PRCFG1=%016" PRIx64 ", PRCFG2=%016" PRIx64 ","
                  " PRCFG3=%016" PRIx64 "\n",
-                 env->CSR_PRCFG1, env->CSR_PRCFG3, env->CSR_PRCFG3);
+                 env->CSR_PRCFG1, env->CSR_PRCFG2, env->CSR_PRCFG3);
     qemu_fprintf(f, "TLBRENTRY=%016" PRIx64 "\n", env->CSR_TLBRENTRY);
     qemu_fprintf(f, "TLBRBADV=%016" PRIx64 "\n", env->CSR_TLBRBADV);
     qemu_fprintf(f, "TLBRERA=%016" PRIx64 "\n", env->CSR_TLBRERA);
diff --git a/target/loongarch/cpu.h b/target/loongarch/cpu.h
index 41b8e6d..6c41faf 100644
--- a/target/loongarch/cpu.h
+++ b/target/loongarch/cpu.h
@@ -36,6 +36,7 @@
 #define CPUNAME_REG             0x20
 #define MISC_FUNC_REG           0x420
 #define IOCSRM_EXTIOI_EN        48
+#define IOCSRM_EXTIOI_INT_ENCODE 49
 
 #define IOCSR_MEM_SIZE          0x428
 
diff --git a/target/sparc/asi.h b/target/sparc/asi.h
index a668296..14ffaa3 100644
--- a/target/sparc/asi.h
+++ b/target/sparc/asi.h
@@ -144,6 +144,8 @@
  * ASIs, "(4V)" designates SUN4V specific ASIs.  "(NG4)" designates SPARC-T4
  * and later ASIs.
  */
+#define ASI_MON_AIUP            0x12 /* (VIS4) Primary, user, monitor   */
+#define ASI_MON_AIUS            0x13 /* (VIS4) Secondary, user, monitor */
 #define ASI_REAL                0x14 /* Real address, cacheable          */
 #define ASI_PHYS_USE_EC		0x14 /* PADDR, E-cacheable		*/
 #define ASI_REAL_IO             0x15 /* Real address, non-cacheable      */
@@ -257,6 +259,8 @@
 #define ASI_UDBL_CONTROL_R	0x7f /* External UDB control regs rd low*/
 #define ASI_INTR_R		0x7f /* IRQ vector dispatch read	*/
 #define ASI_INTR_DATAN_R	0x7f /* (III) In irq vector data reg N	*/
+#define ASI_MON_P               0x84 /* (VIS4) Primary, monitor         */
+#define ASI_MON_S               0x85 /* (VIS4) Secondary, monitor       */
 #define ASI_PIC			0xb0 /* (NG4) PIC registers		*/
 #define ASI_PST8_P		0xc0 /* Primary, 8 8-bit, partial	*/
 #define ASI_PST8_S		0xc1 /* Secondary, 8 8-bit, partial	*/
diff --git a/target/sparc/cpu-feature.h.inc b/target/sparc/cpu-feature.h.inc
index d800f18..be81005 100644
--- a/target/sparc/cpu-feature.h.inc
+++ b/target/sparc/cpu-feature.h.inc
@@ -12,3 +12,7 @@
 FEATURE(CACHE_CTRL)
 FEATURE(POWERDOWN)
 FEATURE(CASA)
+FEATURE(FMAF)
+FEATURE(VIS3)
+FEATURE(IMA)
+FEATURE(VIS4)
diff --git a/target/sparc/cpu.c b/target/sparc/cpu.c
index 5be1592..9bacfb6 100644
--- a/target/sparc/cpu.c
+++ b/target/sparc/cpu.c
@@ -549,6 +549,10 @@
     [CPU_FEATURE_BIT_HYPV] = "hypv",
     [CPU_FEATURE_BIT_VIS1] = "vis1",
     [CPU_FEATURE_BIT_VIS2] = "vis2",
+    [CPU_FEATURE_BIT_FMAF] = "fmaf",
+    [CPU_FEATURE_BIT_VIS3] = "vis3",
+    [CPU_FEATURE_BIT_IMA] = "ima",
+    [CPU_FEATURE_BIT_VIS4] = "vis4",
 #else
     [CPU_FEATURE_BIT_MUL] = "mul",
     [CPU_FEATURE_BIT_DIV] = "div",
@@ -877,6 +881,14 @@
                     CPU_FEATURE_BIT_VIS1, false),
     DEFINE_PROP_BIT("vis2",     SPARCCPU, env.def.features,
                     CPU_FEATURE_BIT_VIS2, false),
+    DEFINE_PROP_BIT("fmaf",     SPARCCPU, env.def.features,
+                    CPU_FEATURE_BIT_FMAF, false),
+    DEFINE_PROP_BIT("vis3",     SPARCCPU, env.def.features,
+                    CPU_FEATURE_BIT_VIS3, false),
+    DEFINE_PROP_BIT("ima",      SPARCCPU, env.def.features,
+                    CPU_FEATURE_BIT_IMA, false),
+    DEFINE_PROP_BIT("vis4",     SPARCCPU, env.def.features,
+                    CPU_FEATURE_BIT_VIS4, false),
 #else
     DEFINE_PROP_BIT("mul",      SPARCCPU, env.def.features,
                     CPU_FEATURE_BIT_MUL, false),
diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c
index 1205a59..0b30665 100644
--- a/target/sparc/fop_helper.c
+++ b/target/sparc/fop_helper.c
@@ -343,6 +343,90 @@
     return f128_ret(ret);
 }
 
+float32 helper_fmadds(CPUSPARCState *env, float32 s1,
+                      float32 s2, float32 s3, uint32_t op)
+{
+    float32 ret = float32_muladd(s1, s2, s3, op, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
+}
+
+float64 helper_fmaddd(CPUSPARCState *env, float64 s1,
+                      float64 s2, float64 s3, uint32_t op)
+{
+    float64 ret = float64_muladd(s1, s2, s3, op, &env->fp_status);
+    check_ieee_exceptions(env, GETPC());
+    return ret;
+}
+
+float32 helper_fnadds(CPUSPARCState *env, float32 src1, float32 src2)
+{
+    float32 ret = float32_add(src1, src2, &env->fp_status);
+
+    /*
+     * NaN inputs or result do not get a sign change.
+     * Nor, apparently, does zero: on hardware, -(x + -x) yields +0.
+     */
+    if (!float32_is_any_nan(ret) && !float32_is_zero(ret)) {
+        ret = float32_chs(ret);
+    }
+    check_ieee_exceptions(env, GETPC());
+    return ret;
+}
+
+float32 helper_fnmuls(CPUSPARCState *env, float32 src1, float32 src2)
+{
+    float32 ret = float32_mul(src1, src2, &env->fp_status);
+
+    /* NaN inputs or result do not get a sign change. */
+    if (!float32_is_any_nan(ret)) {
+        ret = float32_chs(ret);
+    }
+    check_ieee_exceptions(env, GETPC());
+    return ret;
+}
+
+float64 helper_fnaddd(CPUSPARCState *env, float64 src1, float64 src2)
+{
+    float64 ret = float64_add(src1, src2, &env->fp_status);
+
+    /*
+     * NaN inputs or result do not get a sign change.
+     * Nor, apparently, does zero: on hardware, -(x + -x) yields +0.
+     */
+    if (!float64_is_any_nan(ret) && !float64_is_zero(ret)) {
+        ret = float64_chs(ret);
+    }
+    check_ieee_exceptions(env, GETPC());
+    return ret;
+}
+
+float64 helper_fnmuld(CPUSPARCState *env, float64 src1, float64 src2)
+{
+    float64 ret = float64_mul(src1, src2, &env->fp_status);
+
+    /* NaN inputs or result do not get a sign change. */
+    if (!float64_is_any_nan(ret)) {
+        ret = float64_chs(ret);
+    }
+    check_ieee_exceptions(env, GETPC());
+    return ret;
+}
+
+float64 helper_fnsmuld(CPUSPARCState *env, float32 src1, float32 src2)
+{
+    float64 ret = float64_mul(float32_to_float64(src1, &env->fp_status),
+                              float32_to_float64(src2, &env->fp_status),
+                              &env->fp_status);
+
+    /* NaN inputs or result do not get a sign change. */
+    if (!float64_is_any_nan(ret)) {
+        ret = float64_chs(ret);
+    }
+    check_ieee_exceptions(env, GETPC());
+    return ret;
+}
+
 static uint32_t finish_fcmp(CPUSPARCState *env, FloatRelation r, uintptr_t ra)
 {
     check_ieee_exceptions(env, ra);
@@ -406,6 +490,52 @@
     return finish_fcmp(env, r, GETPC());
 }
 
+uint32_t helper_flcmps(float32 src1, float32 src2)
+{
+    /*
+     * FLCMP never raises an exception nor modifies any FSR fields.
+     * Perform the comparison with a dummy fp environment.
+     */
+    float_status discard = { };
+    FloatRelation r = float32_compare_quiet(src1, src2, &discard);
+
+    switch (r) {
+    case float_relation_equal:
+        if (src2 == float32_zero && src1 != float32_zero) {
+            return 1;  /* -0.0 < +0.0 */
+        }
+        return 0;
+    case float_relation_less:
+        return 1;
+    case float_relation_greater:
+        return 0;
+    case float_relation_unordered:
+        return float32_is_any_nan(src2) ? 3 : 2;
+    }
+    g_assert_not_reached();
+}
+
+uint32_t helper_flcmpd(float64 src1, float64 src2)
+{
+    float_status discard = { };
+    FloatRelation r = float64_compare_quiet(src1, src2, &discard);
+
+    switch (r) {
+    case float_relation_equal:
+        if (src2 == float64_zero && src1 != float64_zero) {
+            return 1;  /* -0.0 < +0.0 */
+        }
+        return 0;
+    case float_relation_less:
+        return 1;
+    case float_relation_greater:
+        return 0;
+    case float_relation_unordered:
+        return float64_is_any_nan(src2) ? 3 : 2;
+    }
+    g_assert_not_reached();
+}
+
 target_ulong cpu_get_fsr(CPUSPARCState *env)
 {
     target_ulong fsr = env->fsr | env->fsr_cexc_ftt;
@@ -472,3 +602,9 @@
     env->fsr_cexc_ftt |= fsr & FSR_CEXC_MASK;
     set_fsr_nonsplit(env, fsr);
 }
+
+void helper_set_fsr_nofcc(CPUSPARCState *env, uint32_t fsr)
+{
+    env->fsr_cexc_ftt = fsr & (FSR_CEXC_MASK | FSR_FTT_MASK);
+    set_fsr_nonsplit(env, fsr);
+}
diff --git a/target/sparc/helper.h b/target/sparc/helper.h
index 97fbf6f..134e519 100644
--- a/target/sparc/helper.h
+++ b/target/sparc/helper.h
@@ -40,6 +40,7 @@
 DEF_HELPER_FLAGS_5(st_asi, TCG_CALL_NO_WG, void, env, tl, i64, int, i32)
 #endif
 DEF_HELPER_FLAGS_1(get_fsr, TCG_CALL_NO_WG_SE, tl, env)
+DEF_HELPER_FLAGS_2(set_fsr_nofcc, TCG_CALL_NO_RWG, void, env, i32)
 DEF_HELPER_FLAGS_2(set_fsr_nofcc_noftt, TCG_CALL_NO_RWG, void, env, i32)
 DEF_HELPER_FLAGS_2(fsqrts, TCG_CALL_NO_WG, f32, env, f32)
 DEF_HELPER_FLAGS_2(fsqrtd, TCG_CALL_NO_WG, f64, env, f64)
@@ -50,12 +51,17 @@
 DEF_HELPER_FLAGS_3(fcmped, TCG_CALL_NO_WG, i32, env, f64, f64)
 DEF_HELPER_FLAGS_3(fcmpq, TCG_CALL_NO_WG, i32, env, i128, i128)
 DEF_HELPER_FLAGS_3(fcmpeq, TCG_CALL_NO_WG, i32, env, i128, i128)
+DEF_HELPER_FLAGS_2(flcmps, TCG_CALL_NO_RWG_SE, i32, f32, f32)
+DEF_HELPER_FLAGS_2(flcmpd, TCG_CALL_NO_RWG_SE, i32, f64, f64)
 DEF_HELPER_2(raise_exception, noreturn, env, int)
 
 DEF_HELPER_FLAGS_3(faddd, TCG_CALL_NO_WG, f64, env, f64, f64)
 DEF_HELPER_FLAGS_3(fsubd, TCG_CALL_NO_WG, f64, env, f64, f64)
 DEF_HELPER_FLAGS_3(fmuld, TCG_CALL_NO_WG, f64, env, f64, f64)
 DEF_HELPER_FLAGS_3(fdivd, TCG_CALL_NO_WG, f64, env, f64, f64)
+DEF_HELPER_FLAGS_5(fmaddd, TCG_CALL_NO_WG, f64, env, f64, f64, f64, i32)
+DEF_HELPER_FLAGS_3(fnaddd, TCG_CALL_NO_WG, f64, env, f64, f64)
+DEF_HELPER_FLAGS_3(fnmuld, TCG_CALL_NO_WG, f64, env, f64, f64)
 
 DEF_HELPER_FLAGS_3(faddq, TCG_CALL_NO_WG, i128, env, i128, i128)
 DEF_HELPER_FLAGS_3(fsubq, TCG_CALL_NO_WG, i128, env, i128, i128)
@@ -66,8 +72,12 @@
 DEF_HELPER_FLAGS_3(fsubs, TCG_CALL_NO_WG, f32, env, f32, f32)
 DEF_HELPER_FLAGS_3(fmuls, TCG_CALL_NO_WG, f32, env, f32, f32)
 DEF_HELPER_FLAGS_3(fdivs, TCG_CALL_NO_WG, f32, env, f32, f32)
+DEF_HELPER_FLAGS_5(fmadds, TCG_CALL_NO_WG, f32, env, f32, f32, f32, i32)
+DEF_HELPER_FLAGS_3(fnadds, TCG_CALL_NO_WG, f32, env, f32, f32)
+DEF_HELPER_FLAGS_3(fnmuls, TCG_CALL_NO_WG, f32, env, f32, f32)
 
 DEF_HELPER_FLAGS_3(fsmuld, TCG_CALL_NO_WG, f64, env, f32, f32)
+DEF_HELPER_FLAGS_3(fnsmuld, TCG_CALL_NO_WG, f64, env, f32, f32)
 DEF_HELPER_FLAGS_3(fdmulq, TCG_CALL_NO_WG, i128, env, f64, f64)
 
 DEF_HELPER_FLAGS_2(fitod, TCG_CALL_NO_WG, f64, env, s32)
@@ -105,15 +115,28 @@
 DEF_HELPER_FLAGS_3(fpack32, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
 DEF_HELPER_FLAGS_2(fpackfix, TCG_CALL_NO_RWG_SE, i32, i64, i64)
 DEF_HELPER_FLAGS_3(bshuffle, TCG_CALL_NO_RWG_SE, i64, i64, i64, i64)
-#define VIS_CMPHELPER(name)                                              \
+DEF_HELPER_FLAGS_2(cmask8, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(cmask16, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(cmask32, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fchksm16, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fmean16, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fslas16, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(fslas32, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+#define VIS_CMPHELPER(name)                                  \
+    DEF_HELPER_FLAGS_2(f##name##8, TCG_CALL_NO_RWG_SE,       \
+                       i64, i64, i64)                        \
     DEF_HELPER_FLAGS_2(f##name##16, TCG_CALL_NO_RWG_SE,      \
-                       i64, i64, i64)                                    \
+                       i64, i64, i64)                        \
     DEF_HELPER_FLAGS_2(f##name##32, TCG_CALL_NO_RWG_SE,      \
                        i64, i64, i64)
 VIS_CMPHELPER(cmpgt)
 VIS_CMPHELPER(cmpeq)
 VIS_CMPHELPER(cmple)
 VIS_CMPHELPER(cmpne)
+VIS_CMPHELPER(cmpugt)
+VIS_CMPHELPER(cmpule)
+DEF_HELPER_FLAGS_2(xmulx, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(xmulxhi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 #endif
 #undef VIS_HELPER
 #undef VIS_CMPHELPER
diff --git a/target/sparc/insns.decode b/target/sparc/insns.decode
index e2d8a07..fbcb4f7 100644
--- a/target/sparc/insns.decode
+++ b/target/sparc/insns.decode
@@ -26,6 +26,15 @@
 ## Major Opcode 10 -- integer, floating-point, vis, and system insns.
 ##
 
+%dfp_rd     25:5 !function=extract_dfpreg
+%dfp_rs1    14:5 !function=extract_dfpreg
+%dfp_rs2     0:5 !function=extract_dfpreg
+%dfp_rs3     9:5 !function=extract_dfpreg
+
+%qfp_rd     25:5 !function=extract_qfpreg
+%qfp_rs1    14:5 !function=extract_qfpreg
+%qfp_rs2     0:5 !function=extract_qfpreg
+
 &r_r_ri     rd rs1 rs2_or_imm imm:bool
 @n_r_ri     .. ..... ...... rs1:5 imm:1 rs2_or_imm:s13     &r_r_ri rd=0
 @r_r_ri     .. rd:5  ...... rs1:5 imm:1 rs2_or_imm:s13     &r_r_ri
@@ -37,11 +46,45 @@
 
 &r_r_r      rd rs1 rs2
 @r_r_r      .. rd:5  ...... rs1:5 . ........ rs2:5         &r_r_r
+@d_r_r      .. ..... ...... rs1:5 . ........ rs2:5         \
+            &r_r_r rd=%dfp_rd
+@r_d_d      .. rd:5  ...... ..... . ........ .....         \
+            &r_r_r rs1=%dfp_rs1 rs2=%dfp_rs2
+@d_r_d      .. ..... ...... rs1:5 . ........ .....         \
+            &r_r_r rd=%dfp_rd rs2=%dfp_rs2
+@d_d_d      .. ..... ...... ..... . ........ .....         \
+            &r_r_r rd=%dfp_rd rs1=%dfp_rs1 rs2=%dfp_rs2
+@q_q_q      .. ..... ...... ..... . ........ .....         \
+            &r_r_r rd=%qfp_rd rs1=%qfp_rs1 rs2=%qfp_rs2
+@q_d_d      .. ..... ...... ..... . ........ .....         \
+            &r_r_r rd=%qfp_rd rs1=%dfp_rs1 rs2=%dfp_rs2
+
 @r_r_r_swap .. rd:5  ...... rs2:5 . ........ rs1:5         &r_r_r
+@d_d_d_swap .. ..... ...... ..... . ........ .....         \
+            &r_r_r rd=%dfp_rd rs1=%dfp_rs2 rs2=%dfp_rs1
 
 &r_r        rd rs
 @r_r1       .. rd:5  ...... rs:5  . ........ .....         &r_r
 @r_r2       .. rd:5  ...... ..... . ........ rs:5          &r_r
+@r_d2       .. rd:5  ...... ..... . ........ .....         &r_r rs=%dfp_rs2
+@r_q2       .. rd:5  ...... ..... . ........ .....         &r_r rs=%qfp_rs2
+@d_r2       .. ..... ...... ..... . ........ rs:5          &r_r rd=%dfp_rd
+@q_r2       .. ..... ...... ..... . ........ rs:5          &r_r rd=%qfp_rd
+@d_d1       .. ..... ...... ..... . ........ .....         \
+            &r_r rd=%dfp_rd rs=%dfp_rs1
+@d_d2       .. ..... ...... ..... . ........ .....         \
+            &r_r rd=%dfp_rd rs=%dfp_rs2
+@d_q2       .. ..... ...... ..... . ........ .....         \
+            &r_r rd=%dfp_rd rs=%qfp_rs2
+@q_q2       .. ..... ...... ..... . ........ .....         \
+            &r_r rd=%qfp_rd rs=%qfp_rs2
+@q_d2       .. ..... ...... ..... . ........ .....         \
+            &r_r rd=%qfp_rd rs=%dfp_rs2
+
+&r_r_r_r    rd rs1 rs2 rs3
+@r_r_r_r    .. rd:5  ...... rs1:5 rs3:5 .... rs2:5         &r_r_r_r
+@d_d_d_d    .. ..... ...... ..... ..... .... .....         \
+            &r_r_r_r rd=%dfp_rd rs1=%dfp_rs1 rs2=%dfp_rs2 rs3=%dfp_rs3
 
 {
   [
@@ -81,6 +124,7 @@
     WRTICK_CMPR     10 10111 110000 ..... . .............  @n_r_ri
     WRSTICK         10 11000 110000 ..... . .............  @n_r_ri
     WRSTICK_CMPR    10 11001 110000 ..... . .............  @n_r_ri
+    WRMWAIT         10 11100 110000 ..... . .............  @n_r_ri
   ]
   # Before v8, rs1==0 was WRY, and the rest executed as nop.
   [
@@ -241,68 +285,89 @@
 RETRY       10 00001 111110 00000 0 0000000000000
 
 FMOVs       10 ..... 110100 00000 0 0000 0001 .....        @r_r2
-FMOVd       10 ..... 110100 00000 0 0000 0010 .....        @r_r2
-FMOVq       10 ..... 110100 00000 0 0000 0011 .....        @r_r2
+FMOVd       10 ..... 110100 00000 0 0000 0010 .....        @d_d2
+FMOVq       10 ..... 110100 00000 0 0000 0011 .....        @q_q2
 FNEGs       10 ..... 110100 00000 0 0000 0101 .....        @r_r2
-FNEGd       10 ..... 110100 00000 0 0000 0110 .....        @r_r2
-FNEGq       10 ..... 110100 00000 0 0000 0111 .....        @r_r2
+FNEGd       10 ..... 110100 00000 0 0000 0110 .....        @d_d2
+FNEGq       10 ..... 110100 00000 0 0000 0111 .....        @q_q2
 FABSs       10 ..... 110100 00000 0 0000 1001 .....        @r_r2
-FABSd       10 ..... 110100 00000 0 0000 1010 .....        @r_r2
-FABSq       10 ..... 110100 00000 0 0000 1011 .....        @r_r2
+FABSd       10 ..... 110100 00000 0 0000 1010 .....        @d_d2
+FABSq       10 ..... 110100 00000 0 0000 1011 .....        @q_q2
 FSQRTs      10 ..... 110100 00000 0 0010 1001 .....        @r_r2
-FSQRTd      10 ..... 110100 00000 0 0010 1010 .....        @r_r2
-FSQRTq      10 ..... 110100 00000 0 0010 1011 .....        @r_r2
+FSQRTd      10 ..... 110100 00000 0 0010 1010 .....        @d_d2
+FSQRTq      10 ..... 110100 00000 0 0010 1011 .....        @q_q2
 FADDs       10 ..... 110100 ..... 0 0100 0001 .....        @r_r_r
-FADDd       10 ..... 110100 ..... 0 0100 0010 .....        @r_r_r
-FADDq       10 ..... 110100 ..... 0 0100 0011 .....        @r_r_r
+FADDd       10 ..... 110100 ..... 0 0100 0010 .....        @d_d_d
+FADDq       10 ..... 110100 ..... 0 0100 0011 .....        @q_q_q
 FSUBs       10 ..... 110100 ..... 0 0100 0101 .....        @r_r_r
-FSUBd       10 ..... 110100 ..... 0 0100 0110 .....        @r_r_r
-FSUBq       10 ..... 110100 ..... 0 0100 0111 .....        @r_r_r
+FSUBd       10 ..... 110100 ..... 0 0100 0110 .....        @d_d_d
+FSUBq       10 ..... 110100 ..... 0 0100 0111 .....        @q_q_q
 FMULs       10 ..... 110100 ..... 0 0100 1001 .....        @r_r_r
-FMULd       10 ..... 110100 ..... 0 0100 1010 .....        @r_r_r
-FMULq       10 ..... 110100 ..... 0 0100 1011 .....        @r_r_r
+FMULd       10 ..... 110100 ..... 0 0100 1010 .....        @d_d_d
+FMULq       10 ..... 110100 ..... 0 0100 1011 .....        @q_q_q
 FDIVs       10 ..... 110100 ..... 0 0100 1101 .....        @r_r_r
-FDIVd       10 ..... 110100 ..... 0 0100 1110 .....        @r_r_r
-FDIVq       10 ..... 110100 ..... 0 0100 1111 .....        @r_r_r
-FsMULd      10 ..... 110100 ..... 0 0110 1001 .....        @r_r_r
-FdMULq      10 ..... 110100 ..... 0 0110 1110 .....        @r_r_r
+FDIVd       10 ..... 110100 ..... 0 0100 1110 .....        @d_d_d
+FDIVq       10 ..... 110100 ..... 0 0100 1111 .....        @q_q_q
+FNADDs      10 ..... 110100 ..... 0 0101 0001 .....        @r_r_r
+FNADDd      10 ..... 110100 ..... 0 0101 0010 .....        @d_d_d
+FNMULs      10 ..... 110100 ..... 0 0101 1001 .....        @r_r_r
+FNMULd      10 ..... 110100 ..... 0 0101 1010 .....        @d_d_d
+FHADDs      10 ..... 110100 ..... 0 0110 0001 .....        @r_r_r
+FHADDd      10 ..... 110100 ..... 0 0110 0010 .....        @d_d_d
+FHSUBs      10 ..... 110100 ..... 0 0110 0101 .....        @r_r_r
+FHSUBd      10 ..... 110100 ..... 0 0110 0110 .....        @d_d_d
+FsMULd      10 ..... 110100 ..... 0 0110 1001 .....        @d_r_r
+FdMULq      10 ..... 110100 ..... 0 0110 1110 .....        @q_d_d
+FNHADDs     10 ..... 110100 ..... 0 0111 0001 .....        @r_r_r
+FNHADDd     10 ..... 110100 ..... 0 0111 0010 .....        @d_d_d
+FNsMULd     10 ..... 110100 ..... 0 0111 1001 .....        @d_r_r
 FsTOx       10 ..... 110100 00000 0 1000 0001 .....        @r_r2
-FdTOx       10 ..... 110100 00000 0 1000 0010 .....        @r_r2
-FqTOx       10 ..... 110100 00000 0 1000 0011 .....        @r_r2
+FdTOx       10 ..... 110100 00000 0 1000 0010 .....        @r_d2
+FqTOx       10 ..... 110100 00000 0 1000 0011 .....        @r_q2
 FxTOs       10 ..... 110100 00000 0 1000 0100 .....        @r_r2
-FxTOd       10 ..... 110100 00000 0 1000 1000 .....        @r_r2
-FxTOq       10 ..... 110100 00000 0 1000 1100 .....        @r_r2
+FxTOd       10 ..... 110100 00000 0 1000 1000 .....        @d_r2
+FxTOq       10 ..... 110100 00000 0 1000 1100 .....        @q_r2
 FiTOs       10 ..... 110100 00000 0 1100 0100 .....        @r_r2
-FdTOs       10 ..... 110100 00000 0 1100 0110 .....        @r_r2
-FqTOs       10 ..... 110100 00000 0 1100 0111 .....        @r_r2
-FiTOd       10 ..... 110100 00000 0 1100 1000 .....        @r_r2
-FsTOd       10 ..... 110100 00000 0 1100 1001 .....        @r_r2
-FqTOd       10 ..... 110100 00000 0 1100 1011 .....        @r_r2
-FiTOq       10 ..... 110100 00000 0 1100 1100 .....        @r_r2
-FsTOq       10 ..... 110100 00000 0 1100 1101 .....        @r_r2
-FdTOq       10 ..... 110100 00000 0 1100 1110 .....        @r_r2
+FdTOs       10 ..... 110100 00000 0 1100 0110 .....        @r_d2
+FqTOs       10 ..... 110100 00000 0 1100 0111 .....        @r_q2
+FiTOd       10 ..... 110100 00000 0 1100 1000 .....        @d_r2
+FsTOd       10 ..... 110100 00000 0 1100 1001 .....        @d_r2
+FqTOd       10 ..... 110100 00000 0 1100 1011 .....        @d_q2
+FiTOq       10 ..... 110100 00000 0 1100 1100 .....        @q_r2
+FsTOq       10 ..... 110100 00000 0 1100 1101 .....        @q_r2
+FdTOq       10 ..... 110100 00000 0 1100 1110 .....        @q_d2
 FsTOi       10 ..... 110100 00000 0 1101 0001 .....        @r_r2
-FdTOi       10 ..... 110100 00000 0 1101 0010 .....        @r_r2
-FqTOi       10 ..... 110100 00000 0 1101 0011 .....        @r_r2
+FdTOi       10 ..... 110100 00000 0 1101 0010 .....        @r_d2
+FqTOi       10 ..... 110100 00000 0 1101 0011 .....        @r_q2
 
 FMOVscc     10 rd:5  110101 0 cond:4 1 cc:1 0 000001 rs2:5
-FMOVdcc     10 rd:5  110101 0 cond:4 1 cc:1 0 000010 rs2:5
-FMOVqcc     10 rd:5  110101 0 cond:4 1 cc:1 0 000011 rs2:5
+FMOVdcc     10 ..... 110101 0 cond:4 1 cc:1 0 000010 ..... \
+            rd=%dfp_rd rs2=%dfp_rs2
+FMOVqcc     10 ..... 110101 0 cond:4 1 cc:1 0 000011 ..... \
+            rd=%qfp_rd rs2=%qfp_rs2
 
 FMOVsfcc    10 rd:5  110101 0 cond:4 0 cc:2   000001 rs2:5
-FMOVdfcc    10 rd:5  110101 0 cond:4 0 cc:2   000010 rs2:5
-FMOVqfcc    10 rd:5  110101 0 cond:4 0 cc:2   000011 rs2:5
+FMOVdfcc    10 ..... 110101 0 cond:4 0 cc:2   000010 ..... \
+            rd=%dfp_rd rs2=%dfp_rs2
+FMOVqfcc    10 ..... 110101 0 cond:4 0 cc:2   000011 ..... \
+            rd=%qfp_rd rs2=%qfp_rs2
 
 FMOVRs      10 rd:5  110101 rs1:5    0 cond:3  00101 rs2:5
-FMOVRd      10 rd:5  110101 rs1:5    0 cond:3  00110 rs2:5
-FMOVRq      10 rd:5  110101 rs1:5    0 cond:3  00111 rs2:5
+FMOVRd      10 ..... 110101 rs1:5    0 cond:3  00110 ..... \
+            rd=%dfp_rd rs2=%dfp_rs2
+FMOVRq      10 ..... 110101 rs1:5    0 cond:3  00111 ..... \
+            rd=%qfp_rd rs2=%qfp_rs2
 
 FCMPs       10 000 cc:2 110101 rs1:5  0 0101 0001 rs2:5
-FCMPd       10 000 cc:2 110101 rs1:5  0 0101 0010 rs2:5
-FCMPq       10 000 cc:2 110101 rs1:5  0 0101 0011 rs2:5
+FCMPd       10 000 cc:2 110101 .....  0 0101 0010 .....    \
+            rs1=%dfp_rs1 rs2=%dfp_rs2
+FCMPq       10 000 cc:2 110101 .....  0 0101 0011 .....    \
+            rs1=%qfp_rs1 rs2=%qfp_rs2
 FCMPEs      10 000 cc:2 110101 rs1:5  0 0101 0101 rs2:5
-FCMPEd      10 000 cc:2 110101 rs1:5  0 0101 0110 rs2:5
-FCMPEq      10 000 cc:2 110101 rs1:5  0 0101 0111 rs2:5
+FCMPEd      10 000 cc:2 110101 .....  0 0101 0110 .....    \
+            rs1=%dfp_rs1 rs2=%dfp_rs2
+FCMPEq      10 000 cc:2 110101 .....  0 0101 0111 .....    \
+            rs1=%qfp_rs1 rs2=%qfp_rs2
 
 {
   [
@@ -323,93 +388,187 @@
     ARRAY16     10 ..... 110110 ..... 0 0001 0010 .....    @r_r_r
     ARRAY32     10 ..... 110110 ..... 0 0001 0100 .....    @r_r_r
 
+    ADDXC       10 ..... 110110 ..... 0 0001 0001 .....    @r_r_r
+    ADDXCcc     10 ..... 110110 ..... 0 0001 0011 .....    @r_r_r
+    UMULXHI     10 ..... 110110 ..... 0 0001 0110 .....    @r_r_r
+    LZCNT       10 ..... 110110 00000 0 0001 0111 .....    @r_r2
+    XMULX       10 ..... 110110 ..... 1 0001 0101 .....    @r_r_r
+    XMULXHI     10 ..... 110110 ..... 1 0001 0110 .....    @r_r_r
+
     ALIGNADDR   10 ..... 110110 ..... 0 0001 1000 .....    @r_r_r
     ALIGNADDRL  10 ..... 110110 ..... 0 0001 1010 .....    @r_r_r
 
     BMASK       10 ..... 110110 ..... 0 0001 1001 .....    @r_r_r
 
-    FPCMPLE16   10 ..... 110110 ..... 0 0010 0000 .....    @r_r_r
-    FPCMPNE16   10 ..... 110110 ..... 0 0010 0010 .....    @r_r_r
-    FPCMPGT16   10 ..... 110110 ..... 0 0010 1000 .....    @r_r_r
-    FPCMPEQ16   10 ..... 110110 ..... 0 0010 1010 .....    @r_r_r
-    FPCMPLE32   10 ..... 110110 ..... 0 0010 0100 .....    @r_r_r
-    FPCMPNE32   10 ..... 110110 ..... 0 0010 0110 .....    @r_r_r
-    FPCMPGT32   10 ..... 110110 ..... 0 0010 1100 .....    @r_r_r
-    FPCMPEQ32   10 ..... 110110 ..... 0 0010 1110 .....    @r_r_r
+    CMASK8      10 00000 110110 00000 0 0001 1011 rs2:5
+    CMASK16     10 00000 110110 00000 0 0001 1101 rs2:5
+    CMASK32     10 00000 110110 00000 0 0001 1111 rs2:5
 
-    FMUL8x16    10 ..... 110110 ..... 0 0011 0001 .....    @r_r_r
-    FMUL8x16AU  10 ..... 110110 ..... 0 0011 0011 .....    @r_r_r
-    FMUL8x16AL  10 ..... 110110 ..... 0 0011 0101 .....    @r_r_r
-    FMUL8SUx16  10 ..... 110110 ..... 0 0011 0110 .....    @r_r_r
-    FMUL8ULx16  10 ..... 110110 ..... 0 0011 0111 .....    @r_r_r
-    FMULD8SUx16 10 ..... 110110 ..... 0 0011 1000 .....    @r_r_r
-    FMULD8ULx16 10 ..... 110110 ..... 0 0011 1001 .....    @r_r_r
-    FPACK32     10 ..... 110110 ..... 0 0011 1010 .....    @r_r_r
-    FPACK16     10 ..... 110110 00000 0 0011 1011 .....    @r_r2
-    FPACKFIX    10 ..... 110110 00000 0 0011 1101 .....    @r_r2
-    PDIST       10 ..... 110110 ..... 0 0011 1110 .....    @r_r_r
+    FPCMPLE16   10 ..... 110110 ..... 0 0010 0000 .....    @r_d_d
+    FPCMPNE16   10 ..... 110110 ..... 0 0010 0010 .....    @r_d_d
+    FPCMPGT16   10 ..... 110110 ..... 0 0010 1000 .....    @r_d_d
+    FPCMPEQ16   10 ..... 110110 ..... 0 0010 1010 .....    @r_d_d
+    FPCMPLE32   10 ..... 110110 ..... 0 0010 0100 .....    @r_d_d
+    FPCMPNE32   10 ..... 110110 ..... 0 0010 0110 .....    @r_d_d
+    FPCMPGT32   10 ..... 110110 ..... 0 0010 1100 .....    @r_d_d
+    FPCMPEQ32   10 ..... 110110 ..... 0 0010 1110 .....    @r_d_d
 
-    FALIGNDATAg 10 ..... 110110 ..... 0 0100 1000 .....    @r_r_r
-    FPMERGE     10 ..... 110110 ..... 0 0100 1011 .....    @r_r_r
-    BSHUFFLE    10 ..... 110110 ..... 0 0100 1100 .....    @r_r_r
-    FEXPAND     10 ..... 110110 00000 0 0100 1101 .....    @r_r2
+    FSLL16      10 ..... 110110 ..... 0 0010 0001 .....    @d_d_d
+    FSRL16      10 ..... 110110 ..... 0 0010 0011 .....    @d_d_d
+    FSLAS16     10 ..... 110110 ..... 0 0010 1001 .....    @d_d_d
+    FSRA16      10 ..... 110110 ..... 0 0010 1011 .....    @d_d_d
+    FSLL32      10 ..... 110110 ..... 0 0010 0101 .....    @d_d_d
+    FSRL32      10 ..... 110110 ..... 0 0010 0111 .....    @d_d_d
+    FSLAS32     10 ..... 110110 ..... 0 0010 1101 .....    @d_d_d
+    FSRA32      10 ..... 110110 ..... 0 0010 1111 .....    @d_d_d
 
-    FSRCd       10 ..... 110110 ..... 0 0111 0100 00000    @r_r1  # FSRC1d
+    FPCMPULE8   10 ..... 110110 ..... 1 0010 0000 .....    @r_d_d
+    FPCMPUGT8   10 ..... 110110 ..... 1 0010 1000 .....    @r_d_d
+    FPCMPNE8    10 ..... 110110 ..... 1 0010 0010 .....    @r_d_d
+    FPCMPEQ8    10 ..... 110110 ..... 1 0010 1010 .....    @r_d_d
+    FPCMPLE8    10 ..... 110110 ..... 0 0011 0100 .....    @r_d_d
+    FPCMPGT8    10 ..... 110110 ..... 0 0011 1100 .....    @r_d_d
+    FPCMPULE16  10 ..... 110110 ..... 1 0010 1110 .....    @r_d_d
+    FPCMPUGT16  10 ..... 110110 ..... 1 0010 1011 .....    @r_d_d
+    FPCMPULE32  10 ..... 110110 ..... 1 0010 1111 .....    @r_d_d
+    FPCMPUGT32  10 ..... 110110 ..... 1 0010 1100 .....    @r_d_d
+
+    FMUL8x16    10 ..... 110110 ..... 0 0011 0001 .....    @d_r_d
+    FMUL8x16AU  10 ..... 110110 ..... 0 0011 0011 .....    @d_r_r
+    FMUL8x16AL  10 ..... 110110 ..... 0 0011 0101 .....    @d_r_r
+    FMUL8SUx16  10 ..... 110110 ..... 0 0011 0110 .....    @d_d_d
+    FMUL8ULx16  10 ..... 110110 ..... 0 0011 0111 .....    @d_d_d
+    FMULD8SUx16 10 ..... 110110 ..... 0 0011 1000 .....    @d_r_r
+    FMULD8ULx16 10 ..... 110110 ..... 0 0011 1001 .....    @d_r_r
+    FPACK32     10 ..... 110110 ..... 0 0011 1010 .....    @d_d_d
+    FPACK16     10 ..... 110110 00000 0 0011 1011 .....    @r_d2
+    FPACKFIX    10 ..... 110110 00000 0 0011 1101 .....    @r_d2
+    PDIST       10 ..... 110110 ..... 0 0011 1110 .....    \
+                &r_r_r_r rd=%dfp_rd rs1=%dfp_rd rs2=%dfp_rs1 rs3=%dfp_rs2
+    PDISTN      10 ..... 110110 ..... 0 0011 1111 .....    @r_d_d
+
+    FMEAN16     10 ..... 110110 ..... 0 0100 0000 .....    @d_d_d
+    SUBXC       10 ..... 110110 ..... 0 0100 0001 .....    @r_r_r
+    SUBXCcc     10 ..... 110110 ..... 0 0100 0011 .....    @r_r_r
+    FCHKSM16    10 ..... 110110 ..... 0 0100 0100 .....    @d_d_d
+    FALIGNDATAg 10 ..... 110110 ..... 0 0100 1000 .....    @d_d_d
+    FPMERGE     10 ..... 110110 ..... 0 0100 1011 .....    @d_r_r
+    BSHUFFLE    10 ..... 110110 ..... 0 0100 1100 .....    @d_d_d
+    FEXPAND     10 ..... 110110 00000 0 0100 1101 .....    @d_r2
+    FALIGNDATAi 10 ..... 110110 ..... 0 0100 1001 .....    @d_r_d
+
+    FSRCd       10 ..... 110110 ..... 0 0111 0100 00000    @d_d1  # FSRC1d
     FSRCs       10 ..... 110110 ..... 0 0111 0101 00000    @r_r1  # FSRC1s
-    FSRCd       10 ..... 110110 00000 0 0111 1000 .....    @r_r2  # FSRC2d
+    FSRCd       10 ..... 110110 00000 0 0111 1000 .....    @d_d2  # FSRC2d
     FSRCs       10 ..... 110110 00000 0 0111 1001 .....    @r_r2  # FSRC2s
-    FNOTd       10 ..... 110110 ..... 0 0110 1010 00000    @r_r1  # FNOT1d
+    FNOTd       10 ..... 110110 ..... 0 0110 1010 00000    @d_d1  # FNOT1d
     FNOTs       10 ..... 110110 ..... 0 0110 1011 00000    @r_r1  # FNOT1s
-    FNOTd       10 ..... 110110 00000 0 0110 0110 .....    @r_r2  # FNOT2d
+    FNOTd       10 ..... 110110 00000 0 0110 0110 .....    @d_d2  # FNOT2d
     FNOTs       10 ..... 110110 00000 0 0110 0111 .....    @r_r2  # FNOT2s
 
-    FPADD16     10 ..... 110110 ..... 0 0101 0000 .....    @r_r_r
+    FPADD16     10 ..... 110110 ..... 0 0101 0000 .....    @d_d_d
     FPADD16s    10 ..... 110110 ..... 0 0101 0001 .....    @r_r_r
-    FPADD32     10 ..... 110110 ..... 0 0101 0010 .....    @r_r_r
+    FPADD32     10 ..... 110110 ..... 0 0101 0010 .....    @d_d_d
     FPADD32s    10 ..... 110110 ..... 0 0101 0011 .....    @r_r_r
-    FPSUB16     10 ..... 110110 ..... 0 0101 0100 .....    @r_r_r
+    FPADD64     10 ..... 110110 ..... 0 0100 0010 .....    @d_d_d
+    FPSUB16     10 ..... 110110 ..... 0 0101 0100 .....    @d_d_d
     FPSUB16s    10 ..... 110110 ..... 0 0101 0101 .....    @r_r_r
-    FPSUB32     10 ..... 110110 ..... 0 0101 0110 .....    @r_r_r
+    FPSUB32     10 ..... 110110 ..... 0 0101 0110 .....    @d_d_d
     FPSUB32s    10 ..... 110110 ..... 0 0101 0111 .....    @r_r_r
+    FPSUB64     10 ..... 110110 ..... 0 0100 0110 .....    @d_d_d
 
-    FNORd       10 ..... 110110 ..... 0 0110 0010 .....    @r_r_r
+    FPADDS16    10 ..... 110110 ..... 0 0101 1000 .....    @d_d_d
+    FPADDS16s   10 ..... 110110 ..... 0 0101 1001 .....    @r_r_r
+    FPADDS32    10 ..... 110110 ..... 0 0101 1010 .....    @d_d_d
+    FPADDS32s   10 ..... 110110 ..... 0 0101 1011 .....    @r_r_r
+    FPSUBS16    10 ..... 110110 ..... 0 0101 1100 .....    @d_d_d
+    FPSUBS16s   10 ..... 110110 ..... 0 0101 1101 .....    @r_r_r
+    FPSUBS32    10 ..... 110110 ..... 0 0101 1110 .....    @d_d_d
+    FPSUBS32s   10 ..... 110110 ..... 0 0101 1111 .....    @r_r_r
+
+    FNORd       10 ..... 110110 ..... 0 0110 0010 .....    @d_d_d
     FNORs       10 ..... 110110 ..... 0 0110 0011 .....    @r_r_r
-    FANDNOTd    10 ..... 110110 ..... 0 0110 0100 .....    @r_r_r   # FANDNOT2d
+    FANDNOTd    10 ..... 110110 ..... 0 0110 0100 .....    @d_d_d   # FANDNOT2d
     FANDNOTs    10 ..... 110110 ..... 0 0110 0101 .....    @r_r_r   # FANDNOT2s
-    FANDNOTd    10 ..... 110110 ..... 0 0110 1000 .....    @r_r_r_swap # ... 1d
+    FANDNOTd    10 ..... 110110 ..... 0 0110 1000 .....    @d_d_d_swap # ... 1d
     FANDNOTs    10 ..... 110110 ..... 0 0110 1001 .....    @r_r_r_swap # ... 1s
-    FXORd       10 ..... 110110 ..... 0 0110 1100 .....    @r_r_r
+    FXORd       10 ..... 110110 ..... 0 0110 1100 .....    @d_d_d
     FXORs       10 ..... 110110 ..... 0 0110 1101 .....    @r_r_r
-    FNANDd      10 ..... 110110 ..... 0 0110 1110 .....    @r_r_r
+    FNANDd      10 ..... 110110 ..... 0 0110 1110 .....    @d_d_d
     FNANDs      10 ..... 110110 ..... 0 0110 1111 .....    @r_r_r
-    FANDd       10 ..... 110110 ..... 0 0111 0000 .....    @r_r_r
+    FANDd       10 ..... 110110 ..... 0 0111 0000 .....    @d_d_d
     FANDs       10 ..... 110110 ..... 0 0111 0001 .....    @r_r_r
-    FXNORd      10 ..... 110110 ..... 0 0111 0010 .....    @r_r_r
+    FXNORd      10 ..... 110110 ..... 0 0111 0010 .....    @d_d_d
     FXNORs      10 ..... 110110 ..... 0 0111 0011 .....    @r_r_r
-    FORNOTd     10 ..... 110110 ..... 0 0111 0110 .....    @r_r_r    # FORNOT2d
+    FORNOTd     10 ..... 110110 ..... 0 0111 0110 .....    @d_d_d    # FORNOT2d
     FORNOTs     10 ..... 110110 ..... 0 0111 0111 .....    @r_r_r    # FORNOT2s
-    FORNOTd     10 ..... 110110 ..... 0 0111 1010 .....    @r_r_r_swap # ... 1d
+    FORNOTd     10 ..... 110110 ..... 0 0111 1010 .....    @d_d_d_swap # ... 1d
     FORNOTs     10 ..... 110110 ..... 0 0111 1011 .....    @r_r_r_swap # ... 1s
-    FORd        10 ..... 110110 ..... 0 0111 1100 .....    @r_r_r
+    FORd        10 ..... 110110 ..... 0 0111 1100 .....    @d_d_d
     FORs        10 ..... 110110 ..... 0 0111 1101 .....    @r_r_r
 
-    FZEROd      10 rd:5  110110 00000 0 0110 0000 00000
+    FZEROd      10 ..... 110110 00000 0 0110 0000 00000    rd=%dfp_rd
     FZEROs      10 rd:5  110110 00000 0 0110 0001 00000
-    FONEd       10 rd:5  110110 00000 0 0111 1110 00000
+    FONEd       10 ..... 110110 00000 0 0111 1110 00000    rd=%dfp_rd
     FONEs       10 rd:5  110110 00000 0 0111 1111 00000
+
+    MOVsTOuw    10 ..... 110110 00000 1 0001 0001 .....    @r_r2
+    MOVsTOsw    10 ..... 110110 00000 1 0001 0011 .....    @r_r2
+    MOVwTOs     10 ..... 110110 00000 1 0001 1001 .....    @r_r2
+    MOVdTOx     10 ..... 110110 00000 1 0001 0000 .....    @r_d2
+    MOVxTOd     10 ..... 110110 00000 1 0001 1000 .....    @d_r2
+
+    FPADD8      10 ..... 110110 ..... 1 0010 0100 .....    @d_d_d
+    FPADDS8     10 ..... 110110 ..... 1 0010 0110 .....    @d_d_d
+    FPADDUS8    10 ..... 110110 ..... 1 0010 0111 .....    @d_d_d
+    FPADDUS16   10 ..... 110110 ..... 1 0010 0011 .....    @d_d_d
+    FPSUB8      10 ..... 110110 ..... 1 0101 0100 .....    @d_d_d
+    FPSUBS8     10 ..... 110110 ..... 1 0101 0110 .....    @d_d_d
+    FPSUBUS8    10 ..... 110110 ..... 1 0101 0111 .....    @d_d_d
+    FPSUBUS16   10 ..... 110110 ..... 1 0101 0011 .....    @d_d_d
+
+    FPMIN8      10 ..... 110110 ..... 1 0001 1010 .....    @d_d_d
+    FPMIN16     10 ..... 110110 ..... 1 0001 1011 .....    @d_d_d
+    FPMIN32     10 ..... 110110 ..... 1 0001 1100 .....    @d_d_d
+    FPMINU8     10 ..... 110110 ..... 1 0101 1010 .....    @d_d_d
+    FPMINU16    10 ..... 110110 ..... 1 0101 1011 .....    @d_d_d
+    FPMINU32    10 ..... 110110 ..... 1 0101 1100 .....    @d_d_d
+
+    FPMAX8      10 ..... 110110 ..... 1 0001 1101 .....    @d_d_d
+    FPMAX16     10 ..... 110110 ..... 1 0001 1110 .....    @d_d_d
+    FPMAX32     10 ..... 110110 ..... 1 0001 1111 .....    @d_d_d
+    FPMAXU8     10 ..... 110110 ..... 1 0101 1101 .....    @d_d_d
+    FPMAXU16    10 ..... 110110 ..... 1 0101 1110 .....    @d_d_d
+    FPMAXU32    10 ..... 110110 ..... 1 0101 1111 .....    @d_d_d
+
+    FLCMPs      10 000 cc:2 110110 rs1:5 1 0101 0001 rs2:5
+    FLCMPd      10 000 cc:2 110110 ..... 1 0101 0010 ..... \
+                rs1=%dfp_rs1 rs2=%dfp_rs2
   ]
   NCP           10 ----- 110110 ----- --------- -----      # v8 CPop1
 }
 
-NCP         10 ----- 110111 ----- --------- -----          # v8 CPop2
+{
+  [
+    FMADDs      10 ..... 110111 ..... ..... 0001 .....     @r_r_r_r
+    FMADDd      10 ..... 110111 ..... ..... 0010 .....     @d_d_d_d
+    FMSUBs      10 ..... 110111 ..... ..... 0101 .....     @r_r_r_r
+    FMSUBd      10 ..... 110111 ..... ..... 0110 .....     @d_d_d_d
+    FNMSUBs     10 ..... 110111 ..... ..... 1001 .....     @r_r_r_r
+    FNMSUBd     10 ..... 110111 ..... ..... 1010 .....     @d_d_d_d
+    FNMADDs     10 ..... 110111 ..... ..... 1101 .....     @r_r_r_r
+    FNMADDd     10 ..... 110111 ..... ..... 1110 .....     @d_d_d_d
+
+    FPMADDX     10 ..... 110111 ..... ..... 0000 .....     @d_d_d_d
+    FPMADDXHI   10 ..... 110111 ..... ..... 0100 .....     @d_d_d_d
+  ]
+  NCP           10 ----- 110111 ----- --------- -----      # v8 CPop2
+}
 
 ##
 ## Major Opcode 11 -- load and store instructions
 ##
 
-%dfp_rd     25:5 !function=extract_dfpreg
-%qfp_rd     25:5 !function=extract_qfpreg
-
 &r_r_ri_asi rd rs1 rs2_or_imm asi imm:bool
 @r_r_ri_na  .. rd:5  ...... rs1:5 imm:1 rs2_or_imm:s13     &r_r_ri_asi asi=-1
 @d_r_ri_na  .. ..... ...... rs1:5 imm:1 rs2_or_imm:s13     \
@@ -477,6 +636,7 @@
 LDF         11 ..... 100000 ..... . .............          @r_r_ri_na
 LDFSR       11 00000 100001 ..... . .............          @n_r_ri
 LDXFSR      11 00001 100001 ..... . .............          @n_r_ri
+LDXEFSR     11 00011 100001 ..... . .............          @n_r_ri
 LDQF        11 ..... 100010 ..... . .............          @q_r_ri_na
 LDDF        11 ..... 100011 ..... . .............          @d_r_ri_na
 
diff --git a/target/sparc/ldst_helper.c b/target/sparc/ldst_helper.c
index 7bdf99e..2d48e98 100644
--- a/target/sparc/ldst_helper.c
+++ b/target/sparc/ldst_helper.c
@@ -1395,6 +1395,10 @@
     case ASI_TWINX_PL: /* Primary, twinx, LE */
     case ASI_TWINX_S:  /* Secondary, twinx */
     case ASI_TWINX_SL: /* Secondary, twinx, LE */
+    case ASI_MON_P:
+    case ASI_MON_S:
+    case ASI_MON_AIUP:
+    case ASI_MON_AIUS:
         /* These are always handled inline.  */
         g_assert_not_reached();
 
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
index dca0728..1136390 100644
--- a/target/sparc/translate.c
+++ b/target/sparc/translate.c
@@ -28,6 +28,7 @@
 #include "exec/helper-gen.h"
 #include "exec/translator.h"
 #include "exec/log.h"
+#include "fpu/softfloat.h"
 #include "asi.h"
 
 #define HELPER_H "helper.h"
@@ -60,14 +61,27 @@
 # define gen_helper_write_softint(E, S)         qemu_build_not_reached()
 # define gen_helper_wrpil(E, S)                 qemu_build_not_reached()
 # define gen_helper_wrpstate(E, S)              qemu_build_not_reached()
+# define gen_helper_cmask8               ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_cmask16              ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_cmask32              ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_fcmpeq8              ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fcmpeq16             ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fcmpeq32             ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_fcmpgt8              ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fcmpgt16             ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fcmpgt32             ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_fcmple8              ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fcmple16             ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fcmple32             ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_fcmpne8              ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fcmpne16             ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fcmpne32             ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_fcmpule8             ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_fcmpule16            ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_fcmpule32            ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_fcmpugt8             ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_fcmpugt16            ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_fcmpugt32            ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fdtox                ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fexpand              ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fmul8sux16           ({ qemu_build_not_reached(); NULL; })
@@ -75,11 +89,15 @@
 # define gen_helper_fmul8x16             ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fpmerge              ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fqtox                ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_fslas16              ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_fslas32              ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fstox                ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fxtod                ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fxtoq                ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_fxtos                ({ qemu_build_not_reached(); NULL; })
 # define gen_helper_pdist                ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_xmulx                ({ qemu_build_not_reached(); NULL; })
+# define gen_helper_xmulxhi              ({ qemu_build_not_reached(); NULL; })
 # define MAXTL_MASK                             0
 #endif
 
@@ -123,8 +141,7 @@
 #define cpu_xcc_C ({ qemu_build_not_reached(); NULL; })
 #endif
 
-/* Floating point registers */
-static TCGv_i64 cpu_fpr[TARGET_DPREGS];
+/* Floating point comparison registers */
 static TCGv_i32 cpu_fcc[TARGET_FCCREGS];
 
 #define env_field_offsetof(X)     offsetof(CPUSPARCState, X)
@@ -190,14 +207,6 @@
 #define GET_FIELDs(x,a,b) sign_extend (GET_FIELD(x,a,b), (b) - (a) + 1)
 #define GET_FIELD_SPs(x,a,b) sign_extend (GET_FIELD_SP(x,a,b), ((b) - (a) + 1))
 
-#ifdef TARGET_SPARC64
-#define DFPREG(r) (((r & 1) << 5) | (r & 0x1e))
-#define QFPREG(r) (((r & 1) << 5) | (r & 0x1c))
-#else
-#define DFPREG(r) (r & 0x1e)
-#define QFPREG(r) (r & 0x1c)
-#endif
-
 #define UA2005_HTRAP_MASK 0xff
 #define V8_TRAP_MASK 0x7f
 
@@ -217,59 +226,72 @@
 }
 
 /* floating point registers moves */
+
+static int gen_offset_fpr_F(unsigned int reg)
+{
+    int ret;
+
+    tcg_debug_assert(reg < 32);
+    ret= offsetof(CPUSPARCState, fpr[reg / 2]);
+    if (reg & 1) {
+        ret += offsetof(CPU_DoubleU, l.lower);
+    } else {
+        ret += offsetof(CPU_DoubleU, l.upper);
+    }
+    return ret;
+}
+
 static TCGv_i32 gen_load_fpr_F(DisasContext *dc, unsigned int src)
 {
     TCGv_i32 ret = tcg_temp_new_i32();
-    if (src & 1) {
-        tcg_gen_extrl_i64_i32(ret, cpu_fpr[src / 2]);
-    } else {
-        tcg_gen_extrh_i64_i32(ret, cpu_fpr[src / 2]);
-    }
+    tcg_gen_ld_i32(ret, tcg_env, gen_offset_fpr_F(src));
     return ret;
 }
 
 static void gen_store_fpr_F(DisasContext *dc, unsigned int dst, TCGv_i32 v)
 {
-    TCGv_i64 t = tcg_temp_new_i64();
-
-    tcg_gen_extu_i32_i64(t, v);
-    tcg_gen_deposit_i64(cpu_fpr[dst / 2], cpu_fpr[dst / 2], t,
-                        (dst & 1 ? 0 : 32), 32);
+    tcg_gen_st_i32(v, tcg_env, gen_offset_fpr_F(dst));
     gen_update_fprs_dirty(dc, dst);
 }
 
+static int gen_offset_fpr_D(unsigned int reg)
+{
+    tcg_debug_assert(reg < 64);
+    tcg_debug_assert(reg % 2 == 0);
+    return offsetof(CPUSPARCState, fpr[reg / 2]);
+}
+
 static TCGv_i64 gen_load_fpr_D(DisasContext *dc, unsigned int src)
 {
-    src = DFPREG(src);
-    return cpu_fpr[src / 2];
+    TCGv_i64 ret = tcg_temp_new_i64();
+    tcg_gen_ld_i64(ret, tcg_env, gen_offset_fpr_D(src));
+    return ret;
 }
 
 static void gen_store_fpr_D(DisasContext *dc, unsigned int dst, TCGv_i64 v)
 {
-    dst = DFPREG(dst);
-    tcg_gen_mov_i64(cpu_fpr[dst / 2], v);
+    tcg_gen_st_i64(v, tcg_env, gen_offset_fpr_D(dst));
     gen_update_fprs_dirty(dc, dst);
 }
 
-static TCGv_i64 gen_dest_fpr_D(DisasContext *dc, unsigned int dst)
-{
-    return cpu_fpr[DFPREG(dst) / 2];
-}
-
 static TCGv_i128 gen_load_fpr_Q(DisasContext *dc, unsigned int src)
 {
     TCGv_i128 ret = tcg_temp_new_i128();
+    TCGv_i64 h = gen_load_fpr_D(dc, src);
+    TCGv_i64 l = gen_load_fpr_D(dc, src + 2);
 
-    src = QFPREG(src);
-    tcg_gen_concat_i64_i128(ret, cpu_fpr[src / 2 + 1], cpu_fpr[src / 2]);
+    tcg_gen_concat_i64_i128(ret, l, h);
     return ret;
 }
 
 static void gen_store_fpr_Q(DisasContext *dc, unsigned int dst, TCGv_i128 v)
 {
-    dst = DFPREG(dst);
-    tcg_gen_extr_i128_i64(cpu_fpr[dst / 2 + 1], cpu_fpr[dst / 2], v);
-    gen_update_fprs_dirty(dc, dst);
+    TCGv_i64 h = tcg_temp_new_i64();
+    TCGv_i64 l = tcg_temp_new_i64();
+
+    tcg_gen_extr_i128_i64(l, h, v);
+    gen_store_fpr_D(dc, dst, h);
+    gen_store_fpr_D(dc, dst + 2, l);
 }
 
 /* moves */
@@ -428,6 +450,17 @@
     gen_op_addcc_int(dst, src1, src2, gen_carry32());
 }
 
+static void gen_op_addxc(TCGv dst, TCGv src1, TCGv src2)
+{
+    tcg_gen_add_tl(dst, src1, src2);
+    tcg_gen_add_tl(dst, dst, cpu_cc_C);
+}
+
+static void gen_op_addxccc(TCGv dst, TCGv src1, TCGv src2)
+{
+    gen_op_addcc_int(dst, src1, src2, cpu_cc_C);
+}
+
 static void gen_op_subcc_int(TCGv dst, TCGv src1, TCGv src2, TCGv cin)
 {
     TCGv z = tcg_constant_tl(0);
@@ -482,6 +515,17 @@
     gen_op_subcc_int(dst, src1, src2, gen_carry32());
 }
 
+static void gen_op_subxc(TCGv dst, TCGv src1, TCGv src2)
+{
+    tcg_gen_sub_tl(dst, src1, src2);
+    tcg_gen_sub_tl(dst, dst, cpu_cc_C);
+}
+
+static void gen_op_subxccc(TCGv dst, TCGv src1, TCGv src2)
+{
+    gen_op_subcc_int(dst, src1, src2, cpu_cc_C);
+}
+
 static void gen_op_mulscc(TCGv dst, TCGv src1, TCGv src2)
 {
     TCGv zero = tcg_constant_tl(0);
@@ -556,6 +600,32 @@
     gen_op_multiply(dst, src1, src2, 1);
 }
 
+static void gen_op_umulxhi(TCGv dst, TCGv src1, TCGv src2)
+{
+    TCGv discard = tcg_temp_new();
+    tcg_gen_mulu2_tl(discard, dst, src1, src2);
+}
+
+static void gen_op_fpmaddx(TCGv_i64 dst, TCGv_i64 src1,
+                           TCGv_i64 src2, TCGv_i64 src3)
+{
+    TCGv_i64 t = tcg_temp_new_i64();
+
+    tcg_gen_mul_i64(t, src1, src2);
+    tcg_gen_add_i64(dst, src3, t);
+}
+
+static void gen_op_fpmaddxhi(TCGv_i64 dst, TCGv_i64 src1,
+                             TCGv_i64 src2, TCGv_i64 src3)
+{
+    TCGv_i64 l = tcg_temp_new_i64();
+    TCGv_i64 h = tcg_temp_new_i64();
+    TCGv_i64 z = tcg_constant_i64(0);
+
+    tcg_gen_mulu2_i64(l, h, src1, src2);
+    tcg_gen_add2_i64(l, dst, l, h, src3, z);
+}
+
 static void gen_op_sdiv(TCGv dst, TCGv src1, TCGv src2)
 {
 #ifdef TARGET_SPARC64
@@ -633,6 +703,11 @@
     tcg_gen_ctpop_tl(dst, src2);
 }
 
+static void gen_op_lzcnt(TCGv dst, TCGv src)
+{
+    tcg_gen_clzi_tl(dst, src, TARGET_LONG_BITS);
+}
+
 #ifndef TARGET_SPARC64
 static void gen_helper_array8(TCGv dst, TCGv src1, TCGv src2)
 {
@@ -679,7 +754,80 @@
 #endif
 }
 
-static void gen_op_faligndata(TCGv_i64 dst, TCGv_i64 s1, TCGv_i64 s2)
+static void gen_op_fpadds16s(TCGv_i32 d, TCGv_i32 src1, TCGv_i32 src2)
+{
+    TCGv_i32 t[2];
+
+    for (int i = 0; i < 2; i++) {
+        TCGv_i32 u = tcg_temp_new_i32();
+        TCGv_i32 v = tcg_temp_new_i32();
+
+        tcg_gen_sextract_i32(u, src1, i * 16, 16);
+        tcg_gen_sextract_i32(v, src2, i * 16, 16);
+        tcg_gen_add_i32(u, u, v);
+        tcg_gen_smax_i32(u, u, tcg_constant_i32(INT16_MIN));
+        tcg_gen_smin_i32(u, u, tcg_constant_i32(INT16_MAX));
+        t[i] = u;
+    }
+    tcg_gen_deposit_i32(d, t[0], t[1], 16, 16);
+}
+
+static void gen_op_fpsubs16s(TCGv_i32 d, TCGv_i32 src1, TCGv_i32 src2)
+{
+    TCGv_i32 t[2];
+
+    for (int i = 0; i < 2; i++) {
+        TCGv_i32 u = tcg_temp_new_i32();
+        TCGv_i32 v = tcg_temp_new_i32();
+
+        tcg_gen_sextract_i32(u, src1, i * 16, 16);
+        tcg_gen_sextract_i32(v, src2, i * 16, 16);
+        tcg_gen_sub_i32(u, u, v);
+        tcg_gen_smax_i32(u, u, tcg_constant_i32(INT16_MIN));
+        tcg_gen_smin_i32(u, u, tcg_constant_i32(INT16_MAX));
+        t[i] = u;
+    }
+    tcg_gen_deposit_i32(d, t[0], t[1], 16, 16);
+}
+
+static void gen_op_fpadds32s(TCGv_i32 d, TCGv_i32 src1, TCGv_i32 src2)
+{
+    TCGv_i32 r = tcg_temp_new_i32();
+    TCGv_i32 t = tcg_temp_new_i32();
+    TCGv_i32 v = tcg_temp_new_i32();
+    TCGv_i32 z = tcg_constant_i32(0);
+
+    tcg_gen_add_i32(r, src1, src2);
+    tcg_gen_xor_i32(t, src1, src2);
+    tcg_gen_xor_i32(v, r, src2);
+    tcg_gen_andc_i32(v, v, t);
+
+    tcg_gen_setcond_i32(TCG_COND_GE, t, r, z);
+    tcg_gen_addi_i32(t, t, INT32_MAX);
+
+    tcg_gen_movcond_i32(TCG_COND_LT, d, v, z, t, r);
+}
+
+static void gen_op_fpsubs32s(TCGv_i32 d, TCGv_i32 src1, TCGv_i32 src2)
+{
+    TCGv_i32 r = tcg_temp_new_i32();
+    TCGv_i32 t = tcg_temp_new_i32();
+    TCGv_i32 v = tcg_temp_new_i32();
+    TCGv_i32 z = tcg_constant_i32(0);
+
+    tcg_gen_sub_i32(r, src1, src2);
+    tcg_gen_xor_i32(t, src1, src2);
+    tcg_gen_xor_i32(v, r, src1);
+    tcg_gen_and_i32(v, v, t);
+
+    tcg_gen_setcond_i32(TCG_COND_GE, t, r, z);
+    tcg_gen_addi_i32(t, t, INT32_MAX);
+
+    tcg_gen_movcond_i32(TCG_COND_LT, d, v, z, t, r);
+}
+
+static void gen_op_faligndata_i(TCGv_i64 dst, TCGv_i64 s1,
+                                TCGv_i64 s2, TCGv gsr)
 {
 #ifdef TARGET_SPARC64
     TCGv t1, t2, shift;
@@ -688,7 +836,7 @@
     t2 = tcg_temp_new();
     shift = tcg_temp_new();
 
-    tcg_gen_andi_tl(shift, cpu_gsr, 7);
+    tcg_gen_andi_tl(shift, gsr, 7);
     tcg_gen_shli_tl(shift, shift, 3);
     tcg_gen_shl_tl(t1, s1, shift);
 
@@ -706,6 +854,11 @@
 #endif
 }
 
+static void gen_op_faligndata_g(TCGv_i64 dst, TCGv_i64 s1, TCGv_i64 s2)
+{
+    gen_op_faligndata_i(dst, s1, s2, cpu_gsr);
+}
+
 static void gen_op_bshuffle(TCGv_i64 dst, TCGv_i64 src1, TCGv_i64 src2)
 {
 #ifdef TARGET_SPARC64
@@ -715,6 +868,15 @@
 #endif
 }
 
+static void gen_op_pdistn(TCGv dst, TCGv_i64 src1, TCGv_i64 src2)
+{
+#ifdef TARGET_SPARC64
+    gen_helper_pdist(dst, tcg_constant_i64(0), src1, src2);
+#else
+    g_assert_not_reached();
+#endif
+}
+
 static void gen_op_fmul8x16al(TCGv_i64 dst, TCGv_i32 src1, TCGv_i32 src2)
 {
     tcg_gen_ext16s_i32(src2, src2);
@@ -769,6 +931,66 @@
     tcg_gen_concat_i32_i64(dst, t0, t1);
 }
 
+#ifdef TARGET_SPARC64
+static void gen_vec_fchksm16(unsigned vece, TCGv_vec dst,
+                             TCGv_vec src1, TCGv_vec src2)
+{
+    TCGv_vec a = tcg_temp_new_vec_matching(dst);
+    TCGv_vec c = tcg_temp_new_vec_matching(dst);
+
+    tcg_gen_add_vec(vece, a, src1, src2);
+    tcg_gen_cmp_vec(TCG_COND_LTU, vece, c, a, src1);
+    /* Vector cmp produces -1 for true, so subtract to add carry. */
+    tcg_gen_sub_vec(vece, dst, a, c);
+}
+
+static void gen_op_fchksm16(unsigned vece, uint32_t dofs, uint32_t aofs,
+                            uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_cmp_vec, INDEX_op_add_vec, INDEX_op_sub_vec,
+    };
+    static const GVecGen3 op = {
+        .fni8 = gen_helper_fchksm16,
+        .fniv = gen_vec_fchksm16,
+        .opt_opc = vecop_list,
+        .vece = MO_16,
+    };
+    tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &op);
+}
+
+static void gen_vec_fmean16(unsigned vece, TCGv_vec dst,
+                            TCGv_vec src1, TCGv_vec src2)
+{
+    TCGv_vec t = tcg_temp_new_vec_matching(dst);
+
+    tcg_gen_or_vec(vece, t, src1, src2);
+    tcg_gen_and_vec(vece, t, t, tcg_constant_vec_matching(dst, vece, 1));
+    tcg_gen_sari_vec(vece, src1, src1, 1);
+    tcg_gen_sari_vec(vece, src2, src2, 1);
+    tcg_gen_add_vec(vece, dst, src1, src2);
+    tcg_gen_add_vec(vece, dst, dst, t);
+}
+
+static void gen_op_fmean16(unsigned vece, uint32_t dofs, uint32_t aofs,
+                           uint32_t bofs, uint32_t oprsz, uint32_t maxsz)
+{
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_add_vec, INDEX_op_sari_vec,
+    };
+    static const GVecGen3 op = {
+        .fni8 = gen_helper_fmean16,
+        .fniv = gen_vec_fmean16,
+        .opt_opc = vecop_list,
+        .vece = MO_16,
+    };
+    tcg_gen_gvec_3(dofs, aofs, bofs, oprsz, maxsz, &op);
+}
+#else
+#define gen_op_fchksm16   ({ qemu_build_not_reached(); NULL; })
+#define gen_op_fmean16    ({ qemu_build_not_reached(); NULL; })
+#endif
+
 static void finishing_insn(DisasContext *dc)
 {
     /*
@@ -1138,6 +1360,97 @@
     tcg_gen_concat_i64_i128(dst, l, h);
 }
 
+static void gen_op_fmadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
+{
+    gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(0));
+}
+
+static void gen_op_fmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
+{
+    gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(0));
+}
+
+static void gen_op_fmsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
+{
+    int op = float_muladd_negate_c;
+    gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
+}
+
+static void gen_op_fmsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
+{
+    int op = float_muladd_negate_c;
+    gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
+}
+
+static void gen_op_fnmsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
+{
+    int op = float_muladd_negate_c | float_muladd_negate_result;
+    gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
+}
+
+static void gen_op_fnmsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
+{
+    int op = float_muladd_negate_c | float_muladd_negate_result;
+    gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
+}
+
+static void gen_op_fnmadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
+{
+    int op = float_muladd_negate_result;
+    gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
+}
+
+static void gen_op_fnmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
+{
+    int op = float_muladd_negate_result;
+    gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
+}
+
+/* Use muladd to compute (1 * src1) + src2 / 2 with one rounding. */
+static void gen_op_fhadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
+{
+    TCGv_i32 one = tcg_constant_i32(float32_one);
+    int op = float_muladd_halve_result;
+    gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
+}
+
+static void gen_op_fhaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
+{
+    TCGv_i64 one = tcg_constant_i64(float64_one);
+    int op = float_muladd_halve_result;
+    gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
+}
+
+/* Use muladd to compute (1 * src1) - src2 / 2 with one rounding. */
+static void gen_op_fhsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
+{
+    TCGv_i32 one = tcg_constant_i32(float32_one);
+    int op = float_muladd_negate_c | float_muladd_halve_result;
+    gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
+}
+
+static void gen_op_fhsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
+{
+    TCGv_i64 one = tcg_constant_i64(float64_one);
+    int op = float_muladd_negate_c | float_muladd_halve_result;
+    gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
+}
+
+/* Use muladd to compute -((1 * src1) + src2 / 2) with one rounding. */
+static void gen_op_fnhadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
+{
+    TCGv_i32 one = tcg_constant_i32(float32_one);
+    int op = float_muladd_negate_result | float_muladd_halve_result;
+    gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
+}
+
+static void gen_op_fnhaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
+{
+    TCGv_i64 one = tcg_constant_i64(float64_one);
+    int op = float_muladd_negate_result | float_muladd_halve_result;
+    gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
+}
+
 static void gen_op_fpexception_im(DisasContext *dc, int ftt)
 {
     /*
@@ -1294,6 +1607,7 @@
         case ASI_BLK_AIUP_L_4V:
         case ASI_BLK_AIUP:
         case ASI_BLK_AIUPL:
+        case ASI_MON_AIUP:
             mem_idx = MMU_USER_IDX;
             break;
         case ASI_AIUS:  /* As if user secondary */
@@ -1304,6 +1618,7 @@
         case ASI_BLK_AIUS_L_4V:
         case ASI_BLK_AIUS:
         case ASI_BLK_AIUSL:
+        case ASI_MON_AIUS:
             mem_idx = MMU_USER_SECONDARY_IDX;
             break;
         case ASI_S:  /* Secondary */
@@ -1317,6 +1632,7 @@
         case ASI_FL8_SL:
         case ASI_FL16_S:
         case ASI_FL16_SL:
+        case ASI_MON_S:
             if (mem_idx == MMU_USER_IDX) {
                 mem_idx = MMU_USER_SECONDARY_IDX;
             } else if (mem_idx == MMU_KERNEL_IDX) {
@@ -1334,6 +1650,7 @@
         case ASI_FL8_PL:
         case ASI_FL16_P:
         case ASI_FL16_PL:
+        case ASI_MON_P:
             break;
         }
         switch (asi) {
@@ -1351,6 +1668,10 @@
         case ASI_SL:
         case ASI_P:
         case ASI_PL:
+        case ASI_MON_P:
+        case ASI_MON_S:
+        case ASI_MON_AIUP:
+        case ASI_MON_AIUS:
             type = GET_ASI_DIRECT;
             break;
         case ASI_TWINX_REAL:
@@ -1627,7 +1948,7 @@
     MemOp memop = da->memop;
     MemOp size = memop & MO_SIZE;
     TCGv_i32 d32;
-    TCGv_i64 d64;
+    TCGv_i64 d64, l64;
     TCGv addr_tmp;
 
     /* TODO: Use 128-bit load/store below. */
@@ -1649,16 +1970,20 @@
             break;
 
         case MO_64:
-            tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2], addr, da->mem_idx, memop);
+            d64 = tcg_temp_new_i64();
+            tcg_gen_qemu_ld_i64(d64, addr, da->mem_idx, memop);
+            gen_store_fpr_D(dc, rd, d64);
             break;
 
         case MO_128:
             d64 = tcg_temp_new_i64();
+            l64 = tcg_temp_new_i64();
             tcg_gen_qemu_ld_i64(d64, addr, da->mem_idx, memop);
             addr_tmp = tcg_temp_new();
             tcg_gen_addi_tl(addr_tmp, addr, 8);
-            tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2 + 1], addr_tmp, da->mem_idx, memop);
-            tcg_gen_mov_i64(cpu_fpr[rd / 2], d64);
+            tcg_gen_qemu_ld_i64(l64, addr_tmp, da->mem_idx, memop);
+            gen_store_fpr_D(dc, rd, d64);
+            gen_store_fpr_D(dc, rd + 2, l64);
             break;
         default:
             g_assert_not_reached();
@@ -1670,9 +1995,11 @@
         if (orig_size == MO_64 && (rd & 7) == 0) {
             /* The first operation checks required alignment.  */
             addr_tmp = tcg_temp_new();
+            d64 = tcg_temp_new_i64();
             for (int i = 0; ; ++i) {
-                tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2 + i], addr, da->mem_idx,
+                tcg_gen_qemu_ld_i64(d64, addr, da->mem_idx,
                                     memop | (i == 0 ? MO_ALIGN_64 : 0));
+                gen_store_fpr_D(dc, rd + 2 * i, d64);
                 if (i == 7) {
                     break;
                 }
@@ -1687,8 +2014,9 @@
     case GET_ASI_SHORT:
         /* Valid for lddfa only.  */
         if (orig_size == MO_64) {
-            tcg_gen_qemu_ld_i64(cpu_fpr[rd / 2], addr, da->mem_idx,
-                                memop | MO_ALIGN);
+            d64 = tcg_temp_new_i64();
+            tcg_gen_qemu_ld_i64(d64, addr, da->mem_idx, memop | MO_ALIGN);
+            gen_store_fpr_D(dc, rd, d64);
         } else {
             gen_exception(dc, TT_ILL_INSN);
         }
@@ -1713,17 +2041,19 @@
                 gen_store_fpr_F(dc, rd, d32);
                 break;
             case MO_64:
-                gen_helper_ld_asi(cpu_fpr[rd / 2], tcg_env, addr,
-                                  r_asi, r_mop);
+                d64 = tcg_temp_new_i64();
+                gen_helper_ld_asi(d64, tcg_env, addr, r_asi, r_mop);
+                gen_store_fpr_D(dc, rd, d64);
                 break;
             case MO_128:
                 d64 = tcg_temp_new_i64();
+                l64 = tcg_temp_new_i64();
                 gen_helper_ld_asi(d64, tcg_env, addr, r_asi, r_mop);
                 addr_tmp = tcg_temp_new();
                 tcg_gen_addi_tl(addr_tmp, addr, 8);
-                gen_helper_ld_asi(cpu_fpr[rd / 2 + 1], tcg_env, addr_tmp,
-                                  r_asi, r_mop);
-                tcg_gen_mov_i64(cpu_fpr[rd / 2], d64);
+                gen_helper_ld_asi(l64, tcg_env, addr_tmp, r_asi, r_mop);
+                gen_store_fpr_D(dc, rd, d64);
+                gen_store_fpr_D(dc, rd + 2, l64);
                 break;
             default:
                 g_assert_not_reached();
@@ -1739,6 +2069,7 @@
     MemOp memop = da->memop;
     MemOp size = memop & MO_SIZE;
     TCGv_i32 d32;
+    TCGv_i64 d64;
     TCGv addr_tmp;
 
     /* TODO: Use 128-bit load/store below. */
@@ -1758,8 +2089,8 @@
             tcg_gen_qemu_st_i32(d32, addr, da->mem_idx, memop | MO_ALIGN);
             break;
         case MO_64:
-            tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da->mem_idx,
-                                memop | MO_ALIGN_4);
+            d64 = gen_load_fpr_D(dc, rd);
+            tcg_gen_qemu_st_i64(d64, addr, da->mem_idx, memop | MO_ALIGN_4);
             break;
         case MO_128:
             /* Only 4-byte alignment required.  However, it is legal for the
@@ -1767,11 +2098,12 @@
                required to fix it up.  Requiring 16-byte alignment here avoids
                having to probe the second page before performing the first
                write.  */
-            tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da->mem_idx,
-                                memop | MO_ALIGN_16);
+            d64 = gen_load_fpr_D(dc, rd);
+            tcg_gen_qemu_st_i64(d64, addr, da->mem_idx, memop | MO_ALIGN_16);
             addr_tmp = tcg_temp_new();
             tcg_gen_addi_tl(addr_tmp, addr, 8);
-            tcg_gen_qemu_st_i64(cpu_fpr[rd / 2 + 1], addr_tmp, da->mem_idx, memop);
+            d64 = gen_load_fpr_D(dc, rd + 2);
+            tcg_gen_qemu_st_i64(d64, addr_tmp, da->mem_idx, memop);
             break;
         default:
             g_assert_not_reached();
@@ -1784,7 +2116,8 @@
             /* The first operation checks required alignment.  */
             addr_tmp = tcg_temp_new();
             for (int i = 0; ; ++i) {
-                tcg_gen_qemu_st_i64(cpu_fpr[rd / 2 + i], addr, da->mem_idx,
+                d64 = gen_load_fpr_D(dc, rd + 2 * i);
+                tcg_gen_qemu_st_i64(d64, addr, da->mem_idx,
                                     memop | (i == 0 ? MO_ALIGN_64 : 0));
                 if (i == 7) {
                     break;
@@ -1800,8 +2133,8 @@
     case GET_ASI_SHORT:
         /* Valid for stdfa only.  */
         if (orig_size == MO_64) {
-            tcg_gen_qemu_st_i64(cpu_fpr[rd / 2], addr, da->mem_idx,
-                                memop | MO_ALIGN);
+            d64 = gen_load_fpr_D(dc, rd);
+            tcg_gen_qemu_st_i64(d64, addr, da->mem_idx, memop | MO_ALIGN);
         } else {
             gen_exception(dc, TT_ILL_INSN);
         }
@@ -2032,7 +2365,7 @@
 static void gen_fmovd(DisasContext *dc, DisasCompare *cmp, int rd, int rs)
 {
 #ifdef TARGET_SPARC64
-    TCGv_i64 dst = gen_dest_fpr_D(dc, rd);
+    TCGv_i64 dst = tcg_temp_new_i64();
     tcg_gen_movcond_i64(cmp->cond, dst, cmp->c1, tcg_constant_tl(cmp->c2),
                         gen_load_fpr_D(dc, rs),
                         gen_load_fpr_D(dc, rd));
@@ -2045,16 +2378,18 @@
 static void gen_fmovq(DisasContext *dc, DisasCompare *cmp, int rd, int rs)
 {
 #ifdef TARGET_SPARC64
-    int qd = QFPREG(rd);
-    int qs = QFPREG(rs);
     TCGv c2 = tcg_constant_tl(cmp->c2);
+    TCGv_i64 h = tcg_temp_new_i64();
+    TCGv_i64 l = tcg_temp_new_i64();
 
-    tcg_gen_movcond_i64(cmp->cond, cpu_fpr[qd / 2], cmp->c1, c2,
-                        cpu_fpr[qs / 2], cpu_fpr[qd / 2]);
-    tcg_gen_movcond_i64(cmp->cond, cpu_fpr[qd / 2 + 1], cmp->c1, c2,
-                        cpu_fpr[qs / 2 + 1], cpu_fpr[qd / 2 + 1]);
-
-    gen_update_fprs_dirty(dc, qd);
+    tcg_gen_movcond_i64(cmp->cond, h, cmp->c1, c2,
+                        gen_load_fpr_D(dc, rs),
+                        gen_load_fpr_D(dc, rd));
+    tcg_gen_movcond_i64(cmp->cond, l, cmp->c1, c2,
+                        gen_load_fpr_D(dc, rs + 2),
+                        gen_load_fpr_D(dc, rd + 2));
+    gen_store_fpr_D(dc, rd, h);
+    gen_store_fpr_D(dc, rd + 2, l);
 #else
     qemu_build_not_reached();
 #endif
@@ -2086,12 +2421,20 @@
 
 static int extract_dfpreg(DisasContext *dc, int x)
 {
-    return DFPREG(x);
+    int r = x & 0x1e;
+#ifdef TARGET_SPARC64
+    r |= (x & 1) << 5;
+#endif
+    return r;
 }
 
 static int extract_qfpreg(DisasContext *dc, int x)
 {
-    return QFPREG(x);
+    int r = x & 0x1c;
+#ifdef TARGET_SPARC64
+    r |= (x & 1) << 5;
+#endif
+    return r;
 }
 
 /* Include the auto-generated decoder.  */
@@ -2110,10 +2453,15 @@
 # define avail_MUL(C)     true
 # define avail_POWERDOWN(C) false
 # define avail_64(C)      true
+# define avail_FMAF(C)    ((C)->def->features & CPU_FEATURE_FMAF)
 # define avail_GL(C)      ((C)->def->features & CPU_FEATURE_GL)
 # define avail_HYPV(C)    ((C)->def->features & CPU_FEATURE_HYPV)
+# define avail_IMA(C)     ((C)->def->features & CPU_FEATURE_IMA)
 # define avail_VIS1(C)    ((C)->def->features & CPU_FEATURE_VIS1)
 # define avail_VIS2(C)    ((C)->def->features & CPU_FEATURE_VIS2)
+# define avail_VIS3(C)    ((C)->def->features & CPU_FEATURE_VIS3)
+# define avail_VIS3B(C)   avail_VIS3(C)
+# define avail_VIS4(C)    ((C)->def->features & CPU_FEATURE_VIS4)
 #else
 # define avail_32(C)      true
 # define avail_ASR17(C)   ((C)->def->features & CPU_FEATURE_ASR17)
@@ -2122,10 +2470,15 @@
 # define avail_MUL(C)     ((C)->def->features & CPU_FEATURE_MUL)
 # define avail_POWERDOWN(C) ((C)->def->features & CPU_FEATURE_POWERDOWN)
 # define avail_64(C)      false
+# define avail_FMAF(C)    false
 # define avail_GL(C)      false
 # define avail_HYPV(C)    false
+# define avail_IMA(C)     false
 # define avail_VIS1(C)    false
 # define avail_VIS2(C)    false
+# define avail_VIS3(C)    false
+# define avail_VIS3B(C)   false
+# define avail_VIS4(C)    false
 #endif
 
 /* Default case for non jump instructions. */
@@ -2999,6 +3352,17 @@
 
 TRANS(WRPOWERDOWN, POWERDOWN, do_wr_special, a, supervisor(dc), do_wrpowerdown)
 
+static void do_wrmwait(DisasContext *dc, TCGv src)
+{
+    /*
+     * TODO: This is a stub version of mwait, which merely recognizes
+     * interrupts immediately and does not wait.
+     */
+    dc->base.is_jmp = DISAS_EXIT;
+}
+
+TRANS(WRMWAIT, VIS4, do_wr_special, a, true, do_wrmwait)
+
 static void do_wrpsr(DisasContext *dc, TCGv src)
 {
     gen_helper_wrpsr(tcg_env, src);
@@ -3519,11 +3883,10 @@
 }
 
 static bool gen_edge(DisasContext *dc, arg_r_r_r *a,
-                     int width, bool cc, bool left)
+                     int width, bool cc, bool little_endian)
 {
-    TCGv dst, s1, s2, lo1, lo2;
-    uint64_t amask, tabl, tabr;
-    int shift, imask, omask;
+    TCGv dst, s1, s2, l, r, t, m;
+    uint64_t amask = address_mask_i(dc, -8);
 
     dst = gen_dest_gpr(dc, a->rd);
     s1 = gen_load_gpr(dc, a->rs1);
@@ -3533,75 +3896,52 @@
         gen_op_subcc(cpu_cc_N, s1, s2);
     }
 
-    /*
-     * Theory of operation: there are two tables, left and right (not to
-     * be confused with the left and right versions of the opcode).  These
-     * are indexed by the low 3 bits of the inputs.  To make things "easy",
-     * these tables are loaded into two constants, TABL and TABR below.
-     * The operation index = (input & imask) << shift calculates the index
-     * into the constant, while val = (table >> index) & omask calculates
-     * the value we're looking for.
-     */
+    l = tcg_temp_new();
+    r = tcg_temp_new();
+    t = tcg_temp_new();
+
     switch (width) {
     case 8:
-        imask = 0x7;
-        shift = 3;
-        omask = 0xff;
-        if (left) {
-            tabl = 0x80c0e0f0f8fcfeffULL;
-            tabr = 0xff7f3f1f0f070301ULL;
-        } else {
-            tabl = 0x0103070f1f3f7fffULL;
-            tabr = 0xfffefcf8f0e0c080ULL;
-        }
+        tcg_gen_andi_tl(l, s1, 7);
+        tcg_gen_andi_tl(r, s2, 7);
+        tcg_gen_xori_tl(r, r, 7);
+        m = tcg_constant_tl(0xff);
         break;
     case 16:
-        imask = 0x6;
-        shift = 1;
-        omask = 0xf;
-        if (left) {
-            tabl = 0x8cef;
-            tabr = 0xf731;
-        } else {
-            tabl = 0x137f;
-            tabr = 0xfec8;
-        }
+        tcg_gen_extract_tl(l, s1, 1, 2);
+        tcg_gen_extract_tl(r, s2, 1, 2);
+        tcg_gen_xori_tl(r, r, 3);
+        m = tcg_constant_tl(0xf);
         break;
     case 32:
-        imask = 0x4;
-        shift = 0;
-        omask = 0x3;
-        if (left) {
-            tabl = (2 << 2) | 3;
-            tabr = (3 << 2) | 1;
-        } else {
-            tabl = (1 << 2) | 3;
-            tabr = (3 << 2) | 2;
-        }
+        tcg_gen_extract_tl(l, s1, 2, 1);
+        tcg_gen_extract_tl(r, s2, 2, 1);
+        tcg_gen_xori_tl(r, r, 1);
+        m = tcg_constant_tl(0x3);
         break;
     default:
         abort();
     }
 
-    lo1 = tcg_temp_new();
-    lo2 = tcg_temp_new();
-    tcg_gen_andi_tl(lo1, s1, imask);
-    tcg_gen_andi_tl(lo2, s2, imask);
-    tcg_gen_shli_tl(lo1, lo1, shift);
-    tcg_gen_shli_tl(lo2, lo2, shift);
+    /* Compute Left Edge */
+    if (little_endian) {
+        tcg_gen_shl_tl(l, m, l);
+        tcg_gen_and_tl(l, l, m);
+    } else {
+        tcg_gen_shr_tl(l, m, l);
+    }
+    /* Compute Right Edge */
+    if (little_endian) {
+        tcg_gen_shr_tl(r, m, r);
+    } else {
+        tcg_gen_shl_tl(r, m, r);
+        tcg_gen_and_tl(r, r, m);
+    }
 
-    tcg_gen_shr_tl(lo1, tcg_constant_tl(tabl), lo1);
-    tcg_gen_shr_tl(lo2, tcg_constant_tl(tabr), lo2);
-    tcg_gen_andi_tl(lo1, lo1, omask);
-    tcg_gen_andi_tl(lo2, lo2, omask);
-
-    amask = address_mask_i(dc, -8);
-    tcg_gen_andi_tl(s1, s1, amask);
-    tcg_gen_andi_tl(s2, s2, amask);
-
-    /* Compute dst = (s1 == s2 ? lo1 : lo1 & lo2). */
-    tcg_gen_and_tl(lo2, lo2, lo1);
-    tcg_gen_movcond_tl(TCG_COND_EQ, dst, s1, s2, lo1, lo2);
+    /* Compute dst = (s1 == s2 under amask ? l : l & r) */
+    tcg_gen_xor_tl(t, s1, s2);
+    tcg_gen_and_tl(r, r, l);
+    tcg_gen_movcond_tl(TCG_COND_TSTEQ, dst, t, tcg_constant_tl(amask), r, l);
 
     gen_store_gpr(dc, a->rd, dst);
     return advance_pc(dc);
@@ -3621,6 +3961,19 @@
 TRANS(EDGE32N, VIS2, gen_edge, a, 32, 0, 0)
 TRANS(EDGE32LN, VIS2, gen_edge, a, 32, 0, 1)
 
+static bool do_rr(DisasContext *dc, arg_r_r *a,
+                  void (*func)(TCGv, TCGv))
+{
+    TCGv dst = gen_dest_gpr(dc, a->rd);
+    TCGv src = gen_load_gpr(dc, a->rs);
+
+    func(dst, src);
+    gen_store_gpr(dc, a->rd, dst);
+    return advance_pc(dc);
+}
+
+TRANS(LZCNT, VIS3, do_rr, a, gen_op_lzcnt)
+
 static bool do_rrr(DisasContext *dc, arg_r_r_r *a,
                    void (*func)(TCGv, TCGv, TCGv))
 {
@@ -3637,6 +3990,14 @@
 TRANS(ARRAY16, VIS1, do_rrr, a, gen_op_array16)
 TRANS(ARRAY32, VIS1, do_rrr, a, gen_op_array32)
 
+TRANS(ADDXC, VIS3, do_rrr, a, gen_op_addxc)
+TRANS(ADDXCcc, VIS3, do_rrr, a, gen_op_addxccc)
+
+TRANS(SUBXC, VIS4, do_rrr, a, gen_op_subxc)
+TRANS(SUBXCcc, VIS4, do_rrr, a, gen_op_subxccc)
+
+TRANS(UMULXHI, VIS3, do_rrr, a, gen_op_umulxhi)
+
 static void gen_op_alignaddr(TCGv dst, TCGv s1, TCGv s2)
 {
 #ifdef TARGET_SPARC64
@@ -3679,6 +4040,16 @@
 
 TRANS(BMASK, VIS2, do_rrr, a, gen_op_bmask)
 
+static bool do_cmask(DisasContext *dc, int rs2, void (*func)(TCGv, TCGv, TCGv))
+{
+    func(cpu_gsr, cpu_gsr, gen_load_gpr(dc, rs2));
+    return true;
+}
+
+TRANS(CMASK8, VIS3, do_cmask, a->rs2, gen_helper_cmask8)
+TRANS(CMASK16, VIS3, do_cmask, a->rs2, gen_helper_cmask16)
+TRANS(CMASK32, VIS3, do_cmask, a->rs2, gen_helper_cmask32)
+
 static bool do_shift_r(DisasContext *dc, arg_shiftr *a, bool l, bool u)
 {
     TCGv dst, src1, src2;
@@ -4193,7 +4564,7 @@
     return advance_pc(dc);
 }
 
-static bool trans_LDXFSR(DisasContext *dc, arg_r_r_ri *a)
+static bool do_ldxfsr(DisasContext *dc, arg_r_r_ri *a, bool entire)
 {
 #ifdef TARGET_SPARC64
     TCGv addr = gen_ldst_addr(dc, a->rs1, a->imm, a->rs2_or_imm);
@@ -4218,13 +4589,20 @@
     tcg_gen_extract_i32(cpu_fcc[2], hi, FSR_FCC2_SHIFT - 32, 2);
     tcg_gen_extract_i32(cpu_fcc[3], hi, FSR_FCC3_SHIFT - 32, 2);
 
-    gen_helper_set_fsr_nofcc_noftt(tcg_env, lo);
+    if (entire) {
+        gen_helper_set_fsr_nofcc(tcg_env, lo);
+    } else {
+        gen_helper_set_fsr_nofcc_noftt(tcg_env, lo);
+    }
     return advance_pc(dc);
 #else
     return false;
 #endif
 }
 
+TRANS(LDXFSR, 64, do_ldxfsr, a, false)
+TRANS(LDXEFSR, VIS3B, do_ldxfsr, a, true)
+
 static bool do_stfsr(DisasContext *dc, arg_r_r_ri *a, MemOp mop)
 {
     TCGv addr = gen_ldst_addr(dc, a->rs1, a->imm, a->rs2_or_imm);
@@ -4246,39 +4624,24 @@
 TRANS(STFSR, ALL, do_stfsr, a, MO_TEUL)
 TRANS(STXFSR, 64, do_stfsr, a, MO_TEUQ)
 
-static bool do_fc(DisasContext *dc, int rd, bool c)
+static bool do_fc(DisasContext *dc, int rd, int32_t c)
 {
-    uint64_t mask;
-
     if (gen_trap_ifnofpu(dc)) {
         return true;
     }
-
-    if (rd & 1) {
-        mask = MAKE_64BIT_MASK(0, 32);
-    } else {
-        mask = MAKE_64BIT_MASK(32, 32);
-    }
-    if (c) {
-        tcg_gen_ori_i64(cpu_fpr[rd / 2], cpu_fpr[rd / 2], mask);
-    } else {
-        tcg_gen_andi_i64(cpu_fpr[rd / 2], cpu_fpr[rd / 2], ~mask);
-    }
-    gen_update_fprs_dirty(dc, rd);
+    gen_store_fpr_F(dc, rd, tcg_constant_i32(c));
     return advance_pc(dc);
 }
 
 TRANS(FZEROs, VIS1, do_fc, a->rd, 0)
-TRANS(FONEs, VIS1, do_fc, a->rd, 1)
+TRANS(FONEs, VIS1, do_fc, a->rd, -1)
 
 static bool do_dc(DisasContext *dc, int rd, int64_t c)
 {
     if (gen_trap_ifnofpu(dc)) {
         return true;
     }
-
-    tcg_gen_movi_i64(cpu_fpr[rd / 2], c);
-    gen_update_fprs_dirty(dc, rd);
+    gen_store_fpr_D(dc, rd, tcg_constant_i64(c));
     return advance_pc(dc);
 }
 
@@ -4375,7 +4738,7 @@
         return true;
     }
 
-    dst = gen_dest_fpr_D(dc, a->rd);
+    dst = tcg_temp_new_i64();
     src = gen_load_fpr_D(dc, a->rs);
     func(dst, src);
     gen_store_fpr_D(dc, a->rd, dst);
@@ -4397,7 +4760,7 @@
         return true;
     }
 
-    dst = gen_dest_fpr_D(dc, a->rd);
+    dst = tcg_temp_new_i64();
     src = gen_load_fpr_D(dc, a->rs);
     func(dst, tcg_env, src);
     gen_store_fpr_D(dc, a->rd, dst);
@@ -4437,7 +4800,7 @@
         return true;
     }
 
-    dst = gen_dest_fpr_D(dc, a->rd);
+    dst = tcg_temp_new_i64();
     src = gen_load_fpr_F(dc, a->rs);
     func(dst, tcg_env, src);
     gen_store_fpr_D(dc, a->rd, dst);
@@ -4528,7 +4891,7 @@
     }
 
     src = gen_load_fpr_Q(dc, a->rs);
-    dst = gen_dest_fpr_D(dc, a->rd);
+    dst = tcg_temp_new_i64();
     func(dst, tcg_env, src);
     gen_store_fpr_D(dc, a->rd, dst);
     return advance_pc(dc);
@@ -4612,6 +4975,15 @@
 TRANS(FORNOTs, VIS1, do_fff, a, tcg_gen_orc_i32)
 TRANS(FORs, VIS1, do_fff, a, tcg_gen_or_i32)
 
+TRANS(FHADDs, VIS3, do_fff, a, gen_op_fhadds)
+TRANS(FHSUBs, VIS3, do_fff, a, gen_op_fhsubs)
+TRANS(FNHADDs, VIS3, do_fff, a, gen_op_fnhadds)
+
+TRANS(FPADDS16s, VIS3, do_fff, a, gen_op_fpadds16s)
+TRANS(FPSUBS16s, VIS3, do_fff, a, gen_op_fpsubs16s)
+TRANS(FPADDS32s, VIS3, do_fff, a, gen_op_fpadds32s)
+TRANS(FPSUBS32s, VIS3, do_fff, a, gen_op_fpsubs32s)
+
 static bool do_env_fff(DisasContext *dc, arg_r_r_r *a,
                        void (*func)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
 {
@@ -4632,6 +5004,8 @@
 TRANS(FSUBs, ALL, do_env_fff, a, gen_helper_fsubs)
 TRANS(FMULs, ALL, do_env_fff, a, gen_helper_fmuls)
 TRANS(FDIVs, ALL, do_env_fff, a, gen_helper_fdivs)
+TRANS(FNADDs, VIS3, do_env_fff, a, gen_helper_fnadds)
+TRANS(FNMULs, VIS3, do_env_fff, a, gen_helper_fnmuls)
 
 static bool do_dff(DisasContext *dc, arg_r_r_r *a,
                    void (*func)(TCGv_i64, TCGv_i32, TCGv_i32))
@@ -4643,7 +5017,7 @@
         return true;
     }
 
-    dst = gen_dest_fpr_D(dc, a->rd);
+    dst = tcg_temp_new_i64();
     src1 = gen_load_fpr_F(dc, a->rs1);
     src2 = gen_load_fpr_F(dc, a->rs2);
     func(dst, src1, src2);
@@ -4667,7 +5041,7 @@
         return true;
     }
 
-    dst = gen_dest_fpr_D(dc, a->rd);
+    dst = tcg_temp_new_i64();
     src1 = gen_load_fpr_F(dc, a->rs1);
     src2 = gen_load_fpr_D(dc, a->rs2);
     func(dst, src1, src2);
@@ -4677,6 +5051,63 @@
 
 TRANS(FMUL8x16, VIS1, do_dfd, a, gen_helper_fmul8x16)
 
+static bool do_gvec_ddd(DisasContext *dc, arg_r_r_r *a, MemOp vece,
+                        void (*func)(unsigned, uint32_t, uint32_t,
+                                     uint32_t, uint32_t, uint32_t))
+{
+    if (gen_trap_ifnofpu(dc)) {
+        return true;
+    }
+
+    func(vece, gen_offset_fpr_D(a->rd), gen_offset_fpr_D(a->rs1),
+         gen_offset_fpr_D(a->rs2), 8, 8);
+    return advance_pc(dc);
+}
+
+TRANS(FPADD8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_add)
+TRANS(FPADD16, VIS1, do_gvec_ddd, a, MO_16, tcg_gen_gvec_add)
+TRANS(FPADD32, VIS1, do_gvec_ddd, a, MO_32, tcg_gen_gvec_add)
+
+TRANS(FPSUB8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_sub)
+TRANS(FPSUB16, VIS1, do_gvec_ddd, a, MO_16, tcg_gen_gvec_sub)
+TRANS(FPSUB32, VIS1, do_gvec_ddd, a, MO_32, tcg_gen_gvec_sub)
+
+TRANS(FCHKSM16, VIS3, do_gvec_ddd, a, MO_16, gen_op_fchksm16)
+TRANS(FMEAN16, VIS3, do_gvec_ddd, a, MO_16, gen_op_fmean16)
+
+TRANS(FPADDS8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_ssadd)
+TRANS(FPADDS16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_ssadd)
+TRANS(FPADDS32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_ssadd)
+TRANS(FPADDUS8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_usadd)
+TRANS(FPADDUS16, VIS4, do_gvec_ddd, a, MO_16, tcg_gen_gvec_usadd)
+
+TRANS(FPSUBS8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_sssub)
+TRANS(FPSUBS16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_sssub)
+TRANS(FPSUBS32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_sssub)
+TRANS(FPSUBUS8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_ussub)
+TRANS(FPSUBUS16, VIS4, do_gvec_ddd, a, MO_16, tcg_gen_gvec_ussub)
+
+TRANS(FSLL16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_shlv)
+TRANS(FSLL32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_shlv)
+TRANS(FSRL16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_shrv)
+TRANS(FSRL32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_shrv)
+TRANS(FSRA16, VIS3, do_gvec_ddd, a, MO_16, tcg_gen_gvec_sarv)
+TRANS(FSRA32, VIS3, do_gvec_ddd, a, MO_32, tcg_gen_gvec_sarv)
+
+TRANS(FPMIN8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_smin)
+TRANS(FPMIN16, VIS4, do_gvec_ddd, a, MO_16, tcg_gen_gvec_smin)
+TRANS(FPMIN32, VIS4, do_gvec_ddd, a, MO_32, tcg_gen_gvec_smin)
+TRANS(FPMINU8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_umin)
+TRANS(FPMINU16, VIS4, do_gvec_ddd, a, MO_16, tcg_gen_gvec_umin)
+TRANS(FPMINU32, VIS4, do_gvec_ddd, a, MO_32, tcg_gen_gvec_umin)
+
+TRANS(FPMAX8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_smax)
+TRANS(FPMAX16, VIS4, do_gvec_ddd, a, MO_16, tcg_gen_gvec_smax)
+TRANS(FPMAX32, VIS4, do_gvec_ddd, a, MO_32, tcg_gen_gvec_smax)
+TRANS(FPMAXU8, VIS4, do_gvec_ddd, a, MO_8, tcg_gen_gvec_umax)
+TRANS(FPMAXU16, VIS4, do_gvec_ddd, a, MO_16, tcg_gen_gvec_umax)
+TRANS(FPMAXU32, VIS4, do_gvec_ddd, a, MO_32, tcg_gen_gvec_umax)
+
 static bool do_ddd(DisasContext *dc, arg_r_r_r *a,
                    void (*func)(TCGv_i64, TCGv_i64, TCGv_i64))
 {
@@ -4686,7 +5117,7 @@
         return true;
     }
 
-    dst = gen_dest_fpr_D(dc, a->rd);
+    dst = tcg_temp_new_i64();
     src1 = gen_load_fpr_D(dc, a->rs1);
     src2 = gen_load_fpr_D(dc, a->rs2);
     func(dst, src1, src2);
@@ -4697,10 +5128,6 @@
 TRANS(FMUL8SUx16, VIS1, do_ddd, a, gen_helper_fmul8sux16)
 TRANS(FMUL8ULx16, VIS1, do_ddd, a, gen_helper_fmul8ulx16)
 
-TRANS(FPADD16, VIS1, do_ddd, a, tcg_gen_vec_add16_i64)
-TRANS(FPADD32, VIS1, do_ddd, a, tcg_gen_vec_add32_i64)
-TRANS(FPSUB16, VIS1, do_ddd, a, tcg_gen_vec_sub16_i64)
-TRANS(FPSUB32, VIS1, do_ddd, a, tcg_gen_vec_sub32_i64)
 TRANS(FNORd, VIS1, do_ddd, a, tcg_gen_nor_i64)
 TRANS(FANDNOTd, VIS1, do_ddd, a, tcg_gen_andc_i64)
 TRANS(FXORd, VIS1, do_ddd, a, tcg_gen_xor_i64)
@@ -4711,9 +5138,18 @@
 TRANS(FORd, VIS1, do_ddd, a, tcg_gen_or_i64)
 
 TRANS(FPACK32, VIS1, do_ddd, a, gen_op_fpack32)
-TRANS(FALIGNDATAg, VIS1, do_ddd, a, gen_op_faligndata)
+TRANS(FALIGNDATAg, VIS1, do_ddd, a, gen_op_faligndata_g)
 TRANS(BSHUFFLE, VIS2, do_ddd, a, gen_op_bshuffle)
 
+TRANS(FHADDd, VIS3, do_ddd, a, gen_op_fhaddd)
+TRANS(FHSUBd, VIS3, do_ddd, a, gen_op_fhsubd)
+TRANS(FNHADDd, VIS3, do_ddd, a, gen_op_fnhaddd)
+
+TRANS(FPADD64, VIS3B, do_ddd, a, tcg_gen_add_i64)
+TRANS(FPSUB64, VIS3B, do_ddd, a, tcg_gen_sub_i64)
+TRANS(FSLAS16, VIS3, do_ddd, a, gen_helper_fslas16)
+TRANS(FSLAS32, VIS3, do_ddd, a, gen_helper_fslas32)
+
 static bool do_rdd(DisasContext *dc, arg_r_r_r *a,
                    void (*func)(TCGv, TCGv_i64, TCGv_i64))
 {
@@ -4736,11 +5172,26 @@
 TRANS(FPCMPNE16, VIS1, do_rdd, a, gen_helper_fcmpne16)
 TRANS(FPCMPGT16, VIS1, do_rdd, a, gen_helper_fcmpgt16)
 TRANS(FPCMPEQ16, VIS1, do_rdd, a, gen_helper_fcmpeq16)
+TRANS(FPCMPULE16, VIS4, do_rdd, a, gen_helper_fcmpule16)
+TRANS(FPCMPUGT16, VIS4, do_rdd, a, gen_helper_fcmpugt16)
 
 TRANS(FPCMPLE32, VIS1, do_rdd, a, gen_helper_fcmple32)
 TRANS(FPCMPNE32, VIS1, do_rdd, a, gen_helper_fcmpne32)
 TRANS(FPCMPGT32, VIS1, do_rdd, a, gen_helper_fcmpgt32)
 TRANS(FPCMPEQ32, VIS1, do_rdd, a, gen_helper_fcmpeq32)
+TRANS(FPCMPULE32, VIS4, do_rdd, a, gen_helper_fcmpule32)
+TRANS(FPCMPUGT32, VIS4, do_rdd, a, gen_helper_fcmpugt32)
+
+TRANS(FPCMPEQ8, VIS3B, do_rdd, a, gen_helper_fcmpeq8)
+TRANS(FPCMPNE8, VIS3B, do_rdd, a, gen_helper_fcmpne8)
+TRANS(FPCMPULE8, VIS3B, do_rdd, a, gen_helper_fcmpule8)
+TRANS(FPCMPUGT8, VIS3B, do_rdd, a, gen_helper_fcmpugt8)
+TRANS(FPCMPLE8, VIS4, do_rdd, a, gen_helper_fcmple8)
+TRANS(FPCMPGT8, VIS4, do_rdd, a, gen_helper_fcmpgt8)
+
+TRANS(PDISTN, VIS3, do_rdd, a, gen_op_pdistn)
+TRANS(XMULX, VIS3, do_rrr, a, gen_helper_xmulx)
+TRANS(XMULXHI, VIS3, do_rrr, a, gen_helper_xmulxhi)
 
 static bool do_env_ddd(DisasContext *dc, arg_r_r_r *a,
                        void (*func)(TCGv_i64, TCGv_env, TCGv_i64, TCGv_i64))
@@ -4751,7 +5202,7 @@
         return true;
     }
 
-    dst = gen_dest_fpr_D(dc, a->rd);
+    dst = tcg_temp_new_i64();
     src1 = gen_load_fpr_D(dc, a->rs1);
     src2 = gen_load_fpr_D(dc, a->rs2);
     func(dst, tcg_env, src1, src2);
@@ -4763,6 +5214,8 @@
 TRANS(FSUBd, ALL, do_env_ddd, a, gen_helper_fsubd)
 TRANS(FMULd, ALL, do_env_ddd, a, gen_helper_fmuld)
 TRANS(FDIVd, ALL, do_env_ddd, a, gen_helper_fdivd)
+TRANS(FNADDd, VIS3, do_env_ddd, a, gen_helper_fnaddd)
+TRANS(FNMULd, VIS3, do_env_ddd, a, gen_helper_fnmuld)
 
 static bool trans_FsMULd(DisasContext *dc, arg_r_r_r *a)
 {
@@ -4776,7 +5229,7 @@
         return raise_unimpfpop(dc);
     }
 
-    dst = gen_dest_fpr_D(dc, a->rd);
+    dst = tcg_temp_new_i64();
     src1 = gen_load_fpr_F(dc, a->rs1);
     src2 = gen_load_fpr_F(dc, a->rs2);
     gen_helper_fsmuld(dst, tcg_env, src1, src2);
@@ -4784,25 +5237,94 @@
     return advance_pc(dc);
 }
 
-static bool do_dddd(DisasContext *dc, arg_r_r_r *a,
-                    void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
+static bool trans_FNsMULd(DisasContext *dc, arg_r_r_r *a)
 {
-    TCGv_i64 dst, src0, src1, src2;
+    TCGv_i64 dst;
+    TCGv_i32 src1, src2;
+
+    if (!avail_VIS3(dc)) {
+        return false;
+    }
+    if (gen_trap_ifnofpu(dc)) {
+        return true;
+    }
+    dst = tcg_temp_new_i64();
+    src1 = gen_load_fpr_F(dc, a->rs1);
+    src2 = gen_load_fpr_F(dc, a->rs2);
+    gen_helper_fnsmuld(dst, tcg_env, src1, src2);
+    gen_store_fpr_D(dc, a->rd, dst);
+    return advance_pc(dc);
+}
+
+static bool do_ffff(DisasContext *dc, arg_r_r_r_r *a,
+                    void (*func)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_i32))
+{
+    TCGv_i32 dst, src1, src2, src3;
 
     if (gen_trap_ifnofpu(dc)) {
         return true;
     }
 
-    dst  = gen_dest_fpr_D(dc, a->rd);
-    src0 = gen_load_fpr_D(dc, a->rd);
+    src1 = gen_load_fpr_F(dc, a->rs1);
+    src2 = gen_load_fpr_F(dc, a->rs2);
+    src3 = gen_load_fpr_F(dc, a->rs3);
+    dst = tcg_temp_new_i32();
+    func(dst, src1, src2, src3);
+    gen_store_fpr_F(dc, a->rd, dst);
+    return advance_pc(dc);
+}
+
+TRANS(FMADDs, FMAF, do_ffff, a, gen_op_fmadds)
+TRANS(FMSUBs, FMAF, do_ffff, a, gen_op_fmsubs)
+TRANS(FNMSUBs, FMAF, do_ffff, a, gen_op_fnmsubs)
+TRANS(FNMADDs, FMAF, do_ffff, a, gen_op_fnmadds)
+
+static bool do_dddd(DisasContext *dc, arg_r_r_r_r *a,
+                    void (*func)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64))
+{
+    TCGv_i64 dst, src1, src2, src3;
+
+    if (gen_trap_ifnofpu(dc)) {
+        return true;
+    }
+
+    dst  = tcg_temp_new_i64();
     src1 = gen_load_fpr_D(dc, a->rs1);
     src2 = gen_load_fpr_D(dc, a->rs2);
-    func(dst, src0, src1, src2);
+    src3 = gen_load_fpr_D(dc, a->rs3);
+    func(dst, src1, src2, src3);
     gen_store_fpr_D(dc, a->rd, dst);
     return advance_pc(dc);
 }
 
 TRANS(PDIST, VIS1, do_dddd, a, gen_helper_pdist)
+TRANS(FMADDd, FMAF, do_dddd, a, gen_op_fmaddd)
+TRANS(FMSUBd, FMAF, do_dddd, a, gen_op_fmsubd)
+TRANS(FNMSUBd, FMAF, do_dddd, a, gen_op_fnmsubd)
+TRANS(FNMADDd, FMAF, do_dddd, a, gen_op_fnmaddd)
+TRANS(FPMADDX, IMA, do_dddd, a, gen_op_fpmaddx)
+TRANS(FPMADDXHI, IMA, do_dddd, a, gen_op_fpmaddxhi)
+
+static bool trans_FALIGNDATAi(DisasContext *dc, arg_r_r_r *a)
+{
+    TCGv_i64 dst, src1, src2;
+    TCGv src3;
+
+    if (!avail_VIS4(dc)) {
+        return false;
+    }
+    if (gen_trap_ifnofpu(dc)) {
+        return true;
+    }
+
+    dst  = tcg_temp_new_i64();
+    src1 = gen_load_fpr_D(dc, a->rd);
+    src2 = gen_load_fpr_D(dc, a->rs2);
+    src3 = gen_load_gpr(dc, a->rs1);
+    gen_op_faligndata_i(dst, src1, src2, src3);
+    gen_store_fpr_D(dc, a->rd, dst);
+    return advance_pc(dc);
+}
 
 static bool do_env_qqq(DisasContext *dc, arg_r_r_r *a,
                        void (*func)(TCGv_i128, TCGv_env, TCGv_i128, TCGv_i128))
@@ -4991,6 +5513,76 @@
 TRANS(FCMPq, ALL, do_fcmpq, a, false)
 TRANS(FCMPEq, ALL, do_fcmpq, a, true)
 
+static bool trans_FLCMPs(DisasContext *dc, arg_FLCMPs *a)
+{
+    TCGv_i32 src1, src2;
+
+    if (!avail_VIS3(dc)) {
+        return false;
+    }
+    if (gen_trap_ifnofpu(dc)) {
+        return true;
+    }
+
+    src1 = gen_load_fpr_F(dc, a->rs1);
+    src2 = gen_load_fpr_F(dc, a->rs2);
+    gen_helper_flcmps(cpu_fcc[a->cc], src1, src2);
+    return advance_pc(dc);
+}
+
+static bool trans_FLCMPd(DisasContext *dc, arg_FLCMPd *a)
+{
+    TCGv_i64 src1, src2;
+
+    if (!avail_VIS3(dc)) {
+        return false;
+    }
+    if (gen_trap_ifnofpu(dc)) {
+        return true;
+    }
+
+    src1 = gen_load_fpr_D(dc, a->rs1);
+    src2 = gen_load_fpr_D(dc, a->rs2);
+    gen_helper_flcmpd(cpu_fcc[a->cc], src1, src2);
+    return advance_pc(dc);
+}
+
+static bool do_movf2r(DisasContext *dc, arg_r_r *a,
+                      int (*offset)(unsigned int),
+                      void (*load)(TCGv, TCGv_ptr, tcg_target_long))
+{
+    TCGv dst;
+
+    if (gen_trap_ifnofpu(dc)) {
+        return true;
+    }
+    dst = gen_dest_gpr(dc, a->rd);
+    load(dst, tcg_env, offset(a->rs));
+    gen_store_gpr(dc, a->rd, dst);
+    return advance_pc(dc);
+}
+
+TRANS(MOVsTOsw, VIS3B, do_movf2r, a, gen_offset_fpr_F, tcg_gen_ld32s_tl)
+TRANS(MOVsTOuw, VIS3B, do_movf2r, a, gen_offset_fpr_F, tcg_gen_ld32u_tl)
+TRANS(MOVdTOx, VIS3B, do_movf2r, a, gen_offset_fpr_D, tcg_gen_ld_tl)
+
+static bool do_movr2f(DisasContext *dc, arg_r_r *a,
+                      int (*offset)(unsigned int),
+                      void (*store)(TCGv, TCGv_ptr, tcg_target_long))
+{
+    TCGv src;
+
+    if (gen_trap_ifnofpu(dc)) {
+        return true;
+    }
+    src = gen_load_gpr(dc, a->rs);
+    store(src, tcg_env, offset(a->rd));
+    return advance_pc(dc);
+}
+
+TRANS(MOVwTOs, VIS3B, do_movr2f, a, gen_offset_fpr_F, tcg_gen_st32_tl)
+TRANS(MOVxTOd, VIS3B, do_movr2f, a, gen_offset_fpr_D, tcg_gen_st_tl)
+
 static void sparc_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
 {
     DisasContext *dc = container_of(dcbase, DisasContext, base);
@@ -5172,12 +5764,6 @@
         "l0", "l1", "l2", "l3", "l4", "l5", "l6", "l7",
         "i0", "i1", "i2", "i3", "i4", "i5", "i6", "i7",
     };
-    static const char fregnames[32][4] = {
-        "f0", "f2", "f4", "f6", "f8", "f10", "f12", "f14",
-        "f16", "f18", "f20", "f22", "f24", "f26", "f28", "f30",
-        "f32", "f34", "f36", "f38", "f40", "f42", "f44", "f46",
-        "f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62",
-    };
 
     static const struct { TCGv_i32 *ptr; int off; const char *name; } r32[] = {
 #ifdef TARGET_SPARC64
@@ -5234,12 +5820,6 @@
                                          (i - 8) * sizeof(target_ulong),
                                          gregnames[i]);
     }
-
-    for (i = 0; i < TARGET_DPREGS; i++) {
-        cpu_fpr[i] = tcg_global_mem_new_i64(tcg_env,
-                                            offsetof(CPUSPARCState, fpr[i]),
-                                            fregnames[i]);
-    }
 }
 
 void sparc_restore_state_to_opc(CPUState *cs,
diff --git a/target/sparc/vis_helper.c b/target/sparc/vis_helper.c
index e15c6bb..371f544 100644
--- a/target/sparc/vis_helper.c
+++ b/target/sparc/vis_helper.c
@@ -20,26 +20,44 @@
 #include "qemu/osdep.h"
 #include "cpu.h"
 #include "exec/helper-proto.h"
+#include "crypto/clmul.h"
 
-/* This function uses non-native bit order */
-#define GET_FIELD(X, FROM, TO)                                  \
-    ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))
-
-/* This function uses the order in the manuals, i.e. bit 0 is 2^0 */
-#define GET_FIELD_SP(X, FROM, TO)               \
-    GET_FIELD(X, 63 - (TO), 63 - (FROM))
-
-target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
+target_ulong helper_array8(target_ulong rs1, target_ulong rs2)
 {
-    return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) |
-        (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) |
-        (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) |
-        (GET_FIELD_SP(pixel_addr, 56, 59) << 13) |
-        (GET_FIELD_SP(pixel_addr, 35, 38) << 9) |
-        (GET_FIELD_SP(pixel_addr, 13, 16) << 5) |
-        (((pixel_addr >> 55) & 1) << 4) |
-        (GET_FIELD_SP(pixel_addr, 33, 34) << 2) |
-        GET_FIELD_SP(pixel_addr, 11, 12);
+    /*
+     * From Oracle SPARC Architecture 2015:
+     * Architecturally, an illegal R[rs2] value (>5) causes the array
+     * instructions to produce undefined results. For historic reference,
+     * past implementations of these instructions have ignored R[rs2]{63:3}
+     * and have treated R[rs2] values of 6 and 7 as if they were 5.
+     */
+    target_ulong n = MIN(rs2 & 7, 5);
+
+    target_ulong x_int = (rs1 >> 11) & 0x7ff;
+    target_ulong y_int = (rs1 >> 33) & 0x7ff;
+    target_ulong z_int = rs1 >> 55;
+
+    target_ulong lower_x = x_int & 3;
+    target_ulong lower_y = y_int & 3;
+    target_ulong lower_z = z_int & 1;
+
+    target_ulong middle_x = (x_int >> 2) & 15;
+    target_ulong middle_y = (y_int >> 2) & 15;
+    target_ulong middle_z = (z_int >> 1) & 15;
+
+    target_ulong upper_x = (x_int >> 6) & ((1 << n) - 1);
+    target_ulong upper_y = (y_int >> 6) & ((1 << n) - 1);
+    target_ulong upper_z = z_int >> 5;
+
+    return (upper_z << (17 + 2 * n))
+         | (upper_y << (17 + n))
+         | (upper_x << 17)
+         | (middle_z << 13)
+         | (middle_y << 9)
+         | (middle_x << 5)
+         | (lower_z << 4)
+         | (lower_y << 2)
+         | lower_x;
 }
 
 #if HOST_BIG_ENDIAN
@@ -48,6 +66,7 @@
 #define VIS_W64(n) w[3 - (n)]
 #define VIS_SW64(n) sw[3 - (n)]
 #define VIS_L64(n) l[1 - (n)]
+#define VIS_SL64(n) sl[1 - (n)]
 #define VIS_B32(n) b[3 - (n)]
 #define VIS_W32(n) w[1 - (n)]
 #else
@@ -56,6 +75,7 @@
 #define VIS_W64(n) w[n]
 #define VIS_SW64(n) sw[n]
 #define VIS_L64(n) l[n]
+#define VIS_SL64(n) sl[n]
 #define VIS_B32(n) b[n]
 #define VIS_W32(n) w[n]
 #endif
@@ -66,6 +86,7 @@
     uint16_t w[4];
     int16_t sw[4];
     uint32_t l[2];
+    int32_t sl[2];
     uint64_t ll;
     float64 d;
 } VIS64;
@@ -157,10 +178,10 @@
     s.ll = src1;
     d.ll = src2;
 
-    d.VIS_W64(0) = do_ms16b(s.VIS_B64(0), d.VIS_SW64(0));
-    d.VIS_W64(1) = do_ms16b(s.VIS_B64(2), d.VIS_SW64(1));
-    d.VIS_W64(2) = do_ms16b(s.VIS_B64(4), d.VIS_SW64(2));
-    d.VIS_W64(3) = do_ms16b(s.VIS_B64(6), d.VIS_SW64(3));
+    d.VIS_W64(0) = (s.VIS_B64(0) * d.VIS_SW64(0) + 0x8000) >> 16;
+    d.VIS_W64(1) = (s.VIS_B64(2) * d.VIS_SW64(1) + 0x8000) >> 16;
+    d.VIS_W64(2) = (s.VIS_B64(4) * d.VIS_SW64(2) + 0x8000) >> 16;
+    d.VIS_W64(3) = (s.VIS_B64(6) * d.VIS_SW64(3) + 0x8000) >> 16;
 
     return d.ll;
 }
@@ -180,46 +201,171 @@
     return d.ll;
 }
 
-#define VIS_CMPHELPER(name, F)                                    \
-    uint64_t name##16(uint64_t src1, uint64_t src2)               \
-    {                                                             \
-        VIS64 s, d;                                               \
-                                                                  \
-        s.ll = src1;                                              \
-        d.ll = src2;                                              \
-                                                                  \
-        d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0;     \
-        d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0;    \
-        d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0;    \
-        d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0;    \
-        d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0;           \
-                                                                  \
-        return d.ll;                                              \
-    }                                                             \
-                                                                  \
-    uint64_t name##32(uint64_t src1, uint64_t src2)               \
-    {                                                             \
-        VIS64 s, d;                                               \
-                                                                  \
-        s.ll = src1;                                              \
-        d.ll = src2;                                              \
-                                                                  \
-        d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0;     \
-        d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0;    \
-        d.VIS_L64(1) = 0;                                         \
-                                                                  \
-        return d.ll;                                              \
+uint64_t helper_fcmpeq8(uint64_t src1, uint64_t src2)
+{
+    uint64_t a = src1 ^ src2;
+    uint64_t m = 0x7f7f7f7f7f7f7f7fULL;
+    uint64_t c = ~(((a & m) + m) | a | m);
+
+    /* a.......b.......c.......d.......e.......f.......g.......h....... */
+    c |= c << 7;
+    /* ab......bc......cd......de......ef......fg......gh......h....... */
+    c |= c << 14;
+    /* abcd....bcde....cdef....defg....efgh....fgh.....gh......h....... */
+    c |= c << 28;
+    /* abcdefghbcdefgh.cdefgh..defgh...efgh....fgh.....gh......h....... */
+    return c >> 56;
+}
+
+uint64_t helper_fcmpne8(uint64_t src1, uint64_t src2)
+{
+    return helper_fcmpeq8(src1, src2) ^ 0xff;
+}
+
+uint64_t helper_fcmple8(uint64_t src1, uint64_t src2)
+{
+    VIS64 s1, s2;
+    uint64_t r = 0;
+
+    s1.ll = src1;
+    s2.ll = src2;
+
+    for (int i = 0; i < 8; ++i) {
+        r |= (s1.VIS_SB64(i) <= s2.VIS_SB64(i)) << i;
     }
+    return r;
+}
 
-#define FCMPGT(a, b) ((a) > (b))
-#define FCMPEQ(a, b) ((a) == (b))
-#define FCMPLE(a, b) ((a) <= (b))
-#define FCMPNE(a, b) ((a) != (b))
+uint64_t helper_fcmpgt8(uint64_t src1, uint64_t src2)
+{
+    return helper_fcmple8(src1, src2) ^ 0xff;
+}
 
-VIS_CMPHELPER(helper_fcmpgt, FCMPGT)
-VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
-VIS_CMPHELPER(helper_fcmple, FCMPLE)
-VIS_CMPHELPER(helper_fcmpne, FCMPNE)
+uint64_t helper_fcmpule8(uint64_t src1, uint64_t src2)
+{
+    VIS64 s1, s2;
+    uint64_t r = 0;
+
+    s1.ll = src1;
+    s2.ll = src2;
+
+    for (int i = 0; i < 8; ++i) {
+        r |= (s1.VIS_B64(i) <= s2.VIS_B64(i)) << i;
+    }
+    return r;
+}
+
+uint64_t helper_fcmpugt8(uint64_t src1, uint64_t src2)
+{
+    return helper_fcmpule8(src1, src2) ^ 0xff;
+}
+
+uint64_t helper_fcmpeq16(uint64_t src1, uint64_t src2)
+{
+    uint64_t a = src1 ^ src2;
+    uint64_t m = 0x7fff7fff7fff7fffULL;
+    uint64_t c = ~(((a & m) + m) | a | m);
+
+    /* a...............b...............c...............d............... */
+    c |= c << 15;
+    /* ab..............bc..............cd..............d............... */
+    c |= c << 30;
+    /* abcd............bcd.............cd..............d............... */
+    return c >> 60;
+}
+
+uint64_t helper_fcmpne16(uint64_t src1, uint64_t src2)
+{
+    return helper_fcmpeq16(src1, src2) ^ 0xf;
+}
+
+uint64_t helper_fcmple16(uint64_t src1, uint64_t src2)
+{
+    VIS64 s1, s2;
+    uint64_t r = 0;
+
+    s1.ll = src1;
+    s2.ll = src2;
+
+    for (int i = 0; i < 4; ++i) {
+        r |= (s1.VIS_SW64(i) <= s2.VIS_SW64(i)) << i;
+    }
+    return r;
+}
+
+uint64_t helper_fcmpgt16(uint64_t src1, uint64_t src2)
+{
+    return helper_fcmple16(src1, src2) ^ 0xf;
+}
+
+uint64_t helper_fcmpule16(uint64_t src1, uint64_t src2)
+{
+    VIS64 s1, s2;
+    uint64_t r = 0;
+
+    s1.ll = src1;
+    s2.ll = src2;
+
+    for (int i = 0; i < 4; ++i) {
+        r |= (s1.VIS_W64(i) <= s2.VIS_W64(i)) << i;
+    }
+    return r;
+}
+
+uint64_t helper_fcmpugt16(uint64_t src1, uint64_t src2)
+{
+    return helper_fcmpule16(src1, src2) ^ 0xf;
+}
+
+uint64_t helper_fcmpeq32(uint64_t src1, uint64_t src2)
+{
+    uint64_t a = src1 ^ src2;
+    return ((uint32_t)a == 0) | (a >> 32 ? 0 : 2);
+}
+
+uint64_t helper_fcmpne32(uint64_t src1, uint64_t src2)
+{
+    uint64_t a = src1 ^ src2;
+    return ((uint32_t)a != 0) | (a >> 32 ? 2 : 0);
+}
+
+uint64_t helper_fcmple32(uint64_t src1, uint64_t src2)
+{
+    VIS64 s1, s2;
+    uint64_t r = 0;
+
+    s1.ll = src1;
+    s2.ll = src2;
+
+    for (int i = 0; i < 2; ++i) {
+        r |= (s1.VIS_SL64(i) <= s2.VIS_SL64(i)) << i;
+    }
+    return r;
+}
+
+uint64_t helper_fcmpgt32(uint64_t src1, uint64_t src2)
+{
+    return helper_fcmple32(src1, src2) ^ 3;
+}
+
+uint64_t helper_fcmpule32(uint64_t src1, uint64_t src2)
+{
+    VIS64 s1, s2;
+    uint64_t r = 0;
+
+    s1.ll = src1;
+    s2.ll = src2;
+
+    for (int i = 0; i < 2; ++i) {
+        r |= (s1.VIS_L64(i) <= s2.VIS_L64(i)) << i;
+    }
+    return r;
+}
+
+uint64_t helper_fcmpugt32(uint64_t src1, uint64_t src2)
+{
+    return helper_fcmpule32(src1, src2) ^ 3;
+}
 
 uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2)
 {
@@ -334,3 +480,131 @@
 
     return r.ll;
 }
+
+uint64_t helper_cmask8(uint64_t gsr, uint64_t src)
+{
+    uint32_t mask = 0;
+
+    mask |= (src & 0x01 ? 0x00000007 : 0x0000000f);
+    mask |= (src & 0x02 ? 0x00000060 : 0x000000e0);
+    mask |= (src & 0x04 ? 0x00000500 : 0x00000d00);
+    mask |= (src & 0x08 ? 0x00004000 : 0x0000c000);
+    mask |= (src & 0x10 ? 0x00030000 : 0x000b0000);
+    mask |= (src & 0x20 ? 0x00200000 : 0x00a00000);
+    mask |= (src & 0x40 ? 0x01000000 : 0x09000000);
+    mask |= (src & 0x80 ? 0x00000000 : 0x80000000);
+
+    return deposit64(gsr, 32, 32, mask);
+}
+
+uint64_t helper_cmask16(uint64_t gsr, uint64_t src)
+{
+    uint32_t mask = 0;
+
+    mask |= (src & 0x1 ? 0x00000067 : 0x000000ef);
+    mask |= (src & 0x2 ? 0x00004500 : 0x0000cd00);
+    mask |= (src & 0x4 ? 0x00230000 : 0x00ab0000);
+    mask |= (src & 0x8 ? 0x01000000 : 0x89000000);
+
+    return deposit64(gsr, 32, 32, mask);
+}
+
+uint64_t helper_cmask32(uint64_t gsr, uint64_t src)
+{
+    uint32_t mask = 0;
+
+    mask |= (src & 0x1 ? 0x00004567 : 0x0000cdef);
+    mask |= (src & 0x2 ? 0x01230000 : 0x89ab0000);
+
+    return deposit64(gsr, 32, 32, mask);
+}
+
+static inline uint16_t do_fchksm16(uint16_t src1, uint16_t src2)
+{
+    uint16_t a = src1 + src2;
+    uint16_t c = a < src1;
+    return a + c;
+}
+
+uint64_t helper_fchksm16(uint64_t src1, uint64_t src2)
+{
+    VIS64 r, s1, s2;
+
+    s1.ll = src1;
+    s2.ll = src2;
+    r.ll = 0;
+
+    r.VIS_W64(0) = do_fchksm16(s1.VIS_W64(0), s2.VIS_W64(0));
+    r.VIS_W64(1) = do_fchksm16(s1.VIS_W64(1), s2.VIS_W64(1));
+    r.VIS_W64(2) = do_fchksm16(s1.VIS_W64(2), s2.VIS_W64(2));
+    r.VIS_W64(3) = do_fchksm16(s1.VIS_W64(3), s2.VIS_W64(3));
+
+    return r.ll;
+}
+
+static inline int16_t do_fmean16(int16_t src1, int16_t src2)
+{
+    return (src1 + src2 + 1) / 2;
+}
+
+uint64_t helper_fmean16(uint64_t src1, uint64_t src2)
+{
+    VIS64 r, s1, s2;
+
+    s1.ll = src1;
+    s2.ll = src2;
+    r.ll = 0;
+
+    r.VIS_SW64(0) = do_fmean16(s1.VIS_SW64(0), s2.VIS_SW64(0));
+    r.VIS_SW64(1) = do_fmean16(s1.VIS_SW64(1), s2.VIS_SW64(1));
+    r.VIS_SW64(2) = do_fmean16(s1.VIS_SW64(2), s2.VIS_SW64(2));
+    r.VIS_SW64(3) = do_fmean16(s1.VIS_SW64(3), s2.VIS_SW64(3));
+
+    return r.ll;
+}
+
+uint64_t helper_fslas16(uint64_t src1, uint64_t src2)
+{
+    VIS64 r, s1, s2;
+
+    s1.ll = src1;
+    s2.ll = src2;
+    r.ll = 0;
+
+    for (int i = 0; i < 4; ++i) {
+        int t = s1.VIS_SW64(i) << (s2.VIS_W64(i) % 16);
+        t = MIN(t, INT16_MAX);
+        t = MAX(t, INT16_MIN);
+        r.VIS_SW64(i) = t;
+    }
+
+    return r.ll;
+}
+
+uint64_t helper_fslas32(uint64_t src1, uint64_t src2)
+{
+    VIS64 r, s1, s2;
+
+    s1.ll = src1;
+    s2.ll = src2;
+    r.ll = 0;
+
+    for (int i = 0; i < 2; ++i) {
+        int64_t t = (int64_t)(int32_t)s1.VIS_L64(i) << (s2.VIS_L64(i) % 32);
+        t = MIN(t, INT32_MAX);
+        t = MAX(t, INT32_MIN);
+        r.VIS_L64(i) = t;
+    }
+
+    return r.ll;
+}
+
+uint64_t helper_xmulx(uint64_t src1, uint64_t src2)
+{
+    return int128_getlo(clmul_64(src1, src2));
+}
+
+uint64_t helper_xmulxhi(uint64_t src1, uint64_t src2)
+{
+    return int128_gethi(clmul_64(src1, src2));
+}
diff --git a/tests/docker/dockerfiles/alpine.docker b/tests/docker/dockerfiles/alpine.docker
index 554464f..b079a83 100644
--- a/tests/docker/dockerfiles/alpine.docker
+++ b/tests/docker/dockerfiles/alpine.docker
@@ -1,10 +1,10 @@
 # THIS FILE WAS AUTO-GENERATED
 #
-#  $ lcitool dockerfile --layers all alpine-318 qemu
+#  $ lcitool dockerfile --layers all alpine-319 qemu
 #
 # https://gitlab.com/libvirt/libvirt-ci
 
-FROM docker.io/library/alpine:3.18
+FROM docker.io/library/alpine:3.19
 
 RUN apk update && \
     apk upgrade && \
diff --git a/tests/docker/dockerfiles/fedora-win64-cross.docker b/tests/docker/dockerfiles/fedora-win64-cross.docker
index 0f78711..007e157 100644
--- a/tests/docker/dockerfiles/fedora-win64-cross.docker
+++ b/tests/docker/dockerfiles/fedora-win64-cross.docker
@@ -1,10 +1,10 @@
 # THIS FILE WAS AUTO-GENERATED
 #
-#  $ lcitool dockerfile --layers all --cross-arch mingw64 fedora-38 qemu,qemu-win-installer
+#  $ lcitool dockerfile --layers all --cross-arch mingw64 fedora-40 qemu,qemu-win-installer
 #
 # https://gitlab.com/libvirt/libvirt-ci
 
-FROM registry.fedoraproject.org/fedora:38
+FROM registry.fedoraproject.org/fedora:40
 
 RUN dnf install -y nosync && \
     printf '#!/bin/sh\n\
@@ -51,6 +51,7 @@
                python3-pip \
                python3-sphinx \
                python3-sphinx_rtd_theme \
+               python3-zombie-imp \
                sed \
                socat \
                sparse \
@@ -74,6 +75,7 @@
 ENV PYTHON "/usr/bin/python3"
 
 RUN nosync dnf install -y \
+               mingw-w64-tools \
                mingw32-nsis \
                mingw64-SDL2 \
                mingw64-SDL2_image \
diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker
index 098c894..44f239c 100644
--- a/tests/docker/dockerfiles/fedora.docker
+++ b/tests/docker/dockerfiles/fedora.docker
@@ -1,10 +1,10 @@
 # THIS FILE WAS AUTO-GENERATED
 #
-#  $ lcitool dockerfile --layers all fedora-38 qemu
+#  $ lcitool dockerfile --layers all fedora-40 qemu
 #
 # https://gitlab.com/libvirt/libvirt-ci
 
-FROM registry.fedoraproject.org/fedora:38
+FROM registry.fedoraproject.org/fedora:40
 
 RUN dnf install -y nosync && \
     printf '#!/bin/sh\n\
@@ -110,6 +110,7 @@
                python3-pip \
                python3-sphinx \
                python3-sphinx_rtd_theme \
+               python3-zombie-imp \
                rdma-core-devel \
                sed \
                snappy-devel \
diff --git a/tests/lcitool/libvirt-ci b/tests/lcitool/libvirt-ci
index cec6703..0e9490c 160000
--- a/tests/lcitool/libvirt-ci
+++ b/tests/lcitool/libvirt-ci
@@ -1 +1 @@
-Subproject commit cec67039719becbfbab866f9c23574f389cf9559
+Subproject commit 0e9490cebc726ef772b6c9e27dac32e7ae99f9b2
diff --git a/tests/lcitool/projects/qemu-win-installer.yml b/tests/lcitool/projects/qemu-win-installer.yml
index 86aa222..f3663ba 100644
--- a/tests/lcitool/projects/qemu-win-installer.yml
+++ b/tests/lcitool/projects/qemu-win-installer.yml
@@ -2,3 +2,4 @@
 ---
 packages:
  - g++
+ - mingw-w64-tools
diff --git a/tests/lcitool/projects/qemu.yml b/tests/lcitool/projects/qemu.yml
index 7511ec7..070d7f4 100644
--- a/tests/lcitool/projects/qemu.yml
+++ b/tests/lcitool/projects/qemu.yml
@@ -89,6 +89,7 @@
  - pkg-config
  - pulseaudio
  - python3
+ - python3-imp
  - python3-numpy
  - python3-opencv
  - python3-pillow
diff --git a/tests/lcitool/refresh b/tests/lcitool/refresh
index 789acef..b25e3ac 100755
--- a/tests/lcitool/refresh
+++ b/tests/lcitool/refresh
@@ -80,7 +80,7 @@
 
 def generate_cirrus(target, trailer=None):
     filename = Path(src_dir, ".gitlab-ci.d", "cirrus", target + ".vars")
-    cmd = lcitool_cmd + ["variables", target, "qemu"]
+    cmd = lcitool_cmd + ["variables", "--format", "shell", target, "qemu"]
     generate(filename, cmd, trailer)
 
 
@@ -90,6 +90,13 @@
     generate(filename, cmd, None)
 
 
+def generate_yaml(os, target, arch, trailer=None):
+    filename = Path(src_dir, "scripts", "ci", "setup", os, f"{target}-{arch}.yaml")
+    cmd = lcitool_cmd + ["variables", "--format", "yaml", "-a",
+                         arch, target, "qemu"]
+    generate(filename, cmd, trailer)
+
+
 # Netmap still needs to be manually built as it is yet to be packaged
 # into a distro. We also add cscope and gtags which are used in the CI
 # test
@@ -124,11 +131,11 @@
     #
     # Standard native builds
     #
-    generate_dockerfile("alpine", "alpine-318")
+    generate_dockerfile("alpine", "alpine-319")
     generate_dockerfile("centos9", "centos-stream-9")
     generate_dockerfile("debian", "debian-12",
                         trailer="".join(debian12_extras))
-    generate_dockerfile("fedora", "fedora-38")
+    generate_dockerfile("fedora", "fedora-40")
     generate_dockerfile("opensuse-leap", "opensuse-leap-15")
     generate_dockerfile("ubuntu2204", "ubuntu-2204")
 
@@ -191,7 +198,7 @@
                         trailer=cross_build("s390x-linux-gnu-",
                                             "s390x-softmmu,s390x-linux-user"))
 
-    generate_dockerfile("fedora-win64-cross", "fedora-38",
+    generate_dockerfile("fedora-win64-cross", "fedora-40",
                         project='qemu,qemu-win-installer',
                         cross="mingw64",
                         trailer=cross_build("x86_64-w64-mingw32-",
@@ -209,6 +216,13 @@
     #
     generate_pkglist("freebsd", "freebsd-13")
 
+    #
+    # Ansible package lists
+    #
+    generate_yaml("ubuntu", "ubuntu-2204", "aarch64")
+    generate_yaml("ubuntu", "ubuntu-2204", "s390x")
+
+
     sys.exit(0)
 except Exception as ex:
     print(str(ex), file=sys.stderr)
diff --git a/tests/lcitool/targets/centos-stream-8.yml b/tests/lcitool/targets/centos-stream-8.yml
deleted file mode 100644
index 6b11160..0000000
--- a/tests/lcitool/targets/centos-stream-8.yml
+++ /dev/null
@@ -1,3 +0,0 @@
-paths:
-  pip3: /usr/bin/pip3.8
-  python: /usr/bin/python3.8
diff --git a/tests/qtest/libqos/loongarch-virt-machine.c b/tests/qtest/libqos/loongarch-virt-machine.c
new file mode 100644
index 0000000..c12089c
--- /dev/null
+++ b/tests/qtest/libqos/loongarch-virt-machine.c
@@ -0,0 +1,114 @@
+/*
+ * libqos driver framework
+ *
+ * Copyright (c) 2018 Emanuele Giuseppe Esposito <e.emanuelegiuseppe@gmail.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1 as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
+ */
+
+#include "qemu/osdep.h"
+#include "../libqtest.h"
+#include "qemu/module.h"
+#include "libqos-malloc.h"
+#include "qgraph.h"
+#include "virtio-mmio.h"
+#include "generic-pcihost.h"
+#include "hw/pci/pci_regs.h"
+
+#define LOONGARCH_PAGE_SIZE               0x1000
+#define LOONGARCH_VIRT_RAM_ADDR           0x100000
+#define LOONGARCH_VIRT_RAM_SIZE           0xFF00000
+
+#define LOONGARCH_VIRT_PIO_BASE           0x18000000
+#define LOONGARCH_VIRT_PCIE_PIO_OFFSET    0x4000
+#define LOONGARCH_VIRT_PCIE_PIO_LIMIT     0x10000
+#define LOONGARCH_VIRT_PCIE_ECAM_BASE     0x20000000
+#define LOONGARCH_VIRT_PCIE_MMIO32_BASE   0x40000000
+#define LOONGARCH_VIRT_PCIE_MMIO32_LIMIT  0x80000000
+
+typedef struct QVirtMachine QVirtMachine;
+
+struct QVirtMachine {
+    QOSGraphObject obj;
+    QGuestAllocator alloc;
+    QVirtioMMIODevice virtio_mmio;
+    QGenericPCIHost bridge;
+};
+
+static void virt_destructor(QOSGraphObject *obj)
+{
+    QVirtMachine *machine = (QVirtMachine *) obj;
+    alloc_destroy(&machine->alloc);
+}
+
+static void *virt_get_driver(void *object, const char *interface)
+{
+    QVirtMachine *machine = object;
+    if (!g_strcmp0(interface, "memory")) {
+        return &machine->alloc;
+    }
+
+    fprintf(stderr, "%s not present in loongarch/virtio\n", interface);
+    g_assert_not_reached();
+}
+
+static QOSGraphObject *virt_get_device(void *obj, const char *device)
+{
+    QVirtMachine *machine = obj;
+    if (!g_strcmp0(device, "generic-pcihost")) {
+        return &machine->bridge.obj;
+    } else if (!g_strcmp0(device, "virtio-mmio")) {
+        return &machine->virtio_mmio.obj;
+    }
+
+    fprintf(stderr, "%s not present in loongarch/virt\n", device);
+    g_assert_not_reached();
+}
+
+static void loongarch_config_qpci_bus(QGenericPCIBus *qpci)
+{
+    qpci->gpex_pio_base = LOONGARCH_VIRT_PIO_BASE;
+    qpci->bus.pio_alloc_ptr = LOONGARCH_VIRT_PCIE_PIO_OFFSET;
+    qpci->bus.pio_limit = LOONGARCH_VIRT_PCIE_PIO_LIMIT;
+    qpci->bus.mmio_alloc_ptr = LOONGARCH_VIRT_PCIE_MMIO32_BASE;
+    qpci->bus.mmio_limit = LOONGARCH_VIRT_PCIE_MMIO32_LIMIT;
+    qpci->ecam_alloc_ptr = LOONGARCH_VIRT_PCIE_ECAM_BASE;
+}
+
+static void *qos_create_machine_loongarch_virt(QTestState *qts)
+{
+    QVirtMachine *machine = g_new0(QVirtMachine, 1);
+
+    alloc_init(&machine->alloc, 0,
+               LOONGARCH_VIRT_RAM_ADDR,
+               LOONGARCH_VIRT_RAM_ADDR + LOONGARCH_VIRT_RAM_SIZE,
+               LOONGARCH_PAGE_SIZE);
+
+    qos_create_generic_pcihost(&machine->bridge, qts, &machine->alloc);
+    loongarch_config_qpci_bus(&machine->bridge.pci);
+
+    machine->obj.get_device = virt_get_device;
+    machine->obj.get_driver = virt_get_driver;
+    machine->obj.destructor = virt_destructor;
+    return machine;
+}
+
+static void virt_machine_register_nodes(void)
+{
+    qos_node_create_machine_args("loongarch64/virt",
+                                 qos_create_machine_loongarch_virt,
+                                 " -cpu la464");
+    qos_node_contains("loongarch64/virt", "generic-pcihost", NULL);
+}
+
+libqos_init(virt_machine_register_nodes);
diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build
index 3aed6ef..558eb4c 100644
--- a/tests/qtest/libqos/meson.build
+++ b/tests/qtest/libqos/meson.build
@@ -61,6 +61,7 @@
         'ppc64_pseries-machine.c',
         'x86_64_pc-machine.c',
         'riscv-virt-machine.c',
+        'loongarch-virt-machine.c',
 )
 
 if have_virtfs
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index b98fae6..1279294 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -140,7 +140,7 @@
   (config_all_devices.has_key('CONFIG_VGA') ? ['display-vga-test'] : [])
 
 qtests_loongarch64 = qtests_filter + \
-  ['boot-serial-test']
+  ['boot-serial-test', 'numa-test']
 
 qtests_m68k = ['boot-serial-test'] + \
   qtests_filter
diff --git a/tests/qtest/numa-test.c b/tests/qtest/numa-test.c
index 7aa262d..5518f65 100644
--- a/tests/qtest/numa-test.c
+++ b/tests/qtest/numa-test.c
@@ -265,6 +265,54 @@
     qtest_quit(qts);
 }
 
+static void loongarch64_numa_cpu(const void *data)
+{
+    QDict *resp;
+    QList *cpus;
+    QObject *e;
+    QTestState *qts;
+    g_autofree char *cli = NULL;
+
+    cli = make_cli(data, "-machine "
+        "smp.cpus=2,smp.sockets=2,smp.cores=1,smp.threads=1 "
+        "-numa node,nodeid=0,memdev=ram -numa node,nodeid=1 "
+        "-numa cpu,node-id=0,socket-id=1,core-id=0,thread-id=0 "
+        "-numa cpu,node-id=1,socket-id=0,core-id=0,thread-id=0");
+    qts = qtest_init(cli);
+    cpus = get_cpus(qts, &resp);
+    g_assert(cpus);
+
+    while ((e = qlist_pop(cpus))) {
+        QDict *cpu, *props;
+        int64_t socket, core, thread, node;
+
+        cpu = qobject_to(QDict, e);
+        g_assert(qdict_haskey(cpu, "props"));
+        props = qdict_get_qdict(cpu, "props");
+
+        g_assert(qdict_haskey(props, "node-id"));
+        node = qdict_get_int(props, "node-id");
+        g_assert(qdict_haskey(props, "socket-id"));
+        socket = qdict_get_int(props, "socket-id");
+        g_assert(qdict_haskey(props, "core-id"));
+        core = qdict_get_int(props, "core-id");
+        g_assert(qdict_haskey(props, "thread-id"));
+        thread = qdict_get_int(props, "thread-id");
+
+        if (socket == 0 && core == 0 && thread == 0) {
+            g_assert_cmpint(node, ==, 1);
+        } else if (socket == 1 && core == 0 && thread == 0) {
+            g_assert_cmpint(node, ==, 0);
+        } else {
+            g_assert(false);
+        }
+        qobject_unref(e);
+    }
+
+    qobject_unref(resp);
+    qtest_quit(qts);
+}
+
 static void pc_dynamic_cpu_cfg(const void *data)
 {
     QObject *e;
@@ -593,6 +641,11 @@
                             aarch64_numa_cpu);
     }
 
+    if (!strcmp(arch, "loongarch64")) {
+        qtest_add_data_func("/numa/loongarch64/cpu/explicit", args,
+                            loongarch64_numa_cpu);
+    }
+
 out:
     return g_test_run();
 }
diff --git a/tests/vm/Makefile.include b/tests/vm/Makefile.include
index ac56824..13ed80f 100644
--- a/tests/vm/Makefile.include
+++ b/tests/vm/Makefile.include
@@ -45,7 +45,6 @@
 	@echo "  vm-build-netbsd                 - Build QEMU in NetBSD VM"
 	@echo "  vm-build-openbsd                - Build QEMU in OpenBSD VM"
 ifneq ($(GENISOIMAGE),)
-	@echo "  vm-build-centos                 - Build QEMU in CentOS VM, with Docker"
 ifneq ($(EFI_AARCH64),)
 	@echo "  vm-build-ubuntu.aarch64         - Build QEMU in ubuntu aarch64 VM"
 	@echo "  vm-build-centos.aarch64         - Build QEMU in CentOS aarch64 VM"
diff --git a/tests/vm/centos b/tests/vm/centos
deleted file mode 100755
index d25c8f8..0000000
--- a/tests/vm/centos
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/usr/bin/env python3
-#
-# CentOS 8 Stream image
-#
-# Copyright 2018, 2022 Red Hat Inc.
-#
-# Authors:
-#  Fam Zheng <famz@redhat.com>
-#
-# This code is licensed under the GPL version 2 or later.  See
-# the COPYING file in the top-level directory.
-#
-
-import os
-import sys
-import subprocess
-import basevm
-import time
-
-class CentosVM(basevm.BaseVM):
-    name = "centos"
-    arch = "x86_64"
-    BUILD_SCRIPT = """
-        set -e;
-        cd $(mktemp -d);
-        export SRC_ARCHIVE=/dev/vdb;
-        sudo chmod a+r $SRC_ARCHIVE;
-        tar -xf $SRC_ARCHIVE;
-        make docker-test-block@centos9 {verbose} J={jobs} NETWORK=1;
-        make docker-test-quick@centos9 {verbose} J={jobs} NETWORK=1;
-    """
-
-    def build_image(self, img):
-        cimg = self._download_with_cache("https://cloud.centos.org/centos/8-stream/x86_64/images/CentOS-Stream-GenericCloud-8-20220125.1.x86_64.qcow2")
-        img_tmp = img + ".tmp"
-        subprocess.check_call(['cp', '-f', cimg, img_tmp])
-        self.exec_qemu_img("resize", img_tmp, "50G")
-        self.boot(img_tmp, extra_args = ["-cdrom", self.gen_cloud_init_iso()])
-        self.wait_ssh()
-        self.ssh_root_check("touch /etc/cloud/cloud-init.disabled")
-        self.ssh_root_check("dnf update -y")
-        self.ssh_root_check("dnf install -y dnf-plugins-core")
-        self.ssh_root_check("dnf config-manager --set-enabled powertools")
-        self.ssh_root_check("dnf install -y podman make ninja-build git python3")
-        self.ssh_root("poweroff")
-        self.wait()
-        os.rename(img_tmp, img)
-        return 0
-
-if __name__ == "__main__":
-    sys.exit(basevm.main(CentosVM))
diff --git a/tests/vm/centos.aarch64 b/tests/vm/centos.aarch64
index 3f58de1..fcf9e08 100755
--- a/tests/vm/centos.aarch64
+++ b/tests/vm/centos.aarch64
@@ -25,9 +25,9 @@
     'cpu'          : "max",
     'machine'      : "virt,gic-version=max",
     'install_cmds' : (
-        "dnf config-manager --set-enabled powertools, "
+        "dnf config-manager --enable crb, "
         "dnf config-manager --add-repo=https://download.docker.com/linux/centos/docker-ce.repo, "
-        "dnf install -y make ninja-build git python38 gcc gcc-c++ flex bison "\
+        "dnf install -y make ninja-build git python3 gcc gcc-c++ flex bison "\
             "glib2-devel pixman-devel zlib-devel docker-ce.aarch64, "
         "systemctl enable docker, "
     ),
@@ -38,10 +38,10 @@
 
 
 class CentosAarch64VM(basevm.BaseVM):
-    name = "centos8.aarch64"
+    name = "centos9.aarch64"
     arch = "aarch64"
-    image_name = "CentOS-Stream-GenericCloud-8-20220125.1.aarch64.qcow2"
-    image_link = "https://cloud.centos.org/centos/8-stream/aarch64/images/"
+    image_name = "CentOS-Stream-GenericCloud-9-20230501.0.aarch64.qcow2"
+    image_link = "https://cloud.centos.org/centos/9-stream/aarch64/images/"
     image_link += image_name
     BUILD_SCRIPT = """
         set -e;
diff --git a/util/hexdump.c b/util/hexdump.c
index 0f943e3..ae0d499 100644
--- a/util/hexdump.c
+++ b/util/hexdump.c
@@ -1,5 +1,5 @@
 /*
- * Helper to hexdump a buffer
+* Helper to hexdump a buffer
  *
  * Copyright (c) 2013 Red Hat, Inc.
  * Copyright (c) 2013 Gerd Hoffmann <kraxel@redhat.com>
@@ -16,22 +16,47 @@
 #include "qemu/osdep.h"
 #include "qemu/cutils.h"
 
-void qemu_hexdump_line(char *line, const void *bufptr, size_t len)
+static inline char hexdump_nibble(unsigned x)
 {
-    const char *buf = bufptr;
-    int i;
+    return (x < 10 ? '0' : 'a' - 10) + x;
+}
 
-    if (len > QEMU_HEXDUMP_LINE_BYTES) {
-        len = QEMU_HEXDUMP_LINE_BYTES;
-    }
+GString *qemu_hexdump_line(GString *str, const void *vbuf, size_t len,
+                           size_t unit_len, size_t block_len)
+{
+    const uint8_t *buf = vbuf;
+    size_t u, b;
 
-    for (i = 0; i < len; i++) {
-        if (i != 0 && (i % 4) == 0) {
-            *line++ = ' ';
+    if (str == NULL) {
+        /* Estimate the length of the output to avoid reallocs. */
+        size_t est = len * 2;
+        if (unit_len) {
+            est += len / unit_len;
         }
-        line += sprintf(line, " %02x", (unsigned char)buf[i]);
+        if (block_len) {
+            est += len / block_len;
+        }
+        str = g_string_sized_new(est + 1);
     }
-    *line = '\0';
+
+    for (u = 0, b = 0; len; u++, b++, len--, buf++) {
+        uint8_t c;
+
+        if (unit_len && u == unit_len) {
+            g_string_append_c(str, ' ');
+            u = 0;
+        }
+        if (block_len && b == block_len) {
+            g_string_append_c(str, ' ');
+            b = 0;
+        }
+
+        c = *buf;
+        g_string_append_c(str, hexdump_nibble(c / 16));
+        g_string_append_c(str, hexdump_nibble(c % 16));
+    }
+
+    return str;
 }
 
 static void asciidump_line(char *line, const void *bufptr, size_t len)
@@ -49,24 +74,26 @@
     *line = '\0';
 }
 
+#define QEMU_HEXDUMP_LINE_BYTES 16
 #define QEMU_HEXDUMP_LINE_WIDTH \
     (QEMU_HEXDUMP_LINE_BYTES * 2 + QEMU_HEXDUMP_LINE_BYTES / 4)
 
 void qemu_hexdump(FILE *fp, const char *prefix,
                   const void *bufptr, size_t size)
 {
-    char line[QEMU_HEXDUMP_LINE_LEN];
+    g_autoptr(GString) str = g_string_sized_new(QEMU_HEXDUMP_LINE_WIDTH + 1);
     char ascii[QEMU_HEXDUMP_LINE_BYTES + 1];
     size_t b, len;
 
     for (b = 0; b < size; b += len) {
         len = MIN(size - b, QEMU_HEXDUMP_LINE_BYTES);
 
-        qemu_hexdump_line(line, bufptr + b, len);
+        g_string_truncate(str, 0);
+        qemu_hexdump_line(str, bufptr + b, len, 1, 4);
         asciidump_line(ascii, bufptr + b, len);
 
         fprintf(fp, "%s: %04zx: %-*s %s\n",
-                prefix, b, QEMU_HEXDUMP_LINE_WIDTH, line, ascii);
+                prefix, b, QEMU_HEXDUMP_LINE_WIDTH, str->str, ascii);
     }
 
 }